1 package net.sf.flock.parser;
2
3 import java.util.Date;
4 import java.util.Iterator;
5 import java.util.List;
6
7 import net.sf.flock.FeedFactoryI;
8 import net.sf.flock.FeedI;
9 import net.sf.flock.FlockResourceException;
10 import net.sf.flock.SubscriptionInfoI;
11
12 import org.apache.log4j.LogManager;
13 import org.apache.log4j.Logger;
14 import org.jdom.Attribute;
15 import org.jdom.Document;
16 import org.jdom.Element;
17 import org.jdom.Namespace;
18 import org.jdom.output.XMLOutputter;
19
20 public class Rss10Parser implements FeedParserI {
21
22 private final static Logger LOGGER = LogManager.getLogger(Rss10Parser.class);
23
24 /***
25 * @see net.sf.flock.parser.FeedParserI#isSuitable(Document)
26 */
27 public boolean isSuitable(Document doc) {
28 Element root = doc.getRootElement();
29
30 LOGGER.debug("root element "+root.getName());
31
32 Namespace defNS = ParserUtil.getDefaultNS(root);
33 if (defNS == null) {
34 Namespace rss10 = ParserUtil.getNamespaceFromURI(root,"http://purl.org/rss/1.0/");
35 if (rss10==null)
36 return false;
37 }
38 if ("RDF".equals(root.getName())) {
39 return true;
40 }
41 return false;
42 }
43
44 /***
45 * @see net.sf.flock.parser.FeedParserI#parse(FeedFactoryI, Document)
46 */
47 public FeedI parse(SubscriptionInfoI subscriptionInfoI,FeedFactoryI feedFactory, Document doc) throws FlockResourceException {
48
49 LOGGER.debug("start parsing.");
50
51 // Get the root element (must be rss)
52 Element root = doc.getRootElement();
53 Namespace defNS = ParserUtil.getDefaultNS(root);
54 if (defNS == null) {
55 defNS = ParserUtil.getNamespaceFromURI(root,"http://purl.org/rss/1.0/");
56 if (defNS==null)
57 throw new FlockResourceException("No default namespace found.");
58 }
59 Namespace dcNS = ParserUtil.getNamespace(root, "dc");
60 // fall back to default name space
61 if (dcNS == null) {
62 dcNS = defNS;
63 }
64
65 FeedI feed = feedFactory.createFeed(subscriptionInfoI);
66
67 // Get the channel element (only one occurs)
68 Element channel = root.getChild("channel", defNS);
69
70 // title element
71 feed.setTitle( HTMLUtil.unescape( channel.getChildTextTrim("title", defNS) ) );
72
73 // description element
74 // !!! feed.setDescription(channel.getChildTextTrim("description", defNS));
75
76 // link element
77 String link = channel.getChildTextTrim("link", defNS);
78 if ((link==null) || (link.length()==0)) {
79
80 for (Iterator iterator = channel.getAttributes().iterator();iterator.hasNext();) {
81 Attribute attr = (Attribute) iterator.next();
82 if ("about".equals(attr.getName())) {
83 LOGGER.debug("channel.about attribute found:"+attr.getNamespace()+":"+attr.getName()+" = "+attr.getValue());
84 link = attr.getValue();
85 break;
86 }
87 LOGGER.debug("channel attribute :"+attr.getNamespace()+":"+attr.getName()+" = "+attr.getValue());
88 }
89 }
90 feed.setSite(ParserUtil.getURL(link));
91
92 /*
93 // !!!
94
95 // creator element
96 Element creator = channel.getChild("creator", dcNS);
97 if (creator != null) {
98 feed.setCreator(creator.getTextTrim());
99 }
100
101 // publisher element
102 Element publisher = channel.getChild("publisher", dcNS);
103 if (publisher != null) {
104 feed.setPublisher(publisher.getTextTrim());
105 }
106
107 // language element
108 Element language = channel.getChild("language", dcNS);
109 if (language != null) {
110 feed.setLanguage(language.getTextTrim());
111 }
112
113 // rights element
114 Element copyright = channel.getChild("copyright", defNS);
115 if (copyright != null) {
116 feed.setCopyright(copyright.getTextTrim());
117 }
118 */
119
120 long parseDate = System.currentTimeMillis();
121
122 // item elements
123 List items = root.getChildren("item", defNS);
124 Iterator i = items.iterator();
125 while (i.hasNext()) {
126 Element item = (Element) i.next();
127 // get title element
128 Element elTitle = item.getChild("title", defNS);
129 String strTitle = "<No Title>";
130 if (elTitle != null) {
131 strTitle = HTMLUtil.unescape( elTitle.getTextTrim() );
132 }
133
134 LOGGER.debug("Item element found (" + strTitle + ").");
135
136 // get link element
137 Element elLink = item.getChild("link", defNS);
138 String strLink = "";
139 if (elLink != null) {
140 strLink = elLink.getTextTrim();
141 }
142
143 // get description element
144 Element elDesc = item.getChild("description", dcNS);
145 if (elDesc==null) {
146 // try to get it w/o namespace
147 elDesc = item.getChild("description", defNS);
148 LOGGER.debug("description without namespace is "+ParserUtil.elementValue(elDesc,20));
149 } else {
150 LOGGER.debug("description with namespace is not null");
151 }
152
153 if (elDesc==null) {
154 elDesc = item.getChild("subtitle", defNS);
155 LOGGER.debug("subtitle is "+ParserUtil.elementValue(elDesc,20));
156 }
157 String strDesc = "";
158 if (elDesc != null) {
159
160 strDesc = elDesc.getTextTrim();
161
162 if (strDesc.length()==0) {
163 LOGGER.debug("text empty, use XMLOutputter");
164 XMLOutputter output = new XMLOutputter("",false);
165 strDesc = output.outputString(elDesc);
166 }
167 }
168
169 // generate new RSS item (link to article)
170
171 /*
172 article.setFound(dateParsed);
173
174 // get creator element
175 Element elCreator = item.getChild("creator", dcNS);
176 if (elCreator != null) {
177 article.setCreator(elCreator.getTextTrim());
178 }
179 // get subject element
180 Element elSubject = item.getChild("subject", dcNS);
181 if (elSubject != null) {
182 // !!! Mulitple subject elements not handled currently
183 article.setSubject(elSubject.getTextTrim());
184 }
185 */
186
187 // get date element
188 Date creationTime = ParserUtil.parseDate( item.getChild("date", dcNS) );
189
190 // TODO: this is bullshit :)
191 if (creationTime==null) {
192 creationTime = new Date(parseDate--);
193 }
194
195 feed.newItem(creationTime, strTitle, strDesc, ParserUtil.getURL(strLink));
196 }
197
198 /*
199 // image element
200 Element image = root.getChild("image", defNS);
201 if (image != null) {
202 ImageIF rssImage =
203 cBuilder.makeImage(image.getChildTextTrim("title", defNS),
204 getURL(image.getChildTextTrim("url", defNS)));
205 Element imgWidth = image.getChild("width", defNS);
206 if (imgWidth != null) {
207 try {
208 rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim()));
209 } catch (NumberFormatException e) {
210 logger.warn(e);
211 }
212 }
213 Element imgHeight = image.getChild("height", defNS);
214 if (imgHeight != null) {
215 try {
216 rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim()));
217 } catch (NumberFormatException e) {
218 logger.warn(e);
219 }
220 }
221 Element imgDescr = image.getChild("description", dcNS);
222 if (imgDescr != null) {
223 rssImage.setDescription(imgDescr.getTextTrim());
224 }
225 feed.setImage(rssImage);
226 }
227
228 // textinput element
229 Element txtinp = root.getChild("textinput", defNS);
230 if (txtinp != null) {
231 String tiTitle = null;
232 if (txtinp.getChild("title", defNS) != null) {
233 tiTitle = txtinp.getChild("title", defNS).getTextTrim();
234 }
235 String tiDescr = null;
236 if (txtinp.getChild("description", dcNS) != null) {
237 tiDescr = txtinp.getChild("description", dcNS).getTextTrim();
238 }
239 URL tiLink = null;
240 if (txtinp.getChild("link", defNS) != null) {
241 tiLink = getURL(txtinp.getChild("link", defNS).getTextTrim());
242 }
243 TextInputIF rssTextInput =
244 cBuilder.makeTextInput(tiTitle, tiDescr, tiLink);
245 feed.setTextInput(rssTextInput);
246 }
247 */
248
249 return feed;
250 }
251
252 }
This page was automatically generated by Maven