1 package net.sf.flock.parser;
2
3 import java.util.Date;
4 import java.util.Iterator;
5 import java.util.List;
6
7 import net.sf.flock.FeedFactoryI;
8 import net.sf.flock.FeedI;
9 import net.sf.flock.FlockResourceException;
10 import net.sf.flock.SubscriptionInfoI;
11
12 import org.apache.log4j.LogManager;
13 import org.apache.log4j.Logger;
14 import org.jdom.Attribute;
15 import org.jdom.Document;
16 import org.jdom.Element;
17
18 public class Rss091Parser implements FeedParserI {
19
20 private final static Logger LOGGER = LogManager.getLogger(Rss091Parser.class);
21
22 /***
23 * @see net.sf.flock.parser.FeedParserI#isSuitable(Document)
24 */
25 public boolean isSuitable(Document doc) {
26 Element root = doc.getRootElement();
27 if (!"rss".equals(root.getName())) {
28 return false;
29 }
30 Attribute rssVersion = root.getAttribute("version");
31 if (rssVersion==null || rssVersion.getValue()==null) {
32 return false;
33 }
34 LOGGER.debug("Found RSS version " + rssVersion.getValue());
35 return rssVersion.getValue().startsWith("0.9");
36 }
37
38 /***
39 * @see net.sf.flock.parser.FeedParserI#parse(FeedFactoryI, Document)
40 */
41 public FeedI parse(SubscriptionInfoI subscriptionInfoI,FeedFactoryI feedFactory, Document doc) throws FlockResourceException {
42
43 Element root = doc.getRootElement();
44
45 Element channel = root.getChild("channel");
46
47 // 1 title element
48 FeedI feed = feedFactory.createFeed(subscriptionInfoI);
49
50 feed.setTitle( HTMLUtil.unescape(channel.getChildTextTrim("title")) );
51
52 // 1 description element
53 // !!! feed.setDescription(channel.getChildTextTrim("description"));
54
55 // 1 link element
56 feed.setSite(ParserUtil. getURL(channel.getChildTextTrim("link")));
57
58 // 1 language element
59 // !!! feed.setLanguage(channel.getChildTextTrim("language"));
60
61 long parseDate = System.currentTimeMillis();
62
63 // 1..n item elements
64 List items = channel.getChildren("item");
65 Iterator i = items.iterator();
66 while (i.hasNext()) {
67 Element item = (Element) i.next();
68 // get title element
69 Element elTitle = item.getChild("title");
70 String strTitle = "<No Title>";
71 if (elTitle != null) {
72 strTitle = HTMLUtil.unescape( elTitle.getTextTrim() );
73 }
74
75 LOGGER.debug("Item element found (" + strTitle + ").");
76
77 // get link element
78 Element elLink = item.getChild("link");
79 String strLink = "";
80 if (elLink != null) {
81 strLink = elLink.getTextTrim();
82 }
83
84 // get description element
85 Element elDesc = item.getChild("description");
86 String strDesc = "";
87 if (elDesc != null) {
88 strDesc = elDesc.getTextTrim();
89 }
90
91 // generate new RSS item (link to article)
92
93 feed.newItem( new Date(parseDate--), strTitle, strDesc, ParserUtil.getURL(strLink));
94 }
95
96 // 0..1 rating element
97
98 // 0..1 image element
99 /*
100 Element image = channel.getChild("image");
101 if (image != null) {
102 ImageIF rssImage =
103 cBuilder.makeImage(image.getChildTextTrim("title"),
104 getURL(image.getChildTextTrim("url")));
105 Element imgWidth = image.getChild("width");
106 if (imgWidth != null) {
107 try {
108 rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim()));
109 } catch (NumberFormatException e) {
110 logger.warn(e);
111 }
112 }
113 Element imgHeight = image.getChild("height");
114 if (imgHeight != null) {
115 try {
116 rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim()));
117 } catch (NumberFormatException e) {
118 logger.warn(e);
119 }
120 }
121 Element imgDescr = image.getChild("description");
122 if (imgDescr != null) {
123 rssImage.setDescription(imgDescr.getTextTrim());
124 }
125 chnl.setImage(rssImage);
126 }
127
128 // 0..1 textinput element
129 Element txtinp = channel.getChild("textinput");
130 if (txtinp != null) {
131 TextInputIF rssTextInput =
132 cBuilder.makeTextInput(txtinp.getChild("title").getTextTrim(),
133 txtinp.getChild("description").getTextTrim(),
134 getURL(txtinp.getChild("link").getTextTrim()));
135 chnl.setTextInput(rssTextInput);
136 }
137
138 // 0..1 copyright element
139 Element copyright = channel.getChild("copyright");
140 if (copyright != null) {
141 chnl.setCopyright(copyright.getTextTrim());
142 }
143
144 // 0..1 pubDate element
145 Element pubDate = channel.getChild("pubDate");
146 // if (pubDate != null) {
147 // chnl.setPubDate(pubDate.getTextTrim());
148 // }
149
150 // 0..1 lastBuildDate element
151 Element lastBuildDate = channel.getChild("lastBuildDate");
152 // if (lastBuildDate != null) {
153 // chnl.setLastBuildDate(lastBuildDate.getTextTrim());
154 // }
155
156 // 0..1 docs element
157
158 // 0..1 managingEditor element
159 Element managingEditor = channel.getChild("managingEditor");
160 if (managingEditor != null) {
161 chnl.setCreator(managingEditor.getTextTrim());
162 }
163
164 // 0..1 webMaster element
165 Element webMaster = channel.getChild("webMaster");
166 if (webMaster != null) {
167 chnl.setPublisher(webMaster.getTextTrim());
168 }
169
170 // 0..1 skipHours element
171 // 0..1 skipDays element
172 */
173 return feed;
174 }
175
176 }
This page was automatically generated by Maven