View Javadoc
1 package net.sf.flock.parser; 2 3 import java.text.SimpleDateFormat; 4 import java.util.Date; 5 import java.util.Iterator; 6 import java.util.List; 7 8 import net.sf.flock.FeedFactoryI; 9 import net.sf.flock.FeedI; 10 import net.sf.flock.FlockResourceException; 11 import net.sf.flock.SubscriptionInfoI; 12 13 import org.apache.log4j.LogManager; 14 import org.apache.log4j.Logger; 15 import org.jdom.Attribute; 16 import org.jdom.Document; 17 import org.jdom.Element; 18 import org.jdom.Namespace; 19 20 public class Rss20Parser implements FeedParserI { 21 22 private final static Logger LOGGER = LogManager.getLogger(Rss20Parser.class); 23 24 private SimpleDateFormat timeFormat = new SimpleDateFormat("yyyy.MM.dd H:mm"); 25 26 /*** 27 * @see net.sf.flock.parser.FeedParserI#isSuitable(Document) 28 */ 29 public boolean isSuitable(Document doc) { 30 Element root = doc.getRootElement(); 31 if (!"rss".equals(root.getName())) { 32 return false; 33 } 34 Attribute rssVersion = root.getAttribute("version"); 35 if (rssVersion==null || rssVersion.getValue()==null) { 36 return false; 37 } 38 LOGGER.debug("Found RSS version " + rssVersion.getValue()); 39 return rssVersion.getValue().startsWith("2.0"); 40 } 41 42 /*** 43 * @see net.sf.flock.parser.FeedParserI#parse(FeedFactoryI,Document) 44 */ 45 public FeedI parse(SubscriptionInfoI subscriptionInfoI,FeedFactoryI feedFactory, Document doc) throws FlockResourceException { 46 47 Date dateParsed = new Date(); 48 LOGGER.debug("start parsing."); 49 50 Element root = doc.getRootElement(); 51 52 // Get the channel element (only one occurs) 53 Element channel = root.getChild("channel"); 54 55 // 1 title element 56 FeedI feed = feedFactory.createFeed(subscriptionInfoI); 57 58 feed.setTitle( HTMLUtil.unescape( channel.getChildTextTrim("title") ) ); 59 60 // 1 description element 61 // !!! feed.setDescription(channel.getChildTextTrim("description")); 62 63 // 1 link element 64 feed.setSite(ParserUtil.getURL(channel.getChildTextTrim("link"))); 65 66 // 1 language element 67 // !!! feed.setLanguage(channel.getChildTextTrim("language")); 68 69 // 1..n item elements 70 List items = channel.getChildren("item"); 71 Iterator i = items.iterator(); 72 while (i.hasNext()) { 73 Element item = (Element) i.next(); 74 75 // get description element 76 Element elDesc = item.getChild("description"); 77 String strDesc = ""; 78 if (elDesc != null) { 79 strDesc = elDesc.getTextTrim(); 80 } 81 82 // get link element 83 Element elLink = item.getChild("link"); 84 String strLink = ""; 85 if (elLink != null) { 86 strLink = elLink.getTextTrim(); 87 LOGGER.debug("found link '"+strLink+"'"); 88 } else { 89 // try 'guid' 90 // ... http://radio.weblogs.com/0112015/rss.xml 91 elLink = item.getChild("guid"); 92 if (elLink != null) { 93 strLink = elLink.getTextTrim(); 94 LOGGER.debug("found guid '"+strLink+"'"); 95 } else { 96 LOGGER.debug("no link/guid"); 97 } 98 99 } 100 101 102 103 Date pubDate = ParserUtil.parseDate(item.getChild("pubDate")); 104 105 if (pubDate==null) { 106 // try dc namespace 107 Namespace dcNS = ParserUtil.getNamespace(root, "dc"); 108 if (dcNS!=null) { 109 pubDate = ParserUtil.parseDate( item.getChild("date", dcNS) ); 110 } 111 } 112 113 LOGGER.debug("date :"+pubDate); 114 115 116 Date creationTime = pubDate!=null ? pubDate : dateParsed; 117 // !!! item.setFound(dateParsed); 118 119 Element elTitle = item.getChild("title"); 120 LOGGER.debug("title '"+(elTitle==null ? "null" : elTitle.getTextTrim()) +"'"); 121 String title = 122 elTitle!=null ? HTMLUtil.unescape( elTitle.getTextTrim() ) : 123 feed.getTitle() + " " + this.timeFormat.format(creationTime); 124 125 feed.newItem(creationTime, title, strDesc, ParserUtil.getURL(strLink)); 126 } 127 128 // 0..1 rating element 129 130 // 0..1 image element 131 /* 132 Element image = channel.getChild("image"); 133 if (image != null) { 134 ImageIF rssImage = 135 cBuilder.makeImage(image.getChildTextTrim("title"), 136 getURL(image.getChildTextTrim("url"))); 137 Element imgWidth = image.getChild("width"); 138 if (imgWidth != null) { 139 try { 140 rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim())); 141 } catch (NumberFormatException e) { 142 logger.warn(e); 143 } 144 } 145 Element imgHeight = image.getChild("height"); 146 if (imgHeight != null) { 147 try { 148 rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim())); 149 } catch (NumberFormatException e) { 150 logger.warn(e); 151 } 152 } 153 Element imgDescr = image.getChild("description"); 154 if (imgDescr != null) { 155 rssImage.setDescription(imgDescr.getTextTrim()); 156 } 157 chnl.setImage(rssImage); 158 } 159 160 // 0..1 textinput element 161 Element txtinp = channel.getChild("textinput"); 162 if (txtinp != null) { 163 TextInputIF rssTextInput = 164 cBuilder.makeTextInput(txtinp.getChild("title").getTextTrim(), 165 txtinp.getChild("description").getTextTrim(), 166 getURL(txtinp.getChild("link").getTextTrim())); 167 chnl.setTextInput(rssTextInput); 168 } 169 170 // 0..1 copyright element 171 Element copyright = channel.getChild("copyright"); 172 if (copyright != null) { 173 chnl.setCopyright(copyright.getTextTrim()); 174 } 175 176 // 0..1 pubDate element 177 Element pubDate = channel.getChild("pubDate"); 178 // if (pubDate != null) { 179 // chnl.setPubDate(pubDate.getTextTrim()); 180 // } 181 182 // 0..1 lastBuildDate element 183 Element lastBuildDate = channel.getChild("lastBuildDate"); 184 // if (lastBuildDate != null) { 185 // chnl.setLastBuildDate(lastBuildDate.getTextTrim()); 186 // } 187 188 // 0..1 docs element 189 190 // 0..1 managingEditor element 191 Element managingEditor = channel.getChild("managingEditor"); 192 if (managingEditor != null) { 193 chnl.setCreator(managingEditor.getTextTrim()); 194 } 195 196 // 0..1 webMaster element 197 Element webMaster = channel.getChild("webMaster"); 198 if (webMaster != null) { 199 chnl.setPublisher(webMaster.getTextTrim()); 200 } 201 202 // 0..1 skipHours element 203 // 0..1 skipDays element 204 */ 205 return feed; 206 } 207 208 }

This page was automatically generated by Maven