View Javadoc
1 package net.sf.flock.parser; 2 3 import java.util.Date; 4 import java.util.Iterator; 5 import java.util.List; 6 7 import net.sf.flock.FeedFactoryI; 8 import net.sf.flock.FeedI; 9 import net.sf.flock.FlockResourceException; 10 import net.sf.flock.SubscriptionInfoI; 11 12 import org.apache.log4j.LogManager; 13 import org.apache.log4j.Logger; 14 import org.jdom.Attribute; 15 import org.jdom.Document; 16 import org.jdom.Element; 17 import org.jdom.Namespace; 18 import org.jdom.output.XMLOutputter; 19 20 public class Rss10Parser implements FeedParserI { 21 22 private final static Logger LOGGER = LogManager.getLogger(Rss10Parser.class); 23 24 /*** 25 * @see net.sf.flock.parser.FeedParserI#isSuitable(Document) 26 */ 27 public boolean isSuitable(Document doc) { 28 Element root = doc.getRootElement(); 29 30 LOGGER.debug("root element "+root.getName()); 31 32 Namespace defNS = ParserUtil.getDefaultNS(root); 33 if (defNS == null) { 34 Namespace rss10 = ParserUtil.getNamespaceFromURI(root,"http://purl.org/rss/1.0/"); 35 if (rss10==null) 36 return false; 37 } 38 if ("RDF".equals(root.getName())) { 39 return true; 40 } 41 return false; 42 } 43 44 /*** 45 * @see net.sf.flock.parser.FeedParserI#parse(FeedFactoryI, Document) 46 */ 47 public FeedI parse(SubscriptionInfoI subscriptionInfoI,FeedFactoryI feedFactory, Document doc) throws FlockResourceException { 48 49 LOGGER.debug("start parsing."); 50 51 // Get the root element (must be rss) 52 Element root = doc.getRootElement(); 53 Namespace defNS = ParserUtil.getDefaultNS(root); 54 if (defNS == null) { 55 defNS = ParserUtil.getNamespaceFromURI(root,"http://purl.org/rss/1.0/"); 56 if (defNS==null) 57 throw new FlockResourceException("No default namespace found."); 58 } 59 Namespace dcNS = ParserUtil.getNamespace(root, "dc"); 60 // fall back to default name space 61 if (dcNS == null) { 62 dcNS = defNS; 63 } 64 65 FeedI feed = feedFactory.createFeed(subscriptionInfoI); 66 67 // Get the channel element (only one occurs) 68 Element channel = root.getChild("channel", defNS); 69 70 // title element 71 feed.setTitle( HTMLUtil.unescape( channel.getChildTextTrim("title", defNS) ) ); 72 73 // description element 74 // !!! feed.setDescription(channel.getChildTextTrim("description", defNS)); 75 76 // link element 77 String link = channel.getChildTextTrim("link", defNS); 78 if ((link==null) || (link.length()==0)) { 79 80 for (Iterator iterator = channel.getAttributes().iterator();iterator.hasNext();) { 81 Attribute attr = (Attribute) iterator.next(); 82 if ("about".equals(attr.getName())) { 83 LOGGER.debug("channel.about attribute found:"+attr.getNamespace()+":"+attr.getName()+" = "+attr.getValue()); 84 link = attr.getValue(); 85 break; 86 } 87 LOGGER.debug("channel attribute :"+attr.getNamespace()+":"+attr.getName()+" = "+attr.getValue()); 88 } 89 } 90 feed.setSite(ParserUtil.getURL(link)); 91 92 /* 93 // !!! 94 95 // creator element 96 Element creator = channel.getChild("creator", dcNS); 97 if (creator != null) { 98 feed.setCreator(creator.getTextTrim()); 99 } 100 101 // publisher element 102 Element publisher = channel.getChild("publisher", dcNS); 103 if (publisher != null) { 104 feed.setPublisher(publisher.getTextTrim()); 105 } 106 107 // language element 108 Element language = channel.getChild("language", dcNS); 109 if (language != null) { 110 feed.setLanguage(language.getTextTrim()); 111 } 112 113 // rights element 114 Element copyright = channel.getChild("copyright", defNS); 115 if (copyright != null) { 116 feed.setCopyright(copyright.getTextTrim()); 117 } 118 */ 119 120 long parseDate = System.currentTimeMillis(); 121 122 // item elements 123 List items = root.getChildren("item", defNS); 124 Iterator i = items.iterator(); 125 while (i.hasNext()) { 126 Element item = (Element) i.next(); 127 // get title element 128 Element elTitle = item.getChild("title", defNS); 129 String strTitle = "<No Title>"; 130 if (elTitle != null) { 131 strTitle = HTMLUtil.unescape( elTitle.getTextTrim() ); 132 } 133 134 LOGGER.debug("Item element found (" + strTitle + ")."); 135 136 // get link element 137 Element elLink = item.getChild("link", defNS); 138 String strLink = ""; 139 if (elLink != null) { 140 strLink = elLink.getTextTrim(); 141 } 142 143 // get description element 144 Element elDesc = item.getChild("description", dcNS); 145 if (elDesc==null) { 146 // try to get it w/o namespace 147 elDesc = item.getChild("description", defNS); 148 LOGGER.debug("description without namespace is "+ParserUtil.elementValue(elDesc,20)); 149 } else { 150 LOGGER.debug("description with namespace is not null"); 151 } 152 153 if (elDesc==null) { 154 elDesc = item.getChild("subtitle", defNS); 155 LOGGER.debug("subtitle is "+ParserUtil.elementValue(elDesc,20)); 156 } 157 String strDesc = ""; 158 if (elDesc != null) { 159 160 strDesc = elDesc.getTextTrim(); 161 162 if (strDesc.length()==0) { 163 LOGGER.debug("text empty, use XMLOutputter"); 164 XMLOutputter output = new XMLOutputter("",false); 165 strDesc = output.outputString(elDesc); 166 } 167 } 168 169 // generate new RSS item (link to article) 170 171 /* 172 article.setFound(dateParsed); 173 174 // get creator element 175 Element elCreator = item.getChild("creator", dcNS); 176 if (elCreator != null) { 177 article.setCreator(elCreator.getTextTrim()); 178 } 179 // get subject element 180 Element elSubject = item.getChild("subject", dcNS); 181 if (elSubject != null) { 182 // !!! Mulitple subject elements not handled currently 183 article.setSubject(elSubject.getTextTrim()); 184 } 185 */ 186 187 // get date element 188 Date creationTime = ParserUtil.parseDate( item.getChild("date", dcNS) ); 189 190 // TODO: this is bullshit :) 191 if (creationTime==null) { 192 creationTime = new Date(parseDate--); 193 } 194 195 feed.newItem(creationTime, strTitle, strDesc, ParserUtil.getURL(strLink)); 196 } 197 198 /* 199 // image element 200 Element image = root.getChild("image", defNS); 201 if (image != null) { 202 ImageIF rssImage = 203 cBuilder.makeImage(image.getChildTextTrim("title", defNS), 204 getURL(image.getChildTextTrim("url", defNS))); 205 Element imgWidth = image.getChild("width", defNS); 206 if (imgWidth != null) { 207 try { 208 rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim())); 209 } catch (NumberFormatException e) { 210 logger.warn(e); 211 } 212 } 213 Element imgHeight = image.getChild("height", defNS); 214 if (imgHeight != null) { 215 try { 216 rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim())); 217 } catch (NumberFormatException e) { 218 logger.warn(e); 219 } 220 } 221 Element imgDescr = image.getChild("description", dcNS); 222 if (imgDescr != null) { 223 rssImage.setDescription(imgDescr.getTextTrim()); 224 } 225 feed.setImage(rssImage); 226 } 227 228 // textinput element 229 Element txtinp = root.getChild("textinput", defNS); 230 if (txtinp != null) { 231 String tiTitle = null; 232 if (txtinp.getChild("title", defNS) != null) { 233 tiTitle = txtinp.getChild("title", defNS).getTextTrim(); 234 } 235 String tiDescr = null; 236 if (txtinp.getChild("description", dcNS) != null) { 237 tiDescr = txtinp.getChild("description", dcNS).getTextTrim(); 238 } 239 URL tiLink = null; 240 if (txtinp.getChild("link", defNS) != null) { 241 tiLink = getURL(txtinp.getChild("link", defNS).getTextTrim()); 242 } 243 TextInputIF rssTextInput = 244 cBuilder.makeTextInput(tiTitle, tiDescr, tiLink); 245 feed.setTextInput(rssTextInput); 246 } 247 */ 248 249 return feed; 250 } 251 252 }

This page was automatically generated by Maven