summaryrefslogtreecommitdiffstats
path: root/toolkit/components/feeds/FeedProcessor.js
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/components/feeds/FeedProcessor.js')
-rw-r--r--toolkit/components/feeds/FeedProcessor.js1792
1 files changed, 1792 insertions, 0 deletions
diff --git a/toolkit/components/feeds/FeedProcessor.js b/toolkit/components/feeds/FeedProcessor.js
new file mode 100644
index 000000000..88d0ad6ed
--- /dev/null
+++ b/toolkit/components/feeds/FeedProcessor.js
@@ -0,0 +1,1792 @@
+/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+function LOG(str) {
+ dump("*** " + str + "\n");
+}
+
+const Ci = Components.interfaces;
+const Cc = Components.classes;
+const Cr = Components.results;
+Components.utils.import("resource://gre/modules/XPCOMUtils.jsm");
+
+const FP_CONTRACTID = "@mozilla.org/feed-processor;1";
+const FP_CLASSID = Components.ID("{26acb1f0-28fc-43bc-867a-a46aabc85dd4}");
+const FP_CLASSNAME = "Feed Processor";
+const FR_CONTRACTID = "@mozilla.org/feed-result;1";
+const FR_CLASSID = Components.ID("{072a5c3d-30c6-4f07-b87f-9f63d51403f2}");
+const FR_CLASSNAME = "Feed Result";
+const FEED_CONTRACTID = "@mozilla.org/feed;1";
+const FEED_CLASSID = Components.ID("{5d0cfa97-69dd-4e5e-ac84-f253162e8f9a}");
+const FEED_CLASSNAME = "Feed";
+const ENTRY_CONTRACTID = "@mozilla.org/feed-entry;1";
+const ENTRY_CLASSID = Components.ID("{8e4444ff-8e99-4bdd-aa7f-fb3c1c77319f}");
+const ENTRY_CLASSNAME = "Feed Entry";
+const TEXTCONSTRUCT_CONTRACTID = "@mozilla.org/feed-textconstruct;1";
+const TEXTCONSTRUCT_CLASSID =
+ Components.ID("{b992ddcd-3899-4320-9909-924b3e72c922}");
+const TEXTCONSTRUCT_CLASSNAME = "Feed Text Construct";
+const GENERATOR_CONTRACTID = "@mozilla.org/feed-generator;1";
+const GENERATOR_CLASSID =
+ Components.ID("{414af362-9ad8-4296-898e-62247f25a20e}");
+const GENERATOR_CLASSNAME = "Feed Generator";
+const PERSON_CONTRACTID = "@mozilla.org/feed-person;1";
+const PERSON_CLASSID = Components.ID("{95c963b7-20b2-11db-92f6-001422106990}");
+const PERSON_CLASSNAME = "Feed Person";
+
+const IO_CONTRACTID = "@mozilla.org/network/io-service;1"
+const BAG_CONTRACTID = "@mozilla.org/hash-property-bag;1"
+const ARRAY_CONTRACTID = "@mozilla.org/array;1";
+const SAX_CONTRACTID = "@mozilla.org/saxparser/xmlreader;1";
+const PARSERUTILS_CONTRACTID = "@mozilla.org/parserutils;1";
+
+const gMimeService = Cc["@mozilla.org/mime;1"].getService(Ci.nsIMIMEService);
+var gIoService = null;
+
+const XMLNS = "http://www.w3.org/XML/1998/namespace";
+const RSS090NS = "http://my.netscape.com/rdf/simple/0.9/";
+
+/** *** Some general utils *****/
+function strToURI(link, base) {
+ base = base || null;
+ if (!gIoService)
+ gIoService = Cc[IO_CONTRACTID].getService(Ci.nsIIOService);
+ try {
+ return gIoService.newURI(link, null, base);
+ }
+ catch (e) {
+ return null;
+ }
+}
+
+function isArray(a) {
+ return isObject(a) && a.constructor == Array;
+}
+
+function isObject(a) {
+ return (a && typeof a == "object") || isFunction(a);
+}
+
+function isFunction(a) {
+ return typeof a == "function";
+}
+
+function isIID(a, iid) {
+ var rv = false;
+ try {
+ a.QueryInterface(iid);
+ rv = true;
+ }
+ catch (e) {
+ }
+ return rv;
+}
+
+function isIArray(a) {
+ return isIID(a, Ci.nsIArray);
+}
+
+function isIFeedContainer(a) {
+ return isIID(a, Ci.nsIFeedContainer);
+}
+
+function stripTags(someHTML) {
+ return someHTML.replace(/<[^>]+>/g, "");
+}
+
+/**
+ * Searches through an array of links and returns a JS array
+ * of matching property bags.
+ */
+const IANA_URI = "http://www.iana.org/assignments/relation/";
+function findAtomLinks(rel, links) {
+ var rvLinks = [];
+ for (var i = 0; i < links.length; ++i) {
+ var linkElement = links.queryElementAt(i, Ci.nsIPropertyBag2);
+ // atom:link MUST have @href
+ if (bagHasKey(linkElement, "href")) {
+ var relAttribute = null;
+ if (bagHasKey(linkElement, "rel"))
+ relAttribute = linkElement.getPropertyAsAString("rel")
+ if ((!relAttribute && rel == "alternate") || relAttribute == rel) {
+ rvLinks.push(linkElement);
+ continue;
+ }
+ // catch relations specified by IANA URI
+ if (relAttribute == IANA_URI + rel) {
+ rvLinks.push(linkElement);
+ }
+ }
+ }
+ return rvLinks;
+}
+
+function xmlEscape(s) {
+ s = s.replace(/&/g, "&amp;");
+ s = s.replace(/>/g, "&gt;");
+ s = s.replace(/</g, "&lt;");
+ s = s.replace(/"/g, "&quot;");
+ s = s.replace(/'/g, "&apos;");
+ return s;
+}
+
+function arrayContains(array, element) {
+ for (var i = 0; i < array.length; ++i) {
+ if (array[i] == element) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// XXX add hasKey to nsIPropertyBag
+function bagHasKey(bag, key) {
+ try {
+ bag.getProperty(key);
+ return true;
+ }
+ catch (e) {
+ return false;
+ }
+}
+
+function makePropGetter(key) {
+ return function FeedPropGetter(bag) {
+ try {
+ return value = bag.getProperty(key);
+ }
+ catch (e) {
+ }
+ return null;
+ }
+}
+
+const RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+// namespace map
+var gNamespaces = {
+ "http://webns.net/mvcb/":"admin",
+ "http://backend.userland.com/rss":"",
+ "http://blogs.law.harvard.edu/tech/rss":"",
+ "http://www.w3.org/2005/Atom":"atom",
+ "http://purl.org/atom/ns#":"atom03",
+ "http://purl.org/rss/1.0/modules/content/":"content",
+ "http://purl.org/dc/elements/1.1/":"dc",
+ "http://purl.org/dc/terms/":"dcterms",
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#":"rdf",
+ "http://purl.org/rss/1.0/":"rss1",
+ "http://my.netscape.com/rdf/simple/0.9/":"rss1",
+ "http://wellformedweb.org/CommentAPI/":"wfw",
+ "http://purl.org/rss/1.0/modules/wiki/":"wiki",
+ "http://www.w3.org/XML/1998/namespace":"xml",
+ "http://search.yahoo.com/mrss/":"media",
+ "http://search.yahoo.com/mrss":"media"
+}
+
+// We allow a very small set of namespaces in XHTML content,
+// for attributes only
+var gAllowedXHTMLNamespaces = {
+ "http://www.w3.org/XML/1998/namespace":"xml",
+ // if someone ns qualifies XHTML, we have to prefix it to avoid an
+ // attribute collision.
+ "http://www.w3.org/1999/xhtml":"xhtml"
+}
+
+function FeedResult() {}
+FeedResult.prototype = {
+ bozo: false,
+ doc: null,
+ version: null,
+ headers: null,
+ uri: null,
+ stylesheet: null,
+
+ registerExtensionPrefix: function FR_registerExtensionPrefix(ns, prefix) {
+ throw Cr.NS_ERROR_NOT_IMPLEMENTED;
+ },
+
+ // XPCOM stuff
+ classID: FR_CLASSID,
+ QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeedResult])
+}
+
+function Feed() {
+ this.subtitle = null;
+ this.title = null;
+ this.items = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
+ this.link = null;
+ this.id = null;
+ this.generator = null;
+ this.authors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
+ this.contributors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
+ this.baseURI = null;
+ this.enclosureCount = 0;
+ this.type = Ci.nsIFeed.TYPE_FEED;
+}
+
+Feed.prototype = {
+ searchLists: {
+ title: ["title", "rss1:title", "atom03:title", "atom:title"],
+ subtitle: ["description", "dc:description", "rss1:description",
+ "atom03:tagline", "atom:subtitle"],
+ items: ["items", "atom03_entries", "entries"],
+ id: ["atom:id", "rdf:about"],
+ generator: ["generator"],
+ authors : ["authors"],
+ contributors: ["contributors"],
+ link: [["link", strToURI], ["rss1:link", strToURI]],
+ categories: ["categories", "dc:subject"],
+ rights: ["atom03:rights", "atom:rights"],
+ cloud: ["cloud"],
+ image: ["image", "rss1:image", "atom:logo"],
+ textInput: ["textInput", "rss1:textinput"],
+ skipDays: ["skipDays"],
+ skipHours: ["skipHours"],
+ updated: ["pubDate", "lastBuildDate", "atom03:modified", "dc:date",
+ "dcterms:modified", "atom:updated"]
+ },
+
+ normalize: function Feed_normalize() {
+ fieldsToObj(this, this.searchLists);
+ if (this.skipDays)
+ this.skipDays = this.skipDays.getProperty("days");
+ if (this.skipHours)
+ this.skipHours = this.skipHours.getProperty("hours");
+
+ if (this.updated)
+ this.updated = dateParse(this.updated);
+
+ // Assign Atom link if needed
+ if (bagHasKey(this.fields, "links"))
+ this._atomLinksToURI();
+
+ this._calcEnclosureCountAndFeedType();
+
+ // Resolve relative image links
+ if (this.image && bagHasKey(this.image, "url"))
+ this._resolveImageLink();
+
+ this._resetBagMembersToRawText([this.searchLists.subtitle,
+ this.searchLists.title]);
+ },
+
+ _calcEnclosureCountAndFeedType: function Feed_calcEnclosureCountAndFeedType() {
+ var entries_with_enclosures = 0;
+ var audio_count = 0;
+ var image_count = 0;
+ var video_count = 0;
+ var other_count = 0;
+
+ for (var i = 0; i < this.items.length; ++i) {
+ var entry = this.items.queryElementAt(i, Ci.nsIFeedEntry);
+ entry.QueryInterface(Ci.nsIFeedContainer);
+
+ if (entry.enclosures && entry.enclosures.length > 0) {
+ ++entries_with_enclosures;
+
+ for (var e = 0; e < entry.enclosures.length; ++e) {
+ var enc = entry.enclosures.queryElementAt(e, Ci.nsIWritablePropertyBag2);
+ if (enc.hasKey("type")) {
+ var enctype = enc.get("type");
+
+ if (/^audio/.test(enctype)) {
+ ++audio_count;
+ } else if (/^image/.test(enctype)) {
+ ++image_count;
+ } else if (/^video/.test(enctype)) {
+ ++video_count;
+ } else {
+ ++other_count;
+ }
+ } else {
+ ++other_count;
+ }
+ }
+ }
+ }
+
+ var feedtype = Ci.nsIFeed.TYPE_FEED;
+
+ // For a feed to be marked as TYPE_VIDEO, TYPE_AUDIO and TYPE_IMAGE,
+ // we enforce two things:
+ //
+ // 1. all entries must have at least one enclosure
+ // 2. all enclosures must be video for TYPE_VIDEO, audio for TYPE_AUDIO or image
+ // for TYPE_IMAGE
+ //
+ // Otherwise it's a TYPE_FEED.
+ if (entries_with_enclosures == this.items.length && other_count == 0) {
+ if (audio_count > 0 && !video_count && !image_count) {
+ feedtype = Ci.nsIFeed.TYPE_AUDIO;
+
+ } else if (image_count > 0 && !audio_count && !video_count) {
+ feedtype = Ci.nsIFeed.TYPE_IMAGE;
+
+ } else if (video_count > 0 && !audio_count && !image_count) {
+ feedtype = Ci.nsIFeed.TYPE_VIDEO;
+ }
+ }
+
+ this.type = feedtype;
+ this.enclosureCount = other_count + video_count + audio_count + image_count;
+ },
+
+ _atomLinksToURI: function Feed_linkToURI() {
+ var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray);
+ var alternates = findAtomLinks("alternate", links);
+ if (alternates.length > 0) {
+ var href = alternates[0].getPropertyAsAString("href");
+ var base;
+ if (bagHasKey(alternates[0], "xml:base"))
+ base = alternates[0].getPropertyAsAString("xml:base");
+ this.link = this._resolveURI(href, base);
+ }
+ },
+
+ _resolveImageLink: function Feed_resolveImageLink() {
+ var base;
+ if (bagHasKey(this.image, "xml:base"))
+ base = this.image.getPropertyAsAString("xml:base");
+ var url = this._resolveURI(this.image.getPropertyAsAString("url"), base);
+ if (url)
+ this.image.setPropertyAsAString("url", url.spec);
+ },
+
+ _resolveURI: function Feed_resolveURI(linkSpec, baseSpec) {
+ var uri = null;
+ try {
+ var base = baseSpec ? strToURI(baseSpec, this.baseURI) : this.baseURI;
+ uri = strToURI(linkSpec, base);
+ }
+ catch (e) {
+ LOG(e);
+ }
+
+ return uri;
+ },
+
+ // reset the bag to raw contents, not text constructs
+ _resetBagMembersToRawText: function Feed_resetBagMembers(fieldLists) {
+ for (var i=0; i<fieldLists.length; i++) {
+ for (var j=0; j<fieldLists[i].length; j++) {
+ if (bagHasKey(this.fields, fieldLists[i][j])) {
+ var textConstruct = this.fields.getProperty(fieldLists[i][j]);
+ this.fields.setPropertyAsAString(fieldLists[i][j],
+ textConstruct.text);
+ }
+ }
+ }
+ },
+
+ // XPCOM stuff
+ classID: FEED_CLASSID,
+ QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeed, Ci.nsIFeedContainer])
+}
+
+function Entry() {
+ this.summary = null;
+ this.content = null;
+ this.title = null;
+ this.fields = Cc["@mozilla.org/hash-property-bag;1"].
+ createInstance(Ci.nsIWritablePropertyBag2);
+ this.link = null;
+ this.id = null;
+ this.baseURI = null;
+ this.updated = null;
+ this.published = null;
+ this.authors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
+ this.contributors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
+}
+
+Entry.prototype = {
+ fields: null,
+ enclosures: null,
+ mediaContent: null,
+
+ searchLists: {
+ title: ["title", "rss1:title", "atom03:title", "atom:title"],
+ link: [["link", strToURI], ["rss1:link", strToURI]],
+ id: [["guid", makePropGetter("guid")], "rdf:about",
+ "atom03:id", "atom:id"],
+ authors : ["authors"],
+ contributors: ["contributors"],
+ summary: ["description", "rss1:description", "dc:description",
+ "atom03:summary", "atom:summary"],
+ content: ["content:encoded", "atom03:content", "atom:content"],
+ rights: ["atom03:rights", "atom:rights"],
+ published: ["pubDate", "atom03:issued", "dcterms:issued", "atom:published"],
+ updated: ["pubDate", "atom03:modified", "dc:date", "dcterms:modified",
+ "atom:updated"]
+ },
+
+ normalize: function Entry_normalize() {
+ fieldsToObj(this, this.searchLists);
+
+ // Assign Atom link if needed
+ if (bagHasKey(this.fields, "links"))
+ this._atomLinksToURI();
+
+ // Populate enclosures array
+ this._populateEnclosures();
+
+ // The link might be a guid w/ permalink=true
+ if (!this.link && bagHasKey(this.fields, "guid")) {
+ var guid = this.fields.getProperty("guid");
+ var isPermaLink = true;
+
+ if (bagHasKey(guid, "isPermaLink"))
+ isPermaLink = guid.getProperty("isPermaLink").toLowerCase() != "false";
+
+ if (guid && isPermaLink)
+ this.link = strToURI(guid.getProperty("guid"));
+ }
+
+ if (this.updated)
+ this.updated = dateParse(this.updated);
+ if (this.published)
+ this.published = dateParse(this.published);
+
+ this._resetBagMembersToRawText([this.searchLists.content,
+ this.searchLists.summary,
+ this.searchLists.title]);
+ },
+
+ _populateEnclosures: function Entry_populateEnclosures() {
+ if (bagHasKey(this.fields, "links"))
+ this._atomLinksToEnclosures();
+
+ // Add RSS2 enclosure to enclosures
+ if (bagHasKey(this.fields, "enclosure"))
+ this._enclosureToEnclosures();
+
+ // Add media:content to enclosures
+ if (bagHasKey(this.fields, "mediacontent"))
+ this._mediaToEnclosures("mediacontent");
+
+ // Add media:thumbnail to enclosures
+ if (bagHasKey(this.fields, "mediathumbnail"))
+ this._mediaToEnclosures("mediathumbnail");
+
+ // Add media:content in media:group to enclosures
+ if (bagHasKey(this.fields, "mediagroup"))
+ this._mediaToEnclosures("mediagroup", "mediacontent");
+ },
+
+ __enclosure_map: null,
+
+ _addToEnclosures: function Entry_addToEnclosures(new_enc) {
+ // items we add to the enclosures array get displayed in the FeedWriter and
+ // they must have non-empty urls.
+ if (!bagHasKey(new_enc, "url") || new_enc.getPropertyAsAString("url") == "")
+ return;
+
+ if (this.__enclosure_map == null)
+ this.__enclosure_map = {};
+
+ var previous_enc = this.__enclosure_map[new_enc.getPropertyAsAString("url")];
+
+ if (previous_enc != undefined) {
+ previous_enc.QueryInterface(Ci.nsIWritablePropertyBag2);
+
+ if (!bagHasKey(previous_enc, "type") && bagHasKey(new_enc, "type")) {
+ previous_enc.setPropertyAsAString("type", new_enc.getPropertyAsAString("type"));
+ try {
+ let handlerInfoWrapper = gMimeService.getFromTypeAndExtension(new_enc.getPropertyAsAString("type"), null);
+ if (handlerInfoWrapper && handlerInfoWrapper.description) {
+ previous_enc.setPropertyAsAString("typeDesc", handlerInfoWrapper.description);
+ }
+ } catch (ext) {}
+ }
+
+ if (!bagHasKey(previous_enc, "length") && bagHasKey(new_enc, "length"))
+ previous_enc.setPropertyAsAString("length", new_enc.getPropertyAsAString("length"));
+
+ return;
+ }
+
+ if (this.enclosures == null) {
+ this.enclosures = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
+ this.enclosures.QueryInterface(Ci.nsIMutableArray);
+ }
+
+ this.enclosures.appendElement(new_enc, false);
+ this.__enclosure_map[new_enc.getPropertyAsAString("url")] = new_enc;
+ },
+
+ _atomLinksToEnclosures: function Entry_linkToEnclosure() {
+ var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray);
+ var enc_links = findAtomLinks("enclosure", links);
+ if (enc_links.length == 0)
+ return;
+
+ for (var i = 0; i < enc_links.length; ++i) {
+ var link = enc_links[i];
+
+ // an enclosure must have an href
+ if (!(link.getProperty("href")))
+ return;
+
+ var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
+
+ // copy Atom bits over to equivalent enclosure bits
+ enc.setPropertyAsAString("url", link.getPropertyAsAString("href"));
+ if (bagHasKey(link, "type"))
+ enc.setPropertyAsAString("type", link.getPropertyAsAString("type"));
+ if (bagHasKey(link, "length"))
+ enc.setPropertyAsAString("length", link.getPropertyAsAString("length"));
+
+ this._addToEnclosures(enc);
+ }
+ },
+
+ _enclosureToEnclosures: function Entry_enclosureToEnclosures() {
+ var enc = this.fields.getPropertyAsInterface("enclosure", Ci.nsIPropertyBag2);
+
+ if (!(enc.getProperty("url")))
+ return;
+
+ this._addToEnclosures(enc);
+ },
+
+ _mediaToEnclosures: function Entry_mediaToEnclosures(mediaType, contentType) {
+ var content;
+
+ // If a contentType is specified, the mediaType is a simple propertybag,
+ // and the contentType is an array inside it.
+ if (contentType) {
+ var group = this.fields.getPropertyAsInterface(mediaType, Ci.nsIPropertyBag2);
+ content = group.getPropertyAsInterface(contentType, Ci.nsIArray);
+ } else {
+ content = this.fields.getPropertyAsInterface(mediaType, Ci.nsIArray);
+ }
+
+ for (var i = 0; i < content.length; ++i) {
+ var contentElement = content.queryElementAt(i, Ci.nsIWritablePropertyBag2);
+
+ // media:content don't require url, but if it's not there, we should
+ // skip it.
+ if (!bagHasKey(contentElement, "url"))
+ continue;
+
+ var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
+
+ // copy media:content bits over to equivalent enclosure bits
+ enc.setPropertyAsAString("url", contentElement.getPropertyAsAString("url"));
+ if (bagHasKey(contentElement, "type")) {
+ enc.setPropertyAsAString("type", contentElement.getPropertyAsAString("type"));
+ } else if (mediaType == "mediathumbnail") {
+ // thumbnails won't have a type, but default to image types
+ enc.setPropertyAsAString("type", "image/*");
+ enc.setPropertyAsBool("thumbnail", true);
+ }
+
+ if (bagHasKey(contentElement, "fileSize")) {
+ enc.setPropertyAsAString("length", contentElement.getPropertyAsAString("fileSize"));
+ }
+
+ this._addToEnclosures(enc);
+ }
+ },
+
+ // XPCOM stuff
+ classID: ENTRY_CLASSID,
+ QueryInterface: XPCOMUtils.generateQI(
+ [Ci.nsIFeedEntry, Ci.nsIFeedContainer]
+ )
+}
+
+Entry.prototype._atomLinksToURI = Feed.prototype._atomLinksToURI;
+Entry.prototype._resolveURI = Feed.prototype._resolveURI;
+Entry.prototype._resetBagMembersToRawText =
+ Feed.prototype._resetBagMembersToRawText;
+
+// TextConstruct represents and element that could contain (X)HTML
+function TextConstruct() {
+ this.lang = null;
+ this.base = null;
+ this.type = "text";
+ this.text = null;
+ this.parserUtils = Cc[PARSERUTILS_CONTRACTID].getService(Ci.nsIParserUtils);
+}
+
+TextConstruct.prototype = {
+ plainText: function TC_plainText() {
+ if (this.type != "text") {
+ return this.parserUtils.convertToPlainText(stripTags(this.text),
+ Ci.nsIDocumentEncoder.OutputSelectionOnly |
+ Ci.nsIDocumentEncoder.OutputAbsoluteLinks,
+ 0);
+ }
+ return this.text;
+ },
+
+ createDocumentFragment: function TC_createDocumentFragment(element) {
+ if (this.type == "text") {
+ var doc = element.ownerDocument;
+ var docFragment = doc.createDocumentFragment();
+ var node = doc.createTextNode(this.text);
+ docFragment.appendChild(node);
+ return docFragment;
+ }
+ var isXML;
+ if (this.type == "xhtml")
+ isXML = true
+ else if (this.type == "html")
+ isXML = false;
+ else
+ return null;
+
+ return this.parserUtils.parseFragment(this.text, 0, isXML,
+ this.base, element);
+ },
+
+ // XPCOM stuff
+ classID: TEXTCONSTRUCT_CLASSID,
+ QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeedTextConstruct])
+}
+
+// Generator represents the software that produced the feed
+function Generator() {
+ this.lang = null;
+ this.agent = null;
+ this.version = null;
+ this.uri = null;
+
+ // nsIFeedElementBase
+ this._attributes = null;
+ this.baseURI = null;
+}
+
+Generator.prototype = {
+
+ get attributes() {
+ return this._attributes;
+ },
+
+ set attributes(value) {
+ this._attributes = value;
+ this.version = this._attributes.getValueFromName("", "version");
+ var uriAttribute = this._attributes.getValueFromName("", "uri") ||
+ this._attributes.getValueFromName("", "url");
+ this.uri = strToURI(uriAttribute, this.baseURI);
+
+ // RSS1
+ uriAttribute = this._attributes.getValueFromName(RDF_NS, "resource");
+ if (uriAttribute) {
+ this.agent = uriAttribute;
+ this.uri = strToURI(uriAttribute, this.baseURI);
+ }
+ },
+
+ // XPCOM stuff
+ classID: GENERATOR_CLASSID,
+ QueryInterface: XPCOMUtils.generateQI(
+ [Ci.nsIFeedGenerator, Ci.nsIFeedElementBase]
+ )
+}
+
+function Person() {
+ this.name = null;
+ this.uri = null;
+ this.email = null;
+
+ // nsIFeedElementBase
+ this.attributes = null;
+ this.baseURI = null;
+}
+
+Person.prototype = {
+ // XPCOM stuff
+ classID: PERSON_CLASSID,
+ QueryInterface: XPCOMUtils.generateQI(
+ [Ci.nsIFeedPerson, Ci.nsIFeedElementBase]
+ )
+}
+
+/**
+ * Map a list of fields into properties on a container.
+ *
+ * @param container An nsIFeedContainer
+ * @param fields A list of fields to search for. List members can
+ * be a list, in which case the second member is
+ * transformation function (like parseInt).
+ */
+function fieldsToObj(container, fields) {
+ var props, prop, field, searchList;
+ for (var key in fields) {
+ searchList = fields[key];
+ for (var i=0; i < searchList.length; ++i) {
+ props = searchList[i];
+ prop = null;
+ field = isArray(props) ? props[0] : props;
+ try {
+ prop = container.fields.getProperty(field);
+ }
+ catch (e) {
+ }
+ if (prop) {
+ prop = isArray(props) ? props[1](prop) : prop;
+ container[key] = prop;
+ }
+ }
+ }
+}
+
+/**
+ * Lower cases an element's localName property
+ * @param element A DOM element.
+ *
+ * @returns The lower case localName property of the specified element
+ */
+function LC(element) {
+ return element.localName.toLowerCase();
+}
+
+// TODO move these post-processor functions
+// create a generator element
+function atomGenerator(s, generator) {
+ generator.QueryInterface(Ci.nsIFeedGenerator);
+ generator.agent = s.trim();
+ return generator;
+}
+
+// post-process atom:logo to create an RSS2-like structure
+function atomLogo(s, logo) {
+ logo.setPropertyAsAString("url", s.trim());
+}
+
+// post-process an RSS category, map it to the Atom fields.
+function rssCatTerm(s, cat) {
+ // add slash handling?
+ cat.setPropertyAsAString("term", s.trim());
+ return cat;
+}
+
+// post-process a GUID
+function rssGuid(s, guid) {
+ guid.setPropertyAsAString("guid", s.trim());
+ return guid;
+}
+
+// post-process an RSS author element
+//
+// It can contain a field like this:
+//
+// <author>lawyer@boyer.net (Lawyer Boyer)</author>
+//
+// or, delightfully, a field like this:
+//
+// <dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator>
+//
+// We want to split this up and assign it to corresponding Atom
+// fields.
+//
+function rssAuthor(s, author) {
+ author.QueryInterface(Ci.nsIFeedPerson);
+ // check for RSS2 string format
+ var chars = s.trim();
+ var matches = chars.match(/(.*)\((.*)\)/);
+ var emailCheck =
+ /^([a-zA-Z0-9_\.\-])+\@(([a-zA-Z0-9\-])+\.)+([a-zA-Z0-9]{2,4})+$/;
+ if (matches) {
+ var match1 = matches[1].trim();
+ var match2 = matches[2].trim();
+ if (match2.indexOf("mailto:") == 0)
+ match2 = match2.substring(7);
+ if (emailCheck.test(match1)) {
+ author.email = match1;
+ author.name = match2;
+ }
+ else if (emailCheck.test(match2)) {
+ author.email = match2;
+ author.name = match1;
+ }
+ else {
+ // put it back together
+ author.name = match1 + " (" + match2 + ")";
+ }
+ }
+ else {
+ author.name = chars;
+ if (chars.indexOf('@'))
+ author.email = chars;
+ }
+ return author;
+}
+
+//
+// skipHours and skipDays map to arrays, so we need to change the
+// string to an nsISupports in order to stick it in there.
+//
+function rssArrayElement(s) {
+ var str = Cc["@mozilla.org/supports-string;1"].
+ createInstance(Ci.nsISupportsString);
+ str.data = s;
+ str.QueryInterface(Ci.nsISupportsString);
+ return str;
+}
+
+/**
+ * Tries parsing a string through the JavaScript Date object.
+ * @param aDateString
+ * A string that is supposedly an RFC822 or RFC3339 date.
+ * @return A Date.toUTCString, or null if the string can't be parsed.
+ */
+function dateParse(aDateString) {
+ let dateString = aDateString.trim();
+ // Without bug 682781 fixed, JS won't parse an RFC822 date with a Z for the
+ // timezone, so convert to -00:00 which works for any date format.
+ dateString = dateString.replace(/z$/i, "-00:00");
+ let date = new Date(dateString);
+ if (!isNaN(date)) {
+ return date.toUTCString();
+ }
+ return null;
+}
+
+const XHTML_NS = "http://www.w3.org/1999/xhtml";
+
+// The XHTMLHandler handles inline XHTML found in things like atom:summary
+function XHTMLHandler(processor, isAtom) {
+ this._buf = "";
+ this._processor = processor;
+ this._depth = 0;
+ this._isAtom = isAtom;
+ // a stack of lists tracking in-scope namespaces
+ this._inScopeNS = [];
+}
+
+// The fidelity can be improved here, to allow handling of stuff like
+// SVG and MathML. XXX
+XHTMLHandler.prototype = {
+
+ // look back up at the declared namespaces
+ // we always use the same prefixes for our safe stuff
+ _isInScope: function XH__isInScope(ns) {
+ for (var i in this._inScopeNS) {
+ for (var uri in this._inScopeNS[i]) {
+ if (this._inScopeNS[i][uri] == ns)
+ return true;
+ }
+ }
+ return false;
+ },
+
+ startDocument: function XH_startDocument() {
+ },
+ endDocument: function XH_endDocument() {
+ },
+ startElement: function XH_startElement(namespace, localName, qName, attributes) {
+ ++this._depth;
+ this._inScopeNS.push([]);
+
+ // RFC4287 requires XHTML to be wrapped in a div that is *not* part of
+ // the content. This prevents people from screwing up namespaces, but
+ // we need to skip it here.
+ if (this._isAtom && this._depth == 1 && localName == "div")
+ return;
+
+ // If it's an XHTML element, record it. Otherwise, it's ignored.
+ if (namespace == XHTML_NS) {
+ this._buf += "<" + localName;
+ var uri;
+ for (var i=0; i < attributes.length; ++i) {
+ uri = attributes.getURI(i);
+ // XHTML attributes aren't in a namespace
+ if (uri == "") {
+ this._buf += (" " + attributes.getLocalName(i) + "='" +
+ xmlEscape(attributes.getValue(i)) + "'");
+ } else {
+ // write a small set of allowed attribute namespaces
+ var prefix = gAllowedXHTMLNamespaces[uri];
+ if (prefix != null) {
+ // The attribute value we'll attempt to write
+ var attributeValue = xmlEscape(attributes.getValue(i));
+
+ // it's an allowed attribute NS.
+ // write the attribute
+ this._buf += (" " + prefix + ":" +
+ attributes.getLocalName(i) +
+ "='" + attributeValue + "'");
+
+ // write an xmlns declaration if necessary
+ if (prefix != "xml" && !this._isInScope(uri)) {
+ this._inScopeNS[this._inScopeNS.length - 1].push(uri);
+ this._buf += " xmlns:" + prefix + "='" + uri + "'";
+ }
+ }
+ }
+ }
+ this._buf += ">";
+ }
+ },
+ endElement: function XH_endElement(uri, localName, qName) {
+ --this._depth;
+ this._inScopeNS.pop();
+
+ // We need to skip outer divs in Atom. See comment in startElement.
+ if (this._isAtom && this._depth == 0 && localName == "div")
+ return;
+
+ // When we peek too far, go back to the main processor
+ if (this._depth < 0) {
+ this._processor.returnFromXHTMLHandler(this._buf.trim(),
+ uri, localName, qName);
+ return;
+ }
+ // If it's an XHTML element, record it. Otherwise, it's ignored.
+ if (uri == XHTML_NS) {
+ this._buf += "</" + localName + ">";
+ }
+ },
+ characters: function XH_characters(data) {
+ this._buf += xmlEscape(data);
+ },
+ startPrefixMapping: function XH_startPrefixMapping(prefix, uri) {
+ },
+ endPrefixMapping: function FP_endPrefixMapping(prefix) {
+ },
+ processingInstruction: function XH_processingInstruction() {
+ },
+}
+
+/**
+ * The ExtensionHandler deals with elements we haven't explicitly
+ * added to our transition table in the FeedProcessor.
+ */
+function ExtensionHandler(processor) {
+ this._buf = "";
+ this._depth = 0;
+ this._hasChildElements = false;
+
+ // The FeedProcessor
+ this._processor = processor;
+
+ // Fields of the outermost extension element.
+ this._localName = null;
+ this._uri = null;
+ this._qName = null;
+ this._attrs = null;
+}
+
+ExtensionHandler.prototype = {
+ startDocument: function EH_startDocument() {
+ },
+ endDocument: function EH_endDocument() {
+ },
+ startElement: function EH_startElement(uri, localName, qName, attrs) {
+ ++this._depth;
+
+ if (this._depth == 1) {
+ this._uri = uri;
+ this._localName = localName;
+ this._qName = qName;
+ this._attrs = attrs;
+ }
+
+ // if we descend into another element, we won't send text
+ this._hasChildElements = (this._depth > 1);
+
+ },
+ endElement: function EH_endElement(uri, localName, qName) {
+ --this._depth;
+ if (this._depth == 0) {
+ var text = this._hasChildElements ? null : this._buf.trim();
+ this._processor.returnFromExtHandler(this._uri, this._localName,
+ text, this._attrs);
+ }
+ },
+ characters: function EH_characters(data) {
+ if (!this._hasChildElements)
+ this._buf += data;
+ },
+ startPrefixMapping: function EH_startPrefixMapping() {
+ },
+ endPrefixMapping: function EH_endPrefixMapping() {
+ },
+ processingInstruction: function EH_processingInstruction() {
+ },
+};
+
+
+/**
+ * ElementInfo is a simple container object that describes
+ * some characteristics of a feed element. For example, it
+ * says whether an element can be expected to appear more
+ * than once inside a given entry or feed.
+ */
+function ElementInfo(fieldName, containerClass, closeFunc, isArray) {
+ this.fieldName = fieldName;
+ this.containerClass = containerClass;
+ this.closeFunc = closeFunc;
+ this.isArray = isArray;
+ this.isWrapper = false;
+}
+
+/**
+ * FeedElementInfo represents a feed element, usually the root.
+ */
+function FeedElementInfo(fieldName, feedVersion) {
+ this.isWrapper = false;
+ this.fieldName = fieldName;
+ this.feedVersion = feedVersion;
+}
+
+/**
+ * Some feed formats include vestigial wrapper elements that we don't
+ * want to include in our object model, but we do need to keep track
+ * of during parsing.
+ */
+function WrapperElementInfo(fieldName) {
+ this.isWrapper = true;
+ this.fieldName = fieldName;
+}
+
+/** *** The Processor *****/
+function FeedProcessor() {
+ this._reader = Cc[SAX_CONTRACTID].createInstance(Ci.nsISAXXMLReader);
+ this._buf = "";
+ this._feed = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
+ this._handlerStack = [];
+ this._xmlBaseStack = []; // sparse array keyed to nesting depth
+ this._depth = 0;
+ this._state = "START";
+ this._result = null;
+ this._extensionHandler = null;
+ this._xhtmlHandler = null;
+ this._haveSentResult = false;
+
+ // The nsIFeedResultListener waiting for the parse results
+ this.listener = null;
+
+ // These elements can contain (X)HTML or plain text.
+ // We keep a table here that contains their default treatment
+ this._textConstructs = {"atom:title":"text",
+ "atom:summary":"text",
+ "atom:rights":"text",
+ "atom:content":"text",
+ "atom:subtitle":"text",
+ "description":"html",
+ "rss1:description":"html",
+ "dc:description":"html",
+ "content:encoded":"html",
+ "title":"text",
+ "rss1:title":"text",
+ "atom03:title":"text",
+ "atom03:tagline":"text",
+ "atom03:summary":"text",
+ "atom03:content":"text"};
+ this._stack = [];
+
+ this._trans = {
+ "START": {
+ // If we hit a root RSS element, treat as RSS2.
+ "rss": new FeedElementInfo("RSS2", "rss2"),
+
+ // If we hit an RDF element, if could be RSS1, but we can't
+ // verify that until we hit a rss1:channel element.
+ "rdf:RDF": new WrapperElementInfo("RDF"),
+
+ // If we hit a Atom 1.0 element, treat as Atom 1.0.
+ "atom:feed": new FeedElementInfo("Atom", "atom"),
+
+ // Treat as Atom 0.3
+ "atom03:feed": new FeedElementInfo("Atom03", "atom03"),
+ },
+
+ /** ******* RSS2 **********/
+ "IN_RSS2": {
+ "channel": new WrapperElementInfo("channel")
+ },
+
+ "IN_CHANNEL": {
+ "item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true),
+ "managingEditor": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ rssAuthor, true),
+ "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ rssAuthor, true),
+ "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ rssAuthor, true),
+ "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
+ rssAuthor, true),
+ "category": new ElementInfo("categories", null, rssCatTerm, true),
+ "cloud": new ElementInfo("cloud", null, null, false),
+ "image": new ElementInfo("image", null, null, false),
+ "textInput": new ElementInfo("textInput", null, null, false),
+ "skipDays": new ElementInfo("skipDays", null, null, false),
+ "skipHours": new ElementInfo("skipHours", null, null, false),
+ "generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
+ atomGenerator, false),
+ },
+
+ "IN_ITEMS": {
+ "author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ rssAuthor, true),
+ "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ rssAuthor, true),
+ "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ rssAuthor, true),
+ "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
+ rssAuthor, true),
+ "category": new ElementInfo("categories", null, rssCatTerm, true),
+ "enclosure": new ElementInfo("enclosure", null, null, false),
+ "media:content": new ElementInfo("mediacontent", null, null, true),
+ "media:group": new ElementInfo("mediagroup", null, null, false),
+ "media:thumbnail": new ElementInfo("mediathumbnail", null, null, true),
+ "guid": new ElementInfo("guid", null, rssGuid, false)
+ },
+
+ "IN_SKIPDAYS": {
+ "day": new ElementInfo("days", null, rssArrayElement, true)
+ },
+
+ "IN_SKIPHOURS":{
+ "hour": new ElementInfo("hours", null, rssArrayElement, true)
+ },
+
+ "IN_MEDIAGROUP": {
+ "media:content": new ElementInfo("mediacontent", null, null, true),
+ "media:thumbnail": new ElementInfo("mediathumbnail", null, null, true)
+ },
+
+ /** ******* RSS1 **********/
+ "IN_RDF": {
+ // If we hit a rss1:channel, we can verify that we have RSS1
+ "rss1:channel": new FeedElementInfo("rdf_channel", "rss1"),
+ "rss1:image": new ElementInfo("image", null, null, false),
+ "rss1:textinput": new ElementInfo("textInput", null, null, false),
+ "rss1:item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true),
+ },
+
+ "IN_RDF_CHANNEL": {
+ "admin:generatorAgent": new ElementInfo("generator",
+ Cc[GENERATOR_CONTRACTID],
+ null, false),
+ "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ rssAuthor, true),
+ "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ rssAuthor, true),
+ "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
+ rssAuthor, true),
+ },
+
+ /** ******* ATOM 1.0 **********/
+ "IN_ATOM": {
+ "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ null, true),
+ "atom:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
+ atomGenerator, false),
+ "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
+ null, true),
+ "atom:link": new ElementInfo("links", null, null, true),
+ "atom:logo": new ElementInfo("atom:logo", null, atomLogo, false),
+ "atom:entry": new ElementInfo("entries", Cc[ENTRY_CONTRACTID],
+ null, true)
+ },
+
+ "IN_ENTRIES": {
+ "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ null, true),
+ "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
+ null, true),
+ "atom:link": new ElementInfo("links", null, null, true),
+ },
+
+ /** ******* ATOM 0.3 **********/
+ "IN_ATOM03": {
+ "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ null, true),
+ "atom03:contributor": new ElementInfo("contributors",
+ Cc[PERSON_CONTRACTID],
+ null, true),
+ "atom03:link": new ElementInfo("links", null, null, true),
+ "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],
+ null, true),
+ "atom03:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
+ atomGenerator, false),
+ },
+
+ "IN_ATOM03_ENTRIES": {
+ "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
+ null, true),
+ "atom03:contributor": new ElementInfo("contributors",
+ Cc[PERSON_CONTRACTID],
+ null, true),
+ "atom03:link": new ElementInfo("links", null, null, true),
+ "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],
+ null, true)
+ }
+ }
+}
+
+// See startElement for a long description of how feeds are processed.
+FeedProcessor.prototype = {
+
+ // Set ourselves as the SAX handler, and set the base URI
+ _init: function FP_init(uri) {
+ this._reader.contentHandler = this;
+ this._reader.errorHandler = this;
+ this._result = Cc[FR_CONTRACTID].createInstance(Ci.nsIFeedResult);
+ if (uri) {
+ this._result.uri = uri;
+ this._reader.baseURI = uri;
+ this._xmlBaseStack[0] = uri;
+ }
+ },
+
+ // This function is called once we figure out what type of feed
+ // we're dealing with. Some feed types require digging a bit further
+ // than the root.
+ _docVerified: function FP_docVerified(version) {
+ this._result.doc = Cc[FEED_CONTRACTID].createInstance(Ci.nsIFeed);
+ this._result.doc.baseURI =
+ this._xmlBaseStack[this._xmlBaseStack.length - 1];
+ this._result.doc.fields = this._feed;
+ this._result.version = version;
+ },
+
+ // When we're done with the feed, let the listener know what
+ // happened.
+ _sendResult: function FP_sendResult() {
+ this._haveSentResult = true;
+ try {
+ // Can be null when a non-feed is fed to us
+ if (this._result.doc)
+ this._result.doc.normalize();
+ }
+ catch (e) {
+ LOG("FIXME: " + e);
+ }
+
+ try {
+ if (this.listener != null)
+ this.listener.handleResult(this._result);
+ }
+ finally {
+ this._result = null;
+ }
+ },
+
+ // Parsing functions
+ parseFromStream: function FP_parseFromStream(stream, uri) {
+ this._init(uri);
+ this._reader.parseFromStream(stream, null, stream.available(),
+ "application/xml");
+ this._reader = null;
+ },
+
+ parseFromString: function FP_parseFromString(inputString, uri) {
+ this._init(uri);
+ this._reader.parseFromString(inputString, "application/xml");
+ this._reader = null;
+ },
+
+ parseAsync: function FP_parseAsync(requestObserver, uri) {
+ this._init(uri);
+ this._reader.parseAsync(requestObserver);
+ },
+
+ // nsIStreamListener
+
+ // The XMLReader will throw sensible exceptions if these get called
+ // out of order.
+ onStartRequest: function FP_onStartRequest(request, context) {
+ // this will throw if the request is not a channel, but so will nsParser.
+ var channel = request.QueryInterface(Ci.nsIChannel);
+ channel.contentType = "application/vnd.mozilla.maybe.feed";
+ this._reader.onStartRequest(request, context);
+ },
+
+ onStopRequest: function FP_onStopRequest(request, context, statusCode) {
+ try {
+ this._reader.onStopRequest(request, context, statusCode);
+ }
+ finally {
+ this._reader = null;
+ }
+ },
+
+ onDataAvailable:
+ function FP_onDataAvailable(request, context, inputStream, offset, count) {
+ this._reader.onDataAvailable(request, context, inputStream, offset, count);
+ },
+
+ // nsISAXErrorHandler
+
+ // We only care about fatal errors. When this happens, we may have
+ // parsed through the feed metadata and some number of entries. The
+ // listener can still show some of that data if it wants, and we'll
+ // set the bozo bit to indicate we were unable to parse all the way
+ // through.
+ fatalError: function FP_reportError() {
+ this._result.bozo = true;
+ // XXX need to QI to FeedProgressListener
+ if (!this._haveSentResult)
+ this._sendResult();
+ },
+
+ // nsISAXContentHandler
+
+ startDocument: function FP_startDocument() {
+ // LOG("----------");
+ },
+
+ endDocument: function FP_endDocument() {
+ if (!this._haveSentResult)
+ this._sendResult();
+ },
+
+ // The transitions defined above identify elements that contain more
+ // than just text. For example RSS items contain many fields, and so
+ // do Atom authors. The only commonly used elements that contain
+ // mixed content are Atom Text Constructs of type="xhtml", which we
+ // delegate to another handler for cleaning. That leaves a couple
+ // different types of elements to deal with: those that should occur
+ // only once, such as title elements, and those that can occur
+ // multiple times, such as the RSS category element and the Atom
+ // link element. Most of the RSS1/DC elements can occur multiple
+ // times in theory, but in practice, the only ones that do have
+ // analogues in Atom.
+ //
+ // Some elements are also groups of attributes or sub-elements,
+ // while others are simple text fields. For the most part, we don't
+ // have to pay explicit attention to the simple text elements,
+ // unless we want to post-process the resulting string to transform
+ // it into some richer object like a Date or URI.
+ //
+ // Elements that have more sophisticated content models still end up
+ // being dictionaries, whether they are based on attributes like RSS
+ // cloud, sub-elements like Atom author, or even items and
+ // entries. These elements are treated as "containers". It's
+ // theoretically possible for a container to have an attribute with
+ // the same universal name as a sub-element, but none of the feed
+ // formats allow this by default, and I don't of any extension that
+ // works this way.
+ //
+ startElement: function FP_startElement(uri, localName, qName, attributes) {
+ this._buf = "";
+ ++this._depth;
+ var elementInfo;
+
+ // LOG("<" + localName + ">");
+
+ // Check for xml:base
+ var base = attributes.getValueFromName(XMLNS, "base");
+ if (base) {
+ this._xmlBaseStack[this._depth] =
+ strToURI(base, this._xmlBaseStack[this._xmlBaseStack.length - 1]);
+ }
+
+ // To identify the element we're dealing with, we look up the
+ // namespace URI in our gNamespaces dictionary, which will give us
+ // a "canonical" prefix for a namespace URI. For example, this
+ // allows Dublin Core "creator" elements to be consistently mapped
+ // to "dc:creator", for easy field access by consumer code. This
+ // strategy also happens to shorten up our state table.
+ var key = this._prefixForNS(uri) + localName;
+
+ // Check to see if we need to hand this off to our XHTML handler.
+ // The elements we're dealing with will look like this:
+ //
+ // <title type="xhtml">
+ // <div xmlns="http://www.w3.org/1999/xhtml">
+ // A title with <b>bold</b> and <i>italics</i>.
+ // </div>
+ // </title>
+ //
+ // When it returns in returnFromXHTMLHandler, the handler should
+ // give us back a string like this:
+ //
+ // "A title with <b>bold</b> and <i>italics</i>."
+ //
+ // The Atom spec explicitly says the div is not part of the content,
+ // and explicitly allows whitespace collapsing.
+ //
+ if ((this._result.version == "atom" || this._result.version == "atom03") &&
+ this._textConstructs[key] != null) {
+ var type = attributes.getValueFromName("", "type");
+ if (type != null && type.indexOf("xhtml") >= 0) {
+ this._xhtmlHandler =
+ new XHTMLHandler(this, (this._result.version == "atom"));
+ this._reader.contentHandler = this._xhtmlHandler;
+ return;
+ }
+ }
+
+ // Check our current state, and see if that state has a defined
+ // transition. For example, this._trans["atom:entry"]["atom:author"]
+ // will have one, and it tells us to add an item to our authors array.
+ if (this._trans[this._state] && this._trans[this._state][key]) {
+ elementInfo = this._trans[this._state][key];
+ }
+ else {
+ // If we don't have a transition, hand off to extension handler
+ this._extensionHandler = new ExtensionHandler(this);
+ this._reader.contentHandler = this._extensionHandler;
+ this._extensionHandler.startElement(uri, localName, qName, attributes);
+ return;
+ }
+
+ // This distinguishes wrappers like 'channel' from elements
+ // we'd actually like to do something with (which will test true).
+ this._handlerStack[this._depth] = elementInfo;
+ if (elementInfo.isWrapper) {
+ this._state = "IN_" + elementInfo.fieldName.toUpperCase();
+ this._stack.push([this._feed, this._state]);
+ }
+ else if (elementInfo.feedVersion) {
+ this._state = "IN_" + elementInfo.fieldName.toUpperCase();
+
+ // Check for the older RSS2 variants
+ if (elementInfo.feedVersion == "rss2")
+ elementInfo.feedVersion = this._findRSSVersion(attributes);
+ else if (uri == RSS090NS)
+ elementInfo.feedVersion = "rss090";
+
+ this._docVerified(elementInfo.feedVersion);
+ this._stack.push([this._feed, this._state]);
+ this._mapAttributes(this._feed, attributes);
+ }
+ else {
+ this._state = this._processComplexElement(elementInfo, attributes);
+ }
+ },
+
+ // In the endElement handler, we decrement the stack and look
+ // for cleanup/transition functions to execute. The second part
+ // of the state transition works as above in startElement, but
+ // the state we're looking for is prefixed with an underscore
+ // to distinguish endElement events from startElement events.
+ endElement: function FP_endElement(uri, localName, qName) {
+ var elementInfo = this._handlerStack[this._depth];
+ // LOG("</" + localName + ">");
+ if (elementInfo && !elementInfo.isWrapper)
+ this._closeComplexElement(elementInfo);
+
+ // cut down xml:base context
+ if (this._xmlBaseStack.length == this._depth + 1)
+ this._xmlBaseStack = this._xmlBaseStack.slice(0, this._depth);
+
+ // our new state is whatever is at the top of the stack now
+ if (this._stack.length > 0)
+ this._state = this._stack[this._stack.length - 1][1];
+ this._handlerStack = this._handlerStack.slice(0, this._depth);
+ --this._depth;
+ },
+
+ // Buffer up character data. The buffer is cleared with every
+ // opening element.
+ characters: function FP_characters(data) {
+ this._buf += data;
+ },
+ // TODO: It would be nice to check new prefixes here, and if they
+ // don't conflict with the ones we've defined, throw them in a
+ // dictionary to check.
+ startPrefixMapping: function FP_startPrefixMapping(prefix, uri) {
+ },
+
+ endPrefixMapping: function FP_endPrefixMapping(prefix) {
+ },
+
+ processingInstruction: function FP_processingInstruction(target, data) {
+ if (target == "xml-stylesheet") {
+ var hrefAttribute = data.match(/href=[\"\'](.*?)[\"\']/);
+ if (hrefAttribute && hrefAttribute.length == 2)
+ this._result.stylesheet = strToURI(hrefAttribute[1], this._result.uri);
+ }
+ },
+
+ // end of nsISAXContentHandler
+
+ // Handle our more complicated elements--those that contain
+ // attributes and child elements.
+ _processComplexElement:
+ function FP__processComplexElement(elementInfo, attributes) {
+ var obj;
+
+ // If the container is an entry/item, it'll need to have its
+ // more esoteric properties put in the 'fields' property bag.
+ if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID]) {
+ obj = elementInfo.containerClass.createInstance(Ci.nsIFeedEntry);
+ obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1];
+ this._mapAttributes(obj.fields, attributes);
+ }
+ else if (elementInfo.containerClass) {
+ obj = elementInfo.containerClass.createInstance(Ci.nsIFeedElementBase);
+ obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1];
+ obj.attributes = attributes; // just set the SAX attributes
+ }
+ else {
+ obj = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
+ this._mapAttributes(obj, attributes);
+ }
+
+ // We should have a container/propertyBag that's had its
+ // attributes processed. Now we need to attach it to its
+ // container.
+ var newProp;
+
+ // First we'll see what's on top of the stack.
+ var container = this._stack[this._stack.length - 1][0];
+
+ // Check to see if it has the property
+ var prop;
+ try {
+ prop = container.getProperty(elementInfo.fieldName);
+ }
+ catch (e) {
+ }
+
+ if (elementInfo.isArray) {
+ if (!prop) {
+ container.setPropertyAsInterface(elementInfo.fieldName,
+ Cc[ARRAY_CONTRACTID].
+ createInstance(Ci.nsIMutableArray));
+ }
+
+ newProp = container.getProperty(elementInfo.fieldName);
+ // XXX This QI should not be necessary, but XPConnect seems to fly
+ // off the handle in the browser, and loses track of the interface
+ // on large files. Bug 335638.
+ newProp.QueryInterface(Ci.nsIMutableArray);
+ newProp.appendElement(obj, false);
+
+ // If new object is an nsIFeedContainer, we want to deal with
+ // its member nsIPropertyBag instead.
+ if (isIFeedContainer(obj))
+ newProp = obj.fields;
+
+ }
+ else {
+ // If it doesn't, set it.
+ if (!prop) {
+ container.setPropertyAsInterface(elementInfo.fieldName, obj);
+ }
+ newProp = container.getProperty(elementInfo.fieldName);
+ }
+
+ // make our new state name, and push the property onto the stack
+ var newState = "IN_" + elementInfo.fieldName.toUpperCase();
+ this._stack.push([newProp, newState, obj]);
+ return newState;
+ },
+
+ // Sometimes we need reconcile the element content with the object
+ // model for a given feed. We use helper functions to do the
+ // munging, but we need to identify array types here, so the munging
+ // happens only to the last element of an array.
+ _closeComplexElement: function FP__closeComplexElement(elementInfo) {
+ var stateTuple = this._stack.pop();
+ var container = stateTuple[0];
+ var containerParent = stateTuple[2];
+ var element = null;
+ var isArray = isIArray(container);
+
+ // If it's an array and we have to post-process,
+ // grab the last element
+ if (isArray)
+ element = container.queryElementAt(container.length - 1, Ci.nsISupports);
+ else
+ element = container;
+
+ // Run the post-processing function if there is one.
+ if (elementInfo.closeFunc)
+ element = elementInfo.closeFunc(this._buf, element);
+
+ // If an nsIFeedContainer was on top of the stack,
+ // we need to normalize it
+ if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID])
+ containerParent.normalize();
+
+ // If it's an array, re-set the last element
+ if (isArray)
+ container.replaceElementAt(element, container.length - 1, false);
+ },
+
+ _prefixForNS: function FP_prefixForNS(uri) {
+ if (!uri)
+ return "";
+ var prefix = gNamespaces[uri];
+ if (prefix)
+ return prefix + ":";
+ if (uri.toLowerCase().indexOf("http://backend.userland.com") == 0)
+ return "";
+ return null;
+ },
+
+ _mapAttributes: function FP__mapAttributes(bag, attributes) {
+ // Cycle through the attributes, and set our properties using the
+ // prefix:localNames we find in our namespace dictionary.
+ for (var i = 0; i < attributes.length; ++i) {
+ var key = this._prefixForNS(attributes.getURI(i)) + attributes.getLocalName(i);
+ var val = attributes.getValue(i);
+ bag.setPropertyAsAString(key, val);
+ }
+ },
+
+ // Only for RSS2esque formats
+ _findRSSVersion: function FP__findRSSVersion(attributes) {
+ var versionAttr = attributes.getValueFromName("", "version").trim();
+ var versions = { "0.91":"rss091",
+ "0.92":"rss092",
+ "0.93":"rss093",
+ "0.94":"rss094" }
+ if (versions[versionAttr])
+ return versions[versionAttr];
+ if (versionAttr.substr(0, 2) != "2.")
+ return "rssUnknown";
+ return "rss2";
+ },
+
+ // unknown element values are returned here. See startElement above
+ // for how this works.
+ returnFromExtHandler:
+ function FP_returnExt(uri, localName, chars, attributes) {
+ --this._depth;
+
+ // take control of the SAX events
+ this._reader.contentHandler = this;
+ if (localName == null && chars == null)
+ return;
+
+ // we don't take random elements inside rdf:RDF
+ if (this._state == "IN_RDF")
+ return;
+
+ // Grab the top of the stack
+ var top = this._stack[this._stack.length - 1];
+ if (!top)
+ return;
+
+ var container = top[0];
+ // Grab the last element if it's an array
+ if (isIArray(container)) {
+ var contract = this._handlerStack[this._depth].containerClass;
+ // check if it's something specific, but not an entry
+ if (contract && contract != Cc[ENTRY_CONTRACTID]) {
+ var el = container.queryElementAt(container.length - 1,
+ Ci.nsIFeedElementBase);
+ // XXX there must be a way to flatten these interfaces
+ if (contract == Cc[PERSON_CONTRACTID])
+ el.QueryInterface(Ci.nsIFeedPerson);
+ else
+ return; // don't know about this interface
+
+ let propName = localName;
+ var prefix = gNamespaces[uri];
+
+ // synonyms
+ if ((uri == "" ||
+ prefix &&
+ ((prefix.indexOf("atom") > -1) ||
+ (prefix.indexOf("rss") > -1))) &&
+ (propName == "url" || propName == "href"))
+ propName = "uri";
+
+ try {
+ if (el[propName] !== "undefined") {
+ var propValue = chars;
+ // convert URI-bearing values to an nsIURI
+ if (propName == "uri") {
+ var base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
+ propValue = strToURI(chars, base);
+ }
+ el[propName] = propValue;
+ }
+ }
+ catch (e) {
+ // ignore XPConnect errors
+ }
+ // the rest of the function deals with entry- and feed-level stuff
+ return;
+ }
+ container = container.queryElementAt(container.length - 1,
+ Ci.nsIWritablePropertyBag2);
+ }
+
+ // Make the buffer our new property
+ var propName = this._prefixForNS(uri) + localName;
+
+ // But, it could be something containing HTML. If so,
+ // we need to know about that.
+ if (this._textConstructs[propName] != null &&
+ this._handlerStack[this._depth].containerClass !== null) {
+ var newProp = Cc[TEXTCONSTRUCT_CONTRACTID].
+ createInstance(Ci.nsIFeedTextConstruct);
+ newProp.text = chars;
+ // Look up the default type in our table
+ var type = this._textConstructs[propName];
+ var typeAttribute = attributes.getValueFromName("", "type");
+ if (this._result.version == "atom" && typeAttribute != null) {
+ type = typeAttribute;
+ }
+ else if (this._result.version == "atom03" && typeAttribute != null) {
+ if (typeAttribute.toLowerCase().indexOf("xhtml") >= 0) {
+ type = "xhtml";
+ }
+ else if (typeAttribute.toLowerCase().indexOf("html") >= 0) {
+ type = "html";
+ }
+ else if (typeAttribute.toLowerCase().indexOf("text") >= 0) {
+ type = "text";
+ }
+ }
+
+ // If it's rss feed-level description, it's not supposed to have html
+ if (this._result.version.indexOf("rss") >= 0 &&
+ this._handlerStack[this._depth].containerClass != ENTRY_CONTRACTID) {
+ type = "text";
+ }
+ newProp.type = type;
+ newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
+ container.setPropertyAsInterface(propName, newProp);
+ }
+ else {
+ container.setPropertyAsAString(propName, chars);
+ }
+ },
+
+ // Sometimes, we'll hand off SAX handling duties to an XHTMLHandler
+ // (see above) that will scrape out non-XHTML stuff, normalize
+ // namespaces, and remove the wrapper div from Atom 1.0. When the
+ // XHTMLHandler is done, it'll callback here.
+ returnFromXHTMLHandler:
+ function FP_returnFromXHTMLHandler(chars, uri, localName, qName) {
+ // retake control of the SAX content events
+ this._reader.contentHandler = this;
+
+ // Grab the top of the stack
+ var top = this._stack[this._stack.length - 1];
+ if (!top)
+ return;
+ var container = top[0];
+
+ // Assign the property
+ var newProp = newProp = Cc[TEXTCONSTRUCT_CONTRACTID].
+ createInstance(Ci.nsIFeedTextConstruct);
+ newProp.text = chars;
+ newProp.type = "xhtml";
+ newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
+ container.setPropertyAsInterface(this._prefixForNS(uri) + localName,
+ newProp);
+
+ // XHTML will cause us to peek too far. The XHTML handler will
+ // send us an end element to call. RFC4287-valid feeds allow a
+ // more graceful way to handle this. Unfortunately, we can't count
+ // on compliance at this point.
+ this.endElement(uri, localName, qName);
+ },
+
+ // XPCOM stuff
+ classID: FP_CLASSID,
+ QueryInterface: XPCOMUtils.generateQI(
+ [Ci.nsIFeedProcessor, Ci.nsISAXContentHandler, Ci.nsISAXErrorHandler,
+ Ci.nsIStreamListener, Ci.nsIRequestObserver]
+ )
+}
+
+var components = [FeedProcessor, FeedResult, Feed, Entry,
+ TextConstruct, Generator, Person];
+
+this.NSGetFactory = XPCOMUtils.generateNSGetFactory(components);