diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /toolkit/components/reader | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'toolkit/components/reader')
19 files changed, 5480 insertions, 0 deletions
diff --git a/toolkit/components/reader/.eslintrc.js b/toolkit/components/reader/.eslintrc.js new file mode 100644 index 000000000..1c09e0bf7 --- /dev/null +++ b/toolkit/components/reader/.eslintrc.js @@ -0,0 +1,199 @@ +"use strict"; + +module.exports = { + "rules": { + // Braces only needed for multi-line arrow function blocks + // "arrow-body-style": ["error", "as-needed"], + + // Require spacing around => + // "arrow-spacing": "error", + + // Always require spacing around a single line block + // "block-spacing": "warn", + + // No newline before open brace for a block + "brace-style": "error", + + // No space before always a space after a comma + "comma-spacing": ["error", {"before": false, "after": true}], + + // Commas at the end of the line not the start + // "comma-style": "error", + + // Don't require spaces around computed properties + // "computed-property-spacing": ["error", "never"], + + // Functions must always return something or nothing + "consistent-return": "error", + + // Require braces around blocks that start a new line + // Note that this rule is likely to be overridden on a per-directory basis + // very frequently. + // "curly": ["error", "multi-line"], + + // Always require a trailing EOL + "eol-last": "error", + + // Require function* name() + // "generator-star-spacing": ["error", {"before": false, "after": true}], + + // Two space indent + "indent": ["error", 2, { "SwitchCase": 1 }], + + // Space after colon not before in property declarations + "key-spacing": ["error", { "beforeColon": false, "afterColon": true, "mode": "minimum" }], + + // Unix linebreaks + "linebreak-style": ["error", "unix"], + + // Always require parenthesis for new calls + "new-parens": "error", + + // Use [] instead of Array() + // "no-array-constructor": "error", + + // No duplicate arguments in function declarations + "no-dupe-args": "error", + + // No duplicate keys in object declarations + "no-dupe-keys": "error", + + // No duplicate cases in switch statements + "no-duplicate-case": "error", + + // No labels + "no-labels": "error", + + // If an if block ends with a return no need for an else block + "no-else-return": "error", + + // No empty statements + "no-empty": "error", + + // No empty character classes in regex + "no-empty-character-class": "error", + + // Disallow empty destructuring + "no-empty-pattern": "error", + + // No assiging to exception variable + // "no-ex-assign": "error", + + // No using !! where casting to boolean is already happening + // "no-extra-boolean-cast": "error", + + // No double semicolon + "no-extra-semi": "error", + + // No overwriting defined functions + "no-func-assign": "error", + + // Declarations in Program or Function Body + "no-inner-declarations": "error", + + // No invalid regular expresions + "no-invalid-regexp": "error", + + // No odd whitespace characters + "no-irregular-whitespace": "error", + + // No single if block inside an else block + "no-lonely-if": "error", + + // No mixing spaces and tabs in indent + "no-mixed-spaces-and-tabs": ["error", "smart-tabs"], + + // No unnecessary spacing + "no-multi-spaces": ["error", { exceptions: { "AssignmentExpression": true, "VariableDeclarator": true, "ArrayExpression": true, "ObjectExpression": true } }], + + // No reassigning native JS objects + "no-native-reassign": "error", + + // No (!foo in bar) + "no-negated-in-lhs": "error", + + // Nested ternary statements are confusing + "no-nested-ternary": "error", + + // Use {} instead of new Object() + // "no-new-object": "error", + + // No Math() or JSON() + "no-obj-calls": "error", + + // No octal literals + "no-octal": "error", + + // No redeclaring variables + "no-redeclare": "error", + + // No unnecessary comparisons + "no-self-compare": "error", + + // No declaring variables from an outer scope + "no-shadow": "error", + + // No declaring variables that hide things like arguments + "no-shadow-restricted-names": "error", + + // No spaces between function name and parentheses + "no-spaced-func": "error", + + // No trailing whitespace + "no-trailing-spaces": "error", + + // No using undeclared variables + // "no-undef": "error", + + // Error on newline where a semicolon is needed + "no-unexpected-multiline": "error", + + // No unreachable statements + "no-unreachable": "error", + + // No expressions where a statement is expected + // "no-unused-expressions": "error", + + // No declaring variables that are never used + "no-unused-vars": ["error", {"vars": "all", "args": "none"}], + + // No using variables before defined + // "no-use-before-define": ["error", "nofunc"], + + // No using with + "no-with": "error", + + // Always require semicolon at end of statement + "semi": ["error", "always"], + + // Require space after keywords + "keyword-spacing": "error", + + // Require space before blocks + "space-before-blocks": "error", + + // Never use spaces before function parentheses + // "space-before-function-paren": ["error", { "anonymous": "always", "named": "never" }], + + // Require spaces before finally, catch, etc. + // "space-before-keywords": ["error", "always"], + + // No space padding in parentheses + // "space-in-parens": ["error", "never"], + + // Require spaces around operators + // "space-infix-ops": "error", + + // Require spaces after return, throw and case + // "space-return-throw-case": "error", + + // ++ and -- should not need spacing + // "space-unary-ops": ["error", { "words": true, "nonwords": false }], + + // No comparisons to NaN + "use-isnan": "error", + + // Only check typeof against valid results + "valid-typeof": "error", + }, +} diff --git a/toolkit/components/reader/AboutReader.jsm b/toolkit/components/reader/AboutReader.jsm new file mode 100644 index 000000000..1fb9db123 --- /dev/null +++ b/toolkit/components/reader/AboutReader.jsm @@ -0,0 +1,997 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +var Ci = Components.interfaces, Cc = Components.classes, Cu = Components.utils; + +this.EXPORTED_SYMBOLS = [ "AboutReader" ]; + +Cu.import("resource://gre/modules/ReaderMode.jsm"); +Cu.import("resource://gre/modules/Services.jsm"); +Cu.import("resource://gre/modules/XPCOMUtils.jsm"); + +XPCOMUtils.defineLazyModuleGetter(this, "AsyncPrefs", "resource://gre/modules/AsyncPrefs.jsm"); +XPCOMUtils.defineLazyModuleGetter(this, "NarrateControls", "resource://gre/modules/narrate/NarrateControls.jsm"); +XPCOMUtils.defineLazyModuleGetter(this, "Rect", "resource://gre/modules/Geometry.jsm"); +XPCOMUtils.defineLazyModuleGetter(this, "Task", "resource://gre/modules/Task.jsm"); +XPCOMUtils.defineLazyModuleGetter(this, "UITelemetry", "resource://gre/modules/UITelemetry.jsm"); + +var gStrings = Services.strings.createBundle("chrome://global/locale/aboutReader.properties"); + +var AboutReader = function(mm, win, articlePromise) { + let url = this._getOriginalUrl(win); + if (!(url.startsWith("http://") || url.startsWith("https://"))) { + let errorMsg = "Only http:// and https:// URLs can be loaded in about:reader."; + if (Services.prefs.getBoolPref("reader.errors.includeURLs")) + errorMsg += " Tried to load: " + url + "."; + Cu.reportError(errorMsg); + win.location.href = "about:blank"; + return; + } + + let doc = win.document; + + this._mm = mm; + this._mm.addMessageListener("Reader:CloseDropdown", this); + this._mm.addMessageListener("Reader:AddButton", this); + this._mm.addMessageListener("Reader:RemoveButton", this); + this._mm.addMessageListener("Reader:GetStoredArticleData", this); + + this._docRef = Cu.getWeakReference(doc); + this._winRef = Cu.getWeakReference(win); + this._innerWindowId = win.QueryInterface(Ci.nsIInterfaceRequestor) + .getInterface(Ci.nsIDOMWindowUtils).currentInnerWindowID; + + this._article = null; + + if (articlePromise) { + this._articlePromise = articlePromise; + } + + this._headerElementRef = Cu.getWeakReference(doc.getElementById("reader-header")); + this._domainElementRef = Cu.getWeakReference(doc.getElementById("reader-domain")); + this._titleElementRef = Cu.getWeakReference(doc.getElementById("reader-title")); + this._creditsElementRef = Cu.getWeakReference(doc.getElementById("reader-credits")); + this._contentElementRef = Cu.getWeakReference(doc.getElementById("moz-reader-content")); + this._toolbarElementRef = Cu.getWeakReference(doc.getElementById("reader-toolbar")); + this._messageElementRef = Cu.getWeakReference(doc.getElementById("reader-message")); + + this._scrollOffset = win.pageYOffset; + + doc.addEventListener("click", this, false); + + win.addEventListener("pagehide", this, false); + win.addEventListener("scroll", this, false); + win.addEventListener("resize", this, false); + + Services.obs.addObserver(this, "inner-window-destroyed", false); + + doc.addEventListener("visibilitychange", this, false); + + this._setupStyleDropdown(); + this._setupButton("close-button", this._onReaderClose.bind(this), "aboutReader.toolbar.close"); + + const gIsFirefoxDesktop = Services.appinfo.ID == "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}"; + if (gIsFirefoxDesktop) { + // we're ready for any external setup, send a signal for that. + this._mm.sendAsyncMessage("Reader:OnSetup"); + } + + let colorSchemeValues = JSON.parse(Services.prefs.getCharPref("reader.color_scheme.values")); + let colorSchemeOptions = colorSchemeValues.map((value) => { + return { name: gStrings.GetStringFromName("aboutReader.colorScheme." + value), + value: value, + itemClass: value + "-button" }; + }); + + let colorScheme = Services.prefs.getCharPref("reader.color_scheme"); + this._setupSegmentedButton("color-scheme-buttons", colorSchemeOptions, colorScheme, this._setColorSchemePref.bind(this)); + this._setColorSchemePref(colorScheme); + + let fontTypeSample = gStrings.GetStringFromName("aboutReader.fontTypeSample"); + let fontTypeOptions = [ + { name: fontTypeSample, + description: gStrings.GetStringFromName("aboutReader.fontType.sans-serif"), + value: "sans-serif", + itemClass: "sans-serif-button" + }, + { name: fontTypeSample, + description: gStrings.GetStringFromName("aboutReader.fontType.serif"), + value: "serif", + itemClass: "serif-button" }, + ]; + + let fontType = Services.prefs.getCharPref("reader.font_type"); + this._setupSegmentedButton("font-type-buttons", fontTypeOptions, fontType, this._setFontType.bind(this)); + this._setFontType(fontType); + + this._setupFontSizeButtons(); + + this._setupContentWidthButtons(); + + this._setupLineHeightButtons(); + + if (win.speechSynthesis && Services.prefs.getBoolPref("narrate.enabled")) { + new NarrateControls(mm, win); + } + + this._loadArticle(); +}; + +AboutReader.prototype = { + _BLOCK_IMAGES_SELECTOR: ".content p > img:only-child, " + + ".content p > a:only-child > img:only-child, " + + ".content .wp-caption img, " + + ".content figure img", + + get _doc() { + return this._docRef.get(); + }, + + get _win() { + return this._winRef.get(); + }, + + get _headerElement() { + return this._headerElementRef.get(); + }, + + get _domainElement() { + return this._domainElementRef.get(); + }, + + get _titleElement() { + return this._titleElementRef.get(); + }, + + get _creditsElement() { + return this._creditsElementRef.get(); + }, + + get _contentElement() { + return this._contentElementRef.get(); + }, + + get _toolbarElement() { + return this._toolbarElementRef.get(); + }, + + get _messageElement() { + return this._messageElementRef.get(); + }, + + get _isToolbarVertical() { + if (this._toolbarVertical !== undefined) { + return this._toolbarVertical; + } + return this._toolbarVertical = Services.prefs.getBoolPref("reader.toolbar.vertical"); + }, + + // Provides unique view Id. + get viewId() { + let _viewId = Cc["@mozilla.org/uuid-generator;1"]. + getService(Ci.nsIUUIDGenerator).generateUUID().toString(); + Object.defineProperty(this, "viewId", { value: _viewId }); + + return _viewId; + }, + + receiveMessage: function (message) { + switch (message.name) { + // Triggered by Android user pressing BACK while the banner font-dropdown is open. + case "Reader:CloseDropdown": { + // Just close it. + this._closeDropdowns(); + break; + } + + case "Reader:AddButton": { + if (message.data.id && message.data.image && + !this._doc.getElementById(message.data.id)) { + let btn = this._doc.createElement("button"); + btn.setAttribute("class", "button"); + btn.setAttribute("style", "background-image: url('" + message.data.image + "')"); + btn.setAttribute("id", message.data.id); + if (message.data.title) + btn.setAttribute("title", message.data.title); + if (message.data.text) + btn.textContent = message.data.text; + let tb = this._doc.getElementById("reader-toolbar"); + tb.appendChild(btn); + this._setupButton(message.data.id, button => { + this._mm.sendAsyncMessage("Reader:Clicked-" + button.getAttribute("id"), { article: this._article }); + }); + } + break; + } + case "Reader:RemoveButton": { + if (message.data.id) { + let btn = this._doc.getElementById(message.data.id); + if (btn) + btn.remove(); + } + break; + } + case "Reader:GetStoredArticleData": { + this._mm.sendAsyncMessage("Reader:StoredArticleData", { article: this._article }); + } + } + }, + + handleEvent: function(aEvent) { + if (!aEvent.isTrusted) + return; + + switch (aEvent.type) { + case "click": + let target = aEvent.target; + if (target.classList.contains('dropdown-toggle')) { + this._toggleDropdownClicked(aEvent); + } else if (!target.closest('.dropdown-popup')) { + this._closeDropdowns(); + } + break; + case "scroll": + this._closeDropdowns(true); + let isScrollingUp = this._scrollOffset > aEvent.pageY; + this._setSystemUIVisibility(isScrollingUp); + this._scrollOffset = aEvent.pageY; + break; + case "resize": + this._updateImageMargins(); + if (this._isToolbarVertical) { + this._win.setTimeout(() => { + for (let dropdown of this._doc.querySelectorAll('.dropdown.open')) { + this._updatePopupPosition(dropdown); + } + }, 0); + } + break; + + case "devicelight": + this._handleDeviceLight(aEvent.value); + break; + + case "visibilitychange": + this._handleVisibilityChange(); + break; + + case "pagehide": + // Close the Banners Font-dropdown, cleanup Android BackPressListener. + this._closeDropdowns(); + + this._mm.removeMessageListener("Reader:CloseDropdown", this); + this._mm.removeMessageListener("Reader:AddButton", this); + this._mm.removeMessageListener("Reader:RemoveButton", this); + this._mm.removeMessageListener("Reader:GetStoredArticleData", this); + this._windowUnloaded = true; + break; + } + }, + + observe: function(subject, topic, data) { + if (subject.QueryInterface(Ci.nsISupportsPRUint64).data != this._innerWindowId) { + return; + } + + Services.obs.removeObserver(this, "inner-window-destroyed", false); + + this._mm.removeMessageListener("Reader:CloseDropdown", this); + this._mm.removeMessageListener("Reader:AddButton", this); + this._mm.removeMessageListener("Reader:RemoveButton", this); + this._windowUnloaded = true; + }, + + _onReaderClose: function() { + ReaderMode.leaveReaderMode(this._mm.docShell, this._win); + }, + + _setFontSize: function(newFontSize) { + let containerClasses = this._doc.getElementById("container").classList; + + if (this._fontSize > 0) + containerClasses.remove("font-size" + this._fontSize); + + this._fontSize = newFontSize; + containerClasses.add("font-size" + this._fontSize); + return AsyncPrefs.set("reader.font_size", this._fontSize); + }, + + _setupFontSizeButtons: function() { + const FONT_SIZE_MIN = 1; + const FONT_SIZE_MAX = 9; + + // Sample text shown in Android UI. + let sampleText = this._doc.getElementById("font-size-sample"); + sampleText.textContent = gStrings.GetStringFromName("aboutReader.fontTypeSample"); + + let currentSize = Services.prefs.getIntPref("reader.font_size"); + currentSize = Math.max(FONT_SIZE_MIN, Math.min(FONT_SIZE_MAX, currentSize)); + + let plusButton = this._doc.getElementById("font-size-plus"); + let minusButton = this._doc.getElementById("font-size-minus"); + + function updateControls() { + if (currentSize === FONT_SIZE_MIN) { + minusButton.setAttribute("disabled", true); + } else { + minusButton.removeAttribute("disabled"); + } + if (currentSize === FONT_SIZE_MAX) { + plusButton.setAttribute("disabled", true); + } else { + plusButton.removeAttribute("disabled"); + } + } + + updateControls(); + this._setFontSize(currentSize); + + plusButton.addEventListener("click", (event) => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + + if (currentSize >= FONT_SIZE_MAX) { + return; + } + + currentSize++; + updateControls(); + this._setFontSize(currentSize); + }, true); + + minusButton.addEventListener("click", (event) => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + + if (currentSize <= FONT_SIZE_MIN) { + return; + } + + currentSize--; + updateControls(); + this._setFontSize(currentSize); + }, true); + }, + + _setContentWidth: function(newContentWidth) { + let containerClasses = this._doc.getElementById("container").classList; + + if (this._contentWidth > 0) + containerClasses.remove("content-width" + this._contentWidth); + + this._contentWidth = newContentWidth; + containerClasses.add("content-width" + this._contentWidth); + return AsyncPrefs.set("reader.content_width", this._contentWidth); + }, + + _setupContentWidthButtons: function() { + const CONTENT_WIDTH_MIN = 1; + const CONTENT_WIDTH_MAX = 9; + + let currentContentWidth = Services.prefs.getIntPref("reader.content_width"); + currentContentWidth = Math.max(CONTENT_WIDTH_MIN, Math.min(CONTENT_WIDTH_MAX, currentContentWidth)); + + let plusButton = this._doc.getElementById("content-width-plus"); + let minusButton = this._doc.getElementById("content-width-minus"); + + function updateControls() { + if (currentContentWidth === CONTENT_WIDTH_MIN) { + minusButton.setAttribute("disabled", true); + } else { + minusButton.removeAttribute("disabled"); + } + if (currentContentWidth === CONTENT_WIDTH_MAX) { + plusButton.setAttribute("disabled", true); + } else { + plusButton.removeAttribute("disabled"); + } + } + + updateControls(); + this._setContentWidth(currentContentWidth); + + plusButton.addEventListener("click", (event) => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + + if (currentContentWidth >= CONTENT_WIDTH_MAX) { + return; + } + + currentContentWidth++; + updateControls(); + this._setContentWidth(currentContentWidth); + }, true); + + minusButton.addEventListener("click", (event) => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + + if (currentContentWidth <= CONTENT_WIDTH_MIN) { + return; + } + + currentContentWidth--; + updateControls(); + this._setContentWidth(currentContentWidth); + }, true); + }, + + _setLineHeight: function(newLineHeight) { + let contentClasses = this._doc.getElementById("moz-reader-content").classList; + + if (this._lineHeight > 0) + contentClasses.remove("line-height" + this._lineHeight); + + this._lineHeight = newLineHeight; + contentClasses.add("line-height" + this._lineHeight); + return AsyncPrefs.set("reader.line_height", this._lineHeight); + }, + + _setupLineHeightButtons: function() { + const LINE_HEIGHT_MIN = 1; + const LINE_HEIGHT_MAX = 9; + + let currentLineHeight = Services.prefs.getIntPref("reader.line_height"); + currentLineHeight = Math.max(LINE_HEIGHT_MIN, Math.min(LINE_HEIGHT_MAX, currentLineHeight)); + + let plusButton = this._doc.getElementById("line-height-plus"); + let minusButton = this._doc.getElementById("line-height-minus"); + + function updateControls() { + if (currentLineHeight === LINE_HEIGHT_MIN) { + minusButton.setAttribute("disabled", true); + } else { + minusButton.removeAttribute("disabled"); + } + if (currentLineHeight === LINE_HEIGHT_MAX) { + plusButton.setAttribute("disabled", true); + } else { + plusButton.removeAttribute("disabled"); + } + } + + updateControls(); + this._setLineHeight(currentLineHeight); + + plusButton.addEventListener("click", (event) => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + + if (currentLineHeight >= LINE_HEIGHT_MAX) { + return; + } + + currentLineHeight++; + updateControls(); + this._setLineHeight(currentLineHeight); + }, true); + + minusButton.addEventListener("click", (event) => { + if (!event.isTrusted) { + return; + } + event.stopPropagation(); + + if (currentLineHeight <= LINE_HEIGHT_MIN) { + return; + } + + currentLineHeight--; + updateControls(); + this._setLineHeight(currentLineHeight); + }, true); + }, + + _handleDeviceLight: function(newLux) { + // Desired size of the this._luxValues array. + let luxValuesSize = 10; + // Add new lux value at the front of the array. + this._luxValues.unshift(newLux); + // Add new lux value to this._totalLux for averaging later. + this._totalLux += newLux; + + // Don't update when length of array is less than luxValuesSize except when it is 1. + if (this._luxValues.length < luxValuesSize) { + // Use the first lux value to set the color scheme until our array equals luxValuesSize. + if (this._luxValues.length == 1) { + this._updateColorScheme(newLux); + } + return; + } + // Holds the average of the lux values collected in this._luxValues. + let averageLuxValue = this._totalLux/luxValuesSize; + + this._updateColorScheme(averageLuxValue); + // Pop the oldest value off the array. + let oldLux = this._luxValues.pop(); + // Subtract oldLux since it has been discarded from the array. + this._totalLux -= oldLux; + }, + + _handleVisibilityChange: function() { + let colorScheme = Services.prefs.getCharPref("reader.color_scheme"); + if (colorScheme != "auto") { + return; + } + + // Turn off the ambient light sensor if the page is hidden + this._enableAmbientLighting(!this._doc.hidden); + }, + + // Setup or teardown the ambient light tracking system. + _enableAmbientLighting: function(enable) { + if (enable) { + this._win.addEventListener("devicelight", this, false); + this._luxValues = []; + this._totalLux = 0; + } else { + this._win.removeEventListener("devicelight", this, false); + delete this._luxValues; + delete this._totalLux; + } + }, + + _updateColorScheme: function(luxValue) { + // Upper bound value for "dark" color scheme beyond which it changes to "light". + let upperBoundDark = 50; + // Lower bound value for "light" color scheme beyond which it changes to "dark". + let lowerBoundLight = 10; + // Threshold for color scheme change. + let colorChangeThreshold = 20; + + // Ignore changes that are within a certain threshold of previous lux values. + if ((this._colorScheme === "dark" && luxValue < upperBoundDark) || + (this._colorScheme === "light" && luxValue > lowerBoundLight)) + return; + + if (luxValue < colorChangeThreshold) + this._setColorScheme("dark"); + else + this._setColorScheme("light"); + }, + + _setColorScheme: function(newColorScheme) { + // "auto" is not a real color scheme + if (this._colorScheme === newColorScheme || newColorScheme === "auto") + return; + + let bodyClasses = this._doc.body.classList; + + if (this._colorScheme) + bodyClasses.remove(this._colorScheme); + + this._colorScheme = newColorScheme; + bodyClasses.add(this._colorScheme); + }, + + // Pref values include "dark", "light", and "auto", which automatically switches + // between light and dark color schemes based on the ambient light level. + _setColorSchemePref: function(colorSchemePref) { + this._enableAmbientLighting(colorSchemePref === "auto"); + this._setColorScheme(colorSchemePref); + + AsyncPrefs.set("reader.color_scheme", colorSchemePref); + }, + + _setFontType: function(newFontType) { + if (this._fontType === newFontType) + return; + + let bodyClasses = this._doc.body.classList; + + if (this._fontType) + bodyClasses.remove(this._fontType); + + this._fontType = newFontType; + bodyClasses.add(this._fontType); + + AsyncPrefs.set("reader.font_type", this._fontType); + }, + + _setSystemUIVisibility: function(visible) { + this._mm.sendAsyncMessage("Reader:SystemUIVisibility", { visible: visible }); + }, + + _loadArticle: Task.async(function* () { + let url = this._getOriginalUrl(); + this._showProgressDelayed(); + + let article; + if (this._articlePromise) { + article = yield this._articlePromise; + } else { + try { + article = yield this._getArticle(url); + } catch (e) { + if (e && e.newURL) { + let readerURL = "about:reader?url=" + encodeURIComponent(e.newURL); + this._win.location.replace(readerURL); + return; + } + } + } + + if (this._windowUnloaded) { + return; + } + + // Replace the loading message with an error message if there's a failure. + // Users are supposed to navigate away by themselves (because we cannot + // remove ourselves from session history.) + if (!article) { + this._showError(); + return; + } + + this._showContent(article); + }), + + _getArticle: function(url) { + return new Promise((resolve, reject) => { + let listener = (message) => { + this._mm.removeMessageListener("Reader:ArticleData", listener); + if (message.data.newURL) { + reject({ newURL: message.data.newURL }); + return; + } + resolve(message.data.article); + }; + this._mm.addMessageListener("Reader:ArticleData", listener); + this._mm.sendAsyncMessage("Reader:ArticleGet", { url: url }); + }); + }, + + _requestFavicon: function() { + let handleFaviconReturn = (message) => { + this._mm.removeMessageListener("Reader:FaviconReturn", handleFaviconReturn); + this._loadFavicon(message.data.url, message.data.faviconUrl); + }; + + this._mm.addMessageListener("Reader:FaviconReturn", handleFaviconReturn); + this._mm.sendAsyncMessage("Reader:FaviconRequest", { url: this._article.url }); + }, + + _loadFavicon: function(url, faviconUrl) { + if (this._article.url !== url) + return; + + let doc = this._doc; + + let link = doc.createElement('link'); + link.rel = 'shortcut icon'; + link.href = faviconUrl; + + doc.getElementsByTagName('head')[0].appendChild(link); + }, + + _updateImageMargins: function() { + let windowWidth = this._win.innerWidth; + let bodyWidth = this._doc.body.clientWidth; + + let setImageMargins = function(img) { + // If the image is at least as wide as the window, make it fill edge-to-edge on mobile. + if (img.naturalWidth >= windowWidth) { + img.setAttribute("moz-reader-full-width", true); + } else { + img.removeAttribute("moz-reader-full-width"); + } + + // If the image is at least half as wide as the body, center it on desktop. + if (img.naturalWidth >= bodyWidth/2) { + img.setAttribute("moz-reader-center", true); + } else { + img.removeAttribute("moz-reader-center"); + } + }; + + let imgs = this._doc.querySelectorAll(this._BLOCK_IMAGES_SELECTOR); + for (let i = imgs.length; --i >= 0;) { + let img = imgs[i]; + + if (img.naturalWidth > 0) { + setImageMargins(img); + } else { + img.onload = function() { + setImageMargins(img); + }; + } + } + }, + + _maybeSetTextDirection: function Read_maybeSetTextDirection(article) { + if (!article.dir) + return; + + // Set "dir" attribute on content + this._contentElement.setAttribute("dir", article.dir); + this._headerElement.setAttribute("dir", article.dir); + }, + + _fixLocalLinks() { + // We need to do this because preprocessing the content through nsIParserUtils + // gives back a DOM with a <base> element. That influences how these URLs get + // resolved, making them no longer match the document URI (which is + // about:reader?url=...). To fix this, make all the hash URIs absolute. This + // is hacky, but the alternative of removing the base element has potential + // security implications if Readability has not successfully made all the URLs + // absolute, so we pick just fixing these in-document links explicitly. + let localLinks = this._contentElement.querySelectorAll("a[href^='#']"); + for (let localLink of localLinks) { + // Have to get the attribute because .href provides an absolute URI. + localLink.href = this._doc.documentURI + localLink.getAttribute("href"); + } + }, + + _showError: function() { + this._headerElement.style.display = "none"; + this._contentElement.style.display = "none"; + + let errorMessage = gStrings.GetStringFromName("aboutReader.loadError"); + this._messageElement.textContent = errorMessage; + this._messageElement.style.display = "block"; + + this._doc.title = errorMessage; + + this._error = true; + }, + + // This function is the JS version of Java's StringUtils.stripCommonSubdomains. + _stripHost: function(host) { + if (!host) + return host; + + let start = 0; + + if (host.startsWith("www.")) + start = 4; + else if (host.startsWith("m.")) + start = 2; + else if (host.startsWith("mobile.")) + start = 7; + + return host.substring(start); + }, + + _showContent: function(article) { + this._messageElement.style.display = "none"; + + this._article = article; + + this._domainElement.href = article.url; + let articleUri = Services.io.newURI(article.url, null, null); + this._domainElement.textContent = this._stripHost(articleUri.host); + this._creditsElement.textContent = article.byline; + + this._titleElement.textContent = article.title; + this._doc.title = article.title; + + this._headerElement.style.display = "block"; + + let parserUtils = Cc["@mozilla.org/parserutils;1"].getService(Ci.nsIParserUtils); + let contentFragment = parserUtils.parseFragment(article.content, + Ci.nsIParserUtils.SanitizerDropForms | Ci.nsIParserUtils.SanitizerAllowStyle, + false, articleUri, this._contentElement); + this._contentElement.innerHTML = ""; + this._contentElement.appendChild(contentFragment); + this._fixLocalLinks(); + this._maybeSetTextDirection(article); + + this._contentElement.style.display = "block"; + this._updateImageMargins(); + + this._requestFavicon(); + this._doc.body.classList.add("loaded"); + + this._goToReference(articleUri.ref); + + Services.obs.notifyObservers(this._win, "AboutReader:Ready", ""); + + this._doc.dispatchEvent( + new this._win.CustomEvent("AboutReaderContentReady", { bubbles: true, cancelable: false })); + }, + + _hideContent: function() { + this._headerElement.style.display = "none"; + this._contentElement.style.display = "none"; + }, + + _showProgressDelayed: function() { + this._win.setTimeout(function() { + // No need to show progress if the article has been loaded, + // if the window has been unloaded, or if there was an error + // trying to load the article. + if (this._article || this._windowUnloaded || this._error) { + return; + } + + this._headerElement.style.display = "none"; + this._contentElement.style.display = "none"; + + this._messageElement.textContent = gStrings.GetStringFromName("aboutReader.loading2"); + this._messageElement.style.display = "block"; + }.bind(this), 300); + }, + + /** + * Returns the original article URL for this about:reader view. + */ + _getOriginalUrl: function(win) { + let url = win ? win.location.href : this._win.location.href; + return ReaderMode.getOriginalUrl(url) || url; + }, + + _setupSegmentedButton: function(id, options, initialValue, callback) { + let doc = this._doc; + let segmentedButton = doc.getElementById(id); + + for (let i = 0; i < options.length; i++) { + let option = options[i]; + + let item = doc.createElement("button"); + + // Put the name in a div so that Android can hide it. + let div = doc.createElement("div"); + div.textContent = option.name; + div.classList.add("name"); + item.appendChild(div); + + if (option.itemClass !== undefined) + item.classList.add(option.itemClass); + + if (option.description !== undefined) { + let description = doc.createElement("div"); + description.textContent = option.description; + description.classList.add("description"); + item.appendChild(description); + } + + segmentedButton.appendChild(item); + + item.addEventListener("click", function(aEvent) { + if (!aEvent.isTrusted) + return; + + aEvent.stopPropagation(); + + // Just pass the ID of the button as an extra and hope the ID doesn't change + // unless the context changes + UITelemetry.addEvent("action.1", "button", null, id); + + let items = segmentedButton.children; + for (let j = items.length - 1; j >= 0; j--) { + items[j].classList.remove("selected"); + } + + item.classList.add("selected"); + callback(option.value); + }.bind(this), true); + + if (option.value === initialValue) + item.classList.add("selected"); + } + }, + + _setupButton: function(id, callback, titleEntity, textEntity) { + if (titleEntity) { + this._setButtonTip(id, titleEntity); + } + + let button = this._doc.getElementById(id); + if (textEntity) { + button.textContent = gStrings.GetStringFromName(textEntity); + } + button.removeAttribute("hidden"); + button.addEventListener("click", function(aEvent) { + if (!aEvent.isTrusted) + return; + + aEvent.stopPropagation(); + let btn = aEvent.target; + callback(btn); + }, true); + }, + + /** + * Sets a toolTip for a button. Performed at initial button setup + * and dynamically as button state changes. + * @param Localizable string providing UI element usage tip. + */ + _setButtonTip: function(id, titleEntity) { + let button = this._doc.getElementById(id); + button.setAttribute("title", gStrings.GetStringFromName(titleEntity)); + }, + + _setupStyleDropdown: function() { + let dropdownToggle = this._doc.querySelector("#style-dropdown .dropdown-toggle"); + dropdownToggle.setAttribute("title", gStrings.GetStringFromName("aboutReader.toolbar.typeControls")); + }, + + _updatePopupPosition: function(dropdown) { + let dropdownToggle = dropdown.querySelector(".dropdown-toggle"); + let dropdownPopup = dropdown.querySelector(".dropdown-popup"); + + let toggleHeight = dropdownToggle.offsetHeight; + let toggleTop = dropdownToggle.offsetTop; + let popupTop = toggleTop - toggleHeight / 2; + + dropdownPopup.style.top = popupTop + "px"; + }, + + _toggleDropdownClicked: function(event) { + let dropdown = event.target.closest('.dropdown'); + + if (!dropdown) + return; + + event.stopPropagation(); + + if (dropdown.classList.contains("open")) { + this._closeDropdowns(); + } else { + this._openDropdown(dropdown); + if (this._isToolbarVertical) { + this._updatePopupPosition(dropdown); + } + } + }, + + /* + * If the ReaderView banner font-dropdown is closed, open it. + */ + _openDropdown: function(dropdown) { + if (dropdown.classList.contains("open")) { + return; + } + + this._closeDropdowns(); + + // Trigger BackPressListener initialization in Android. + dropdown.classList.add("open"); + this._mm.sendAsyncMessage("Reader:DropdownOpened", this.viewId); + }, + + /* + * If the ReaderView has open dropdowns, close them. If we are closing the + * dropdowns because the page is scrolling, allow popups to stay open with + * the keep-open class. + */ + _closeDropdowns: function(scrolling) { + let selector = ".dropdown.open"; + if (scrolling) { + selector += ":not(.keep-open)"; + } + + let openDropdowns = this._doc.querySelectorAll(selector); + for (let dropdown of openDropdowns) { + dropdown.classList.remove("open"); + } + + // Trigger BackPressListener cleanup in Android. + if (openDropdowns.length) { + this._mm.sendAsyncMessage("Reader:DropdownClosed", this.viewId); + } + }, + + /* + * Scroll reader view to a reference + */ + _goToReference(ref) { + if (ref) { + this._win.location.hash = ref; + } + } +}; diff --git a/toolkit/components/reader/JSDOMParser.js b/toolkit/components/reader/JSDOMParser.js new file mode 100644 index 000000000..853649775 --- /dev/null +++ b/toolkit/components/reader/JSDOMParser.js @@ -0,0 +1,1195 @@ +/*eslint-env es6:false*/ +/* + * DO NOT MODIFY THIS FILE DIRECTLY! + * + * This is a shared library that is maintained in an external repo: + * https://github.com/mozilla/readability + */ + +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * This is a relatively lightweight DOMParser that is safe to use in a web + * worker. This is far from a complete DOM implementation; however, it should + * contain the minimal set of functionality necessary for Readability.js. + * + * Aside from not implementing the full DOM API, there are other quirks to be + * aware of when using the JSDOMParser: + * + * 1) Properly formed HTML/XML must be used. This means you should be extra + * careful when using this parser on anything received directly from an + * XMLHttpRequest. Providing a serialized string from an XMLSerializer, + * however, should be safe (since the browser's XMLSerializer should + * generate valid HTML/XML). Therefore, if parsing a document from an XHR, + * the recommended approach is to do the XHR in the main thread, use + * XMLSerializer.serializeToString() on the responseXML, and pass the + * resulting string to the worker. + * + * 2) Live NodeLists are not supported. DOM methods and properties such as + * getElementsByTagName() and childNodes return standard arrays. If you + * want these lists to be updated when nodes are removed or added to the + * document, you must take care to manually update them yourself. + */ +(function (global) { + + // XML only defines these and the numeric ones: + + var entityTable = { + "lt": "<", + "gt": ">", + "amp": "&", + "quot": '"', + "apos": "'", + }; + + var reverseEntityTable = { + "<": "<", + ">": ">", + "&": "&", + '"': """, + "'": "'", + }; + + function encodeTextContentHTML(s) { + return s.replace(/[&<>]/g, function(x) { + return reverseEntityTable[x]; + }); + } + + function encodeHTML(s) { + return s.replace(/[&<>'"]/g, function(x) { + return reverseEntityTable[x]; + }); + } + + function decodeHTML(str) { + return str.replace(/&(quot|amp|apos|lt|gt);/g, function(match, tag) { + return entityTable[tag]; + }).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, function(match, hex, numStr) { + var num = parseInt(hex || numStr, hex ? 16 : 10); // read num + return String.fromCharCode(num); + }); + } + + // When a style is set in JS, map it to the corresponding CSS attribute + var styleMap = { + "alignmentBaseline": "alignment-baseline", + "background": "background", + "backgroundAttachment": "background-attachment", + "backgroundClip": "background-clip", + "backgroundColor": "background-color", + "backgroundImage": "background-image", + "backgroundOrigin": "background-origin", + "backgroundPosition": "background-position", + "backgroundPositionX": "background-position-x", + "backgroundPositionY": "background-position-y", + "backgroundRepeat": "background-repeat", + "backgroundRepeatX": "background-repeat-x", + "backgroundRepeatY": "background-repeat-y", + "backgroundSize": "background-size", + "baselineShift": "baseline-shift", + "border": "border", + "borderBottom": "border-bottom", + "borderBottomColor": "border-bottom-color", + "borderBottomLeftRadius": "border-bottom-left-radius", + "borderBottomRightRadius": "border-bottom-right-radius", + "borderBottomStyle": "border-bottom-style", + "borderBottomWidth": "border-bottom-width", + "borderCollapse": "border-collapse", + "borderColor": "border-color", + "borderImage": "border-image", + "borderImageOutset": "border-image-outset", + "borderImageRepeat": "border-image-repeat", + "borderImageSlice": "border-image-slice", + "borderImageSource": "border-image-source", + "borderImageWidth": "border-image-width", + "borderLeft": "border-left", + "borderLeftColor": "border-left-color", + "borderLeftStyle": "border-left-style", + "borderLeftWidth": "border-left-width", + "borderRadius": "border-radius", + "borderRight": "border-right", + "borderRightColor": "border-right-color", + "borderRightStyle": "border-right-style", + "borderRightWidth": "border-right-width", + "borderSpacing": "border-spacing", + "borderStyle": "border-style", + "borderTop": "border-top", + "borderTopColor": "border-top-color", + "borderTopLeftRadius": "border-top-left-radius", + "borderTopRightRadius": "border-top-right-radius", + "borderTopStyle": "border-top-style", + "borderTopWidth": "border-top-width", + "borderWidth": "border-width", + "bottom": "bottom", + "boxShadow": "box-shadow", + "boxSizing": "box-sizing", + "captionSide": "caption-side", + "clear": "clear", + "clip": "clip", + "clipPath": "clip-path", + "clipRule": "clip-rule", + "color": "color", + "colorInterpolation": "color-interpolation", + "colorInterpolationFilters": "color-interpolation-filters", + "colorProfile": "color-profile", + "colorRendering": "color-rendering", + "content": "content", + "counterIncrement": "counter-increment", + "counterReset": "counter-reset", + "cursor": "cursor", + "direction": "direction", + "display": "display", + "dominantBaseline": "dominant-baseline", + "emptyCells": "empty-cells", + "enableBackground": "enable-background", + "fill": "fill", + "fillOpacity": "fill-opacity", + "fillRule": "fill-rule", + "filter": "filter", + "cssFloat": "float", + "floodColor": "flood-color", + "floodOpacity": "flood-opacity", + "font": "font", + "fontFamily": "font-family", + "fontSize": "font-size", + "fontStretch": "font-stretch", + "fontStyle": "font-style", + "fontVariant": "font-variant", + "fontWeight": "font-weight", + "glyphOrientationHorizontal": "glyph-orientation-horizontal", + "glyphOrientationVertical": "glyph-orientation-vertical", + "height": "height", + "imageRendering": "image-rendering", + "kerning": "kerning", + "left": "left", + "letterSpacing": "letter-spacing", + "lightingColor": "lighting-color", + "lineHeight": "line-height", + "listStyle": "list-style", + "listStyleImage": "list-style-image", + "listStylePosition": "list-style-position", + "listStyleType": "list-style-type", + "margin": "margin", + "marginBottom": "margin-bottom", + "marginLeft": "margin-left", + "marginRight": "margin-right", + "marginTop": "margin-top", + "marker": "marker", + "markerEnd": "marker-end", + "markerMid": "marker-mid", + "markerStart": "marker-start", + "mask": "mask", + "maxHeight": "max-height", + "maxWidth": "max-width", + "minHeight": "min-height", + "minWidth": "min-width", + "opacity": "opacity", + "orphans": "orphans", + "outline": "outline", + "outlineColor": "outline-color", + "outlineOffset": "outline-offset", + "outlineStyle": "outline-style", + "outlineWidth": "outline-width", + "overflow": "overflow", + "overflowX": "overflow-x", + "overflowY": "overflow-y", + "padding": "padding", + "paddingBottom": "padding-bottom", + "paddingLeft": "padding-left", + "paddingRight": "padding-right", + "paddingTop": "padding-top", + "page": "page", + "pageBreakAfter": "page-break-after", + "pageBreakBefore": "page-break-before", + "pageBreakInside": "page-break-inside", + "pointerEvents": "pointer-events", + "position": "position", + "quotes": "quotes", + "resize": "resize", + "right": "right", + "shapeRendering": "shape-rendering", + "size": "size", + "speak": "speak", + "src": "src", + "stopColor": "stop-color", + "stopOpacity": "stop-opacity", + "stroke": "stroke", + "strokeDasharray": "stroke-dasharray", + "strokeDashoffset": "stroke-dashoffset", + "strokeLinecap": "stroke-linecap", + "strokeLinejoin": "stroke-linejoin", + "strokeMiterlimit": "stroke-miterlimit", + "strokeOpacity": "stroke-opacity", + "strokeWidth": "stroke-width", + "tableLayout": "table-layout", + "textAlign": "text-align", + "textAnchor": "text-anchor", + "textDecoration": "text-decoration", + "textIndent": "text-indent", + "textLineThrough": "text-line-through", + "textLineThroughColor": "text-line-through-color", + "textLineThroughMode": "text-line-through-mode", + "textLineThroughStyle": "text-line-through-style", + "textLineThroughWidth": "text-line-through-width", + "textOverflow": "text-overflow", + "textOverline": "text-overline", + "textOverlineColor": "text-overline-color", + "textOverlineMode": "text-overline-mode", + "textOverlineStyle": "text-overline-style", + "textOverlineWidth": "text-overline-width", + "textRendering": "text-rendering", + "textShadow": "text-shadow", + "textTransform": "text-transform", + "textUnderline": "text-underline", + "textUnderlineColor": "text-underline-color", + "textUnderlineMode": "text-underline-mode", + "textUnderlineStyle": "text-underline-style", + "textUnderlineWidth": "text-underline-width", + "top": "top", + "unicodeBidi": "unicode-bidi", + "unicodeRange": "unicode-range", + "vectorEffect": "vector-effect", + "verticalAlign": "vertical-align", + "visibility": "visibility", + "whiteSpace": "white-space", + "widows": "widows", + "width": "width", + "wordBreak": "word-break", + "wordSpacing": "word-spacing", + "wordWrap": "word-wrap", + "writingMode": "writing-mode", + "zIndex": "z-index", + "zoom": "zoom", + }; + + // Elements that can be self-closing + var voidElems = { + "area": true, + "base": true, + "br": true, + "col": true, + "command": true, + "embed": true, + "hr": true, + "img": true, + "input": true, + "link": true, + "meta": true, + "param": true, + "source": true, + "wbr": true + }; + + var whitespace = [" ", "\t", "\n", "\r"]; + + // See http://www.w3schools.com/dom/dom_nodetype.asp + var nodeTypes = { + ELEMENT_NODE: 1, + ATTRIBUTE_NODE: 2, + TEXT_NODE: 3, + CDATA_SECTION_NODE: 4, + ENTITY_REFERENCE_NODE: 5, + ENTITY_NODE: 6, + PROCESSING_INSTRUCTION_NODE: 7, + COMMENT_NODE: 8, + DOCUMENT_NODE: 9, + DOCUMENT_TYPE_NODE: 10, + DOCUMENT_FRAGMENT_NODE: 11, + NOTATION_NODE: 12 + }; + + function getElementsByTagName(tag) { + tag = tag.toUpperCase(); + var elems = []; + var allTags = (tag === "*"); + function getElems(node) { + var length = node.children.length; + for (var i = 0; i < length; i++) { + var child = node.children[i]; + if (allTags || (child.tagName === tag)) + elems.push(child); + getElems(child); + } + } + getElems(this); + return elems; + } + + var Node = function () {}; + + Node.prototype = { + attributes: null, + childNodes: null, + localName: null, + nodeName: null, + parentNode: null, + textContent: null, + nextSibling: null, + previousSibling: null, + + get firstChild() { + return this.childNodes[0] || null; + }, + + get firstElementChild() { + return this.children[0] || null; + }, + + get lastChild() { + return this.childNodes[this.childNodes.length - 1] || null; + }, + + get lastElementChild() { + return this.children[this.children.length - 1] || null; + }, + + appendChild: function (child) { + if (child.parentNode) { + child.parentNode.removeChild(child); + } + + var last = this.lastChild; + if (last) + last.nextSibling = child; + child.previousSibling = last; + + if (child.nodeType === Node.ELEMENT_NODE) { + child.previousElementSibling = this.children[this.children.length - 1] || null; + this.children.push(child); + child.previousElementSibling && (child.previousElementSibling.nextElementSibling = child); + } + this.childNodes.push(child); + child.parentNode = this; + }, + + removeChild: function (child) { + var childNodes = this.childNodes; + var childIndex = childNodes.indexOf(child); + if (childIndex === -1) { + throw "removeChild: node not found"; + } else { + child.parentNode = null; + var prev = child.previousSibling; + var next = child.nextSibling; + if (prev) + prev.nextSibling = next; + if (next) + next.previousSibling = prev; + + if (child.nodeType === Node.ELEMENT_NODE) { + prev = child.previousElementSibling; + next = child.nextElementSibling; + if (prev) + prev.nextElementSibling = next; + if (next) + next.previousElementSibling = prev; + this.children.splice(this.children.indexOf(child), 1); + } + + child.previousSibling = child.nextSibling = null; + child.previousElementSibling = child.nextElementSibling = null; + + return childNodes.splice(childIndex, 1)[0]; + } + }, + + replaceChild: function (newNode, oldNode) { + var childNodes = this.childNodes; + var childIndex = childNodes.indexOf(oldNode); + if (childIndex === -1) { + throw "replaceChild: node not found"; + } else { + // This will take care of updating the new node if it was somewhere else before: + if (newNode.parentNode) + newNode.parentNode.removeChild(newNode); + + childNodes[childIndex] = newNode; + + // update the new node's sibling properties, and its new siblings' sibling properties + newNode.nextSibling = oldNode.nextSibling; + newNode.previousSibling = oldNode.previousSibling; + if (newNode.nextSibling) + newNode.nextSibling.previousSibling = newNode; + if (newNode.previousSibling) + newNode.previousSibling.nextSibling = newNode; + + newNode.parentNode = this; + + // Now deal with elements before we clear out those values for the old node, + // because it can help us take shortcuts here: + if (newNode.nodeType === Node.ELEMENT_NODE) { + if (oldNode.nodeType === Node.ELEMENT_NODE) { + // Both were elements, which makes this easier, we just swap things out: + newNode.previousElementSibling = oldNode.previousElementSibling; + newNode.nextElementSibling = oldNode.nextElementSibling; + if (newNode.previousElementSibling) + newNode.previousElementSibling.nextElementSibling = newNode; + if (newNode.nextElementSibling) + newNode.nextElementSibling.previousElementSibling = newNode; + this.children[this.children.indexOf(oldNode)] = newNode; + } else { + // Hard way: + newNode.previousElementSibling = (function() { + for (var i = childIndex - 1; i >= 0; i--) { + if (childNodes[i].nodeType === Node.ELEMENT_NODE) + return childNodes[i]; + } + return null; + })(); + if (newNode.previousElementSibling) { + newNode.nextElementSibling = newNode.previousElementSibling.nextElementSibling; + } else { + newNode.nextElementSibling = (function() { + for (var i = childIndex + 1; i < childNodes.length; i++) { + if (childNodes[i].nodeType === Node.ELEMENT_NODE) + return childNodes[i]; + } + return null; + })(); + } + if (newNode.previousElementSibling) + newNode.previousElementSibling.nextElementSibling = newNode; + if (newNode.nextElementSibling) + newNode.nextElementSibling.previousElementSibling = newNode; + + if (newNode.nextElementSibling) + this.children.splice(this.children.indexOf(newNode.nextElementSibling), 0, newNode); + else + this.children.push(newNode); + } + } else if (oldNode.nodeType === Node.ELEMENT_NODE) { + // new node is not an element node. + // if the old one was, update its element siblings: + if (oldNode.previousElementSibling) + oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling; + if (oldNode.nextElementSibling) + oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling; + this.children.splice(this.children.indexOf(oldNode), 1); + + // If the old node wasn't an element, neither the new nor the old node was an element, + // and the children array and its members shouldn't need any updating. + } + + + oldNode.parentNode = null; + oldNode.previousSibling = null; + oldNode.nextSibling = null; + if (oldNode.nodeType === Node.ELEMENT_NODE) { + oldNode.previousElementSibling = null; + oldNode.nextElementSibling = null; + } + return oldNode; + } + }, + + __JSDOMParser__: true, + }; + + for (var nodeType in nodeTypes) { + Node[nodeType] = Node.prototype[nodeType] = nodeTypes[nodeType]; + } + + var Attribute = function (name, value) { + this.name = name; + this._value = value; + }; + + Attribute.prototype = { + get value() { + return this._value; + }, + setValue: function(newValue) { + this._value = newValue; + delete this._decodedValue; + }, + setDecodedValue: function(newValue) { + this._value = encodeHTML(newValue); + this._decodedValue = newValue; + }, + getDecodedValue: function() { + if (typeof this._decodedValue === "undefined") { + this._decodedValue = (this._value && decodeHTML(this._value)) || ""; + } + return this._decodedValue; + }, + }; + + var Comment = function () { + this.childNodes = []; + }; + + Comment.prototype = { + __proto__: Node.prototype, + + nodeName: "#comment", + nodeType: Node.COMMENT_NODE + }; + + var Text = function () { + this.childNodes = []; + }; + + Text.prototype = { + __proto__: Node.prototype, + + nodeName: "#text", + nodeType: Node.TEXT_NODE, + get textContent() { + if (typeof this._textContent === "undefined") { + this._textContent = decodeHTML(this._innerHTML || ""); + } + return this._textContent; + }, + get innerHTML() { + if (typeof this._innerHTML === "undefined") { + this._innerHTML = encodeTextContentHTML(this._textContent || ""); + } + return this._innerHTML; + }, + + set innerHTML(newHTML) { + this._innerHTML = newHTML; + delete this._textContent; + }, + set textContent(newText) { + this._textContent = newText; + delete this._innerHTML; + }, + }; + + var Document = function () { + this.styleSheets = []; + this.childNodes = []; + this.children = []; + }; + + Document.prototype = { + __proto__: Node.prototype, + + nodeName: "#document", + nodeType: Node.DOCUMENT_NODE, + title: "", + + getElementsByTagName: getElementsByTagName, + + getElementById: function (id) { + function getElem(node) { + var length = node.children.length; + if (node.id === id) + return node; + for (var i = 0; i < length; i++) { + var el = getElem(node.children[i]); + if (el) + return el; + } + return null; + } + return getElem(this); + }, + + createElement: function (tag) { + var node = new Element(tag); + return node; + }, + + createTextNode: function (text) { + var node = new Text(); + node.textContent = text; + return node; + }, + }; + + var Element = function (tag) { + this.attributes = []; + this.childNodes = []; + this.children = []; + this.nextElementSibling = this.previousElementSibling = null; + this.localName = tag.toLowerCase(); + this.tagName = tag.toUpperCase(); + this.style = new Style(this); + }; + + Element.prototype = { + __proto__: Node.prototype, + + nodeType: Node.ELEMENT_NODE, + + getElementsByTagName: getElementsByTagName, + + get className() { + return this.getAttribute("class") || ""; + }, + + set className(str) { + this.setAttribute("class", str); + }, + + get id() { + return this.getAttribute("id") || ""; + }, + + set id(str) { + this.setAttribute("id", str); + }, + + get href() { + return this.getAttribute("href") || ""; + }, + + set href(str) { + this.setAttribute("href", str); + }, + + get src() { + return this.getAttribute("src") || ""; + }, + + set src(str) { + this.setAttribute("src", str); + }, + + get nodeName() { + return this.tagName; + }, + + get innerHTML() { + function getHTML(node) { + var i = 0; + for (i = 0; i < node.childNodes.length; i++) { + var child = node.childNodes[i]; + if (child.localName) { + arr.push("<" + child.localName); + + // serialize attribute list + for (var j = 0; j < child.attributes.length; j++) { + var attr = child.attributes[j]; + // the attribute value will be HTML escaped. + var val = attr.value; + var quote = (val.indexOf('"') === -1 ? '"' : "'"); + arr.push(" " + attr.name + '=' + quote + val + quote); + } + + if (child.localName in voidElems && !child.childNodes.length) { + // if this is a self-closing element, end it here + arr.push("/>"); + } else { + // otherwise, add its children + arr.push(">"); + getHTML(child); + arr.push("</" + child.localName + ">"); + } + } else { + // This is a text node, so asking for innerHTML won't recurse. + arr.push(child.innerHTML); + } + } + } + + // Using Array.join() avoids the overhead from lazy string concatenation. + // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes + var arr = []; + getHTML(this); + return arr.join(""); + }, + + set innerHTML(html) { + var parser = new JSDOMParser(); + var node = parser.parse(html); + var i; + for (i = this.childNodes.length; --i >= 0;) { + this.childNodes[i].parentNode = null; + } + this.childNodes = node.childNodes; + this.children = node.children; + for (i = this.childNodes.length; --i >= 0;) { + this.childNodes[i].parentNode = this; + } + }, + + set textContent(text) { + // clear parentNodes for existing children + for (var i = this.childNodes.length; --i >= 0;) { + this.childNodes[i].parentNode = null; + } + + var node = new Text(); + this.childNodes = [ node ]; + this.children = []; + node.textContent = text; + node.parentNode = this; + }, + + get textContent() { + function getText(node) { + var nodes = node.childNodes; + for (var i = 0; i < nodes.length; i++) { + var child = nodes[i]; + if (child.nodeType === 3) { + text.push(child.textContent); + } else { + getText(child); + } + } + } + + // Using Array.join() avoids the overhead from lazy string concatenation. + // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes + var text = []; + getText(this); + return text.join(""); + }, + + getAttribute: function (name) { + for (var i = this.attributes.length; --i >= 0;) { + var attr = this.attributes[i]; + if (attr.name === name) + return attr.getDecodedValue(); + } + return undefined; + }, + + setAttribute: function (name, value) { + for (var i = this.attributes.length; --i >= 0;) { + var attr = this.attributes[i]; + if (attr.name === name) { + attr.setDecodedValue(value); + return; + } + } + this.attributes.push(new Attribute(name, encodeHTML(value))); + }, + + removeAttribute: function (name) { + for (var i = this.attributes.length; --i >= 0;) { + var attr = this.attributes[i]; + if (attr.name === name) { + this.attributes.splice(i, 1); + break; + } + } + } + }; + + var Style = function (node) { + this.node = node; + }; + + // getStyle() and setStyle() use the style attribute string directly. This + // won't be very efficient if there are a lot of style manipulations, but + // it's the easiest way to make sure the style attribute string and the JS + // style property stay in sync. Readability.js doesn't do many style + // manipulations, so this should be okay. + Style.prototype = { + getStyle: function (styleName) { + var attr = this.node.getAttribute("style"); + if (!attr) + return undefined; + + var styles = attr.split(";"); + for (var i = 0; i < styles.length; i++) { + var style = styles[i].split(":"); + var name = style[0].trim(); + if (name === styleName) + return style[1].trim(); + } + + return undefined; + }, + + setStyle: function (styleName, styleValue) { + var value = this.node.getAttribute("style") || ""; + var index = 0; + do { + var next = value.indexOf(";", index) + 1; + var length = next - index - 1; + var style = (length > 0 ? value.substr(index, length) : value.substr(index)); + if (style.substr(0, style.indexOf(":")).trim() === styleName) { + value = value.substr(0, index).trim() + (next ? " " + value.substr(next).trim() : ""); + break; + } + index = next; + } while (index); + + value += " " + styleName + ": " + styleValue + ";"; + this.node.setAttribute("style", value.trim()); + } + }; + + // For each item in styleMap, define a getter and setter on the style + // property. + for (var jsName in styleMap) { + (function (cssName) { + Style.prototype.__defineGetter__(jsName, function () { + return this.getStyle(cssName); + }); + Style.prototype.__defineSetter__(jsName, function (value) { + this.setStyle(cssName, value); + }); + })(styleMap[jsName]); + } + + var JSDOMParser = function () { + this.currentChar = 0; + + // In makeElementNode() we build up many strings one char at a time. Using + // += for this results in lots of short-lived intermediate strings. It's + // better to build an array of single-char strings and then join() them + // together at the end. And reusing a single array (i.e. |this.strBuf|) + // over and over for this purpose uses less memory than using a new array + // for each string. + this.strBuf = []; + + // Similarly, we reuse this array to return the two arguments from + // makeElementNode(), which saves us from having to allocate a new array + // every time. + this.retPair = []; + + this.errorState = ""; + }; + + JSDOMParser.prototype = { + error: function(m) { + dump("JSDOMParser error: " + m + "\n"); + this.errorState += m + "\n"; + }, + + /** + * Look at the next character without advancing the index. + */ + peekNext: function () { + return this.html[this.currentChar]; + }, + + /** + * Get the next character and advance the index. + */ + nextChar: function () { + return this.html[this.currentChar++]; + }, + + /** + * Called after a quote character is read. This finds the next quote + * character and returns the text string in between. + */ + readString: function (quote) { + var str; + var n = this.html.indexOf(quote, this.currentChar); + if (n === -1) { + this.currentChar = this.html.length; + str = null; + } else { + str = this.html.substring(this.currentChar, n); + this.currentChar = n + 1; + } + + return str; + }, + + /** + * Called when parsing a node. This finds the next name/value attribute + * pair and adds the result to the attributes list. + */ + readAttribute: function (node) { + var name = ""; + + var n = this.html.indexOf("=", this.currentChar); + if (n === -1) { + this.currentChar = this.html.length; + } else { + // Read until a '=' character is hit; this will be the attribute key + name = this.html.substring(this.currentChar, n); + this.currentChar = n + 1; + } + + if (!name) + return; + + // After a '=', we should see a '"' for the attribute value + var c = this.nextChar(); + if (c !== '"' && c !== "'") { + this.error("Error reading attribute " + name + ", expecting '\"'"); + return; + } + + // Read the attribute value (and consume the matching quote) + var value = this.readString(c); + + node.attributes.push(new Attribute(name, value)); + + return; + }, + + /** + * Parses and returns an Element node. This is called after a '<' has been + * read. + * + * @returns an array; the first index of the array is the parsed node; + * the second index is a boolean indicating whether this is a void + * Element + */ + makeElementNode: function (retPair) { + var c = this.nextChar(); + + // Read the Element tag name + var strBuf = this.strBuf; + strBuf.length = 0; + while (whitespace.indexOf(c) == -1 && c !== ">" && c !== "/") { + if (c === undefined) + return false; + strBuf.push(c); + c = this.nextChar(); + } + var tag = strBuf.join(''); + + if (!tag) + return false; + + var node = new Element(tag); + + // Read Element attributes + while (c !== "/" && c !== ">") { + if (c === undefined) + return false; + while (whitespace.indexOf(this.html[this.currentChar++]) != -1); + this.currentChar--; + c = this.nextChar(); + if (c !== "/" && c !== ">") { + --this.currentChar; + this.readAttribute(node); + } + } + + // If this is a self-closing tag, read '/>' + var closed = false; + if (c === "/") { + closed = true; + c = this.nextChar(); + if (c !== ">") { + this.error("expected '>' to close " + tag); + return false; + } + } + + retPair[0] = node; + retPair[1] = closed; + return true; + }, + + /** + * If the current input matches this string, advance the input index; + * otherwise, do nothing. + * + * @returns whether input matched string + */ + match: function (str) { + var strlen = str.length; + if (this.html.substr(this.currentChar, strlen).toLowerCase() === str.toLowerCase()) { + this.currentChar += strlen; + return true; + } + return false; + }, + + /** + * Searches the input until a string is found and discards all input up to + * and including the matched string. + */ + discardTo: function (str) { + var index = this.html.indexOf(str, this.currentChar) + str.length; + if (index === -1) + this.currentChar = this.html.length; + this.currentChar = index; + }, + + /** + * Reads child nodes for the given node. + */ + readChildren: function (node) { + var child; + while ((child = this.readNode())) { + // Don't keep Comment nodes + if (child.nodeType !== 8) { + node.appendChild(child); + } + } + }, + + readScript: function (node) { + while (this.currentChar < this.html.length) { + var c = this.nextChar(); + var nextC = this.peekNext(); + if (c === "<") { + if (nextC === "!" || nextC === "?") { + // We're still before the ! or ? that is starting this comment: + this.currentChar++; + node.appendChild(this.discardNextComment()); + continue; + } + if (nextC === "/" && this.html.substr(this.currentChar, 8 /*"/script>".length */).toLowerCase() == "/script>") { + // Go back before the '<' so we find the end tag. + this.currentChar--; + // Done with this script tag, the caller will close: + return; + } + } + // Either c wasn't a '<' or it was but we couldn't find either a comment + // or a closing script tag, so we should just parse as text until the next one + // comes along: + + var haveTextNode = node.lastChild && node.lastChild.nodeType === Node.TEXT_NODE; + var textNode = haveTextNode ? node.lastChild : new Text(); + var n = this.html.indexOf("<", this.currentChar); + // Decrement this to include the current character *afterwards* so we don't get stuck + // looking for the same < all the time. + this.currentChar--; + if (n === -1) { + textNode.innerHTML += this.html.substring(this.currentChar, this.html.length); + this.currentChar = this.html.length; + } else { + textNode.innerHTML += this.html.substring(this.currentChar, n); + this.currentChar = n; + } + if (!haveTextNode) + node.appendChild(textNode); + } + }, + + discardNextComment: function() { + if (this.match("--")) { + this.discardTo("-->"); + } else { + var c = this.nextChar(); + while (c !== ">") { + if (c === undefined) + return null; + if (c === '"' || c === "'") + this.readString(c); + c = this.nextChar(); + } + } + return new Comment(); + }, + + + /** + * Reads the next child node from the input. If we're reading a closing + * tag, or if we've reached the end of input, return null. + * + * @returns the node + */ + readNode: function () { + var c = this.nextChar(); + + if (c === undefined) + return null; + + // Read any text as Text node + if (c !== "<") { + --this.currentChar; + var textNode = new Text(); + var n = this.html.indexOf("<", this.currentChar); + if (n === -1) { + textNode.innerHTML = this.html.substring(this.currentChar, this.html.length); + this.currentChar = this.html.length; + } else { + textNode.innerHTML = this.html.substring(this.currentChar, n); + this.currentChar = n; + } + return textNode; + } + + c = this.peekNext(); + + // Read Comment node. Normally, Comment nodes know their inner + // textContent, but we don't really care about Comment nodes (we throw + // them away in readChildren()). So just returning an empty Comment node + // here is sufficient. + if (c === "!" || c === "?") { + // We're still before the ! or ? that is starting this comment: + this.currentChar++; + return this.discardNextComment(); + } + + // If we're reading a closing tag, return null. This means we've reached + // the end of this set of child nodes. + if (c === "/") { + --this.currentChar; + return null; + } + + // Otherwise, we're looking at an Element node + var result = this.makeElementNode(this.retPair); + if (!result) + return null; + + var node = this.retPair[0]; + var closed = this.retPair[1]; + var localName = node.localName; + + // If this isn't a void Element, read its child nodes + if (!closed) { + if (localName == "script") { + this.readScript(node); + } else { + this.readChildren(node); + } + var closingTag = "</" + localName + ">"; + if (!this.match(closingTag)) { + this.error("expected '" + closingTag + "' and got " + this.html.substr(this.currentChar, closingTag.length)); + return null; + } + } + + // Only use the first title, because SVG might have other + // title elements which we don't care about (medium.com + // does this, at least). + if (localName === "title" && !this.doc.title) { + this.doc.title = node.textContent.trim(); + } else if (localName === "head") { + this.doc.head = node; + } else if (localName === "body") { + this.doc.body = node; + } else if (localName === "html") { + this.doc.documentElement = node; + } + + return node; + }, + + /** + * Parses an HTML string and returns a JS implementation of the Document. + */ + parse: function (html) { + this.html = html; + var doc = this.doc = new Document(); + this.readChildren(doc); + + // If this is an HTML document, remove root-level children except for the + // <html> node + if (doc.documentElement) { + for (var i = doc.childNodes.length; --i >= 0;) { + var child = doc.childNodes[i]; + if (child !== doc.documentElement) { + doc.removeChild(child); + } + } + } + + return doc; + } + }; + + // Attach the standard DOM types to the global scope + global.Node = Node; + global.Comment = Comment; + global.Document = Document; + global.Element = Element; + global.Text = Text; + + // Attach JSDOMParser to the global scope + global.JSDOMParser = JSDOMParser; + +})(this); diff --git a/toolkit/components/reader/Readability.js b/toolkit/components/reader/Readability.js new file mode 100644 index 000000000..491461a8e --- /dev/null +++ b/toolkit/components/reader/Readability.js @@ -0,0 +1,1863 @@ +/*eslint-env es6:false*/ +/* + * DO NOT MODIFY THIS FILE DIRECTLY! + * + * This is a shared library that is maintained in an external repo: + * https://github.com/mozilla/readability + */ + +/* + * Copyright (c) 2010 Arc90 Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This code is heavily based on Arc90's readability.js (1.7.1) script + * available at: http://code.google.com/p/arc90labs-readability + */ + +/** + * Public constructor. + * @param {Object} uri The URI descriptor object. + * @param {HTMLDocument} doc The document to parse. + * @param {Object} options The options object. + */ +function Readability(uri, doc, options) { + options = options || {}; + + this._uri = uri; + this._doc = doc; + this._biggestFrame = false; + this._articleByline = null; + this._articleDir = null; + + // Configureable options + this._debug = !!options.debug; + this._maxElemsToParse = options.maxElemsToParse || this.DEFAULT_MAX_ELEMS_TO_PARSE; + this._nbTopCandidates = options.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES; + this._maxPages = options.maxPages || this.DEFAULT_MAX_PAGES; + + // Start with all flags set + this._flags = this.FLAG_STRIP_UNLIKELYS | + this.FLAG_WEIGHT_CLASSES | + this.FLAG_CLEAN_CONDITIONALLY; + + // The list of pages we've parsed in this call of readability, + // for autopaging. As a key store for easier searching. + this._parsedPages = {}; + + // A list of the ETag headers of pages we've parsed, in case they happen to match, + // we'll know it's a duplicate. + this._pageETags = {}; + + // Make an AJAX request for each page and append it to the document. + this._curPageNum = 1; + + var logEl; + + // Control whether log messages are sent to the console + if (this._debug) { + logEl = function(e) { + var rv = e.nodeName + " "; + if (e.nodeType == e.TEXT_NODE) { + return rv + '("' + e.textContent + '")'; + } + var classDesc = e.className && ("." + e.className.replace(/ /g, ".")); + var elDesc = ""; + if (e.id) + elDesc = "(#" + e.id + classDesc + ")"; + else if (classDesc) + elDesc = "(" + classDesc + ")"; + return rv + elDesc; + }; + this.log = function () { + if (typeof dump !== undefined) { + var msg = Array.prototype.map.call(arguments, function(x) { + return (x && x.nodeName) ? logEl(x) : x; + }).join(" "); + dump("Reader: (Readability) " + msg + "\n"); + } else if (typeof console !== undefined) { + var args = ["Reader: (Readability) "].concat(arguments); + console.log.apply(console, args); + } + }; + } else { + this.log = function () {}; + } +} + +Readability.prototype = { + FLAG_STRIP_UNLIKELYS: 0x1, + FLAG_WEIGHT_CLASSES: 0x2, + FLAG_CLEAN_CONDITIONALLY: 0x4, + + // Max number of nodes supported by this parser. Default: 0 (no limit) + DEFAULT_MAX_ELEMS_TO_PARSE: 0, + + // The number of top candidates to consider when analysing how + // tight the competition is among candidates. + DEFAULT_N_TOP_CANDIDATES: 5, + + // The maximum number of pages to loop through before we call + // it quits and just show a link. + DEFAULT_MAX_PAGES: 5, + + // Element tags to score by default. + DEFAULT_TAGS_TO_SCORE: "section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","), + + // All of the regular expressions in use within readability. + // Defined up here so we don't instantiate them repeatedly in loops. + REGEXPS: { + unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|menu|modal|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i, + okMaybeItsACandidate: /and|article|body|column|main|shadow/i, + positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i, + negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i, + extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i, + byline: /byline|author|dateline|writtenby|p-author/i, + replaceFonts: /<(\/?)font[^>]*>/gi, + normalize: /\s{2,}/g, + videos: /\/\/(www\.)?(dailymotion|youtube|youtube-nocookie|player\.vimeo)\.com/i, + nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i, + prevLink: /(prev|earl|old|new|<|«)/i, + whitespace: /^\s*$/, + hasContent: /\S$/, + }, + + DIV_TO_P_ELEMS: [ "A", "BLOCKQUOTE", "DL", "DIV", "IMG", "OL", "P", "PRE", "TABLE", "UL", "SELECT" ], + + ALTER_TO_DIV_EXCEPTIONS: ["DIV", "ARTICLE", "SECTION", "P"], + + /** + * Run any post-process modifications to article content as necessary. + * + * @param Element + * @return void + **/ + _postProcessContent: function(articleContent) { + // Readability cannot open relative uris so we convert them to absolute uris. + this._fixRelativeUris(articleContent); + }, + + /** + * Iterates over a NodeList, calls `filterFn` for each node and removes node + * if function returned `true`. + * + * If function is not passed, removes all the nodes in node list. + * + * @param NodeList nodeList The no + * @param Function filterFn + * @return void + */ + _removeNodes: function(nodeList, filterFn) { + for (var i = nodeList.length - 1; i >= 0; i--) { + var node = nodeList[i]; + var parentNode = node.parentNode; + if (parentNode) { + if (!filterFn || filterFn.call(this, node, i, nodeList)) { + parentNode.removeChild(node); + } + } + } + }, + + /** + * Iterate over a NodeList, which doesn't natively fully implement the Array + * interface. + * + * For convenience, the current object context is applied to the provided + * iterate function. + * + * @param NodeList nodeList The NodeList. + * @param Function fn The iterate function. + * @param Boolean backward Whether to use backward iteration. + * @return void + */ + _forEachNode: function(nodeList, fn, backward) { + Array.prototype.forEach.call(nodeList, fn, this); + }, + + /** + * Iterate over a NodeList, return true if any of the provided iterate + * function calls returns true, false otherwise. + * + * For convenience, the current object context is applied to the + * provided iterate function. + * + * @param NodeList nodeList The NodeList. + * @param Function fn The iterate function. + * @return Boolean + */ + _someNode: function(nodeList, fn) { + return Array.prototype.some.call(nodeList, fn, this); + }, + + /** + * Concat all nodelists passed as arguments. + * + * @return ...NodeList + * @return Array + */ + _concatNodeLists: function() { + var slice = Array.prototype.slice; + var args = slice.call(arguments); + var nodeLists = args.map(function(list) { + return slice.call(list); + }); + return Array.prototype.concat.apply([], nodeLists); + }, + + _getAllNodesWithTag: function(node, tagNames) { + if (node.querySelectorAll) { + return node.querySelectorAll(tagNames.join(',')); + } + return [].concat.apply([], tagNames.map(function(tag) { + var collection = node.getElementsByTagName(tag); + return Array.isArray(collection) ? collection : Array.from(collection); + })); + }, + + /** + * Converts each <a> and <img> uri in the given element to an absolute URI, + * ignoring #ref URIs. + * + * @param Element + * @return void + */ + _fixRelativeUris: function(articleContent) { + var scheme = this._uri.scheme; + var prePath = this._uri.prePath; + var pathBase = this._uri.pathBase; + + function toAbsoluteURI(uri) { + // If this is already an absolute URI, return it. + if (/^[a-zA-Z][a-zA-Z0-9\+\-\.]*:/.test(uri)) + return uri; + + // Scheme-rooted relative URI. + if (uri.substr(0, 2) == "//") + return scheme + "://" + uri.substr(2); + + // Prepath-rooted relative URI. + if (uri[0] == "/") + return prePath + uri; + + // Dotslash relative URI. + if (uri.indexOf("./") === 0) + return pathBase + uri.slice(2); + + // Ignore hash URIs: + if (uri[0] == "#") + return uri; + + // Standard relative URI; add entire path. pathBase already includes a + // trailing "/". + return pathBase + uri; + } + + var links = articleContent.getElementsByTagName("a"); + this._forEachNode(links, function(link) { + var href = link.getAttribute("href"); + if (href) { + // Replace links with javascript: URIs with text content, since + // they won't work after scripts have been removed from the page. + if (href.indexOf("javascript:") === 0) { + var text = this._doc.createTextNode(link.textContent); + link.parentNode.replaceChild(text, link); + } else { + link.setAttribute("href", toAbsoluteURI(href)); + } + } + }); + + var imgs = articleContent.getElementsByTagName("img"); + this._forEachNode(imgs, function(img) { + var src = img.getAttribute("src"); + if (src) { + img.setAttribute("src", toAbsoluteURI(src)); + } + }); + }, + + /** + * Get the article title as an H1. + * + * @return void + **/ + _getArticleTitle: function() { + var doc = this._doc; + var curTitle = ""; + var origTitle = ""; + + try { + curTitle = origTitle = doc.title; + + // If they had an element with id "title" in their HTML + if (typeof curTitle !== "string") + curTitle = origTitle = this._getInnerText(doc.getElementsByTagName('title')[0]); + } catch (e) {/* ignore exceptions setting the title. */} + + if (curTitle.match(/ [\|\-] /)) { + curTitle = origTitle.replace(/(.*)[\|\-] .*/gi, '$1'); + + if (curTitle.split(' ').length < 3) + curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi, '$1'); + } else if (curTitle.indexOf(': ') !== -1) { + // Check if we have an heading containing this exact string, so we + // could assume it's the full title. + var headings = this._concatNodeLists( + doc.getElementsByTagName('h1'), + doc.getElementsByTagName('h2') + ); + var match = this._someNode(headings, function(heading) { + return heading.textContent === curTitle; + }); + + // If we don't, let's extract the title out of the original title string. + if (!match) { + curTitle = origTitle.substring(origTitle.lastIndexOf(':') + 1); + + // If the title is now too short, try the first colon instead: + if (curTitle.split(' ').length < 3) + curTitle = origTitle.substring(origTitle.indexOf(':') + 1); + } + } else if (curTitle.length > 150 || curTitle.length < 15) { + var hOnes = doc.getElementsByTagName('h1'); + + if (hOnes.length === 1) + curTitle = this._getInnerText(hOnes[0]); + } + + curTitle = curTitle.trim(); + + if (curTitle.split(' ').length <= 4) + curTitle = origTitle; + + return curTitle; + }, + + /** + * Prepare the HTML document for readability to scrape it. + * This includes things like stripping javascript, CSS, and handling terrible markup. + * + * @return void + **/ + _prepDocument: function() { + var doc = this._doc; + + // Remove all style tags in head + this._removeNodes(doc.getElementsByTagName("style")); + + if (doc.body) { + this._replaceBrs(doc.body); + } + + this._forEachNode(doc.getElementsByTagName("font"), function(fontNode) { + this._setNodeTag(fontNode, "SPAN"); + }); + }, + + /** + * Finds the next element, starting from the given node, and ignoring + * whitespace in between. If the given node is an element, the same node is + * returned. + */ + _nextElement: function (node) { + var next = node; + while (next + && (next.nodeType != Node.ELEMENT_NODE) + && this.REGEXPS.whitespace.test(next.textContent)) { + next = next.nextSibling; + } + return next; + }, + + /** + * Replaces 2 or more successive <br> elements with a single <p>. + * Whitespace between <br> elements are ignored. For example: + * <div>foo<br>bar<br> <br><br>abc</div> + * will become: + * <div>foo<br>bar<p>abc</p></div> + */ + _replaceBrs: function (elem) { + this._forEachNode(this._getAllNodesWithTag(elem, ["br"]), function(br) { + var next = br.nextSibling; + + // Whether 2 or more <br> elements have been found and replaced with a + // <p> block. + var replaced = false; + + // If we find a <br> chain, remove the <br>s until we hit another element + // or non-whitespace. This leaves behind the first <br> in the chain + // (which will be replaced with a <p> later). + while ((next = this._nextElement(next)) && (next.tagName == "BR")) { + replaced = true; + var brSibling = next.nextSibling; + next.parentNode.removeChild(next); + next = brSibling; + } + + // If we removed a <br> chain, replace the remaining <br> with a <p>. Add + // all sibling nodes as children of the <p> until we hit another <br> + // chain. + if (replaced) { + var p = this._doc.createElement("p"); + br.parentNode.replaceChild(p, br); + + next = p.nextSibling; + while (next) { + // If we've hit another <br><br>, we're done adding children to this <p>. + if (next.tagName == "BR") { + var nextElem = this._nextElement(next); + if (nextElem && nextElem.tagName == "BR") + break; + } + + // Otherwise, make this node a child of the new <p>. + var sibling = next.nextSibling; + p.appendChild(next); + next = sibling; + } + } + }); + }, + + _setNodeTag: function (node, tag) { + this.log("_setNodeTag", node, tag); + if (node.__JSDOMParser__) { + node.localName = tag.toLowerCase(); + node.tagName = tag.toUpperCase(); + return node; + } + + var replacement = node.ownerDocument.createElement(tag); + while (node.firstChild) { + replacement.appendChild(node.firstChild); + } + node.parentNode.replaceChild(replacement, node); + if (node.readability) + replacement.readability = node.readability; + + for (var i = 0; i < node.attributes.length; i++) { + replacement.setAttribute(node.attributes[i].name, node.attributes[i].value); + } + return replacement; + }, + + /** + * Prepare the article node for display. Clean out any inline styles, + * iframes, forms, strip extraneous <p> tags, etc. + * + * @param Element + * @return void + **/ + _prepArticle: function(articleContent) { + this._cleanStyles(articleContent); + + // Clean out junk from the article content + this._cleanConditionally(articleContent, "form"); + this._clean(articleContent, "object"); + this._clean(articleContent, "embed"); + this._clean(articleContent, "h1"); + this._clean(articleContent, "footer"); + + // If there is only one h2, they are probably using it as a header + // and not a subheader, so remove it since we already have a header. + if (articleContent.getElementsByTagName('h2').length === 1) + this._clean(articleContent, "h2"); + + this._clean(articleContent, "iframe"); + this._cleanHeaders(articleContent); + + // Do these last as the previous stuff may have removed junk + // that will affect these + this._cleanConditionally(articleContent, "table"); + this._cleanConditionally(articleContent, "ul"); + this._cleanConditionally(articleContent, "div"); + + // Remove extra paragraphs + this._removeNodes(articleContent.getElementsByTagName('p'), function (paragraph) { + var imgCount = paragraph.getElementsByTagName('img').length; + var embedCount = paragraph.getElementsByTagName('embed').length; + var objectCount = paragraph.getElementsByTagName('object').length; + // At this point, nasty iframes have been removed, only remain embedded video ones. + var iframeCount = paragraph.getElementsByTagName('iframe').length; + var totalCount = imgCount + embedCount + objectCount + iframeCount; + + return totalCount === 0 && !this._getInnerText(paragraph, false); + }); + + this._forEachNode(this._getAllNodesWithTag(articleContent, ["br"]), function(br) { + var next = this._nextElement(br.nextSibling); + if (next && next.tagName == "P") + br.parentNode.removeChild(br); + }); + }, + + /** + * Initialize a node with the readability object. Also checks the + * className/id for special names to add to its score. + * + * @param Element + * @return void + **/ + _initializeNode: function(node) { + node.readability = {"contentScore": 0}; + + switch (node.tagName) { + case 'DIV': + node.readability.contentScore += 5; + break; + + case 'PRE': + case 'TD': + case 'BLOCKQUOTE': + node.readability.contentScore += 3; + break; + + case 'ADDRESS': + case 'OL': + case 'UL': + case 'DL': + case 'DD': + case 'DT': + case 'LI': + case 'FORM': + node.readability.contentScore -= 3; + break; + + case 'H1': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': + case 'TH': + node.readability.contentScore -= 5; + break; + } + + node.readability.contentScore += this._getClassWeight(node); + }, + + _removeAndGetNext: function(node) { + var nextNode = this._getNextNode(node, true); + node.parentNode.removeChild(node); + return nextNode; + }, + + /** + * Traverse the DOM from node to node, starting at the node passed in. + * Pass true for the second parameter to indicate this node itself + * (and its kids) are going away, and we want the next node over. + * + * Calling this in a loop will traverse the DOM depth-first. + */ + _getNextNode: function(node, ignoreSelfAndKids) { + // First check for kids if those aren't being ignored + if (!ignoreSelfAndKids && node.firstElementChild) { + return node.firstElementChild; + } + // Then for siblings... + if (node.nextElementSibling) { + return node.nextElementSibling; + } + // And finally, move up the parent chain *and* find a sibling + // (because this is depth-first traversal, we will have already + // seen the parent nodes themselves). + do { + node = node.parentNode; + } while (node && !node.nextElementSibling); + return node && node.nextElementSibling; + }, + + /** + * Like _getNextNode, but for DOM implementations with no + * firstElementChild/nextElementSibling functionality... + */ + _getNextNodeNoElementProperties: function(node, ignoreSelfAndKids) { + function nextSiblingEl(n) { + do { + n = n.nextSibling; + } while (n && n.nodeType !== n.ELEMENT_NODE); + return n; + } + // First check for kids if those aren't being ignored + if (!ignoreSelfAndKids && node.children[0]) { + return node.children[0]; + } + // Then for siblings... + var next = nextSiblingEl(node); + if (next) { + return next; + } + // And finally, move up the parent chain *and* find a sibling + // (because this is depth-first traversal, we will have already + // seen the parent nodes themselves). + do { + node = node.parentNode; + if (node) + next = nextSiblingEl(node); + } while (node && !next); + return node && next; + }, + + _checkByline: function(node, matchString) { + if (this._articleByline) { + return false; + } + + if (node.getAttribute !== undefined) { + var rel = node.getAttribute("rel"); + } + + if ((rel === "author" || this.REGEXPS.byline.test(matchString)) && this._isValidByline(node.textContent)) { + this._articleByline = node.textContent.trim(); + return true; + } + + return false; + }, + + _getNodeAncestors: function(node, maxDepth) { + maxDepth = maxDepth || 0; + var i = 0, ancestors = []; + while (node.parentNode) { + ancestors.push(node.parentNode); + if (maxDepth && ++i === maxDepth) + break; + node = node.parentNode; + } + return ancestors; + }, + + /*** + * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is + * most likely to be the stuff a user wants to read. Then return it wrapped up in a div. + * + * @param page a document to run upon. Needs to be a full document, complete with body. + * @return Element + **/ + _grabArticle: function (page) { + this.log("**** grabArticle ****"); + var doc = this._doc; + var isPaging = (page !== null ? true: false); + page = page ? page : this._doc.body; + + // We can't grab an article if we don't have a page! + if (!page) { + this.log("No body found in document. Abort."); + return null; + } + + var pageCacheHtml = page.innerHTML; + + // Check if any "dir" is set on the toplevel document element + this._articleDir = doc.documentElement.getAttribute("dir"); + + while (true) { + var stripUnlikelyCandidates = this._flagIsActive(this.FLAG_STRIP_UNLIKELYS); + + // First, node prepping. Trash nodes that look cruddy (like ones with the + // class name "comment", etc), and turn divs into P tags where they have been + // used inappropriately (as in, where they contain no other block level elements.) + var elementsToScore = []; + var node = this._doc.documentElement; + + while (node) { + var matchString = node.className + " " + node.id; + + // Check to see if this node is a byline, and remove it if it is. + if (this._checkByline(node, matchString)) { + node = this._removeAndGetNext(node); + continue; + } + + // Remove unlikely candidates + if (stripUnlikelyCandidates) { + if (this.REGEXPS.unlikelyCandidates.test(matchString) && + !this.REGEXPS.okMaybeItsACandidate.test(matchString) && + node.tagName !== "BODY" && + node.tagName !== "A") { + this.log("Removing unlikely candidate - " + matchString); + node = this._removeAndGetNext(node); + continue; + } + } + + if (this.DEFAULT_TAGS_TO_SCORE.indexOf(node.tagName) !== -1) { + elementsToScore.push(node); + } + + // Turn all divs that don't have children block level elements into p's + if (node.tagName === "DIV") { + // Sites like http://mobile.slate.com encloses each paragraph with a DIV + // element. DIVs with only a P element inside and no text content can be + // safely converted into plain P elements to avoid confusing the scoring + // algorithm with DIVs with are, in practice, paragraphs. + if (this._hasSinglePInsideElement(node)) { + var newNode = node.children[0]; + node.parentNode.replaceChild(newNode, node); + node = newNode; + } else if (!this._hasChildBlockElement(node)) { + node = this._setNodeTag(node, "P"); + elementsToScore.push(node); + } else { + // EXPERIMENTAL + this._forEachNode(node.childNodes, function(childNode) { + if (childNode.nodeType === Node.TEXT_NODE) { + var p = doc.createElement('p'); + p.textContent = childNode.textContent; + p.style.display = 'inline'; + p.className = 'readability-styled'; + node.replaceChild(p, childNode); + } + }); + } + } + node = this._getNextNode(node); + } + + /** + * Loop through all paragraphs, and assign a score to them based on how content-y they look. + * Then add their score to their parent node. + * + * A score is determined by things like number of commas, class names, etc. Maybe eventually link density. + **/ + var candidates = []; + this._forEachNode(elementsToScore, function(elementToScore) { + if (!elementToScore.parentNode || typeof(elementToScore.parentNode.tagName) === 'undefined') + return; + + // If this paragraph is less than 25 characters, don't even count it. + var innerText = this._getInnerText(elementToScore); + if (innerText.length < 25) + return; + + // Exclude nodes with no ancestor. + var ancestors = this._getNodeAncestors(elementToScore, 3); + if (ancestors.length === 0) + return; + + var contentScore = 0; + + // Add a point for the paragraph itself as a base. + contentScore += 1; + + // Add points for any commas within this paragraph. + contentScore += innerText.split(',').length; + + // For every 100 characters in this paragraph, add another point. Up to 3 points. + contentScore += Math.min(Math.floor(innerText.length / 100), 3); + + // Initialize and score ancestors. + this._forEachNode(ancestors, function(ancestor, level) { + if (!ancestor.tagName) + return; + + if (typeof(ancestor.readability) === 'undefined') { + this._initializeNode(ancestor); + candidates.push(ancestor); + } + + // Node score divider: + // - parent: 1 (no division) + // - grandparent: 2 + // - great grandparent+: ancestor level * 3 + if (level === 0) + var scoreDivider = 1; + else if (level === 1) + scoreDivider = 2; + else + scoreDivider = level * 3; + ancestor.readability.contentScore += contentScore / scoreDivider; + }); + }); + + // After we've calculated scores, loop through all of the possible + // candidate nodes we found and find the one with the highest score. + var topCandidates = []; + for (var c = 0, cl = candidates.length; c < cl; c += 1) { + var candidate = candidates[c]; + + // Scale the final candidates score based on link density. Good content + // should have a relatively small link density (5% or less) and be mostly + // unaffected by this operation. + var candidateScore = candidate.readability.contentScore * (1 - this._getLinkDensity(candidate)); + candidate.readability.contentScore = candidateScore; + + this.log('Candidate:', candidate, "with score " + candidateScore); + + for (var t = 0; t < this._nbTopCandidates; t++) { + var aTopCandidate = topCandidates[t]; + + if (!aTopCandidate || candidateScore > aTopCandidate.readability.contentScore) { + topCandidates.splice(t, 0, candidate); + if (topCandidates.length > this._nbTopCandidates) + topCandidates.pop(); + break; + } + } + } + + var topCandidate = topCandidates[0] || null; + var neededToCreateTopCandidate = false; + + // If we still have no top candidate, just use the body as a last resort. + // We also have to copy the body node so it is something we can modify. + if (topCandidate === null || topCandidate.tagName === "BODY") { + // Move all of the page's children into topCandidate + topCandidate = doc.createElement("DIV"); + neededToCreateTopCandidate = true; + // Move everything (not just elements, also text nodes etc.) into the container + // so we even include text directly in the body: + var kids = page.childNodes; + while (kids.length) { + this.log("Moving child out:", kids[0]); + topCandidate.appendChild(kids[0]); + } + + page.appendChild(topCandidate); + + this._initializeNode(topCandidate); + } else if (topCandidate) { + // Because of our bonus system, parents of candidates might have scores + // themselves. They get half of the node. There won't be nodes with higher + // scores than our topCandidate, but if we see the score going *up* in the first + // few steps up the tree, that's a decent sign that there might be more content + // lurking in other places that we want to unify in. The sibling stuff + // below does some of that - but only if we've looked high enough up the DOM + // tree. + var parentOfTopCandidate = topCandidate.parentNode; + var lastScore = topCandidate.readability.contentScore; + // The scores shouldn't get too low. + var scoreThreshold = lastScore / 3; + while (parentOfTopCandidate && parentOfTopCandidate.readability) { + var parentScore = parentOfTopCandidate.readability.contentScore; + if (parentScore < scoreThreshold) + break; + if (parentScore > lastScore) { + // Alright! We found a better parent to use. + topCandidate = parentOfTopCandidate; + break; + } + lastScore = parentOfTopCandidate.readability.contentScore; + parentOfTopCandidate = parentOfTopCandidate.parentNode; + } + } + + // Now that we have the top candidate, look through its siblings for content + // that might also be related. Things like preambles, content split by ads + // that we removed, etc. + var articleContent = doc.createElement("DIV"); + if (isPaging) + articleContent.id = "readability-content"; + + var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2); + var siblings = topCandidate.parentNode.children; + + for (var s = 0, sl = siblings.length; s < sl; s++) { + var sibling = siblings[s]; + var append = false; + + this.log("Looking at sibling node:", sibling, sibling.readability ? ("with score " + sibling.readability.contentScore) : ''); + this.log("Sibling has score", sibling.readability ? sibling.readability.contentScore : 'Unknown'); + + if (sibling === topCandidate) { + append = true; + } else { + var contentBonus = 0; + + // Give a bonus if sibling nodes and top candidates have the example same classname + if (sibling.className === topCandidate.className && topCandidate.className !== "") + contentBonus += topCandidate.readability.contentScore * 0.2; + + if (sibling.readability && + ((sibling.readability.contentScore + contentBonus) >= siblingScoreThreshold)) { + append = true; + } else if (sibling.nodeName === "P") { + var linkDensity = this._getLinkDensity(sibling); + var nodeContent = this._getInnerText(sibling); + var nodeLength = nodeContent.length; + + if (nodeLength > 80 && linkDensity < 0.25) { + append = true; + } else if (nodeLength < 80 && nodeLength > 0 && linkDensity === 0 && + nodeContent.search(/\.( |$)/) !== -1) { + append = true; + } + } + } + + if (append) { + this.log("Appending node:", sibling); + + if (this.ALTER_TO_DIV_EXCEPTIONS.indexOf(sibling.nodeName) === -1) { + // We have a node that isn't a common block level element, like a form or td tag. + // Turn it into a div so it doesn't get filtered out later by accident. + this.log("Altering sibling:", sibling, 'to div.'); + + sibling = this._setNodeTag(sibling, "DIV"); + } + + articleContent.appendChild(sibling); + // siblings is a reference to the children array, and + // sibling is removed from the array when we call appendChild(). + // As a result, we must revisit this index since the nodes + // have been shifted. + s -= 1; + sl -= 1; + } + } + + if (this._debug) + this.log("Article content pre-prep: " + articleContent.innerHTML); + // So we have all of the content that we need. Now we clean it up for presentation. + this._prepArticle(articleContent); + if (this._debug) + this.log("Article content post-prep: " + articleContent.innerHTML); + + if (this._curPageNum === 1) { + if (neededToCreateTopCandidate) { + // We already created a fake div thing, and there wouldn't have been any siblings left + // for the previous loop, so there's no point trying to create a new div, and then + // move all the children over. Just assign IDs and class names here. No need to append + // because that already happened anyway. + topCandidate.id = "readability-page-1"; + topCandidate.className = "page"; + } else { + var div = doc.createElement("DIV"); + div.id = "readability-page-1"; + div.className = "page"; + var children = articleContent.childNodes; + while (children.length) { + div.appendChild(children[0]); + } + articleContent.appendChild(div); + } + } + + if (this._debug) + this.log("Article content after paging: " + articleContent.innerHTML); + + // Now that we've gone through the full algorithm, check to see if + // we got any meaningful content. If we didn't, we may need to re-run + // grabArticle with different flags set. This gives us a higher likelihood of + // finding the content, and the sieve approach gives us a higher likelihood of + // finding the -right- content. + if (this._getInnerText(articleContent, true).length < 500) { + page.innerHTML = pageCacheHtml; + + if (this._flagIsActive(this.FLAG_STRIP_UNLIKELYS)) { + this._removeFlag(this.FLAG_STRIP_UNLIKELYS); + } else if (this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) { + this._removeFlag(this.FLAG_WEIGHT_CLASSES); + } else if (this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) { + this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY); + } else { + return null; + } + } else { + return articleContent; + } + } + }, + + /** + * Check whether the input string could be a byline. + * This verifies that the input is a string, and that the length + * is less than 100 chars. + * + * @param possibleByline {string} - a string to check whether its a byline. + * @return Boolean - whether the input string is a byline. + */ + _isValidByline: function(byline) { + if (typeof byline == 'string' || byline instanceof String) { + byline = byline.trim(); + return (byline.length > 0) && (byline.length < 100); + } + return false; + }, + + /** + * Attempts to get excerpt and byline metadata for the article. + * + * @return Object with optional "excerpt" and "byline" properties + */ + _getArticleMetadata: function() { + var metadata = {}; + var values = {}; + var metaElements = this._doc.getElementsByTagName("meta"); + + // Match "description", or Twitter's "twitter:description" (Cards) + // in name attribute. + var namePattern = /^\s*((twitter)\s*:\s*)?(description|title)\s*$/gi; + + // Match Facebook's Open Graph title & description properties. + var propertyPattern = /^\s*og\s*:\s*(description|title)\s*$/gi; + + // Find description tags. + this._forEachNode(metaElements, function(element) { + var elementName = element.getAttribute("name"); + var elementProperty = element.getAttribute("property"); + + if ([elementName, elementProperty].indexOf("author") !== -1) { + metadata.byline = element.getAttribute("content"); + return; + } + + var name = null; + if (namePattern.test(elementName)) { + name = elementName; + } else if (propertyPattern.test(elementProperty)) { + name = elementProperty; + } + + if (name) { + var content = element.getAttribute("content"); + if (content) { + // Convert to lowercase and remove any whitespace + // so we can match below. + name = name.toLowerCase().replace(/\s/g, ''); + values[name] = content.trim(); + } + } + }); + + if ("description" in values) { + metadata.excerpt = values["description"]; + } else if ("og:description" in values) { + // Use facebook open graph description. + metadata.excerpt = values["og:description"]; + } else if ("twitter:description" in values) { + // Use twitter cards description. + metadata.excerpt = values["twitter:description"]; + } + + if ("og:title" in values) { + // Use facebook open graph title. + metadata.title = values["og:title"]; + } else if ("twitter:title" in values) { + // Use twitter cards title. + metadata.title = values["twitter:title"]; + } + + return metadata; + }, + + /** + * Removes script tags from the document. + * + * @param Element + **/ + _removeScripts: function(doc) { + this._removeNodes(doc.getElementsByTagName('script'), function(scriptNode) { + scriptNode.nodeValue = ""; + scriptNode.removeAttribute('src'); + return true; + }); + this._removeNodes(doc.getElementsByTagName('noscript')); + }, + + /** + * Check if this node has only whitespace and a single P element + * Returns false if the DIV node contains non-empty text nodes + * or if it contains no P or more than 1 element. + * + * @param Element + **/ + _hasSinglePInsideElement: function(element) { + // There should be exactly 1 element child which is a P: + if (element.children.length != 1 || element.children[0].tagName !== "P") { + return false; + } + + // And there should be no text nodes with real content + return !this._someNode(element.childNodes, function(node) { + return node.nodeType === Node.TEXT_NODE && + this.REGEXPS.hasContent.test(node.textContent); + }); + }, + + /** + * Determine whether element has any children block level elements. + * + * @param Element + */ + _hasChildBlockElement: function (element) { + return this._someNode(element.childNodes, function(node) { + return this.DIV_TO_P_ELEMS.indexOf(node.tagName) !== -1 || + this._hasChildBlockElement(node); + }); + }, + + /** + * Get the inner text of a node - cross browser compatibly. + * This also strips out any excess whitespace to be found. + * + * @param Element + * @param Boolean normalizeSpaces (default: true) + * @return string + **/ + _getInnerText: function(e, normalizeSpaces) { + normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces; + var textContent = e.textContent.trim(); + + if (normalizeSpaces) { + return textContent.replace(this.REGEXPS.normalize, " "); + } + return textContent; + }, + + /** + * Get the number of times a string s appears in the node e. + * + * @param Element + * @param string - what to split on. Default is "," + * @return number (integer) + **/ + _getCharCount: function(e, s) { + s = s || ","; + return this._getInnerText(e).split(s).length - 1; + }, + + /** + * Remove the style attribute on every e and under. + * TODO: Test if getElementsByTagName(*) is faster. + * + * @param Element + * @return void + **/ + _cleanStyles: function(e) { + e = e || this._doc; + if (!e) + return; + var cur = e.firstChild; + + // Remove any root styles, if we're able. + if (typeof e.removeAttribute === 'function' && e.className !== 'readability-styled') + e.removeAttribute('style'); + + // Go until there are no more child nodes + while (cur !== null) { + if (cur.nodeType === cur.ELEMENT_NODE) { + // Remove style attribute(s) : + if (cur.className !== "readability-styled") + cur.removeAttribute("style"); + + this._cleanStyles(cur); + } + + cur = cur.nextSibling; + } + }, + + /** + * Get the density of links as a percentage of the content + * This is the amount of text that is inside a link divided by the total text in the node. + * + * @param Element + * @return number (float) + **/ + _getLinkDensity: function(element) { + var textLength = this._getInnerText(element).length; + if (textLength === 0) + return 0; + + var linkLength = 0; + + // XXX implement _reduceNodeList? + this._forEachNode(element.getElementsByTagName("a"), function(linkNode) { + linkLength += this._getInnerText(linkNode).length; + }); + + return linkLength / textLength; + }, + + /** + * Find a cleaned up version of the current URL, to use for comparing links for possible next-pageyness. + * + * @author Dan Lacy + * @return string the base url + **/ + _findBaseUrl: function() { + var uri = this._uri; + var noUrlParams = uri.path.split("?")[0]; + var urlSlashes = noUrlParams.split("/").reverse(); + var cleanedSegments = []; + var possibleType = ""; + + for (var i = 0, slashLen = urlSlashes.length; i < slashLen; i += 1) { + var segment = urlSlashes[i]; + + // Split off and save anything that looks like a file type. + if (segment.indexOf(".") !== -1) { + possibleType = segment.split(".")[1]; + + // If the type isn't alpha-only, it's probably not actually a file extension. + if (!possibleType.match(/[^a-zA-Z]/)) + segment = segment.split(".")[0]; + } + + // EW-CMS specific segment replacement. Ugly. + // Example: http://www.ew.com/ew/article/0,,20313460_20369436,00.html + if (segment.indexOf(',00') !== -1) + segment = segment.replace(',00', ''); + + // If our first or second segment has anything looking like a page number, remove it. + if (segment.match(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i) && ((i === 1) || (i === 0))) + segment = segment.replace(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i, ""); + + var del = false; + + // If this is purely a number, and it's the first or second segment, + // it's probably a page number. Remove it. + if (i < 2 && segment.match(/^\d{1,2}$/)) + del = true; + + // If this is the first segment and it's just "index", remove it. + if (i === 0 && segment.toLowerCase() === "index") + del = true; + + // If our first or second segment is smaller than 3 characters, + // and the first segment was purely alphas, remove it. + if (i < 2 && segment.length < 3 && !urlSlashes[0].match(/[a-z]/i)) + del = true; + + // If it's not marked for deletion, push it to cleanedSegments. + if (!del) + cleanedSegments.push(segment); + } + + // This is our final, cleaned, base article URL. + return uri.scheme + "://" + uri.host + cleanedSegments.reverse().join("/"); + }, + + /** + * Look for any paging links that may occur within the document. + * + * @param body + * @return object (array) + **/ + _findNextPageLink: function(elem) { + var uri = this._uri; + var possiblePages = {}; + var allLinks = elem.getElementsByTagName('a'); + var articleBaseUrl = this._findBaseUrl(); + + // Loop through all links, looking for hints that they may be next-page links. + // Things like having "page" in their textContent, className or id, or being a child + // of a node with a page-y className or id. + // + // Also possible: levenshtein distance? longest common subsequence? + // + // After we do that, assign each page a score, and + for (var i = 0, il = allLinks.length; i < il; i += 1) { + var link = allLinks[i]; + var linkHref = allLinks[i].href.replace(/#.*$/, '').replace(/\/$/, ''); + + // If we've already seen this page, ignore it. + if (linkHref === "" || + linkHref === articleBaseUrl || + linkHref === uri.spec || + linkHref in this._parsedPages) { + continue; + } + + // If it's on a different domain, skip it. + if (uri.host !== linkHref.split(/\/+/g)[1]) + continue; + + var linkText = this._getInnerText(link); + + // If the linkText looks like it's not the next page, skip it. + if (linkText.match(this.REGEXPS.extraneous) || linkText.length > 25) + continue; + + // If the leftovers of the URL after removing the base URL don't contain + // any digits, it's certainly not a next page link. + var linkHrefLeftover = linkHref.replace(articleBaseUrl, ''); + if (!linkHrefLeftover.match(/\d/)) + continue; + + if (!(linkHref in possiblePages)) { + possiblePages[linkHref] = {"score": 0, "linkText": linkText, "href": linkHref}; + } else { + possiblePages[linkHref].linkText += ' | ' + linkText; + } + + var linkObj = possiblePages[linkHref]; + + // If the articleBaseUrl isn't part of this URL, penalize this link. It could + // still be the link, but the odds are lower. + // Example: http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html + if (linkHref.indexOf(articleBaseUrl) !== 0) + linkObj.score -= 25; + + var linkData = linkText + ' ' + link.className + ' ' + link.id; + if (linkData.match(this.REGEXPS.nextLink)) + linkObj.score += 50; + + if (linkData.match(/pag(e|ing|inat)/i)) + linkObj.score += 25; + + if (linkData.match(/(first|last)/i)) { + // -65 is enough to negate any bonuses gotten from a > or » in the text, + // If we already matched on "next", last is probably fine. + // If we didn't, then it's bad. Penalize. + if (!linkObj.linkText.match(this.REGEXPS.nextLink)) + linkObj.score -= 65; + } + + if (linkData.match(this.REGEXPS.negative) || linkData.match(this.REGEXPS.extraneous)) + linkObj.score -= 50; + + if (linkData.match(this.REGEXPS.prevLink)) + linkObj.score -= 200; + + // If a parentNode contains page or paging or paginat + var parentNode = link.parentNode; + var positiveNodeMatch = false; + var negativeNodeMatch = false; + + while (parentNode) { + var parentNodeClassAndId = parentNode.className + ' ' + parentNode.id; + + if (!positiveNodeMatch && parentNodeClassAndId && parentNodeClassAndId.match(/pag(e|ing|inat)/i)) { + positiveNodeMatch = true; + linkObj.score += 25; + } + + if (!negativeNodeMatch && parentNodeClassAndId && parentNodeClassAndId.match(this.REGEXPS.negative)) { + // If this is just something like "footer", give it a negative. + // If it's something like "body-and-footer", leave it be. + if (!parentNodeClassAndId.match(this.REGEXPS.positive)) { + linkObj.score -= 25; + negativeNodeMatch = true; + } + } + + parentNode = parentNode.parentNode; + } + + // If the URL looks like it has paging in it, add to the score. + // Things like /page/2/, /pagenum/2, ?p=3, ?page=11, ?pagination=34 + if (linkHref.match(/p(a|g|ag)?(e|ing|ination)?(=|\/)[0-9]{1,2}/i) || linkHref.match(/(page|paging)/i)) + linkObj.score += 25; + + // If the URL contains negative values, give a slight decrease. + if (linkHref.match(this.REGEXPS.extraneous)) + linkObj.score -= 15; + + /** + * Minor punishment to anything that doesn't match our current URL. + * NOTE: I'm finding this to cause more harm than good where something is exactly 50 points. + * Dan, can you show me a counterexample where this is necessary? + * if (linkHref.indexOf(window.location.href) !== 0) { + * linkObj.score -= 1; + * } + **/ + + // If the link text can be parsed as a number, give it a minor bonus, with a slight + // bias towards lower numbered pages. This is so that pages that might not have 'next' + // in their text can still get scored, and sorted properly by score. + var linkTextAsNumber = parseInt(linkText, 10); + if (linkTextAsNumber) { + // Punish 1 since we're either already there, or it's probably + // before what we want anyways. + if (linkTextAsNumber === 1) { + linkObj.score -= 10; + } else { + linkObj.score += Math.max(0, 10 - linkTextAsNumber); + } + } + } + + // Loop thrugh all of our possible pages from above and find our top + // candidate for the next page URL. Require at least a score of 50, which + // is a relatively high confidence that this page is the next link. + var topPage = null; + for (var page in possiblePages) { + if (possiblePages.hasOwnProperty(page)) { + if (possiblePages[page].score >= 50 && + (!topPage || topPage.score < possiblePages[page].score)) + topPage = possiblePages[page]; + } + } + + var nextHref = null; + if (topPage) { + nextHref = topPage.href.replace(/\/$/, ''); + + this.log('NEXT PAGE IS ' + nextHref); + this._parsedPages[nextHref] = true; + } + return nextHref; + }, + + _successfulRequest: function(request) { + return (request.status >= 200 && request.status < 300) || + request.status === 304 || + (request.status === 0 && request.responseText); + }, + + _ajax: function(url, options) { + var request = new XMLHttpRequest(); + + function respondToReadyState(readyState) { + if (request.readyState === 4) { + if (this._successfulRequest(request)) { + if (options.success) + options.success(request); + } else if (options.error) { + options.error(request); + } + } + } + + if (typeof options === 'undefined') + options = {}; + + request.onreadystatechange = respondToReadyState; + + request.open('get', url, true); + request.setRequestHeader('Accept', 'text/html'); + + try { + request.send(options.postBody); + } catch (e) { + if (options.error) + options.error(); + } + + return request; + }, + + _appendNextPage: function(nextPageLink) { + var doc = this._doc; + this._curPageNum += 1; + + var articlePage = doc.createElement("DIV"); + articlePage.id = 'readability-page-' + this._curPageNum; + articlePage.className = 'page'; + articlePage.innerHTML = '<p class="page-separator" title="Page ' + this._curPageNum + '">§</p>'; + + doc.getElementById("readability-content").appendChild(articlePage); + + if (this._curPageNum > this._maxPages) { + var nextPageMarkup = "<div style='text-align: center'><a href='" + nextPageLink + "'>View Next Page</a></div>"; + articlePage.innerHTML = articlePage.innerHTML + nextPageMarkup; + return; + } + + // Now that we've built the article page DOM element, get the page content + // asynchronously and load the cleaned content into the div we created for it. + (function(pageUrl, thisPage) { + this._ajax(pageUrl, { + success: function(r) { + + // First, check to see if we have a matching ETag in headers - if we do, this is a duplicate page. + var eTag = r.getResponseHeader('ETag'); + if (eTag) { + if (eTag in this._pageETags) { + this.log("Exact duplicate page found via ETag. Aborting."); + articlePage.style.display = 'none'; + return; + } + this._pageETags[eTag] = 1; + } + + // TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away. + var page = doc.createElement("DIV"); + + // Do some preprocessing to our HTML to make it ready for appending. + // - Remove any script tags. Swap and reswap newlines with a unicode + // character because multiline regex doesn't work in javascript. + // - Turn any noscript tags into divs so that we can parse them. This + // allows us to find any next page links hidden via javascript. + // - Turn all double br's into p's - was handled by prepDocument in the original view. + // Maybe in the future abstract out prepDocument to work for both the original document + // and AJAX-added pages. + var responseHtml = r.responseText.replace(/\n/g, '\uffff').replace(/<script.*?>.*?<\/script>/gi, ''); + responseHtml = responseHtml.replace(/\n/g, '\uffff').replace(/<script.*?>.*?<\/script>/gi, ''); + responseHtml = responseHtml.replace(/\uffff/g, '\n').replace(/<(\/?)noscript/gi, '<$1div'); + responseHtml = responseHtml.replace(this.REGEXPS.replaceFonts, '<$1span>'); + + page.innerHTML = responseHtml; + this._replaceBrs(page); + + // Reset all flags for the next page, as they will search through it and + // disable as necessary at the end of grabArticle. + this._flags = 0x1 | 0x2 | 0x4; + + var secondNextPageLink = this._findNextPageLink(page); + + // NOTE: if we end up supporting _appendNextPage(), we'll need to + // change this call to be async + var content = this._grabArticle(page); + + if (!content) { + this.log("No content found in page to append. Aborting."); + return; + } + + // Anti-duplicate mechanism. Essentially, get the first paragraph of our new page. + // Compare it against all of the the previous document's we've gotten. If the previous + // document contains exactly the innerHTML of this first paragraph, it's probably a duplicate. + var firstP = content.getElementsByTagName("P").length ? content.getElementsByTagName("P")[0] : null; + if (firstP && firstP.innerHTML.length > 100) { + for (var i = 1; i <= this._curPageNum; i += 1) { + var rPage = doc.getElementById('readability-page-' + i); + if (rPage && rPage.innerHTML.indexOf(firstP.innerHTML) !== -1) { + this.log('Duplicate of page ' + i + ' - skipping.'); + articlePage.style.display = 'none'; + this._parsedPages[pageUrl] = true; + return; + } + } + } + + this._removeScripts(content); + + thisPage.innerHTML = thisPage.innerHTML + content.innerHTML; + + // After the page has rendered, post process the content. This delay is necessary because, + // in webkit at least, offsetWidth is not set in time to determine image width. We have to + // wait a little bit for reflow to finish before we can fix floating images. + setTimeout((function() { + this._postProcessContent(thisPage); + }).bind(this), 500); + + + if (secondNextPageLink) + this._appendNextPage(secondNextPageLink); + } + }); + }).bind(this)(nextPageLink, articlePage); + }, + + /** + * Get an elements class/id weight. Uses regular expressions to tell if this + * element looks good or bad. + * + * @param Element + * @return number (Integer) + **/ + _getClassWeight: function(e) { + if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) + return 0; + + var weight = 0; + + // Look for a special classname + if (typeof(e.className) === 'string' && e.className !== '') { + if (this.REGEXPS.negative.test(e.className)) + weight -= 25; + + if (this.REGEXPS.positive.test(e.className)) + weight += 25; + } + + // Look for a special ID + if (typeof(e.id) === 'string' && e.id !== '') { + if (this.REGEXPS.negative.test(e.id)) + weight -= 25; + + if (this.REGEXPS.positive.test(e.id)) + weight += 25; + } + + return weight; + }, + + /** + * Clean a node of all elements of type "tag". + * (Unless it's a youtube/vimeo video. People love movies.) + * + * @param Element + * @param string tag to clean + * @return void + **/ + _clean: function(e, tag) { + var isEmbed = ["object", "embed", "iframe"].indexOf(tag) !== -1; + + this._removeNodes(e.getElementsByTagName(tag), function(element) { + // Allow youtube and vimeo videos through as people usually want to see those. + if (isEmbed) { + var attributeValues = [].map.call(element.attributes, function(attr) { + return attr.value; + }).join("|"); + + // First, check the elements attributes to see if any of them contain youtube or vimeo + if (this.REGEXPS.videos.test(attributeValues)) + return false; + + // Then check the elements inside this element for the same. + if (this.REGEXPS.videos.test(element.innerHTML)) + return false; + } + + return true; + }); + }, + + /** + * Check if a given node has one of its ancestor tag name matching the + * provided one. + * @param HTMLElement node + * @param String tagName + * @param Number maxDepth + * @return Boolean + */ + _hasAncestorTag: function(node, tagName, maxDepth) { + maxDepth = maxDepth || 3; + tagName = tagName.toUpperCase(); + var depth = 0; + while (node.parentNode) { + if (depth > maxDepth) + return false; + if (node.parentNode.tagName === tagName) + return true; + node = node.parentNode; + depth++; + } + return false; + }, + + /** + * Clean an element of all tags of type "tag" if they look fishy. + * "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc. + * + * @return void + **/ + _cleanConditionally: function(e, tag) { + if (!this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) + return; + + var isList = tag === "ul" || tag === "ol"; + + // Gather counts for other typical elements embedded within. + // Traverse backwards so we can remove nodes at the same time + // without effecting the traversal. + // + // TODO: Consider taking into account original contentScore here. + this._removeNodes(e.getElementsByTagName(tag), function(node) { + var weight = this._getClassWeight(node); + var contentScore = 0; + + this.log("Cleaning Conditionally", node); + + if (weight + contentScore < 0) { + return true; + } + + if (this._getCharCount(node, ',') < 10) { + // If there are not very many commas, and the number of + // non-paragraph elements is more than paragraphs or other + // ominous signs, remove the element. + var p = node.getElementsByTagName("p").length; + var img = node.getElementsByTagName("img").length; + var li = node.getElementsByTagName("li").length-100; + var input = node.getElementsByTagName("input").length; + + var embedCount = 0; + var embeds = node.getElementsByTagName("embed"); + for (var ei = 0, il = embeds.length; ei < il; ei += 1) { + if (!this.REGEXPS.videos.test(embeds[ei].src)) + embedCount += 1; + } + + var linkDensity = this._getLinkDensity(node); + var contentLength = this._getInnerText(node).length; + + var haveToRemove = + // Make an exception for elements with no p's and exactly 1 img. + (img > p && !this._hasAncestorTag(node, "figure")) || + (!isList && li > p) || + (input > Math.floor(p/3)) || + (!isList && contentLength < 25 && (img === 0 || img > 2)) || + (!isList && weight < 25 && linkDensity > 0.2) || + (weight >= 25 && linkDensity > 0.5) || + ((embedCount === 1 && contentLength < 75) || embedCount > 1); + return haveToRemove; + } + return false; + }); + }, + + /** + * Clean out spurious headers from an Element. Checks things like classnames and link density. + * + * @param Element + * @return void + **/ + _cleanHeaders: function(e) { + for (var headerIndex = 1; headerIndex < 3; headerIndex += 1) { + this._removeNodes(e.getElementsByTagName('h' + headerIndex), function (header) { + return this._getClassWeight(header) < 0; + }); + } + }, + + _flagIsActive: function(flag) { + return (this._flags & flag) > 0; + }, + + _addFlag: function(flag) { + this._flags = this._flags | flag; + }, + + _removeFlag: function(flag) { + this._flags = this._flags & ~flag; + }, + + /** + * Decides whether or not the document is reader-able without parsing the whole thing. + * + * @return boolean Whether or not we suspect parse() will suceeed at returning an article object. + */ + isProbablyReaderable: function(helperIsVisible) { + var nodes = this._getAllNodesWithTag(this._doc, ["p", "pre"]); + + // Get <div> nodes which have <br> node(s) and append them into the `nodes` variable. + // Some articles' DOM structures might look like + // <div> + // Sentences<br> + // <br> + // Sentences<br> + // </div> + var brNodes = this._getAllNodesWithTag(this._doc, ["div > br"]); + if (brNodes.length) { + var set = new Set(); + [].forEach.call(brNodes, function(node) { + set.add(node.parentNode); + }); + nodes = [].concat.apply(Array.from(set), nodes); + } + + // FIXME we should have a fallback for helperIsVisible, but this is + // problematic because of jsdom's elem.style handling - see + // https://github.com/mozilla/readability/pull/186 for context. + + var score = 0; + // This is a little cheeky, we use the accumulator 'score' to decide what to return from + // this callback: + return this._someNode(nodes, function(node) { + if (helperIsVisible && !helperIsVisible(node)) + return false; + var matchString = node.className + " " + node.id; + + if (this.REGEXPS.unlikelyCandidates.test(matchString) && + !this.REGEXPS.okMaybeItsACandidate.test(matchString)) { + return false; + } + + if (node.matches && node.matches("li p")) { + return false; + } + + var textContentLength = node.textContent.trim().length; + if (textContentLength < 140) { + return false; + } + + score += Math.sqrt(textContentLength - 140); + + if (score > 20) { + return true; + } + return false; + }); + }, + + /** + * Runs readability. + * + * Workflow: + * 1. Prep the document by removing script tags, css, etc. + * 2. Build readability's DOM tree. + * 3. Grab the article content from the current dom tree. + * 4. Replace the current DOM tree with the new one. + * 5. Read peacefully. + * + * @return void + **/ + parse: function () { + // Avoid parsing too large documents, as per configuration option + if (this._maxElemsToParse > 0) { + var numTags = this._doc.getElementsByTagName("*").length; + if (numTags > this._maxElemsToParse) { + throw new Error("Aborting parsing document; " + numTags + " elements found"); + } + } + + if (typeof this._doc.documentElement.firstElementChild === "undefined") { + this._getNextNode = this._getNextNodeNoElementProperties; + } + // Remove script tags from the document. + this._removeScripts(this._doc); + + // FIXME: Disabled multi-page article support for now as it + // needs more work on infrastructure. + + // Make sure this document is added to the list of parsed pages first, + // so we don't double up on the first page. + // this._parsedPages[uri.spec.replace(/\/$/, '')] = true; + + // Pull out any possible next page link first. + // var nextPageLink = this._findNextPageLink(doc.body); + + this._prepDocument(); + + var metadata = this._getArticleMetadata(); + var articleTitle = metadata.title || this._getArticleTitle(); + + var articleContent = this._grabArticle(); + if (!articleContent) + return null; + + this.log("Grabbed: " + articleContent.innerHTML); + + this._postProcessContent(articleContent); + + // if (nextPageLink) { + // // Append any additional pages after a small timeout so that people + // // can start reading without having to wait for this to finish processing. + // setTimeout((function() { + // this._appendNextPage(nextPageLink); + // }).bind(this), 500); + // } + + // If we haven't found an excerpt in the article's metadata, use the article's + // first paragraph as the excerpt. This is used for displaying a preview of + // the article's content. + if (!metadata.excerpt) { + var paragraphs = articleContent.getElementsByTagName("p"); + if (paragraphs.length > 0) { + metadata.excerpt = paragraphs[0].textContent.trim(); + } + } + + var textContent = articleContent.textContent; + return { + uri: this._uri, + title: articleTitle, + byline: metadata.byline || this._articleByline, + dir: this._articleDir, + content: articleContent.innerHTML, + textContent: textContent, + length: textContent.length, + excerpt: metadata.excerpt, + }; + } +}; diff --git a/toolkit/components/reader/ReaderMode.jsm b/toolkit/components/reader/ReaderMode.jsm new file mode 100644 index 000000000..033a02489 --- /dev/null +++ b/toolkit/components/reader/ReaderMode.jsm @@ -0,0 +1,514 @@ +// -*- indent-tabs-mode: nil; js-indent-level: 2 -*- +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ +"use strict"; + +this.EXPORTED_SYMBOLS = ["ReaderMode"]; + +const { classes: Cc, interfaces: Ci, utils: Cu } = Components; + +// Constants for telemetry. +const DOWNLOAD_SUCCESS = 0; +const DOWNLOAD_ERROR_XHR = 1; +const DOWNLOAD_ERROR_NO_DOC = 2; + +const PARSE_SUCCESS = 0; +const PARSE_ERROR_TOO_MANY_ELEMENTS = 1; +const PARSE_ERROR_WORKER = 2; +const PARSE_ERROR_NO_ARTICLE = 3; + +Cu.import("resource://gre/modules/Services.jsm"); +Cu.import("resource://gre/modules/XPCOMUtils.jsm"); + +Cu.importGlobalProperties(["XMLHttpRequest"]); + +XPCOMUtils.defineLazyModuleGetter(this, "CommonUtils", "resource://services-common/utils.js"); +XPCOMUtils.defineLazyModuleGetter(this, "Messaging", "resource://gre/modules/Messaging.jsm"); +XPCOMUtils.defineLazyModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm"); +XPCOMUtils.defineLazyModuleGetter(this, "ReaderWorker", "resource://gre/modules/reader/ReaderWorker.jsm"); +XPCOMUtils.defineLazyModuleGetter(this, "Task", "resource://gre/modules/Task.jsm"); +XPCOMUtils.defineLazyModuleGetter(this, "TelemetryStopwatch", "resource://gre/modules/TelemetryStopwatch.jsm"); + +XPCOMUtils.defineLazyGetter(this, "Readability", function() { + let scope = {}; + scope.dump = this.dump; + Services.scriptloader.loadSubScript("resource://gre/modules/reader/Readability.js", scope); + return scope["Readability"]; +}); + +this.ReaderMode = { + // Version of the cache schema. + CACHE_VERSION: 1, + + DEBUG: 0, + + // Don't try to parse the page if it has too many elements (for memory and + // performance reasons) + get maxElemsToParse() { + delete this.parseNodeLimit; + + Services.prefs.addObserver("reader.parse-node-limit", this, false); + return this.parseNodeLimit = Services.prefs.getIntPref("reader.parse-node-limit"); + }, + + get isEnabledForParseOnLoad() { + delete this.isEnabledForParseOnLoad; + + // Listen for future pref changes. + Services.prefs.addObserver("reader.parse-on-load.", this, false); + + return this.isEnabledForParseOnLoad = this._getStateForParseOnLoad(); + }, + + get isOnLowMemoryPlatform() { + let memory = Cc["@mozilla.org/xpcom/memory-service;1"].getService(Ci.nsIMemory); + delete this.isOnLowMemoryPlatform; + return this.isOnLowMemoryPlatform = memory.isLowMemoryPlatform(); + }, + + _getStateForParseOnLoad: function () { + let isEnabled = Services.prefs.getBoolPref("reader.parse-on-load.enabled"); + let isForceEnabled = Services.prefs.getBoolPref("reader.parse-on-load.force-enabled"); + // For low-memory devices, don't allow reader mode since it takes up a lot of memory. + // See https://bugzilla.mozilla.org/show_bug.cgi?id=792603 for details. + return isForceEnabled || (isEnabled && !this.isOnLowMemoryPlatform); + }, + + observe: function(aMessage, aTopic, aData) { + switch (aTopic) { + case "nsPref:changed": + if (aData.startsWith("reader.parse-on-load.")) { + this.isEnabledForParseOnLoad = this._getStateForParseOnLoad(); + } else if (aData === "reader.parse-node-limit") { + this.parseNodeLimit = Services.prefs.getIntPref(aData); + } + break; + } + }, + + /** + * Enter the reader mode by going forward one step in history if applicable, + * if not, append the about:reader page in the history instead. + */ + enterReaderMode: function(docShell, win) { + let url = win.document.location.href; + let readerURL = "about:reader?url=" + encodeURIComponent(url); + let webNav = docShell.QueryInterface(Ci.nsIWebNavigation); + let sh = webNav.sessionHistory; + if (webNav.canGoForward) { + let forwardEntry = sh.getEntryAtIndex(sh.index + 1, false); + let forwardURL = forwardEntry.URI.spec; + if (forwardURL && (forwardURL == readerURL || !readerURL)) { + webNav.goForward(); + return; + } + } + + win.document.location = readerURL; + }, + + /** + * Exit the reader mode by going back one step in history if applicable, + * if not, append the original page in the history instead. + */ + leaveReaderMode: function(docShell, win) { + let url = win.document.location.href; + let originalURL = this.getOriginalUrl(url); + let webNav = docShell.QueryInterface(Ci.nsIWebNavigation); + let sh = webNav.sessionHistory; + if (webNav.canGoBack) { + let prevEntry = sh.getEntryAtIndex(sh.index - 1, false); + let prevURL = prevEntry.URI.spec; + if (prevURL && (prevURL == originalURL || !originalURL)) { + webNav.goBack(); + return; + } + } + + win.document.location = originalURL; + }, + + /** + * Returns original URL from an about:reader URL. + * + * @param url An about:reader URL. + * @return The original URL for the article, or null if we did not find + * a properly formatted about:reader URL. + */ + getOriginalUrl: function(url) { + if (!url.startsWith("about:reader?")) { + return null; + } + + let outerHash = ""; + try { + let uriObj = Services.io.newURI(url, null, null); + url = uriObj.specIgnoringRef; + outerHash = uriObj.ref; + } catch (ex) { /* ignore, use the raw string */ } + + let searchParams = new URLSearchParams(url.substring("about:reader?".length)); + if (!searchParams.has("url")) { + return null; + } + let originalUrl = searchParams.get("url"); + if (outerHash) { + try { + let uriObj = Services.io.newURI(originalUrl, null, null); + uriObj = Services.io.newURI('#' + outerHash, null, uriObj); + originalUrl = uriObj.spec; + } catch (ex) {} + } + return originalUrl; + }, + + /** + * Decides whether or not a document is reader-able without parsing the whole thing. + * + * @param doc A document to parse. + * @return boolean Whether or not we should show the reader mode button. + */ + isProbablyReaderable: function(doc) { + // Only care about 'real' HTML documents: + if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) { + return false; + } + + let uri = Services.io.newURI(doc.location.href, null, null); + if (!this._shouldCheckUri(uri)) { + return false; + } + + let utils = this.getUtilsForWin(doc.defaultView); + // We pass in a helper function to determine if a node is visible, because + // it uses gecko APIs that the engine-agnostic readability code can't rely + // upon. + return new Readability(uri, doc).isProbablyReaderable(this.isNodeVisible.bind(this, utils)); + }, + + isNodeVisible: function(utils, node) { + let bounds = utils.getBoundsWithoutFlushing(node); + return bounds.height > 0 && bounds.width > 0; + }, + + getUtilsForWin: function(win) { + return win.QueryInterface(Ci.nsIInterfaceRequestor).getInterface(Ci.nsIDOMWindowUtils); + }, + + /** + * Gets an article from a loaded browser's document. This method will not attempt + * to parse certain URIs (e.g. about: URIs). + * + * @param doc A document to parse. + * @return {Promise} + * @resolves JS object representing the article, or null if no article is found. + */ + parseDocument: Task.async(function* (doc) { + let documentURI = Services.io.newURI(doc.documentURI, null, null); + let baseURI = Services.io.newURI(doc.baseURI, null, null); + if (!this._shouldCheckUri(documentURI) || !this._shouldCheckUri(baseURI, true)) { + this.log("Reader mode disabled for URI"); + return null; + } + + return yield this._readerParse(baseURI, doc); + }), + + /** + * Downloads and parses a document from a URL. + * + * @param url URL to download and parse. + * @return {Promise} + * @resolves JS object representing the article, or null if no article is found. + */ + downloadAndParseDocument: Task.async(function* (url) { + let doc = yield this._downloadDocument(url); + let uri = Services.io.newURI(doc.baseURI, null, null); + if (!this._shouldCheckUri(uri, true)) { + this.log("Reader mode disabled for URI"); + return null; + } + + return yield this._readerParse(uri, doc); + }), + + _downloadDocument: function (url) { + let histogram = Services.telemetry.getHistogramById("READER_MODE_DOWNLOAD_RESULT"); + return new Promise((resolve, reject) => { + let xhr = new XMLHttpRequest(); + xhr.open("GET", url, true); + xhr.onerror = evt => reject(evt.error); + xhr.responseType = "document"; + xhr.onload = evt => { + if (xhr.status !== 200) { + reject("Reader mode XHR failed with status: " + xhr.status); + histogram.add(DOWNLOAD_ERROR_XHR); + return; + } + + let doc = xhr.responseXML; + if (!doc) { + reject("Reader mode XHR didn't return a document"); + histogram.add(DOWNLOAD_ERROR_NO_DOC); + return; + } + + // Manually follow a meta refresh tag if one exists. + let meta = doc.querySelector("meta[http-equiv=refresh]"); + if (meta) { + let content = meta.getAttribute("content"); + if (content) { + let urlIndex = content.toUpperCase().indexOf("URL="); + if (urlIndex > -1) { + let baseURI = Services.io.newURI(url, null, null); + let newURI = Services.io.newURI(content.substring(urlIndex + 4), null, baseURI); + let newURL = newURI.spec; + let ssm = Services.scriptSecurityManager; + let flags = ssm.LOAD_IS_AUTOMATIC_DOCUMENT_REPLACEMENT | + ssm.DISALLOW_INHERIT_PRINCIPAL; + try { + ssm.checkLoadURIStrWithPrincipal(doc.nodePrincipal, newURL, flags); + } catch (ex) { + let errorMsg = "Reader mode disallowed meta refresh (reason: " + ex + ")."; + + if (Services.prefs.getBoolPref("reader.errors.includeURLs")) + errorMsg += " Refresh target URI: '" + newURL + "'."; + reject(errorMsg); + return; + } + // Otherwise, pass an object indicating our new URL: + if (!baseURI.equalsExceptRef(newURI)) { + reject({newURL}); + return; + } + } + } + } + let responseURL = xhr.responseURL; + let givenURL = url; + // Convert these to real URIs to make sure the escaping (or lack + // thereof) is identical: + try { + responseURL = Services.io.newURI(responseURL, null, null).specIgnoringRef; + } catch (ex) { /* Ignore errors - we'll use what we had before */ } + try { + givenURL = Services.io.newURI(givenURL, null, null).specIgnoringRef; + } catch (ex) { /* Ignore errors - we'll use what we had before */ } + + if (responseURL != givenURL) { + // We were redirected without a meta refresh tag. + // Force redirect to the correct place: + reject({newURL: xhr.responseURL}); + return; + } + resolve(doc); + histogram.add(DOWNLOAD_SUCCESS); + }; + xhr.send(); + }); + }, + + + /** + * Retrieves an article from the cache given an article URI. + * + * @param url The article URL. + * @return {Promise} + * @resolves JS object representing the article, or null if no article is found. + * @rejects OS.File.Error + */ + getArticleFromCache: Task.async(function* (url) { + let path = this._toHashedPath(url); + try { + let array = yield OS.File.read(path); + return JSON.parse(new TextDecoder().decode(array)); + } catch (e) { + if (!(e instanceof OS.File.Error) || !e.becauseNoSuchFile) + throw e; + return null; + } + }), + + /** + * Stores an article in the cache. + * + * @param article JS object representing article. + * @return {Promise} + * @resolves When the article is stored. + * @rejects OS.File.Error + */ + storeArticleInCache: Task.async(function* (article) { + let array = new TextEncoder().encode(JSON.stringify(article)); + let path = this._toHashedPath(article.url); + yield this._ensureCacheDir(); + return OS.File.writeAtomic(path, array, { tmpPath: path + ".tmp" }) + .then(success => { + OS.File.stat(path).then(info => { + return Messaging.sendRequest({ + type: "Reader:AddedToCache", + url: article.url, + size: info.size, + path: path, + }); + }); + }); + }), + + /** + * Removes an article from the cache given an article URI. + * + * @param url The article URL. + * @return {Promise} + * @resolves When the article is removed. + * @rejects OS.File.Error + */ + removeArticleFromCache: Task.async(function* (url) { + let path = this._toHashedPath(url); + yield OS.File.remove(path); + }), + + log: function(msg) { + if (this.DEBUG) + dump("Reader: " + msg); + }, + + _blockedHosts: [ + "mail.google.com", + "github.com", + "pinterest.com", + "reddit.com", + "twitter.com", + "youtube.com", + ], + + _shouldCheckUri: function (uri, isBaseUri = false) { + if (!(uri.schemeIs("http") || uri.schemeIs("https"))) { + this.log("Not parsing URI scheme: " + uri.scheme); + return false; + } + + try { + uri.QueryInterface(Ci.nsIURL); + } catch (ex) { + // If this doesn't work, presumably the URL is not well-formed or something + return false; + } + // Sadly, some high-profile pages have false positives, so bail early for those: + let asciiHost = uri.asciiHost; + if (!isBaseUri && this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) { + return false; + } + + if (!isBaseUri && (!uri.filePath || uri.filePath == "/")) { + this.log("Not parsing home page: " + uri.spec); + return false; + } + + return true; + }, + + /** + * Attempts to parse a document into an article. Heavy lifting happens + * in readerWorker.js. + * + * @param uri The base URI of the article. + * @param doc The document to parse. + * @return {Promise} + * @resolves JS object representing the article, or null if no article is found. + */ + _readerParse: Task.async(function* (uri, doc) { + let histogram = Services.telemetry.getHistogramById("READER_MODE_PARSE_RESULT"); + if (this.parseNodeLimit) { + let numTags = doc.getElementsByTagName("*").length; + if (numTags > this.parseNodeLimit) { + this.log("Aborting parse for " + uri.spec + "; " + numTags + " elements found"); + histogram.add(PARSE_ERROR_TOO_MANY_ELEMENTS); + return null; + } + } + + let uriParam = { + spec: uri.spec, + host: uri.host, + prePath: uri.prePath, + scheme: uri.scheme, + pathBase: Services.io.newURI(".", null, uri).spec + }; + + let serializer = Cc["@mozilla.org/xmlextras/xmlserializer;1"]. + createInstance(Ci.nsIDOMSerializer); + let serializedDoc = serializer.serializeToString(doc); + + let article = null; + try { + article = yield ReaderWorker.post("parseDocument", [uriParam, serializedDoc]); + } catch (e) { + Cu.reportError("Error in ReaderWorker: " + e); + histogram.add(PARSE_ERROR_WORKER); + } + + if (!article) { + this.log("Worker did not return an article"); + histogram.add(PARSE_ERROR_NO_ARTICLE); + return null; + } + + // Readability returns a URI object, but we only care about the URL. + article.url = article.uri.spec; + delete article.uri; + + let flags = Ci.nsIDocumentEncoder.OutputSelectionOnly | Ci.nsIDocumentEncoder.OutputAbsoluteLinks; + article.title = Cc["@mozilla.org/parserutils;1"].getService(Ci.nsIParserUtils) + .convertToPlainText(article.title, flags, 0); + + histogram.add(PARSE_SUCCESS); + return article; + }), + + get _cryptoHash() { + delete this._cryptoHash; + return this._cryptoHash = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash); + }, + + get _unicodeConverter() { + delete this._unicodeConverter; + this._unicodeConverter = Cc["@mozilla.org/intl/scriptableunicodeconverter"] + .createInstance(Ci.nsIScriptableUnicodeConverter); + this._unicodeConverter.charset = "utf8"; + return this._unicodeConverter; + }, + + /** + * Calculate the hashed path for a stripped article URL. + * + * @param url The article URL. This should have referrers removed. + * @return The file path to the cached article. + */ + _toHashedPath: function (url) { + let value = this._unicodeConverter.convertToByteArray(url); + this._cryptoHash.init(this._cryptoHash.MD5); + this._cryptoHash.update(value, value.length); + + let hash = CommonUtils.encodeBase32(this._cryptoHash.finish(false)); + let fileName = hash.substring(0, hash.indexOf("=")) + ".json"; + return OS.Path.join(OS.Constants.Path.profileDir, "readercache", fileName); + }, + + /** + * Ensures the cache directory exists. + * + * @return Promise + * @resolves When the cache directory exists. + * @rejects OS.File.Error + */ + _ensureCacheDir: function () { + let dir = OS.Path.join(OS.Constants.Path.profileDir, "readercache"); + return OS.File.exists(dir).then(exists => { + if (!exists) { + return OS.File.makeDir(dir); + } + return undefined; + }); + } +}; diff --git a/toolkit/components/reader/ReaderWorker.js b/toolkit/components/reader/ReaderWorker.js new file mode 100644 index 000000000..20023d4e0 --- /dev/null +++ b/toolkit/components/reader/ReaderWorker.js @@ -0,0 +1,50 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +/** + * A worker dedicated to handle parsing documents for reader view. + */ + +importScripts("resource://gre/modules/workers/require.js", + "resource://gre/modules/reader/JSDOMParser.js", + "resource://gre/modules/reader/Readability.js"); + +var PromiseWorker = require("resource://gre/modules/workers/PromiseWorker.js"); + +const DEBUG = false; + +var worker = new PromiseWorker.AbstractWorker(); +worker.dispatch = function(method, args = []) { + return Agent[method](...args); +}; +worker.postMessage = function(result, ...transfers) { + self.postMessage(result, ...transfers); +}; +worker.close = function() { + self.close(); +}; +worker.log = function(...args) { + if (DEBUG) { + dump("ReaderWorker: " + args.join(" ") + "\n"); + } +}; + +self.addEventListener("message", msg => worker.handleMessage(msg)); + +var Agent = { + /** + * Parses structured article data from a document. + * + * @param {object} uri URI data for the document. + * @param {string} serializedDoc The serialized document. + * + * @return {object} Article object returned from Readability. + */ + parseDocument: function (uri, serializedDoc) { + let doc = new JSDOMParser().parse(serializedDoc); + return new Readability(uri, doc).parse(); + }, +}; diff --git a/toolkit/components/reader/ReaderWorker.jsm b/toolkit/components/reader/ReaderWorker.jsm new file mode 100644 index 000000000..ed0ea9aea --- /dev/null +++ b/toolkit/components/reader/ReaderWorker.jsm @@ -0,0 +1,17 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +/** + * Interface to a dedicated thread handling readability parsing. + */ + +const Cu = Components.utils; + +Cu.import("resource://gre/modules/PromiseWorker.jsm", this); + +this.EXPORTED_SYMBOLS = ["ReaderWorker"]; + +this.ReaderWorker = new BasePromiseWorker("resource://gre/modules/reader/ReaderWorker.js"); diff --git a/toolkit/components/reader/content/aboutReader.html b/toolkit/components/reader/content/aboutReader.html new file mode 100644 index 000000000..b9c1139f6 --- /dev/null +++ b/toolkit/components/reader/content/aboutReader.html @@ -0,0 +1,74 @@ +<!DOCTYPE html> +<html> + +<head> + <meta content="text/html; charset=UTF-8" http-equiv="content-type" /> + <meta name="viewport" content="width=device-width; user-scalable=0" /> + + <link rel="stylesheet" href="chrome://global/skin/aboutReader.css" type="text/css"/> + + <script type="text/javascript;version=1.8" src="chrome://global/content/reader/aboutReader.js"></script> +</head> + +<body> + <div id="container" class="container"> + <div id="reader-header" class="header"> + <style scoped> + @import url("chrome://global/skin/aboutReaderControls.css"); + </style> + <a id="reader-domain" class="domain"></a> + <div class="domain-border"></div> + <h1 id="reader-title"></h1> + <div id="reader-credits" class="credits"></div> + </div> + + <div class="content"> + <style scoped> + @import url("chrome://global/skin/aboutReaderContent.css"); + </style> + <div id="moz-reader-content"></div> + </div> + + <div> + <style scoped> + @import url("chrome://global/skin/aboutReaderControls.css"); + </style> + <div id="reader-message"></div> + </div> + </div> + + <ul id="reader-toolbar" class="toolbar"> + <style scoped> + @import url("chrome://global/skin/aboutReaderControls.css"); + </style> + <li><button id="close-button" class="button close-button"/></li> + <ul id="style-dropdown" class="dropdown"> + <li><button class="dropdown-toggle button style-button"/></li> + <li id="reader-popup" class="dropdown-popup"> + <div id="font-type-buttons"></div> + <hr></hr> + <div id="font-size-buttons"> + <button id="font-size-minus" class="minus-button"/> + <button id="font-size-sample"/> + <button id="font-size-plus" class="plus-button"/> + </div> + <hr></hr> + <div id="content-width-buttons"> + <button id="content-width-minus" class="content-width-minus-button"/> + <button id="content-width-plus" class="content-width-plus-button"/> + </div> + <hr></hr> + <div id="line-height-buttons"> + <button id="line-height-minus" class="line-height-minus-button"/> + <button id="line-height-plus" class="line-height-plus-button"/> + </div> + <hr></hr> + <div id="color-scheme-buttons"></div> + <div class="dropdown-arrow"/> + </li> + </ul> + </ul> + +</body> + +</html> diff --git a/toolkit/components/reader/content/aboutReader.js b/toolkit/components/reader/content/aboutReader.js new file mode 100644 index 000000000..17133e69d --- /dev/null +++ b/toolkit/components/reader/content/aboutReader.js @@ -0,0 +1,9 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +window.addEventListener("DOMContentLoaded", function () { + document.dispatchEvent(new CustomEvent("AboutReaderContentLoaded", { bubbles: true })); +}); diff --git a/toolkit/components/reader/jar.mn b/toolkit/components/reader/jar.mn new file mode 100644 index 000000000..241f1e693 --- /dev/null +++ b/toolkit/components/reader/jar.mn @@ -0,0 +1,7 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +toolkit.jar: + content/global/reader/aboutReader.html (content/aboutReader.html) + content/global/reader/aboutReader.js (content/aboutReader.js) diff --git a/toolkit/components/reader/moz.build b/toolkit/components/reader/moz.build new file mode 100644 index 000000000..6863d6542 --- /dev/null +++ b/toolkit/components/reader/moz.build @@ -0,0 +1,26 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +JAR_MANIFESTS += ['jar.mn'] + +EXTRA_JS_MODULES += [ + 'AboutReader.jsm', + 'ReaderMode.jsm' +] + +EXTRA_JS_MODULES.reader = [ + 'JSDOMParser.js', + 'Readability.js', + 'ReaderWorker.js', + 'ReaderWorker.jsm' +] + +BROWSER_CHROME_MANIFESTS += [ + 'test/browser.ini' +] + +with Files('**'): + BUG_COMPONENT = ('Toolkit', 'Reader Mode') diff --git a/toolkit/components/reader/test/browser.ini b/toolkit/components/reader/test/browser.ini new file mode 100644 index 000000000..4f9df23b3 --- /dev/null +++ b/toolkit/components/reader/test/browser.ini @@ -0,0 +1,15 @@ +[DEFAULT] +support-files = head.js +[browser_readerMode.js] +support-files = + readerModeArticle.html + readerModeArticleHiddenNodes.html +[browser_readerMode_hidden_nodes.js] +support-files = + readerModeArticleHiddenNodes.html +[browser_readerMode_with_anchor.js] +support-files = + readerModeArticle.html +[browser_bug1124271_readerModePinnedTab.js] +support-files = + readerModeArticle.html diff --git a/toolkit/components/reader/test/browser_bug1124271_readerModePinnedTab.js b/toolkit/components/reader/test/browser_bug1124271_readerModePinnedTab.js new file mode 100644 index 000000000..39913aa3e --- /dev/null +++ b/toolkit/components/reader/test/browser_bug1124271_readerModePinnedTab.js @@ -0,0 +1,47 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Test that the reader mode button won't open in a new tab when clicked from a pinned tab + +const PREF = "reader.parse-on-load.enabled"; + +const TEST_PATH = getRootDirectory(gTestPath).replace("chrome://mochitests/content", "http://example.com"); + +var readerButton = document.getElementById("reader-mode-button"); + +add_task(function* () { + registerCleanupFunction(function() { + Services.prefs.clearUserPref(PREF); + while (gBrowser.tabs.length > 1) { + gBrowser.removeCurrentTab(); + } + }); + + // Enable the reader mode button. + Services.prefs.setBoolPref(PREF, true); + + let tab = gBrowser.selectedTab = gBrowser.addTab(); + gBrowser.pinTab(tab); + + let initialTabsCount = gBrowser.tabs.length; + + // Point tab to a test page that is reader-able. + let url = TEST_PATH + "readerModeArticle.html"; + yield promiseTabLoadEvent(tab, url); + yield promiseWaitForCondition(() => !readerButton.hidden); + + readerButton.click(); + yield promiseTabLoadEvent(tab); + + // Ensure no new tabs are opened when exiting reader mode in a pinned tab + is(gBrowser.tabs.length, initialTabsCount, "No additional tabs were opened."); + + let pageShownPromise = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); + readerButton.click(); + yield pageShownPromise; + // Ensure no new tabs are opened when exiting reader mode in a pinned tab + is(gBrowser.tabs.length, initialTabsCount, "No additional tabs were opened."); + + gBrowser.removeCurrentTab(); +}); diff --git a/toolkit/components/reader/test/browser_readerMode.js b/toolkit/components/reader/test/browser_readerMode.js new file mode 100644 index 000000000..70290c3b5 --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode.js @@ -0,0 +1,220 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that the reader mode button appears and works properly on + * reader-able content. + */ +const TEST_PREFS = [ + ["reader.parse-on-load.enabled", true], +]; + +const TEST_PATH = getRootDirectory(gTestPath).replace("chrome://mochitests/content", "http://example.com"); + +var readerButton = document.getElementById("reader-mode-button"); + +add_task(function* test_reader_button() { + registerCleanupFunction(function() { + // Reset test prefs. + TEST_PREFS.forEach(([name, value]) => { + Services.prefs.clearUserPref(name); + }); + while (gBrowser.tabs.length > 1) { + gBrowser.removeCurrentTab(); + } + }); + + // Set required test prefs. + TEST_PREFS.forEach(([name, value]) => { + Services.prefs.setBoolPref(name, value); + }); + Services.prefs.setBoolPref("browser.reader.detectedFirstArticle", false); + + let tab = gBrowser.selectedTab = gBrowser.addTab(); + is_element_hidden(readerButton, "Reader mode button is not present on a new tab"); + ok(!UITour.isInfoOnTarget(window, "readerMode-urlBar"), + "Info panel shouldn't appear without the reader mode button"); + ok(!Services.prefs.getBoolPref("browser.reader.detectedFirstArticle"), + "Shouldn't have detected the first article"); + + // We're going to show the reader mode intro popup, make sure we wait for it: + let tourPopupShownPromise = + BrowserTestUtils.waitForEvent(document.getElementById("UITourTooltip"), "popupshown"); + // Point tab to a test page that is reader-able. + let url = TEST_PATH + "readerModeArticle.html"; + yield promiseTabLoadEvent(tab, url); + yield promiseWaitForCondition(() => !readerButton.hidden); + yield tourPopupShownPromise; + is_element_visible(readerButton, "Reader mode button is present on a reader-able page"); + ok(UITour.isInfoOnTarget(window, "readerMode-urlBar"), + "Info panel should be anchored at the reader mode button"); + ok(Services.prefs.getBoolPref("browser.reader.detectedFirstArticle"), + "Should have detected the first article"); + + // Switch page into reader mode. + readerButton.click(); + yield promiseTabLoadEvent(tab); + ok(!UITour.isInfoOnTarget(window, "readerMode-urlBar"), "Info panel should have closed"); + + let readerUrl = gBrowser.selectedBrowser.currentURI.spec; + ok(readerUrl.startsWith("about:reader"), "about:reader loaded after clicking reader mode button"); + is_element_visible(readerButton, "Reader mode button is present on about:reader"); + + is(gURLBar.value, readerUrl, "gURLBar value is about:reader URL"); + is(gURLBar.textValue, url.substring("http://".length), "gURLBar is displaying original article URL"); + + // Check selected value for URL bar + yield new Promise((resolve, reject) => { + waitForClipboard(url, function () { + gURLBar.focus(); + gURLBar.select(); + goDoCommand("cmd_copy"); + }, resolve, reject); + }); + + info("Got correct URL when copying"); + + // Switch page back out of reader mode. + let promisePageShow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); + readerButton.click(); + yield promisePageShow; + is(gBrowser.selectedBrowser.currentURI.spec, url, + "Back to the original page after clicking active reader mode button"); + ok(gBrowser.selectedBrowser.canGoForward, + "Moved one step back in the session history."); + + // Load a new tab that is NOT reader-able. + let newTab = gBrowser.selectedTab = gBrowser.addTab(); + yield promiseTabLoadEvent(newTab, "about:robots"); + yield promiseWaitForCondition(() => readerButton.hidden); + is_element_hidden(readerButton, "Reader mode button is not present on a non-reader-able page"); + + // Switch back to the original tab to make sure reader mode button is still visible. + gBrowser.removeCurrentTab(); + yield promiseWaitForCondition(() => !readerButton.hidden); + is_element_visible(readerButton, "Reader mode button is present on a reader-able page"); +}); + +add_task(function* test_getOriginalUrl() { + let { ReaderMode } = Cu.import("resource://gre/modules/ReaderMode.jsm", {}); + let url = "http://foo.com/article.html"; + + is(ReaderMode.getOriginalUrl("about:reader?url=" + encodeURIComponent(url)), url, "Found original URL from encoded URL"); + is(ReaderMode.getOriginalUrl("about:reader?foobar"), null, "Did not find original URL from malformed reader URL"); + is(ReaderMode.getOriginalUrl(url), null, "Did not find original URL from non-reader URL"); + + let badUrl = "http://foo.com/?;$%^^"; + is(ReaderMode.getOriginalUrl("about:reader?url=" + encodeURIComponent(badUrl)), badUrl, "Found original URL from encoded malformed URL"); + is(ReaderMode.getOriginalUrl("about:reader?url=" + badUrl), badUrl, "Found original URL from non-encoded malformed URL"); +}); + +add_task(function* test_reader_view_element_attribute_transform() { + registerCleanupFunction(function() { + while (gBrowser.tabs.length > 1) { + gBrowser.removeCurrentTab(); + } + }); + + function observeAttribute(element, attribute, triggerFn, checkFn) { + return new Promise(resolve => { + let observer = new MutationObserver((mutations) => { + mutations.forEach( mu => { + if (element.getAttribute(attribute) !== mu.oldValue) { + checkFn(); + resolve(); + observer.disconnect(); + } + }); + }); + + observer.observe(element, { + attributes: true, + attributeOldValue: true, + attributeFilter: [attribute] + }); + + triggerFn(); + }); + } + + let command = document.getElementById("View:ReaderView"); + let tab = yield BrowserTestUtils.openNewForegroundTab(gBrowser); + is(command.hidden, true, "Command element should have the hidden attribute"); + + info("Navigate a reader-able page"); + let waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); + yield observeAttribute(command, "hidden", + () => { + let url = TEST_PATH + "readerModeArticle.html"; + tab.linkedBrowser.loadURI(url); + }, + () => { + is(command.hidden, false, "Command's hidden attribute should be false on a reader-able page"); + } + ); + yield waitForPageshow; + + info("Navigate a non-reader-able page"); + waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); + yield observeAttribute(command, "hidden", + () => { + let url = TEST_PATH + "readerModeArticleHiddenNodes.html"; + tab.linkedBrowser.loadURI(url); + }, + () => { + is(command.hidden, true, "Command's hidden attribute should be true on a non-reader-able page"); + } + ); + yield waitForPageshow; + + info("Navigate a reader-able page"); + waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); + yield observeAttribute(command, "hidden", + () => { + let url = TEST_PATH + "readerModeArticle.html"; + tab.linkedBrowser.loadURI(url); + }, + () => { + is(command.hidden, false, "Command's hidden attribute should be false on a reader-able page"); + } + ); + yield waitForPageshow; + + info("Enter Reader Mode"); + waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); + yield observeAttribute(readerButton, "readeractive", + () => { + readerButton.click(); + }, + () => { + is(readerButton.getAttribute("readeractive"), "true", "readerButton's readeractive attribute should be true when entering reader mode"); + } + ); + yield waitForPageshow; + + info("Exit Reader Mode"); + waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); + yield observeAttribute(readerButton, "readeractive", + () => { + readerButton.click(); + }, + () => { + is(readerButton.getAttribute("readeractive"), "", "readerButton's readeractive attribute should be empty when reader mode is exited"); + } + ); + yield waitForPageshow; + + info("Navigate a non-reader-able page"); + waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); + yield observeAttribute(command, "hidden", + () => { + let url = TEST_PATH + "readerModeArticleHiddenNodes.html"; + tab.linkedBrowser.loadURI(url); + }, + () => { + is(command.hidden, true, "Command's hidden attribute should be true on a non-reader-able page"); + } + ); + yield waitForPageshow; +}); diff --git a/toolkit/components/reader/test/browser_readerMode_hidden_nodes.js b/toolkit/components/reader/test/browser_readerMode_hidden_nodes.js new file mode 100644 index 000000000..b73eab58d --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_hidden_nodes.js @@ -0,0 +1,53 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Test that the reader mode button appears and works properly on + * reader-able content. + */ +const TEST_PREFS = [ + ["reader.parse-on-load.enabled", true], + ["browser.reader.detectedFirstArticle", false], +]; + +const TEST_PATH = getRootDirectory(gTestPath).replace("chrome://mochitests/content", "http://example.com"); + +var readerButton = document.getElementById("reader-mode-button"); + +add_task(function* test_reader_button() { + registerCleanupFunction(function() { + // Reset test prefs. + TEST_PREFS.forEach(([name, value]) => { + Services.prefs.clearUserPref(name); + }); + while (gBrowser.tabs.length > 1) { + gBrowser.removeCurrentTab(); + } + }); + + // Set required test prefs. + TEST_PREFS.forEach(([name, value]) => { + Services.prefs.setBoolPref(name, value); + }); + + let tab = gBrowser.selectedTab = gBrowser.addTab(); + is_element_hidden(readerButton, "Reader mode button is not present on a new tab"); + // Point tab to a test page that is not reader-able due to hidden nodes. + let url = TEST_PATH + "readerModeArticleHiddenNodes.html"; + let paintPromise = ContentTask.spawn(tab.linkedBrowser, "", function() { + return new Promise(resolve => { + addEventListener("DOMContentLoaded", function onDCL() { + removeEventListener("DOMContentLoaded", onDCL); + addEventListener("MozAfterPaint", function onPaint() { + removeEventListener("MozAfterPaint", onPaint); + resolve(); + }); + }); + }); + }); + tab.linkedBrowser.loadURI(url); + yield paintPromise; + + is_element_hidden(readerButton, "Reader mode button is still not present on tab with unreadable content."); +}); diff --git a/toolkit/components/reader/test/browser_readerMode_with_anchor.js b/toolkit/components/reader/test/browser_readerMode_with_anchor.js new file mode 100644 index 000000000..24c23c49f --- /dev/null +++ b/toolkit/components/reader/test/browser_readerMode_with_anchor.js @@ -0,0 +1,21 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +const TEST_PATH = getRootDirectory(gTestPath).replace("chrome://mochitests/content", "http://example.com"); + +add_task(function* () { + yield BrowserTestUtils.withNewTab(TEST_PATH + "readerModeArticle.html#foo", function* (browser) { + let pageShownPromise = BrowserTestUtils.waitForContentEvent(browser, "AboutReaderContentReady"); + let readerButton = document.getElementById("reader-mode-button"); + readerButton.click(); + yield pageShownPromise; + yield ContentTask.spawn(browser, null, function* () { + // Check if offset != 0 + ok(content.document.getElementById("foo") !== null, "foo element should be in document"); + ok(content.pageYOffset != 0, "pageYOffset should be > 0"); + }); + }); +}); diff --git a/toolkit/components/reader/test/head.js b/toolkit/components/reader/test/head.js new file mode 100644 index 000000000..3d8d989bc --- /dev/null +++ b/toolkit/components/reader/test/head.js @@ -0,0 +1,126 @@ +XPCOMUtils.defineLazyModuleGetter(this, "Promise", + "resource://gre/modules/Promise.jsm"); + +/* exported promiseTabLoadEvent, promiseWaitForCondition, is_element_visible, is_element_hidden */ + +/** + * Waits for a load (or custom) event to finish in a given tab. If provided + * load an uri into the tab. + * + * @param tab + * The tab to load into. + * @param [optional] url + * The url to load, or the current url. + * @return {Promise} resolved when the event is handled. + * @resolves to the received event + * @rejects if a valid load event is not received within a meaningful interval + */ +function promiseTabLoadEvent(tab, url) { + let deferred = Promise.defer(); + info("Wait tab event: load"); + + function handle(loadedUrl) { + if (loadedUrl === "about:blank" || (url && loadedUrl !== url)) { + info(`Skipping spurious load event for ${loadedUrl}`); + return false; + } + + info("Tab event received: load"); + return true; + } + + // Create two promises: one resolved from the content process when the page + // loads and one that is rejected if we take too long to load the url. + let loaded = BrowserTestUtils.browserLoaded(tab.linkedBrowser, false, handle); + + let timeout = setTimeout(() => { + deferred.reject(new Error("Timed out while waiting for a 'load' event")); + }, 30000); + + loaded.then(() => { + clearTimeout(timeout); + deferred.resolve(); + }); + + if (url) + BrowserTestUtils.loadURI(tab.linkedBrowser, url); + + // Promise.all rejects if either promise rejects (i.e. if we time out) and + // if our loaded promise resolves before the timeout, then we resolve the + // timeout promise as well, causing the all promise to resolve. + return Promise.all([deferred.promise, loaded]); +} + +function waitForCondition(condition, nextTest, errorMsg, retryTimes) { + retryTimes = typeof retryTimes !== 'undefined' ? retryTimes : 30; + var tries = 0; + var interval = setInterval(function() { + if (tries >= retryTimes) { + ok(false, errorMsg); + moveOn(); + } + var conditionPassed; + try { + conditionPassed = condition(); + } catch (e) { + ok(false, e + "\n" + e.stack); + conditionPassed = false; + } + if (conditionPassed) { + moveOn(); + } + tries++; + }, 100); + var moveOn = function() { + clearInterval(interval); + nextTest(); + }; +} + +function promiseWaitForCondition(aConditionFn) { + let deferred = Promise.defer(); + waitForCondition(aConditionFn, deferred.resolve, "Condition didn't pass."); + return deferred.promise; +} + +function is_element_visible(element, msg) { + isnot(element, null, "Element should not be null, when checking visibility"); + ok(is_visible(element), msg || "Element should be visible"); + +} +function is_element_hidden(element, msg) { + isnot(element, null, "Element should not be null, when checking visibility"); + ok(is_hidden(element), msg || "Element should be hidden"); +} + +function is_visible(element) { + var style = element.ownerGlobal.getComputedStyle(element); + if (style.display == "none") + return false; + if (style.visibility != "visible") + return false; + if (style.display == "-moz-popup" && element.state != "open") + return false; + + // Hiding a parent element will hide all its children + if (element.parentNode != element.ownerDocument) + return is_visible(element.parentNode); + + return true; +} + +function is_hidden(element) { + var style = element.ownerGlobal.getComputedStyle(element); + if (style.display == "none") + return true; + if (style.visibility != "visible") + return true; + if (style.display == "-moz-popup") + return ["hiding", "closed"].indexOf(element.state) != -1; + + // Hiding a parent element will hide all its children + if (element.parentNode != element.ownerDocument) + return is_hidden(element.parentNode); + + return false; +} diff --git a/toolkit/components/reader/test/readerModeArticle.html b/toolkit/components/reader/test/readerModeArticle.html new file mode 100644 index 000000000..7c5033d5b --- /dev/null +++ b/toolkit/components/reader/test/readerModeArticle.html @@ -0,0 +1,25 @@ +<!DOCTYPE html> +<html> +<head> +<title>Article title</title> +<meta name="description" content="This is the article description." /> +</head> +<body> +<header>Site header</header> +<div> +<h1>Article title</h1> +<h2 class="author">by Jane Doe</h2> +<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. In rutrum accumsan ultricies. Mauris vitae nisi at sem facilisis semper ac in est.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<div id="foo">by John Doe</div> +<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. In rutrum accumsan ultricies. Mauris vitae nisi at sem facilisis semper ac in est.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +</div> +</body> +</html> diff --git a/toolkit/components/reader/test/readerModeArticleHiddenNodes.html b/toolkit/components/reader/test/readerModeArticleHiddenNodes.html new file mode 100644 index 000000000..92441b797 --- /dev/null +++ b/toolkit/components/reader/test/readerModeArticleHiddenNodes.html @@ -0,0 +1,22 @@ +<!DOCTYPE html> +<html> +<head> +<title>Article title</title> +<meta name="description" content="This is the article description." /> +</head> +<body> +<style> +p { display: none } +</style> +<header>Site header</header> +<div> +<h1>Article title</h1> +<h2 class="author">by Jane Doe</h2> +<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. In rutrum accumsan ultricies. Mauris vitae nisi at sem facilisis semper ac in est.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +<p>Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.</p> +</div> +</body> +</html> |