Merge pull request #367 from Ascrod/readerview

Reader and Narrator Updates
author: Moonchild <mcwerewolf@gmail.com> 2018-05-16 17:10:38 +0200
committer: GitHub <noreply@github.com> 2018-05-16 17:10:38 +0200
commit: 90942a2af0cabb9345cf04fa6113e12197504fcf (patch)
tree: e16c71be5a1343abe0489863f84ed271b6ebd3d7 /toolkit/components/reader
parent: 819ca50f163a9113772a7dbfd617d97151893337 (diff)
parent: 9ef464a5ac0a17135a0f7b4fef070bb4f7fbe44c (diff)
download: UXP-90942a2af0cabb9345cf04fa6113e12197504fcf.tar
UXP-90942a2af0cabb9345cf04fa6113e12197504fcf.tar.gz
UXP-90942a2af0cabb9345cf04fa6113e12197504fcf.tar.lz
UXP-90942a2af0cabb9345cf04fa6113e12197504fcf.tar.xz
UXP-90942a2af0cabb9345cf04fa6113e12197504fcf.zip
7 files changed, 845 insertions, 857 deletions
diff --git a/toolkit/components/reader/AboutReader.jsm b/toolkit/components/reader/AboutReader.jsm
index 1fb9db123..fb82e5789 100644
--- a/toolkit/components/reader/AboutReader.jsm
+++ b/toolkit/components/reader/AboutReader.jsm
@@ -15,12 +15,12 @@ Cu.import("resource://gre/modules/XPCOMUtils.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "AsyncPrefs", "resource://gre/modules/AsyncPrefs.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "NarrateControls", "resource://gre/modules/narrate/NarrateControls.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "Rect", "resource://gre/modules/Geometry.jsm");
-XPCOMUtils.defineLazyModuleGetter(this, "Task", "resource://gre/modules/Task.jsm");
-XPCOMUtils.defineLazyModuleGetter(this, "UITelemetry", "resource://gre/modules/UITelemetry.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "PluralForm", "resource://gre/modules/PluralForm.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "PlacesUtils", "resource://gre/modules/PlacesUtils.jsm");
 
 var gStrings = Services.strings.createBundle("chrome://global/locale/aboutReader.properties");
 
-var AboutReader = function(mm, win, articlePromise) {
+var AboutReader = function(win, articlePromise) {
   let url = this._getOriginalUrl(win);
   if (!(url.startsWith("http://") || url.startsWith("https://"))) {
     let errorMsg = "Only http:// and https:// URLs can be loaded in about:reader.";
@@ -33,57 +33,59 @@ var AboutReader = function(mm, win, articlePromise) {
 
   let doc = win.document;
 
-  this._mm = mm;
-  this._mm.addMessageListener("Reader:CloseDropdown", this);
-  this._mm.addMessageListener("Reader:AddButton", this);
-  this._mm.addMessageListener("Reader:RemoveButton", this);
-  this._mm.addMessageListener("Reader:GetStoredArticleData", this);
-
   this._docRef = Cu.getWeakReference(doc);
   this._winRef = Cu.getWeakReference(win);
   this._innerWindowId = win.QueryInterface(Ci.nsIInterfaceRequestor)
     .getInterface(Ci.nsIDOMWindowUtils).currentInnerWindowID;
 
   this._article = null;
+  this._languagePromise = new Promise(resolve => {
+    this._foundLanguage = resolve;
+  });
 
   if (articlePromise) {
     this._articlePromise = articlePromise;
   }
 
-  this._headerElementRef = Cu.getWeakReference(doc.getElementById("reader-header"));
-  this._domainElementRef = Cu.getWeakReference(doc.getElementById("reader-domain"));
-  this._titleElementRef = Cu.getWeakReference(doc.getElementById("reader-title"));
-  this._creditsElementRef = Cu.getWeakReference(doc.getElementById("reader-credits"));
-  this._contentElementRef = Cu.getWeakReference(doc.getElementById("moz-reader-content"));
-  this._toolbarElementRef = Cu.getWeakReference(doc.getElementById("reader-toolbar"));
-  this._messageElementRef = Cu.getWeakReference(doc.getElementById("reader-message"));
+  this._headerElementRef = Cu.getWeakReference(doc.querySelector(".reader-header"));
+  this._domainElementRef = Cu.getWeakReference(doc.querySelector(".reader-domain"));
+  this._titleElementRef = Cu.getWeakReference(doc.querySelector(".reader-title"));
+  this._readTimeElementRef = Cu.getWeakReference(doc.querySelector(".reader-estimated-time"));
+  this._creditsElementRef = Cu.getWeakReference(doc.querySelector(".reader-credits"));
+  this._contentElementRef = Cu.getWeakReference(doc.querySelector(".moz-reader-content"));
+  this._toolbarElementRef = Cu.getWeakReference(doc.querySelector(".reader-toolbar"));
+  this._messageElementRef = Cu.getWeakReference(doc.querySelector(".reader-message"));
+  this._containerElementRef = Cu.getWeakReference(doc.querySelector(".container"));
 
   this._scrollOffset = win.pageYOffset;
 
-  doc.addEventListener("click", this, false);
+  doc.addEventListener("click", this);
+
+  win.addEventListener("pagehide", this);
+  win.addEventListener("scroll", this);
+  win.addEventListener("resize", this);
 
-  win.addEventListener("pagehide", this, false);
-  win.addEventListener("scroll", this, false);
-  win.addEventListener("resize", this, false);
+  win.addEventListener("AboutReaderAddButton", this, false, true);
+  win.addEventListener("AboutReaderRemoveButton", this, false, true);
 
   Services.obs.addObserver(this, "inner-window-destroyed", false);
 
-  doc.addEventListener("visibilitychange", this, false);
+  doc.addEventListener("visibilitychange", this);
 
   this._setupStyleDropdown();
   this._setupButton("close-button", this._onReaderClose.bind(this), "aboutReader.toolbar.close");
 
-  const gIsFirefoxDesktop = Services.appinfo.ID == "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}";
-  if (gIsFirefoxDesktop) {
-    // we're ready for any external setup, send a signal for that.
-    this._mm.sendAsyncMessage("Reader:OnSetup");
-  }
+  // we're ready for any external setup, send a signal for that.
+  doc.dispatchEvent(
+    new win.CustomEvent("AboutReaderOnSetup", { bubbles: true, cancelable: false }));
 
   let colorSchemeValues = JSON.parse(Services.prefs.getCharPref("reader.color_scheme.values"));
   let colorSchemeOptions = colorSchemeValues.map((value) => {
-    return { name: gStrings.GetStringFromName("aboutReader.colorScheme." + value),
-             value: value,
-             itemClass: value + "-button" };
+    return {
+      name: gStrings.GetStringFromName("aboutReader.colorScheme." + value),
+      value,
+      itemClass: value + "-button"
+    };
   });
 
   let colorScheme = Services.prefs.getCharPref("reader.color_scheme");
@@ -114,7 +116,7 @@ var AboutReader = function(mm, win, articlePromise) {
   this._setupLineHeightButtons();
 
   if (win.speechSynthesis && Services.prefs.getBoolPref("narrate.enabled")) {
-    new NarrateControls(mm, win);
+    new NarrateControls(win, this._languagePromise);
   }
 
   this._loadArticle();
@@ -146,6 +148,10 @@ AboutReader.prototype = {
     return this._titleElementRef.get();
   },
 
+  get _readTimeElement() {
+    return this._readTimeElementRef.get();
+  },
+
   get _creditsElement() {
     return this._creditsElementRef.get();
   },
@@ -162,6 +168,10 @@ AboutReader.prototype = {
     return this._messageElementRef.get();
   },
 
+  get _containerElement() {
+    return this._containerElementRef.get();
+  },
+
   get _isToolbarVertical() {
     if (this._toolbarVertical !== undefined) {
       return this._toolbarVertical;
@@ -178,72 +188,31 @@ AboutReader.prototype = {
     return _viewId;
   },
 
-  receiveMessage: function (message) {
-    switch (message.name) {
-      // Triggered by Android user pressing BACK while the banner font-dropdown is open.
-      case "Reader:CloseDropdown": {
-        // Just close it.
-        this._closeDropdowns();
-        break;
-      }
-
-      case "Reader:AddButton": {
-        if (message.data.id && message.data.image &&
-            !this._doc.getElementById(message.data.id)) {
-          let btn = this._doc.createElement("button");
-          btn.setAttribute("class", "button");
-          btn.setAttribute("style", "background-image: url('" + message.data.image + "')");
-          btn.setAttribute("id", message.data.id);
-          if (message.data.title)
-            btn.setAttribute("title", message.data.title);
-          if (message.data.text)
-            btn.textContent = message.data.text;
-          let tb = this._doc.getElementById("reader-toolbar");
-          tb.appendChild(btn);
-          this._setupButton(message.data.id, button => {
-            this._mm.sendAsyncMessage("Reader:Clicked-" + button.getAttribute("id"), { article: this._article });
-          });
-        }
-        break;
-      }
-      case "Reader:RemoveButton": {
-        if (message.data.id) {
-          let btn = this._doc.getElementById(message.data.id);
-          if (btn)
-            btn.remove();
-        }
-        break;
-      }
-      case "Reader:GetStoredArticleData": {
-        this._mm.sendAsyncMessage("Reader:StoredArticleData", { article: this._article });
-      }
-    }
-  },
-
-  handleEvent: function(aEvent) {
+  handleEvent(aEvent) {
     if (!aEvent.isTrusted)
       return;
 
     switch (aEvent.type) {
       case "click":
         let target = aEvent.target;
-        if (target.classList.contains('dropdown-toggle')) {
+        if (target.classList.contains("dropdown-toggle")) {
           this._toggleDropdownClicked(aEvent);
-        } else if (!target.closest('.dropdown-popup')) {
+        } else if (!target.closest(".dropdown-popup")) {
           this._closeDropdowns();
         }
+        if (target.tagName == "A" && !target.classList.contains("reader-domain")) {
+          this._linkClicked(aEvent);
+        }
         break;
       case "scroll":
         this._closeDropdowns(true);
-        let isScrollingUp = this._scrollOffset > aEvent.pageY;
-        this._setSystemUIVisibility(isScrollingUp);
         this._scrollOffset = aEvent.pageY;
         break;
       case "resize":
         this._updateImageMargins();
         if (this._isToolbarVertical) {
           this._win.setTimeout(() => {
-            for (let dropdown of this._doc.querySelectorAll('.dropdown.open')) {
+            for (let dropdown of this._doc.querySelectorAll(".dropdown.open")) {
               this._updatePopupPosition(dropdown);
             }
           }, 0);
@@ -261,35 +230,57 @@ AboutReader.prototype = {
       case "pagehide":
         // Close the Banners Font-dropdown, cleanup Android BackPressListener.
         this._closeDropdowns();
-
-        this._mm.removeMessageListener("Reader:CloseDropdown", this);
-        this._mm.removeMessageListener("Reader:AddButton", this);
-        this._mm.removeMessageListener("Reader:RemoveButton", this);
-        this._mm.removeMessageListener("Reader:GetStoredArticleData", this);
         this._windowUnloaded = true;
         break;
+
+      case "AboutReaderAddButton": {
+        if (aEvent.detail.id && aEvent.detail.image &&
+            !this._doc.getElementById(aEvent.detail.id)) {
+          let btn = this._doc.createElement("button");
+          btn.setAttribute("class", "button " + aEvent.detail.id);
+          btn.setAttribute("style", "background-image: url('" + aEvent.detail.image + "')");
+          btn.setAttribute("id", aEvent.detail.id);
+          if (aEvent.detail.title)
+            btn.setAttribute("title", aEvent.detail.title);
+          if (aEvent.detail.text)
+            btn.textContent = aEvent.detail.text;
+          let tb = this._toolbarElement;
+          tb.appendChild(btn);
+          this._setupButton(aEvent.detail.id, button => {
+            var data = { article: this._article };
+            this._doc.dispatchEvent(
+              new this._win.CustomEvent("AboutReaderButtonClicked-" + button.getAttribute("id"), {detail: data, bubbles: true, cancelable: false}));
+          });
+        }
+        break;
+      }
+
+      case "AboutReaderRemoveButton": {
+        if (aEvent.detail.id) {
+          let btn = this._doc.getElementById(aEvent.detail.id);
+          if (btn)
+            btn.remove();
+        }
+        break;
+      }
     }
   },
 
-  observe: function(subject, topic, data) {
+  observe(subject, topic, data) {
     if (subject.QueryInterface(Ci.nsISupportsPRUint64).data != this._innerWindowId) {
       return;
     }
 
-    Services.obs.removeObserver(this, "inner-window-destroyed", false);
-
-    this._mm.removeMessageListener("Reader:CloseDropdown", this);
-    this._mm.removeMessageListener("Reader:AddButton", this);
-    this._mm.removeMessageListener("Reader:RemoveButton", this);
+    Services.obs.removeObserver(this, "inner-window-destroyed");
     this._windowUnloaded = true;
   },
 
-  _onReaderClose: function() {
-    ReaderMode.leaveReaderMode(this._mm.docShell, this._win);
+  _onReaderClose() {
+    ReaderMode.leaveReaderMode(this._win.document.docShell, this._win);
   },
 
-  _setFontSize: function(newFontSize) {
-    let containerClasses = this._doc.getElementById("container").classList;
+  _setFontSize(newFontSize) {
+    let containerClasses = this._containerElement.classList;
 
     if (this._fontSize > 0)
       containerClasses.remove("font-size" + this._fontSize);
@@ -299,19 +290,19 @@ AboutReader.prototype = {
     return AsyncPrefs.set("reader.font_size", this._fontSize);
   },
 
-  _setupFontSizeButtons: function() {
+  _setupFontSizeButtons() {
     const FONT_SIZE_MIN = 1;
     const FONT_SIZE_MAX = 9;
 
     // Sample text shown in Android UI.
-    let sampleText = this._doc.getElementById("font-size-sample");
+    let sampleText = this._doc.querySelector(".font-size-sample");
     sampleText.textContent = gStrings.GetStringFromName("aboutReader.fontTypeSample");
 
     let currentSize = Services.prefs.getIntPref("reader.font_size");
     currentSize = Math.max(FONT_SIZE_MIN, Math.min(FONT_SIZE_MAX, currentSize));
 
-    let plusButton = this._doc.getElementById("font-size-plus");
-    let minusButton = this._doc.getElementById("font-size-minus");
+    let plusButton = this._doc.querySelector(".plus-button");
+    let minusButton = this._doc.querySelector(".minus-button");
 
     function updateControls() {
       if (currentSize === FONT_SIZE_MIN) {
@@ -360,8 +351,8 @@ AboutReader.prototype = {
     }, true);
   },
 
-  _setContentWidth: function(newContentWidth) {
-    let containerClasses = this._doc.getElementById("container").classList;
+  _setContentWidth(newContentWidth) {
+    let containerClasses = this._containerElement.classList;
 
     if (this._contentWidth > 0)
       containerClasses.remove("content-width" + this._contentWidth);
@@ -371,15 +362,15 @@ AboutReader.prototype = {
     return AsyncPrefs.set("reader.content_width", this._contentWidth);
   },
 
-  _setupContentWidthButtons: function() {
+  _setupContentWidthButtons() {
     const CONTENT_WIDTH_MIN = 1;
     const CONTENT_WIDTH_MAX = 9;
 
     let currentContentWidth = Services.prefs.getIntPref("reader.content_width");
     currentContentWidth = Math.max(CONTENT_WIDTH_MIN, Math.min(CONTENT_WIDTH_MAX, currentContentWidth));
 
-    let plusButton = this._doc.getElementById("content-width-plus");
-    let minusButton = this._doc.getElementById("content-width-minus");
+    let plusButton = this._doc.querySelector(".content-width-plus-button");
+    let minusButton = this._doc.querySelector(".content-width-minus-button");
 
     function updateControls() {
       if (currentContentWidth === CONTENT_WIDTH_MIN) {
@@ -428,8 +419,8 @@ AboutReader.prototype = {
     }, true);
   },
 
-  _setLineHeight: function(newLineHeight) {
-    let contentClasses = this._doc.getElementById("moz-reader-content").classList;
+  _setLineHeight(newLineHeight) {
+    let contentClasses = this._contentElement.classList;
 
     if (this._lineHeight > 0)
       contentClasses.remove("line-height" + this._lineHeight);
@@ -439,15 +430,15 @@ AboutReader.prototype = {
     return AsyncPrefs.set("reader.line_height", this._lineHeight);
   },
 
-  _setupLineHeightButtons: function() {
+  _setupLineHeightButtons() {
     const LINE_HEIGHT_MIN = 1;
     const LINE_HEIGHT_MAX = 9;
 
     let currentLineHeight = Services.prefs.getIntPref("reader.line_height");
     currentLineHeight = Math.max(LINE_HEIGHT_MIN, Math.min(LINE_HEIGHT_MAX, currentLineHeight));
 
-    let plusButton = this._doc.getElementById("line-height-plus");
-    let minusButton = this._doc.getElementById("line-height-minus");
+    let plusButton = this._doc.querySelector(".line-height-plus-button");
+    let minusButton = this._doc.querySelector(".line-height-minus-button");
 
     function updateControls() {
       if (currentLineHeight === LINE_HEIGHT_MIN) {
@@ -496,7 +487,7 @@ AboutReader.prototype = {
     }, true);
   },
 
-  _handleDeviceLight: function(newLux) {
+  _handleDeviceLight(newLux) {
     // Desired size of the this._luxValues array.
     let luxValuesSize = 10;
     // Add new lux value at the front of the array.
@@ -513,7 +504,7 @@ AboutReader.prototype = {
       return;
     }
     // Holds the average of the lux values collected in this._luxValues.
-    let averageLuxValue = this._totalLux/luxValuesSize;
+    let averageLuxValue = this._totalLux / luxValuesSize;
 
     this._updateColorScheme(averageLuxValue);
     // Pop the oldest value off the array.
@@ -522,7 +513,7 @@ AboutReader.prototype = {
     this._totalLux -= oldLux;
   },
 
-  _handleVisibilityChange: function() {
+  _handleVisibilityChange() {
     let colorScheme = Services.prefs.getCharPref("reader.color_scheme");
     if (colorScheme != "auto") {
       return;
@@ -533,19 +524,19 @@ AboutReader.prototype = {
   },
 
   // Setup or teardown the ambient light tracking system.
-  _enableAmbientLighting: function(enable) {
+  _enableAmbientLighting(enable) {
     if (enable) {
-      this._win.addEventListener("devicelight", this, false);
+      this._win.addEventListener("devicelight", this);
       this._luxValues = [];
       this._totalLux = 0;
     } else {
-      this._win.removeEventListener("devicelight", this, false);
+      this._win.removeEventListener("devicelight", this);
       delete this._luxValues;
       delete this._totalLux;
     }
   },
 
-  _updateColorScheme: function(luxValue) {
+  _updateColorScheme(luxValue) {
     // Upper bound value for "dark" color scheme beyond which it changes to "light".
     let upperBoundDark = 50;
     // Lower bound value for "light" color scheme beyond which it changes to "dark".
@@ -564,7 +555,7 @@ AboutReader.prototype = {
       this._setColorScheme("light");
   },
 
-  _setColorScheme: function(newColorScheme) {
+  _setColorScheme(newColorScheme) {
     // "auto" is not a real color scheme
     if (this._colorScheme === newColorScheme || newColorScheme === "auto")
       return;
@@ -580,14 +571,14 @@ AboutReader.prototype = {
 
   // Pref values include "dark", "light", and "auto", which automatically switches
   // between light and dark color schemes based on the ambient light level.
-  _setColorSchemePref: function(colorSchemePref) {
+  _setColorSchemePref(colorSchemePref) {
     this._enableAmbientLighting(colorSchemePref === "auto");
     this._setColorScheme(colorSchemePref);
 
     AsyncPrefs.set("reader.color_scheme", colorSchemePref);
   },
 
-  _setFontType: function(newFontType) {
+  _setFontType(newFontType) {
     if (this._fontType === newFontType)
       return;
 
@@ -602,20 +593,34 @@ AboutReader.prototype = {
     AsyncPrefs.set("reader.font_type", this._fontType);
   },
 
-  _setSystemUIVisibility: function(visible) {
-    this._mm.sendAsyncMessage("Reader:SystemUIVisibility", { visible: visible });
+  _setToolbarVisibility(visible) {
+    let tb = this._toolbarElement;
+
+    if (visible) {
+      if (tb.style.opacity != "1") {
+        tb.removeAttribute("hidden");
+        tb.style.opacity = "1";
+      }
+    } else if (tb.style.opacity != "0") {
+      tb.addEventListener("transitionend", evt => {
+        if (tb.style.opacity == "0") {
+          tb.setAttribute("hidden", "");
+        }
+      }, { once: true });
+      tb.style.opacity = "0";
+    }
   },
 
-  _loadArticle: Task.async(function* () {
+  async _loadArticle() {
     let url = this._getOriginalUrl();
     this._showProgressDelayed();
 
     let article;
     if (this._articlePromise) {
-      article = yield this._articlePromise;
+      article = await this._articlePromise;
     } else {
       try {
-        article = yield this._getArticle(url);
+        article = await this._getArticle(url);
       } catch (e) {
         if (e && e.newURL) {
           let readerURL = "about:reader?url=" + encodeURIComponent(e.newURL);
@@ -638,47 +643,37 @@ AboutReader.prototype = {
     }
 
     this._showContent(article);
-  }),
-
-  _getArticle: function(url) {
-    return new Promise((resolve, reject) => {
-      let listener = (message) => {
-        this._mm.removeMessageListener("Reader:ArticleData", listener);
-        if (message.data.newURL) {
-          reject({ newURL: message.data.newURL });
-          return;
-        }
-        resolve(message.data.article);
-      };
-      this._mm.addMessageListener("Reader:ArticleData", listener);
-      this._mm.sendAsyncMessage("Reader:ArticleGet", { url: url });
-    });
   },
 
-  _requestFavicon: function() {
-    let handleFaviconReturn = (message) => {
-      this._mm.removeMessageListener("Reader:FaviconReturn", handleFaviconReturn);
-      this._loadFavicon(message.data.url, message.data.faviconUrl);
-    };
+  _getArticle(url) {
+    return ReaderMode.downloadAndParseDocument(url);
+  },
 
-    this._mm.addMessageListener("Reader:FaviconReturn", handleFaviconReturn);
-    this._mm.sendAsyncMessage("Reader:FaviconRequest", { url: this._article.url });
+  _requestFavicon() {
+    let faviconUrl = PlacesUtils.promiseFaviconLinkUrl(this._article.url);
+    var self = this;
+    faviconUrl.then(function onResolution(favicon) {
+      self._loadFavicon(self._article.url, favicon.path.replace(/^favicon:/, ""));
+    },
+    function onRejection(reason) {
+      Cu.reportError("Error requesting favicon URL for about:reader content: " + reason);
+    }).catch(Cu.reportError);
   },
 
-  _loadFavicon: function(url, faviconUrl) {
+  _loadFavicon(url, faviconUrl) {
     if (this._article.url !== url)
       return;
 
     let doc = this._doc;
 
-    let link = doc.createElement('link');
-    link.rel = 'shortcut icon';
+    let link = doc.createElement("link");
+    link.rel = "shortcut icon";
     link.href = faviconUrl;
 
-    doc.getElementsByTagName('head')[0].appendChild(link);
+    doc.getElementsByTagName("head")[0].appendChild(link);
   },
 
-  _updateImageMargins: function() {
+  _updateImageMargins() {
     let windowWidth = this._win.innerWidth;
     let bodyWidth = this._doc.body.clientWidth;
 
@@ -691,7 +686,7 @@ AboutReader.prototype = {
       }
 
       // If the image is at least half as wide as the body, center it on desktop.
-      if (img.naturalWidth >= bodyWidth/2) {
+      if (img.naturalWidth >= bodyWidth / 2) {
         img.setAttribute("moz-reader-center", true);
       } else {
         img.removeAttribute("moz-reader-center");
@@ -713,30 +708,32 @@ AboutReader.prototype = {
   },
 
   _maybeSetTextDirection: function Read_maybeSetTextDirection(article) {
-    if (!article.dir)
-      return;
+    if (article.dir) {
+      // Set "dir" attribute on content
+      this._contentElement.setAttribute("dir", article.dir);
+      this._headerElement.setAttribute("dir", article.dir);
+
+      // The native locale could be set differently than the article's text direction.
+      var localeDirection = Services.locale.isAppLocaleRTL ? "rtl" : "ltr";
+      this._readTimeElement.setAttribute("dir", localeDirection);
+      this._readTimeElement.style.textAlign = article.dir == "rtl" ? "right" : "left";
+    }
+  },
 
-    // Set "dir" attribute on content
-    this._contentElement.setAttribute("dir", article.dir);
-    this._headerElement.setAttribute("dir", article.dir);
-  },
-
-  _fixLocalLinks() {
-    // We need to do this because preprocessing the content through nsIParserUtils
-    // gives back a DOM with a <base> element. That influences how these URLs get
-    // resolved, making them no longer match the document URI (which is
-    // about:reader?url=...). To fix this, make all the hash URIs absolute. This
-    // is hacky, but the alternative of removing the base element has potential
-    // security implications if Readability has not successfully made all the URLs
-    // absolute, so we pick just fixing these in-document links explicitly.
-    let localLinks = this._contentElement.querySelectorAll("a[href^='#']");
-    for (let localLink of localLinks) {
-      // Have to get the attribute because .href provides an absolute URI.
-      localLink.href = this._doc.documentURI + localLink.getAttribute("href");
+  _formatReadTime(slowEstimate, fastEstimate) {
+    let displayStringKey = "aboutReader.estimatedReadTimeRange1";
+
+    // only show one reading estimate when they are the same value
+    if (slowEstimate == fastEstimate) {
+      displayStringKey = "aboutReader.estimatedReadTimeValue1";
     }
+
+    return PluralForm.get(slowEstimate, gStrings.GetStringFromName(displayStringKey))
+      .replace("#1", fastEstimate)
+      .replace("#2", slowEstimate);
   },
 
-  _showError: function() {
+  _showError() {
     this._headerElement.style.display = "none";
     this._contentElement.style.display = "none";
 
@@ -746,11 +743,16 @@ AboutReader.prototype = {
 
     this._doc.title = errorMessage;
 
+    this._doc.documentElement.dataset.isError = true;
+
     this._error = true;
+
+    this._doc.dispatchEvent(
+      new this._win.CustomEvent("AboutReaderContentError", { bubbles: true, cancelable: false }));
   },
 
   // This function is the JS version of Java's StringUtils.stripCommonSubdomains.
-  _stripHost: function(host) {
+  _stripHost(host) {
     if (!host)
       return host;
 
@@ -766,17 +768,18 @@ AboutReader.prototype = {
     return host.substring(start);
   },
 
-  _showContent: function(article) {
+  _showContent(article) {
     this._messageElement.style.display = "none";
 
     this._article = article;
 
     this._domainElement.href = article.url;
-    let articleUri = Services.io.newURI(article.url, null, null);
+    let articleUri = Services.io.newURI(article.url);
     this._domainElement.textContent = this._stripHost(articleUri.host);
     this._creditsElement.textContent = article.byline;
 
     this._titleElement.textContent = article.title;
+    this._readTimeElement.textContent = this._formatReadTime(article.readingTimeMinsSlow, article.readingTimeMinsFast);
     this._doc.title = article.title;
 
     this._headerElement.style.display = "block";
@@ -787,8 +790,8 @@ AboutReader.prototype = {
       false, articleUri, this._contentElement);
     this._contentElement.innerHTML = "";
     this._contentElement.appendChild(contentFragment);
-    this._fixLocalLinks();
     this._maybeSetTextDirection(article);
+    this._foundLanguage(article.language);
 
     this._contentElement.style.display = "block";
     this._updateImageMargins();
@@ -804,13 +807,13 @@ AboutReader.prototype = {
       new this._win.CustomEvent("AboutReaderContentReady", { bubbles: true, cancelable: false }));
   },
 
-  _hideContent: function() {
+  _hideContent() {
     this._headerElement.style.display = "none";
     this._contentElement.style.display = "none";
   },
 
-  _showProgressDelayed: function() {
-    this._win.setTimeout(function() {
+  _showProgressDelayed() {
+    this._win.setTimeout(() => {
       // No need to show progress if the article has been loaded,
       // if the window has been unloaded, or if there was an error
       // trying to load the article.
@@ -823,20 +826,20 @@ AboutReader.prototype = {
 
       this._messageElement.textContent = gStrings.GetStringFromName("aboutReader.loading2");
       this._messageElement.style.display = "block";
-    }.bind(this), 300);
+    }, 300);
   },
 
   /**
    * Returns the original article URL for this about:reader view.
    */
-  _getOriginalUrl: function(win) {
+  _getOriginalUrl(win) {
     let url = win ? win.location.href : this._win.location.href;
     return ReaderMode.getOriginalUrl(url) || url;
   },
 
-  _setupSegmentedButton: function(id, options, initialValue, callback) {
+  _setupSegmentedButton(id, options, initialValue, callback) {
     let doc = this._doc;
-    let segmentedButton = doc.getElementById(id);
+    let segmentedButton = doc.getElementsByClassName(id)[0];
 
     for (let i = 0; i < options.length; i++) {
       let option = options[i];
@@ -867,10 +870,6 @@ AboutReader.prototype = {
 
         aEvent.stopPropagation();
 
-        // Just pass the ID of the button as an extra and hope the ID doesn't change
-        // unless the context changes
-        UITelemetry.addEvent("action.1", "button", null, id);
-
         let items = segmentedButton.children;
         for (let j = items.length - 1; j >= 0; j--) {
           items[j].classList.remove("selected");
@@ -878,19 +877,19 @@ AboutReader.prototype = {
 
         item.classList.add("selected");
         callback(option.value);
-      }.bind(this), true);
+      }, true);
 
       if (option.value === initialValue)
         item.classList.add("selected");
     }
   },
 
-  _setupButton: function(id, callback, titleEntity, textEntity) {
+  _setupButton(id, callback, titleEntity, textEntity) {
     if (titleEntity) {
       this._setButtonTip(id, titleEntity);
     }
 
-    let button = this._doc.getElementById(id);
+    let button = this._doc.getElementsByClassName(id)[0];
     if (textEntity) {
       button.textContent = gStrings.GetStringFromName(textEntity);
     }
@@ -910,17 +909,17 @@ AboutReader.prototype = {
    * and dynamically as button state changes.
    * @param   Localizable string providing UI element usage tip.
    */
-  _setButtonTip: function(id, titleEntity) {
-    let button = this._doc.getElementById(id);
+  _setButtonTip(id, titleEntity) {
+    let button = this._doc.getElementsByClassName(id)[0];
     button.setAttribute("title", gStrings.GetStringFromName(titleEntity));
   },
 
-  _setupStyleDropdown: function() {
-    let dropdownToggle = this._doc.querySelector("#style-dropdown .dropdown-toggle");
+  _setupStyleDropdown() {
+    let dropdownToggle = this._doc.querySelector(".style-dropdown .dropdown-toggle");
     dropdownToggle.setAttribute("title", gStrings.GetStringFromName("aboutReader.toolbar.typeControls"));
   },
 
-  _updatePopupPosition: function(dropdown) {
+  _updatePopupPosition(dropdown) {
     let dropdownToggle = dropdown.querySelector(".dropdown-toggle");
     let dropdownPopup = dropdown.querySelector(".dropdown-popup");
 
@@ -931,8 +930,8 @@ AboutReader.prototype = {
     dropdownPopup.style.top = popupTop + "px";
   },
 
-  _toggleDropdownClicked: function(event) {
-    let dropdown = event.target.closest('.dropdown');
+  _toggleDropdownClicked(event) {
+    let dropdown = event.target.closest(".dropdown");
 
     if (!dropdown)
       return;
@@ -952,16 +951,13 @@ AboutReader.prototype = {
   /*
    * If the ReaderView banner font-dropdown is closed, open it.
    */
-  _openDropdown: function(dropdown) {
+  _openDropdown(dropdown) {
     if (dropdown.classList.contains("open")) {
       return;
     }
 
     this._closeDropdowns();
-
-    // Trigger BackPressListener initialization in Android.
     dropdown.classList.add("open");
-    this._mm.sendAsyncMessage("Reader:DropdownOpened", this.viewId);
   },
 
   /*
@@ -969,7 +965,7 @@ AboutReader.prototype = {
    * dropdowns because the page is scrolling, allow popups to stay open with
    * the keep-open class.
    */
-  _closeDropdowns: function(scrolling) {
+  _closeDropdowns(scrolling) {
     let selector = ".dropdown.open";
     if (scrolling) {
       selector += ":not(.keep-open)";
@@ -979,10 +975,17 @@ AboutReader.prototype = {
     for (let dropdown of openDropdowns) {
       dropdown.classList.remove("open");
     }
+  },
 
-    // Trigger BackPressListener cleanup in Android.
-    if (openDropdowns.length) {
-      this._mm.sendAsyncMessage("Reader:DropdownClosed", this.viewId);
+  /*
+   * Override link handling for same-page references so we don't exit Reader View.
+   */
+  _linkClicked(event) {
+    var originalUrl = Services.io.newURI(this._getOriginalUrl(), null, null);
+    var targetUrl = Services.io.newURI(event.target.href, null, null);
+    if (originalUrl.specIgnoringRef == targetUrl.specIgnoringRef) {
+      event.preventDefault();
+      this._goToReference(targetUrl.ref);
     }
   },
 
diff --git a/toolkit/components/reader/JSDOMParser.js b/toolkit/components/reader/JSDOMParser.js
index 853649775..38f59c4ea 100644
--- a/toolkit/components/reader/JSDOMParser.js
+++ b/toolkit/components/reader/JSDOMParser.js
@@ -1017,46 +1017,6 @@
       }
     },
 
-    readScript: function (node) {
-      while (this.currentChar < this.html.length) {
-        var c = this.nextChar();
-        var nextC = this.peekNext();
-        if (c === "<") {
-          if (nextC === "!" || nextC === "?") {
-            // We're still before the ! or ? that is starting this comment:
-            this.currentChar++;
-            node.appendChild(this.discardNextComment());
-            continue;
-          }
-          if (nextC === "/" && this.html.substr(this.currentChar, 8 /*"/script>".length */).toLowerCase() == "/script>") {
-            // Go back before the '<' so we find the end tag.
-            this.currentChar--;
-            // Done with this script tag, the caller will close:
-            return;
-          }
-        }
-        // Either c wasn't a '<' or it was but we couldn't find either a comment
-        // or a closing script tag, so we should just parse as text until the next one
-        // comes along:
-
-        var haveTextNode = node.lastChild && node.lastChild.nodeType === Node.TEXT_NODE;
-        var textNode = haveTextNode ? node.lastChild : new Text();
-        var n = this.html.indexOf("<", this.currentChar);
-        // Decrement this to include the current character *afterwards* so we don't get stuck
-        // looking for the same < all the time.
-        this.currentChar--;
-        if (n === -1) {
-          textNode.innerHTML += this.html.substring(this.currentChar, this.html.length);
-          this.currentChar = this.html.length;
-        } else {
-          textNode.innerHTML += this.html.substring(this.currentChar, n);
-          this.currentChar = n;
-        }
-        if (!haveTextNode)
-          node.appendChild(textNode);
-      }
-    },
-
     discardNextComment: function() {
       if (this.match("--")) {
         this.discardTo("-->");
@@ -1131,11 +1091,7 @@
 
       // If this isn't a void Element, read its child nodes
       if (!closed) {
-        if (localName == "script") {
-          this.readScript(node);
-        } else {
-          this.readChildren(node);
-        }
+        this.readChildren(node);
         var closingTag = "</" + localName + ">";
         if (!this.match(closingTag)) {
           this.error("expected '" + closingTag + "' and got " + this.html.substr(this.currentChar, closingTag.length));
diff --git a/toolkit/components/reader/Readability.js b/toolkit/components/reader/Readability.js
index 491461a8e..04949dc61 100644
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
@@ -38,32 +38,22 @@ function Readability(uri, doc, options) {
 
   this._uri = uri;
   this._doc = doc;
-  this._biggestFrame = false;
+  this._articleTitle = null;
   this._articleByline = null;
   this._articleDir = null;
 
-  // Configureable options
+  // Configurable options
   this._debug = !!options.debug;
   this._maxElemsToParse = options.maxElemsToParse || this.DEFAULT_MAX_ELEMS_TO_PARSE;
   this._nbTopCandidates = options.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES;
-  this._maxPages = options.maxPages || this.DEFAULT_MAX_PAGES;
+  this._wordThreshold = options.wordThreshold || this.DEFAULT_WORD_THRESHOLD;
+  this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(options.classesToPreserve || []);
 
   // Start with all flags set
   this._flags = this.FLAG_STRIP_UNLIKELYS |
                 this.FLAG_WEIGHT_CLASSES |
                 this.FLAG_CLEAN_CONDITIONALLY;
 
-  // The list of pages we've parsed in this call of readability,
-  // for autopaging. As a key store for easier searching.
-  this._parsedPages = {};
-
-  // A list of the ETag headers of pages we've parsed, in case they happen to match,
-  // we'll know it's a duplicate.
-  this._pageETags = {};
-
-  // Make an AJAX request for each page and append it to the document.
-  this._curPageNum = 1;
-
   var logEl;
 
   // Control whether log messages are sent to the console
@@ -82,12 +72,12 @@ function Readability(uri, doc, options) {
       return rv + elDesc;
     };
     this.log = function () {
-      if (typeof dump !== undefined) {
+      if (typeof dump !== "undefined") {
         var msg = Array.prototype.map.call(arguments, function(x) {
           return (x && x.nodeName) ? logEl(x) : x;
         }).join(" ");
         dump("Reader: (Readability) " + msg + "\n");
-      } else if (typeof console !== undefined) {
+      } else if (typeof console !== "undefined") {
         var args = ["Reader: (Readability) "].concat(arguments);
         console.log.apply(console, args);
       }
@@ -109,20 +99,19 @@ Readability.prototype = {
   // tight the competition is among candidates.
   DEFAULT_N_TOP_CANDIDATES: 5,
 
-  // The maximum number of pages to loop through before we call
-  // it quits and just show a link.
-  DEFAULT_MAX_PAGES: 5,
-
   // Element tags to score by default.
   DEFAULT_TAGS_TO_SCORE: "section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),
 
+  // The default number of words an article must have in order to return a result
+  DEFAULT_WORD_THRESHOLD: 500,
+
   // All of the regular expressions in use within readability.
   // Defined up here so we don't instantiate them repeatedly in loops.
   REGEXPS: {
-    unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|menu|modal|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i,
+    unlikelyCandidates: /banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
     okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
     positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
-    negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
+    negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
     extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
     byline: /byline|author|dateline|writtenby|p-author/i,
     replaceFonts: /<(\/?)font[^>]*>/gi,
@@ -138,6 +127,13 @@ Readability.prototype = {
 
   ALTER_TO_DIV_EXCEPTIONS: ["DIV", "ARTICLE", "SECTION", "P"],
 
+  PRESENTATIONAL_ATTRIBUTES: [ "align", "background", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "hspace", "rules", "style", "valign", "vspace" ],
+
+  DEPRECATED_SIZE_ATTRIBUTE_ELEMS: [ "TABLE", "TH", "TD", "HR", "PRE" ],
+
+  // These are the classes that readability sets itself.
+  CLASSES_TO_PRESERVE: [ "readability-styled", "page" ],
+
   /**
    * Run any post-process modifications to article content as necessary.
    *
@@ -147,6 +143,9 @@ Readability.prototype = {
   _postProcessContent: function(articleContent) {
     // Readability cannot open relative uris so we convert them to absolute uris.
     this._fixRelativeUris(articleContent);
+
+    // Remove classes.
+    this._cleanClasses(articleContent);
   },
 
   /**
@@ -155,8 +154,8 @@ Readability.prototype = {
    *
    * If function is not passed, removes all the nodes in node list.
    *
-   * @param NodeList nodeList The no
-   * @param Function filterFn
+   * @param NodeList nodeList The nodes to operate on
+   * @param Function filterFn the function to use as a filter
    * @return void
    */
   _removeNodes: function(nodeList, filterFn) {
@@ -172,6 +171,20 @@ Readability.prototype = {
   },
 
   /**
+   * Iterates over a NodeList, and calls _setNodeTag for each node.
+   *
+   * @param NodeList nodeList The nodes to operate on
+   * @param String newTagName the new tag name to use
+   * @return void
+   */
+  _replaceNodeTags: function(nodeList, newTagName) {
+    for (var i = nodeList.length - 1; i >= 0; i--) {
+      var node = nodeList[i];
+      this._setNodeTag(node, newTagName);
+    }
+  },
+
+  /**
    * Iterate over a NodeList, which doesn't natively fully implement the Array
    * interface.
    *
@@ -180,10 +193,9 @@ Readability.prototype = {
    *
    * @param  NodeList nodeList The NodeList.
    * @param  Function fn       The iterate function.
-   * @param  Boolean  backward Whether to use backward iteration.
    * @return void
    */
-  _forEachNode: function(nodeList, fn, backward) {
+  _forEachNode: function(nodeList, fn) {
     Array.prototype.forEach.call(nodeList, fn, this);
   },
 
@@ -228,6 +240,34 @@ Readability.prototype = {
   },
 
   /**
+   * Removes the class="" attribute from every element in the given
+   * subtree, except those that match CLASSES_TO_PRESERVE and
+   * the classesToPreserve array from the options object.
+   *
+   * @param Element
+   * @return void
+   */
+  _cleanClasses: function(node) {
+    var classesToPreserve = this._classesToPreserve;
+    var className = (node.getAttribute("class") || "")
+      .split(/\s+/)
+      .filter(function(cls) {
+        return classesToPreserve.indexOf(cls) != -1;
+      })
+      .join(" ");
+
+    if (className) {
+      node.setAttribute("class", className);
+    } else {
+      node.removeAttribute("class");
+    }
+
+    for (node = node.firstElementChild; node; node = node.nextElementSibling) {
+      this._cleanClasses(node);
+    }
+  },
+
+  /**
    * Converts each <a> and <img> uri in the given element to an absolute URI,
    * ignoring #ref URIs.
    *
@@ -307,11 +347,20 @@ Readability.prototype = {
         curTitle = origTitle = this._getInnerText(doc.getElementsByTagName('title')[0]);
     } catch (e) {/* ignore exceptions setting the title. */}
 
-    if (curTitle.match(/ [\|\-] /)) {
-      curTitle = origTitle.replace(/(.*)[\|\-] .*/gi, '$1');
+    var titleHadHierarchicalSeparators = false;
+    function wordCount(str) {
+      return str.split(/\s+/).length;
+    }
+
+    // If there's a separator in the title, first remove the final part
+    if ((/ [\|\-\\\/>»] /).test(curTitle)) {
+      titleHadHierarchicalSeparators = / [\\\/>»] /.test(curTitle);
+      curTitle = origTitle.replace(/(.*)[\|\-\\\/>»] .*/gi, '$1');
 
-      if (curTitle.split(' ').length < 3)
-        curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi, '$1');
+      // If the resulting title is too short (3 words or fewer), remove
+      // the first part instead:
+      if (wordCount(curTitle) < 3)
+        curTitle = origTitle.replace(/[^\|\-\\\/>»]*[\|\-\\\/>»](.*)/gi, '$1');
     } else if (curTitle.indexOf(': ') !== -1) {
       // Check if we have an heading containing this exact string, so we
       // could assume it's the full title.
@@ -328,8 +377,13 @@ Readability.prototype = {
         curTitle = origTitle.substring(origTitle.lastIndexOf(':') + 1);
 
         // If the title is now too short, try the first colon instead:
-        if (curTitle.split(' ').length < 3)
+        if (wordCount(curTitle) < 3) {
           curTitle = origTitle.substring(origTitle.indexOf(':') + 1);
+          // But if we have too many words before the colon there's something weird
+          // with the titles and the H tags so let's just use the original title instead
+        } else if (wordCount(origTitle.substr(0, origTitle.indexOf(':'))) > 5) {
+          curTitle = origTitle;
+        }
       }
     } else if (curTitle.length > 150 || curTitle.length < 15) {
       var hOnes = doc.getElementsByTagName('h1');
@@ -339,9 +393,16 @@ Readability.prototype = {
     }
 
     curTitle = curTitle.trim();
-
-    if (curTitle.split(' ').length <= 4)
+    // If we now have 4 words or fewer as our title, and either no
+    // 'hierarchical' separators (\, /, > or ») were found in the original
+    // title or we decreased the number of words by more than 1 word, use
+    // the original title.
+    var curTitleWordCount = wordCount(curTitle);
+    if (curTitleWordCount <= 4 &&
+        (!titleHadHierarchicalSeparators ||
+         curTitleWordCount != wordCount(origTitle.replace(/[\|\-\\\/>»]+/g, "")) - 1)) {
       curTitle = origTitle;
+    }
 
     return curTitle;
   },
@@ -362,9 +423,7 @@ Readability.prototype = {
       this._replaceBrs(doc.body);
     }
 
-    this._forEachNode(doc.getElementsByTagName("font"), function(fontNode) {
-      this._setNodeTag(fontNode, "SPAN");
-    });
+    this._replaceNodeTags(doc.getElementsByTagName("font"), "SPAN");
   },
 
   /**
@@ -464,19 +523,49 @@ Readability.prototype = {
   _prepArticle: function(articleContent) {
     this._cleanStyles(articleContent);
 
+    // Check for data tables before we continue, to avoid removing items in
+    // those tables, which will often be isolated even though they're
+    // visually linked to other content-ful elements (text, images, etc.).
+    this._markDataTables(articleContent);
+
     // Clean out junk from the article content
     this._cleanConditionally(articleContent, "form");
+    this._cleanConditionally(articleContent, "fieldset");
     this._clean(articleContent, "object");
     this._clean(articleContent, "embed");
     this._clean(articleContent, "h1");
     this._clean(articleContent, "footer");
 
-    // If there is only one h2, they are probably using it as a header
-    // and not a subheader, so remove it since we already have a header.
-    if (articleContent.getElementsByTagName('h2').length === 1)
-      this._clean(articleContent, "h2");
+    // Clean out elements have "share" in their id/class combinations from final top candidates,
+    // which means we don't remove the top candidates even they have "share".
+    this._forEachNode(articleContent.children, function(topCandidate) {
+      this._cleanMatchedNodes(topCandidate, /share/);
+    });
+
+    // If there is only one h2 and its text content substantially equals article title,
+    // they are probably using it as a header and not a subheader,
+    // so remove it since we already extract the title separately.
+    var h2 = articleContent.getElementsByTagName('h2');
+    if (h2.length === 1) {
+      var lengthSimilarRate = (h2[0].textContent.length - this._articleTitle.length) / this._articleTitle.length;
+      if (Math.abs(lengthSimilarRate) < 0.5) {
+        var titlesMatch = false;
+        if (lengthSimilarRate > 0) {
+          titlesMatch = h2[0].textContent.includes(this._articleTitle);
+        } else {
+          titlesMatch = this._articleTitle.includes(h2[0].textContent);
+        }
+        if (titlesMatch) {
+          this._clean(articleContent, "h2");
+        }
+      }
+    }
 
     this._clean(articleContent, "iframe");
+    this._clean(articleContent, "input");
+    this._clean(articleContent, "textarea");
+    this._clean(articleContent, "select");
+    this._clean(articleContent, "button");
     this._cleanHeaders(articleContent);
 
     // Do these last as the previous stuff may have removed junk
@@ -662,9 +751,6 @@ Readability.prototype = {
 
     var pageCacheHtml = page.innerHTML;
 
-    // Check if any "dir" is set on the toplevel document element
-    this._articleDir = doc.documentElement.getAttribute("dir");
-
     while (true) {
       var stripUnlikelyCandidates = this._flagIsActive(this.FLAG_STRIP_UNLIKELYS);
 
@@ -695,6 +781,15 @@ Readability.prototype = {
           }
         }
 
+        // Remove DIV, SECTION, and HEADER nodes without any content(e.g. text, image, video, or iframe).
+        if ((node.tagName === "DIV" || node.tagName === "SECTION" || node.tagName === "HEADER" ||
+             node.tagName === "H1" || node.tagName === "H2" || node.tagName === "H3" ||
+             node.tagName === "H4" || node.tagName === "H5" || node.tagName === "H6") &&
+            this._isElementWithoutContent(node)) {
+          node = this._removeAndGetNext(node);
+          continue;
+        }
+
         if (this.DEFAULT_TAGS_TO_SCORE.indexOf(node.tagName) !== -1) {
           elementsToScore.push(node);
         }
@@ -709,13 +804,14 @@ Readability.prototype = {
             var newNode = node.children[0];
             node.parentNode.replaceChild(newNode, node);
             node = newNode;
+            elementsToScore.push(node);
           } else if (!this._hasChildBlockElement(node)) {
             node = this._setNodeTag(node, "P");
             elementsToScore.push(node);
           } else {
             // EXPERIMENTAL
             this._forEachNode(node.childNodes, function(childNode) {
-              if (childNode.nodeType === Node.TEXT_NODE) {
+              if (childNode.nodeType === Node.TEXT_NODE && childNode.textContent.trim().length > 0) {
                 var p = doc.createElement('p');
                 p.textContent = childNode.textContent;
                 p.style.display = 'inline';
@@ -812,6 +908,7 @@ Readability.prototype = {
 
       var topCandidate = topCandidates[0] || null;
       var neededToCreateTopCandidate = false;
+      var parentOfTopCandidate;
 
       // If we still have no top candidate, just use the body as a last resort.
       // We also have to copy the body node so it is something we can modify.
@@ -831,6 +928,33 @@ Readability.prototype = {
 
         this._initializeNode(topCandidate);
       } else if (topCandidate) {
+        // Find a better top candidate node if it contains (at least three) nodes which belong to `topCandidates` array
+        // and whose scores are quite closed with current `topCandidate` node.
+        var alternativeCandidateAncestors = [];
+        for (var i = 1; i < topCandidates.length; i++) {
+          if (topCandidates[i].readability.contentScore / topCandidate.readability.contentScore >= 0.75) {
+            alternativeCandidateAncestors.push(this._getNodeAncestors(topCandidates[i]));
+          }
+        }
+        var MINIMUM_TOPCANDIDATES = 3;
+        if (alternativeCandidateAncestors.length >= MINIMUM_TOPCANDIDATES) {
+          parentOfTopCandidate = topCandidate.parentNode;
+          while (parentOfTopCandidate.tagName !== "BODY") {
+            var listsContainingThisAncestor = 0;
+            for (var ancestorIndex = 0; ancestorIndex < alternativeCandidateAncestors.length && listsContainingThisAncestor < MINIMUM_TOPCANDIDATES; ancestorIndex++) {
+              listsContainingThisAncestor += Number(alternativeCandidateAncestors[ancestorIndex].includes(parentOfTopCandidate));
+            }
+            if (listsContainingThisAncestor >= MINIMUM_TOPCANDIDATES) {
+              topCandidate = parentOfTopCandidate;
+              break;
+            }
+            parentOfTopCandidate = parentOfTopCandidate.parentNode;
+          }
+        }
+        if (!topCandidate.readability) {
+          this._initializeNode(topCandidate);
+        }
+
         // Because of our bonus system, parents of candidates might have scores
         // themselves. They get half of the node. There won't be nodes with higher
         // scores than our topCandidate, but if we see the score going *up* in the first
@@ -838,11 +962,15 @@ Readability.prototype = {
         // lurking in other places that we want to unify in. The sibling stuff
         // below does some of that - but only if we've looked high enough up the DOM
         // tree.
-        var parentOfTopCandidate = topCandidate.parentNode;
+        parentOfTopCandidate = topCandidate.parentNode;
         var lastScore = topCandidate.readability.contentScore;
         // The scores shouldn't get too low.
         var scoreThreshold = lastScore / 3;
-        while (parentOfTopCandidate && parentOfTopCandidate.readability) {
+        while (parentOfTopCandidate.tagName !== "BODY") {
+          if (!parentOfTopCandidate.readability) {
+            parentOfTopCandidate = parentOfTopCandidate.parentNode;
+            continue;
+          }
           var parentScore = parentOfTopCandidate.readability.contentScore;
           if (parentScore < scoreThreshold)
             break;
@@ -854,6 +982,17 @@ Readability.prototype = {
           lastScore = parentOfTopCandidate.readability.contentScore;
           parentOfTopCandidate = parentOfTopCandidate.parentNode;
         }
+
+        // If the top candidate is the only child, use parent instead. This will help sibling
+        // joining logic when adjacent content is actually located in parent's sibling node.
+        parentOfTopCandidate = topCandidate.parentNode;
+        while (parentOfTopCandidate.tagName != "BODY" && parentOfTopCandidate.children.length == 1) {
+          topCandidate = parentOfTopCandidate;
+          parentOfTopCandidate = topCandidate.parentNode;
+        }
+        if (!topCandidate.readability) {
+          this._initializeNode(topCandidate);
+        }
       }
 
       // Now that we have the top candidate, look through its siblings for content
@@ -864,7 +1003,9 @@ Readability.prototype = {
         articleContent.id = "readability-content";
 
       var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2);
-      var siblings = topCandidate.parentNode.children;
+      // Keep potential top candidate's parent node to try to get text direction of it later.
+      parentOfTopCandidate = topCandidate.parentNode;
+      var siblings = parentOfTopCandidate.children;
 
       for (var s = 0, sl = siblings.length; s < sl; s++) {
         var sibling = siblings[s];
@@ -927,24 +1068,22 @@ Readability.prototype = {
       if (this._debug)
         this.log("Article content post-prep: " + articleContent.innerHTML);
 
-      if (this._curPageNum === 1) {
-        if (neededToCreateTopCandidate) {
-          // We already created a fake div thing, and there wouldn't have been any siblings left
-          // for the previous loop, so there's no point trying to create a new div, and then
-          // move all the children over. Just assign IDs and class names here. No need to append
-          // because that already happened anyway.
-          topCandidate.id = "readability-page-1";
-          topCandidate.className = "page";
-        } else {
-          var div = doc.createElement("DIV");
-          div.id = "readability-page-1";
-          div.className = "page";
-          var children = articleContent.childNodes;
-          while (children.length) {
-            div.appendChild(children[0]);
-          }
-          articleContent.appendChild(div);
+      if (neededToCreateTopCandidate) {
+        // We already created a fake div thing, and there wouldn't have been any siblings left
+        // for the previous loop, so there's no point trying to create a new div, and then
+        // move all the children over. Just assign IDs and class names here. No need to append
+        // because that already happened anyway.
+        topCandidate.id = "readability-page-1";
+        topCandidate.className = "page";
+      } else {
+        var div = doc.createElement("DIV");
+        div.id = "readability-page-1";
+        div.className = "page";
+        var children = articleContent.childNodes;
+        while (children.length) {
+          div.appendChild(children[0]);
         }
+        articleContent.appendChild(div);
       }
 
       if (this._debug)
@@ -955,7 +1094,7 @@ Readability.prototype = {
       // grabArticle with different flags set. This gives us a higher likelihood of
       // finding the content, and the sieve approach gives us a higher likelihood of
       // finding the -right- content.
-      if (this._getInnerText(articleContent, true).length < 500) {
+      if (this._getInnerText(articleContent, true).length < this._wordThreshold) {
         page.innerHTML = pageCacheHtml;
 
         if (this._flagIsActive(this.FLAG_STRIP_UNLIKELYS)) {
@@ -968,6 +1107,18 @@ Readability.prototype = {
           return null;
         }
       } else {
+        // Find out text direction from ancestors of final top candidate.
+        var ancestors = [parentOfTopCandidate, topCandidate].concat(this._getNodeAncestors(parentOfTopCandidate));
+        this._someNode(ancestors, function(ancestor) {
+          if (!ancestor.tagName)
+            return false;
+          var articleDir = ancestor.getAttribute("dir");
+          if (articleDir) {
+            this._articleDir = articleDir;
+            return true;
+          }
+          return false;
+        });
         return articleContent;
       }
     }
@@ -1044,12 +1195,15 @@ Readability.prototype = {
       metadata.excerpt = values["twitter:description"];
     }
 
-    if ("og:title" in values) {
-      // Use facebook open graph title.
-      metadata.title = values["og:title"];
-    } else if ("twitter:title" in values) {
-      // Use twitter cards title.
-      metadata.title = values["twitter:title"];
+    metadata.title = this._getArticleTitle();
+    if (!metadata.title) {
+      if ("og:title" in values) {
+        // Use facebook open graph title.
+        metadata.title = values["og:title"];
+      } else if ("twitter:title" in values) {
+        // Use twitter cards title.
+        metadata.title = values["twitter:title"];
+      }
     }
 
     return metadata;
@@ -1089,6 +1243,13 @@ Readability.prototype = {
     });
   },
 
+  _isElementWithoutContent: function(node) {
+    return node.nodeType === Node.ELEMENT_NODE &&
+      node.textContent.trim().length == 0 &&
+      (node.children.length == 0 ||
+       node.children.length == node.getElementsByTagName("br").length + node.getElementsByTagName("hr").length);
+  },
+
   /**
    * Determine whether element has any children block level elements.
    *
@@ -1139,26 +1300,25 @@ Readability.prototype = {
    * @return void
   **/
   _cleanStyles: function(e) {
-    e = e || this._doc;
-    if (!e)
+    if (!e || e.tagName.toLowerCase() === 'svg')
       return;
-    var cur = e.firstChild;
 
-    // Remove any root styles, if we're able.
-    if (typeof e.removeAttribute === 'function' && e.className !== 'readability-styled')
-      e.removeAttribute('style');
-
-    // Go until there are no more child nodes
-    while (cur !== null) {
-      if (cur.nodeType === cur.ELEMENT_NODE) {
-        // Remove style attribute(s) :
-        if (cur.className !== "readability-styled")
-          cur.removeAttribute("style");
+    if (e.className !== 'readability-styled') {
+      // Remove `style` and deprecated presentational attributes
+      for (var i = 0; i < this.PRESENTATIONAL_ATTRIBUTES.length; i++) {
+        e.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[i]);
+      }
 
-        this._cleanStyles(cur);
+      if (this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(e.tagName) !== -1) {
+        e.removeAttribute('width');
+        e.removeAttribute('height');
       }
+    }
 
-      cur = cur.nextSibling;
+    var cur = e.firstElementChild;
+    while (cur !== null) {
+      this._cleanStyles(cur);
+      cur = cur.nextElementSibling;
     }
   },
 
@@ -1185,368 +1345,6 @@ Readability.prototype = {
   },
 
   /**
-   * Find a cleaned up version of the current URL, to use for comparing links for possible next-pageyness.
-   *
-   * @author Dan Lacy
-   * @return string the base url
-  **/
-  _findBaseUrl: function() {
-    var uri = this._uri;
-    var noUrlParams = uri.path.split("?")[0];
-    var urlSlashes = noUrlParams.split("/").reverse();
-    var cleanedSegments = [];
-    var possibleType = "";
-
-    for (var i = 0, slashLen = urlSlashes.length; i < slashLen; i += 1) {
-      var segment = urlSlashes[i];
-
-      // Split off and save anything that looks like a file type.
-      if (segment.indexOf(".") !== -1) {
-        possibleType = segment.split(".")[1];
-
-        // If the type isn't alpha-only, it's probably not actually a file extension.
-        if (!possibleType.match(/[^a-zA-Z]/))
-          segment = segment.split(".")[0];
-      }
-
-      // EW-CMS specific segment replacement. Ugly.
-      // Example: http://www.ew.com/ew/article/0,,20313460_20369436,00.html
-      if (segment.indexOf(',00') !== -1)
-        segment = segment.replace(',00', '');
-
-      // If our first or second segment has anything looking like a page number, remove it.
-      if (segment.match(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i) && ((i === 1) || (i === 0)))
-        segment = segment.replace(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i, "");
-
-      var del = false;
-
-      // If this is purely a number, and it's the first or second segment,
-      // it's probably a page number. Remove it.
-      if (i < 2 && segment.match(/^\d{1,2}$/))
-        del = true;
-
-      // If this is the first segment and it's just "index", remove it.
-      if (i === 0 && segment.toLowerCase() === "index")
-        del = true;
-
-      // If our first or second segment is smaller than 3 characters,
-      // and the first segment was purely alphas, remove it.
-      if (i < 2 && segment.length < 3 && !urlSlashes[0].match(/[a-z]/i))
-        del = true;
-
-      // If it's not marked for deletion, push it to cleanedSegments.
-      if (!del)
-        cleanedSegments.push(segment);
-    }
-
-    // This is our final, cleaned, base article URL.
-    return uri.scheme + "://" + uri.host + cleanedSegments.reverse().join("/");
-  },
-
-  /**
-   * Look for any paging links that may occur within the document.
-   *
-   * @param body
-   * @return object (array)
-  **/
-  _findNextPageLink: function(elem) {
-    var uri = this._uri;
-    var possiblePages = {};
-    var allLinks = elem.getElementsByTagName('a');
-    var articleBaseUrl = this._findBaseUrl();
-
-    // Loop through all links, looking for hints that they may be next-page links.
-    // Things like having "page" in their textContent, className or id, or being a child
-    // of a node with a page-y className or id.
-    //
-    // Also possible: levenshtein distance? longest common subsequence?
-    //
-    // After we do that, assign each page a score, and
-    for (var i = 0, il = allLinks.length; i < il; i += 1) {
-      var link = allLinks[i];
-      var linkHref = allLinks[i].href.replace(/#.*$/, '').replace(/\/$/, '');
-
-      // If we've already seen this page, ignore it.
-      if (linkHref === "" ||
-        linkHref === articleBaseUrl ||
-        linkHref === uri.spec ||
-        linkHref in this._parsedPages) {
-        continue;
-      }
-
-      // If it's on a different domain, skip it.
-      if (uri.host !== linkHref.split(/\/+/g)[1])
-        continue;
-
-      var linkText = this._getInnerText(link);
-
-      // If the linkText looks like it's not the next page, skip it.
-      if (linkText.match(this.REGEXPS.extraneous) || linkText.length > 25)
-        continue;
-
-      // If the leftovers of the URL after removing the base URL don't contain
-      // any digits, it's certainly not a next page link.
-      var linkHrefLeftover = linkHref.replace(articleBaseUrl, '');
-      if (!linkHrefLeftover.match(/\d/))
-        continue;
-
-      if (!(linkHref in possiblePages)) {
-        possiblePages[linkHref] = {"score": 0, "linkText": linkText, "href": linkHref};
-      } else {
-        possiblePages[linkHref].linkText += ' | ' + linkText;
-      }
-
-      var linkObj = possiblePages[linkHref];
-
-      // If the articleBaseUrl isn't part of this URL, penalize this link. It could
-      // still be the link, but the odds are lower.
-      // Example: http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html
-      if (linkHref.indexOf(articleBaseUrl) !== 0)
-        linkObj.score -= 25;
-
-      var linkData = linkText + ' ' + link.className + ' ' + link.id;
-      if (linkData.match(this.REGEXPS.nextLink))
-        linkObj.score += 50;
-
-      if (linkData.match(/pag(e|ing|inat)/i))
-        linkObj.score += 25;
-
-      if (linkData.match(/(first|last)/i)) {
-        // -65 is enough to negate any bonuses gotten from a > or » in the text,
-        // If we already matched on "next", last is probably fine.
-        // If we didn't, then it's bad. Penalize.
-        if (!linkObj.linkText.match(this.REGEXPS.nextLink))
-          linkObj.score -= 65;
-      }
-
-      if (linkData.match(this.REGEXPS.negative) || linkData.match(this.REGEXPS.extraneous))
-        linkObj.score -= 50;
-
-      if (linkData.match(this.REGEXPS.prevLink))
-        linkObj.score -= 200;
-
-      // If a parentNode contains page or paging or paginat
-      var parentNode = link.parentNode;
-      var positiveNodeMatch = false;
-      var negativeNodeMatch = false;
-
-      while (parentNode) {
-        var parentNodeClassAndId = parentNode.className + ' ' + parentNode.id;
-
-        if (!positiveNodeMatch && parentNodeClassAndId && parentNodeClassAndId.match(/pag(e|ing|inat)/i)) {
-          positiveNodeMatch = true;
-          linkObj.score += 25;
-        }
-
-        if (!negativeNodeMatch && parentNodeClassAndId && parentNodeClassAndId.match(this.REGEXPS.negative)) {
-          // If this is just something like "footer", give it a negative.
-          // If it's something like "body-and-footer", leave it be.
-          if (!parentNodeClassAndId.match(this.REGEXPS.positive)) {
-            linkObj.score -= 25;
-            negativeNodeMatch = true;
-          }
-        }
-
-        parentNode = parentNode.parentNode;
-      }
-
-      // If the URL looks like it has paging in it, add to the score.
-      // Things like /page/2/, /pagenum/2, ?p=3, ?page=11, ?pagination=34
-      if (linkHref.match(/p(a|g|ag)?(e|ing|ination)?(=|\/)[0-9]{1,2}/i) || linkHref.match(/(page|paging)/i))
-        linkObj.score += 25;
-
-      // If the URL contains negative values, give a slight decrease.
-      if (linkHref.match(this.REGEXPS.extraneous))
-        linkObj.score -= 15;
-
-      /**
-       * Minor punishment to anything that doesn't match our current URL.
-       * NOTE: I'm finding this to cause more harm than good where something is exactly 50 points.
-       *     Dan, can you show me a counterexample where this is necessary?
-       * if (linkHref.indexOf(window.location.href) !== 0) {
-       *  linkObj.score -= 1;
-       * }
-      **/
-
-      // If the link text can be parsed as a number, give it a minor bonus, with a slight
-      // bias towards lower numbered pages. This is so that pages that might not have 'next'
-      // in their text can still get scored, and sorted properly by score.
-      var linkTextAsNumber = parseInt(linkText, 10);
-      if (linkTextAsNumber) {
-        // Punish 1 since we're either already there, or it's probably
-        // before what we want anyways.
-        if (linkTextAsNumber === 1) {
-          linkObj.score -= 10;
-        } else {
-          linkObj.score += Math.max(0, 10 - linkTextAsNumber);
-        }
-      }
-    }
-
-    // Loop thrugh all of our possible pages from above and find our top
-    // candidate for the next page URL. Require at least a score of 50, which
-    // is a relatively high confidence that this page is the next link.
-    var topPage = null;
-    for (var page in possiblePages) {
-      if (possiblePages.hasOwnProperty(page)) {
-        if (possiblePages[page].score >= 50 &&
-          (!topPage || topPage.score < possiblePages[page].score))
-          topPage = possiblePages[page];
-      }
-    }
-
-    var nextHref = null;
-    if (topPage) {
-      nextHref = topPage.href.replace(/\/$/, '');
-
-      this.log('NEXT PAGE IS ' + nextHref);
-      this._parsedPages[nextHref] = true;
-    }
-    return nextHref;
-  },
-
-  _successfulRequest: function(request) {
-    return (request.status >= 200 && request.status < 300) ||
-        request.status === 304 ||
-         (request.status === 0 && request.responseText);
-  },
-
-  _ajax: function(url, options) {
-    var request = new XMLHttpRequest();
-
-    function respondToReadyState(readyState) {
-      if (request.readyState === 4) {
-        if (this._successfulRequest(request)) {
-          if (options.success)
-            options.success(request);
-        } else if (options.error) {
-          options.error(request);
-        }
-      }
-    }
-
-    if (typeof options === 'undefined')
-      options = {};
-
-    request.onreadystatechange = respondToReadyState;
-
-    request.open('get', url, true);
-    request.setRequestHeader('Accept', 'text/html');
-
-    try {
-      request.send(options.postBody);
-    } catch (e) {
-      if (options.error)
-        options.error();
-    }
-
-    return request;
-  },
-
-  _appendNextPage: function(nextPageLink) {
-    var doc = this._doc;
-    this._curPageNum += 1;
-
-    var articlePage = doc.createElement("DIV");
-    articlePage.id = 'readability-page-' + this._curPageNum;
-    articlePage.className = 'page';
-    articlePage.innerHTML = '<p class="page-separator" title="Page ' + this._curPageNum + '">&sect;</p>';
-
-    doc.getElementById("readability-content").appendChild(articlePage);
-
-    if (this._curPageNum > this._maxPages) {
-      var nextPageMarkup = "<div style='text-align: center'><a href='" + nextPageLink + "'>View Next Page</a></div>";
-      articlePage.innerHTML = articlePage.innerHTML + nextPageMarkup;
-      return;
-    }
-
-    // Now that we've built the article page DOM element, get the page content
-    // asynchronously and load the cleaned content into the div we created for it.
-    (function(pageUrl, thisPage) {
-      this._ajax(pageUrl, {
-        success: function(r) {
-
-          // First, check to see if we have a matching ETag in headers - if we do, this is a duplicate page.
-          var eTag = r.getResponseHeader('ETag');
-          if (eTag) {
-            if (eTag in this._pageETags) {
-              this.log("Exact duplicate page found via ETag. Aborting.");
-              articlePage.style.display = 'none';
-              return;
-            }
-            this._pageETags[eTag] = 1;
-          }
-
-          // TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away.
-          var page = doc.createElement("DIV");
-
-          // Do some preprocessing to our HTML to make it ready for appending.
-          // - Remove any script tags. Swap and reswap newlines with a unicode
-          //   character because multiline regex doesn't work in javascript.
-          // - Turn any noscript tags into divs so that we can parse them. This
-          //   allows us to find any next page links hidden via javascript.
-          // - Turn all double br's into p's - was handled by prepDocument in the original view.
-          //   Maybe in the future abstract out prepDocument to work for both the original document
-          //   and AJAX-added pages.
-          var responseHtml = r.responseText.replace(/\n/g, '\uffff').replace(/<script.*?>.*?<\/script>/gi, '');
-          responseHtml = responseHtml.replace(/\n/g, '\uffff').replace(/<script.*?>.*?<\/script>/gi, '');
-          responseHtml = responseHtml.replace(/\uffff/g, '\n').replace(/<(\/?)noscript/gi, '<$1div');
-          responseHtml = responseHtml.replace(this.REGEXPS.replaceFonts, '<$1span>');
-
-          page.innerHTML = responseHtml;
-          this._replaceBrs(page);
-
-          // Reset all flags for the next page, as they will search through it and
-          // disable as necessary at the end of grabArticle.
-          this._flags = 0x1 | 0x2 | 0x4;
-
-          var secondNextPageLink = this._findNextPageLink(page);
-
-          // NOTE: if we end up supporting _appendNextPage(), we'll need to
-          // change this call to be async
-          var content = this._grabArticle(page);
-
-          if (!content) {
-            this.log("No content found in page to append. Aborting.");
-            return;
-          }
-
-          // Anti-duplicate mechanism. Essentially, get the first paragraph of our new page.
-          // Compare it against all of the the previous document's we've gotten. If the previous
-          // document contains exactly the innerHTML of this first paragraph, it's probably a duplicate.
-          var firstP = content.getElementsByTagName("P").length ? content.getElementsByTagName("P")[0] : null;
-          if (firstP && firstP.innerHTML.length > 100) {
-            for (var i = 1; i <= this._curPageNum; i += 1) {
-              var rPage = doc.getElementById('readability-page-' + i);
-              if (rPage && rPage.innerHTML.indexOf(firstP.innerHTML) !== -1) {
-                this.log('Duplicate of page ' + i + ' - skipping.');
-                articlePage.style.display = 'none';
-                this._parsedPages[pageUrl] = true;
-                return;
-              }
-            }
-          }
-
-          this._removeScripts(content);
-
-          thisPage.innerHTML = thisPage.innerHTML + content.innerHTML;
-
-          // After the page has rendered, post process the content. This delay is necessary because,
-          // in webkit at least, offsetWidth is not set in time to determine image width. We have to
-          // wait a little bit for reflow to finish before we can fix floating images.
-          setTimeout((function() {
-            this._postProcessContent(thisPage);
-          }).bind(this), 500);
-
-
-          if (secondNextPageLink)
-            this._appendNextPage(secondNextPageLink);
-        }
-      });
-    }).bind(this)(nextPageLink, articlePage);
-  },
-
-  /**
    * Get an elements class/id weight. Uses regular expressions to tell if this
    * element looks good or bad.
    *
@@ -1617,16 +1415,17 @@ Readability.prototype = {
    * @param  HTMLElement node
    * @param  String      tagName
    * @param  Number      maxDepth
+   * @param  Function    filterFn a filter to invoke to determine whether this node 'counts'
    * @return Boolean
    */
-  _hasAncestorTag: function(node, tagName, maxDepth) {
+  _hasAncestorTag: function(node, tagName, maxDepth, filterFn) {
     maxDepth = maxDepth || 3;
     tagName = tagName.toUpperCase();
     var depth = 0;
     while (node.parentNode) {
-      if (depth > maxDepth)
+      if (maxDepth > 0 && depth > maxDepth)
         return false;
-      if (node.parentNode.tagName === tagName)
+      if (node.parentNode.tagName === tagName && (!filterFn || filterFn(node.parentNode)))
         return true;
       node = node.parentNode;
       depth++;
@@ -1635,6 +1434,93 @@ Readability.prototype = {
   },
 
   /**
+   * Return an object indicating how many rows and columns this table has.
+   */
+  _getRowAndColumnCount: function(table) {
+    var rows = 0;
+    var columns = 0;
+    var trs = table.getElementsByTagName("tr");
+    for (var i = 0; i < trs.length; i++) {
+      var rowspan = trs[i].getAttribute("rowspan") || 0;
+      if (rowspan) {
+        rowspan = parseInt(rowspan, 10);
+      }
+      rows += (rowspan || 1);
+
+      // Now look for column-related info
+      var columnsInThisRow = 0;
+      var cells = trs[i].getElementsByTagName("td");
+      for (var j = 0; j < cells.length; j++) {
+        var colspan = cells[j].getAttribute("colspan") || 0;
+        if (colspan) {
+          colspan = parseInt(colspan, 10);
+        }
+        columnsInThisRow += (colspan || 1);
+      }
+      columns = Math.max(columns, columnsInThisRow);
+    }
+    return {rows: rows, columns: columns};
+  },
+
+  /**
+   * Look for 'data' (as opposed to 'layout') tables, for which we use
+   * similar checks as
+   * https://dxr.mozilla.org/mozilla-central/rev/71224049c0b52ab190564d3ea0eab089a159a4cf/accessible/html/HTMLTableAccessible.cpp#920
+   */
+  _markDataTables: function(root) {
+    var tables = root.getElementsByTagName("table");
+    for (var i = 0; i < tables.length; i++) {
+      var table = tables[i];
+      var role = table.getAttribute("role");
+      if (role == "presentation") {
+        table._readabilityDataTable = false;
+        continue;
+      }
+      var datatable = table.getAttribute("datatable");
+      if (datatable == "0") {
+        table._readabilityDataTable = false;
+        continue;
+      }
+      var summary = table.getAttribute("summary");
+      if (summary) {
+        table._readabilityDataTable = true;
+        continue;
+      }
+
+      var caption = table.getElementsByTagName("caption")[0];
+      if (caption && caption.childNodes.length > 0) {
+        table._readabilityDataTable = true;
+        continue;
+      }
+
+      // If the table has a descendant with any of these tags, consider a data table:
+      var dataTableDescendants = ["col", "colgroup", "tfoot", "thead", "th"];
+      var descendantExists = function(tag) {
+        return !!table.getElementsByTagName(tag)[0];
+      };
+      if (dataTableDescendants.some(descendantExists)) {
+        this.log("Data table because found data-y descendant");
+        table._readabilityDataTable = true;
+        continue;
+      }
+
+      // Nested tables indicate a layout table:
+      if (table.getElementsByTagName("table")[0]) {
+        table._readabilityDataTable = false;
+        continue;
+      }
+
+      var sizeInfo = this._getRowAndColumnCount(table);
+      if (sizeInfo.rows >= 10 || sizeInfo.columns > 4) {
+        table._readabilityDataTable = true;
+        continue;
+      }
+      // Now just go by size entirely:
+      table._readabilityDataTable = sizeInfo.rows * sizeInfo.columns > 10;
+    }
+  },
+
+  /**
    * Clean an element of all tags of type "tag" if they look fishy.
    * "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc.
    *
@@ -1652,6 +1538,15 @@ Readability.prototype = {
     //
     // TODO: Consider taking into account original contentScore here.
     this._removeNodes(e.getElementsByTagName(tag), function(node) {
+      // First check if we're in a data table, in which case don't remove us.
+      var isDataTable = function(t) {
+        return t._readabilityDataTable;
+      };
+
+      if (this._hasAncestorTag(node, "table", -1, isDataTable)) {
+        return false;
+      }
+
       var weight = this._getClassWeight(node);
       var contentScore = 0;
 
@@ -1667,7 +1562,7 @@ Readability.prototype = {
         // ominous signs, remove the element.
         var p = node.getElementsByTagName("p").length;
         var img = node.getElementsByTagName("img").length;
-        var li = node.getElementsByTagName("li").length-100;
+        var li = node.getElementsByTagName("li").length - 100;
         var input = node.getElementsByTagName("input").length;
 
         var embedCount = 0;
@@ -1681,11 +1576,10 @@ Readability.prototype = {
         var contentLength = this._getInnerText(node).length;
 
         var haveToRemove =
-          // Make an exception for elements with no p's and exactly 1 img.
-          (img > p && !this._hasAncestorTag(node, "figure")) ||
+          (img > 1 && p / img < 0.5 && !this._hasAncestorTag(node, "figure")) ||
           (!isList && li > p) ||
           (input > Math.floor(p/3)) ||
-          (!isList && contentLength < 25 && (img === 0 || img > 2)) ||
+          (!isList && contentLength < 25 && (img === 0 || img > 2) && !this._hasAncestorTag(node, "figure")) ||
           (!isList && weight < 25 && linkDensity > 0.2) ||
           (weight >= 25 && linkDensity > 0.5) ||
           ((embedCount === 1 && contentLength < 75) || embedCount > 1);
@@ -1696,6 +1590,25 @@ Readability.prototype = {
   },
 
   /**
+   * Clean out elements whose id/class combinations match specific string.
+   *
+   * @param Element
+   * @param RegExp match id/class combination.
+   * @return void
+   **/
+  _cleanMatchedNodes: function(e, regex) {
+    var endOfSearchMarkerNode = this._getNextNode(e, true);
+    var next = this._getNextNode(e);
+    while (next && next != endOfSearchMarkerNode) {
+      if (regex.test(next.className + " " + next.id)) {
+        next = this._removeAndGetNext(next);
+      } else {
+        next = this._getNextNode(next);
+      }
+    }
+  },
+
+  /**
    * Clean out spurious headers from an Element. Checks things like classnames and link density.
    *
    * @param Element
@@ -1713,10 +1626,6 @@ Readability.prototype = {
     return (this._flags & flag) > 0;
   },
 
-  _addFlag: function(flag) {
-    this._flags = this._flags | flag;
-  },
-
   _removeFlag: function(flag) {
     this._flags = this._flags & ~flag;
   },
@@ -1807,20 +1716,10 @@ Readability.prototype = {
     // Remove script tags from the document.
     this._removeScripts(this._doc);
 
-    // FIXME: Disabled multi-page article support for now as it
-    // needs more work on infrastructure.
-
-    // Make sure this document is added to the list of parsed pages first,
-    // so we don't double up on the first page.
-    // this._parsedPages[uri.spec.replace(/\/$/, '')] = true;
-
-    // Pull out any possible next page link first.
-    // var nextPageLink = this._findNextPageLink(doc.body);
-
     this._prepDocument();
 
     var metadata = this._getArticleMetadata();
-    var articleTitle = metadata.title || this._getArticleTitle();
+    this._articleTitle = metadata.title;
 
     var articleContent = this._grabArticle();
     if (!articleContent)
@@ -1830,14 +1729,6 @@ Readability.prototype = {
 
     this._postProcessContent(articleContent);
 
-    // if (nextPageLink) {
-    //   // Append any additional pages after a small timeout so that people
-    //   // can start reading without having to wait for this to finish processing.
-    //   setTimeout((function() {
-    //     this._appendNextPage(nextPageLink);
-    //   }).bind(this), 500);
-    // }
-
     // If we haven't found an excerpt in the article's metadata, use the article's
     // first paragraph as the excerpt. This is used for displaying a preview of
     // the article's content.
@@ -1851,7 +1742,7 @@ Readability.prototype = {
     var textContent = articleContent.textContent;
     return {
       uri: this._uri,
-      title: articleTitle,
+      title: this._articleTitle,
       byline: metadata.byline || this._articleByline,
       dir: this._articleDir,
       content: articleContent.innerHTML,
@@ -1861,3 +1752,7 @@ Readability.prototype = {
     };
   }
 };
+
+if (typeof module === "object") {
+  module.exports = Readability;
+}
diff --git a/toolkit/components/reader/ReaderMode.jsm b/toolkit/components/reader/ReaderMode.jsm
index 033a02489..e9eb83154 100644
--- a/toolkit/components/reader/ReaderMode.jsm
+++ b/toolkit/components/reader/ReaderMode.jsm
@@ -8,15 +8,18 @@ this.EXPORTED_SYMBOLS = ["ReaderMode"];
 
 const { classes: Cc, interfaces: Ci, utils: Cu } = Components;
 
-// Constants for telemetry.
-const DOWNLOAD_SUCCESS = 0;
-const DOWNLOAD_ERROR_XHR = 1;
-const DOWNLOAD_ERROR_NO_DOC = 2;
-
-const PARSE_SUCCESS = 0;
-const PARSE_ERROR_TOO_MANY_ELEMENTS = 1;
-const PARSE_ERROR_WORKER = 2;
-const PARSE_ERROR_NO_ARTICLE = 3;
+// Class names to preserve in the readerized output. We preserve these class
+// names so that rules in aboutReader.css can match them.
+const CLASSES_TO_PRESERVE = [
+  "caption",
+  "hidden",
+  "invisble",
+  "sr-only",
+  "visually-hidden",
+  "visuallyhidden",
+  "wp-caption",
+  "wp-caption-text",
+];
 
 Cu.import("resource://gre/modules/Services.jsm");
 Cu.import("resource://gre/modules/XPCOMUtils.jsm");
@@ -24,17 +27,15 @@ Cu.import("resource://gre/modules/XPCOMUtils.jsm");
 Cu.importGlobalProperties(["XMLHttpRequest"]);
 
 XPCOMUtils.defineLazyModuleGetter(this, "CommonUtils", "resource://services-common/utils.js");
-XPCOMUtils.defineLazyModuleGetter(this, "Messaging", "resource://gre/modules/Messaging.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "EventDispatcher", "resource://gre/modules/Messaging.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "ReaderWorker", "resource://gre/modules/reader/ReaderWorker.jsm");
-XPCOMUtils.defineLazyModuleGetter(this, "Task", "resource://gre/modules/Task.jsm");
-XPCOMUtils.defineLazyModuleGetter(this, "TelemetryStopwatch", "resource://gre/modules/TelemetryStopwatch.jsm");
 
 XPCOMUtils.defineLazyGetter(this, "Readability", function() {
   let scope = {};
   scope.dump = this.dump;
   Services.scriptloader.loadSubScript("resource://gre/modules/reader/Readability.js", scope);
-  return scope["Readability"];
+  return scope.Readability;
 });
 
 this.ReaderMode = {
@@ -61,21 +62,13 @@ this.ReaderMode = {
     return this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
   },
 
-  get isOnLowMemoryPlatform() {
-    let memory = Cc["@mozilla.org/xpcom/memory-service;1"].getService(Ci.nsIMemory);
-    delete this.isOnLowMemoryPlatform;
-    return this.isOnLowMemoryPlatform = memory.isLowMemoryPlatform();
-  },
-
-  _getStateForParseOnLoad: function () {
+  _getStateForParseOnLoad() {
     let isEnabled = Services.prefs.getBoolPref("reader.parse-on-load.enabled");
     let isForceEnabled = Services.prefs.getBoolPref("reader.parse-on-load.force-enabled");
-    // For low-memory devices, don't allow reader mode since it takes up a lot of memory.
-    // See https://bugzilla.mozilla.org/show_bug.cgi?id=792603 for details.
-    return isForceEnabled || (isEnabled && !this.isOnLowMemoryPlatform);
+    return isForceEnabled || isEnabled;
   },
 
-  observe: function(aMessage, aTopic, aData) {
+  observe(aMessage, aTopic, aData) {
     switch (aTopic) {
       case "nsPref:changed":
         if (aData.startsWith("reader.parse-on-load.")) {
@@ -91,7 +84,7 @@ this.ReaderMode = {
    * Enter the reader mode by going forward one step in history if applicable,
    * if not, append the about:reader page in the history instead.
    */
-  enterReaderMode: function(docShell, win) {
+  enterReaderMode(docShell, win) {
     let url = win.document.location.href;
     let readerURL = "about:reader?url=" + encodeURIComponent(url);
     let webNav = docShell.QueryInterface(Ci.nsIWebNavigation);
@@ -112,7 +105,7 @@ this.ReaderMode = {
    * Exit the reader mode by going back one step in history if applicable,
    * if not, append the original page in the history instead.
    */
-  leaveReaderMode: function(docShell, win) {
+  leaveReaderMode(docShell, win) {
     let url = win.document.location.href;
     let originalURL = this.getOriginalUrl(url);
     let webNav = docShell.QueryInterface(Ci.nsIWebNavigation);
@@ -136,14 +129,14 @@ this.ReaderMode = {
    * @return The original URL for the article, or null if we did not find
    *         a properly formatted about:reader URL.
    */
-  getOriginalUrl: function(url) {
+  getOriginalUrl(url) {
     if (!url.startsWith("about:reader?")) {
       return null;
     }
 
     let outerHash = "";
     try {
-      let uriObj = Services.io.newURI(url, null, null);
+      let uriObj = Services.io.newURI(url);
       url = uriObj.specIgnoringRef;
       outerHash = uriObj.ref;
     } catch (ex) { /* ignore, use the raw string */ }
@@ -155,27 +148,45 @@ this.ReaderMode = {
     let originalUrl = searchParams.get("url");
     if (outerHash) {
       try {
-        let uriObj = Services.io.newURI(originalUrl, null, null);
-        uriObj = Services.io.newURI('#' + outerHash, null, uriObj);
+        let uriObj = Services.io.newURI(originalUrl);
+        uriObj = Services.io.newURI("#" + outerHash, null, uriObj);
         originalUrl = uriObj.spec;
       } catch (ex) {}
     }
     return originalUrl;
   },
 
+  getOriginalUrlObjectForDisplay(url) {
+    let originalUrl = this.getOriginalUrl(url);
+    if (originalUrl) {
+      let uriObj;
+      try {
+        uriObj = Services.uriFixup.createFixupURI(originalUrl, Services.uriFixup.FIXUP_FLAG_NONE);
+      } catch (ex) {
+        return null;
+      }
+      try {
+        return Services.uriFixup.createExposableURI(uriObj);
+      } catch (ex) {
+        return null;
+      }
+    }
+    return null;
+  },
+
   /**
    * Decides whether or not a document is reader-able without parsing the whole thing.
    *
    * @param doc A document to parse.
    * @return boolean Whether or not we should show the reader mode button.
    */
-  isProbablyReaderable: function(doc) {
+  isProbablyReaderable(doc) {
     // Only care about 'real' HTML documents:
     if (doc.mozSyntheticDocument || !(doc instanceof doc.defaultView.HTMLDocument)) {
       return false;
     }
 
-    let uri = Services.io.newURI(doc.location.href, null, null);
+    let uri = Services.io.newURI(doc.location.href);
     if (!this._shouldCheckUri(uri)) {
       return false;
     }
@@ -187,12 +198,12 @@ this.ReaderMode = {
     return new Readability(uri, doc).isProbablyReaderable(this.isNodeVisible.bind(this, utils));
   },
 
-  isNodeVisible: function(utils, node) {
+  isNodeVisible(utils, node) {
     let bounds = utils.getBoundsWithoutFlushing(node);
     return bounds.height > 0 && bounds.width > 0;
   },
 
-  getUtilsForWin: function(win) {
+  getUtilsForWin(win) {
     return win.QueryInterface(Ci.nsIInterfaceRequestor).getInterface(Ci.nsIDOMWindowUtils);
   },
 
@@ -204,16 +215,14 @@ this.ReaderMode = {
    * @return {Promise}
    * @resolves JS object representing the article, or null if no article is found.
    */
-  parseDocument: Task.async(function* (doc) {
-    let documentURI = Services.io.newURI(doc.documentURI, null, null);
-    let baseURI = Services.io.newURI(doc.baseURI, null, null);
-    if (!this._shouldCheckUri(documentURI) || !this._shouldCheckUri(baseURI, true)) {
+  parseDocument(doc) {
+    if (!this._shouldCheckUri(doc.documentURIObject) || !this._shouldCheckUri(doc.baseURIObject, true)) {
       this.log("Reader mode disabled for URI");
       return null;
     }
 
-    return yield this._readerParse(baseURI, doc);
-  }),
+    return this._readerParse(doc);
+  },
 
   /**
    * Downloads and parses a document from a URL.
@@ -222,19 +231,28 @@ this.ReaderMode = {
    * @return {Promise}
    * @resolves JS object representing the article, or null if no article is found.
    */
-  downloadAndParseDocument: Task.async(function* (url) {
-    let doc = yield this._downloadDocument(url);
-    let uri = Services.io.newURI(doc.baseURI, null, null);
-    if (!this._shouldCheckUri(uri, true)) {
+  async downloadAndParseDocument(url) {
+    let doc = await this._downloadDocument(url);
+    if (!doc) {
+      return null;
+    }
+    if (!this._shouldCheckUri(doc.documentURIObject) || !this._shouldCheckUri(doc.baseURIObject, true)) {
       this.log("Reader mode disabled for URI");
       return null;
     }
 
-    return yield this._readerParse(uri, doc);
-  }),
+    return await this._readerParse(doc);
+  },
 
-  _downloadDocument: function (url) {
-    let histogram = Services.telemetry.getHistogramById("READER_MODE_DOWNLOAD_RESULT");
+  _downloadDocument(url) {
+    try {
+      if (!this._shouldCheckUri(Services.io.newURI(url))) {
+        return null;
+      }
+    } catch (ex) {
+      Cu.reportError(new Error(`Couldn't create URI from ${url} to download: ${ex}`));
+      return null;
+    }
     return new Promise((resolve, reject) => {
       let xhr = new XMLHttpRequest();
       xhr.open("GET", url, true);
@@ -243,14 +261,12 @@ this.ReaderMode = {
       xhr.onload = evt => {
         if (xhr.status !== 200) {
           reject("Reader mode XHR failed with status: " + xhr.status);
-          histogram.add(DOWNLOAD_ERROR_XHR);
           return;
         }
 
         let doc = xhr.responseXML;
         if (!doc) {
           reject("Reader mode XHR didn't return a document");
-          histogram.add(DOWNLOAD_ERROR_NO_DOC);
           return;
         }
 
@@ -261,7 +277,7 @@ this.ReaderMode = {
           if (content) {
             let urlIndex = content.toUpperCase().indexOf("URL=");
             if (urlIndex > -1) {
-              let baseURI = Services.io.newURI(url, null, null);
+              let baseURI = Services.io.newURI(url);
               let newURI = Services.io.newURI(content.substring(urlIndex + 4), null, baseURI);
               let newURL = newURI.spec;
               let ssm = Services.scriptSecurityManager;
@@ -290,10 +306,10 @@ this.ReaderMode = {
         // Convert these to real URIs to make sure the escaping (or lack
         // thereof) is identical:
         try {
-          responseURL = Services.io.newURI(responseURL, null, null).specIgnoringRef;
+          responseURL = Services.io.newURI(responseURL).specIgnoringRef;
         } catch (ex) { /* Ignore errors - we'll use what we had before */ }
         try {
-          givenURL = Services.io.newURI(givenURL, null, null).specIgnoringRef;
+          givenURL = Services.io.newURI(givenURL).specIgnoringRef;
         } catch (ex) { /* Ignore errors - we'll use what we had before */ }
 
         if (responseURL != givenURL) {
@@ -303,7 +319,6 @@ this.ReaderMode = {
           return;
         }
         resolve(doc);
-        histogram.add(DOWNLOAD_SUCCESS);
       };
       xhr.send();
     });
@@ -318,17 +333,17 @@ this.ReaderMode = {
    * @resolves JS object representing the article, or null if no article is found.
    * @rejects OS.File.Error
    */
-  getArticleFromCache: Task.async(function* (url) {
+  async getArticleFromCache(url) {
     let path = this._toHashedPath(url);
     try {
-      let array = yield OS.File.read(path);
+      let array = await OS.File.read(path);
       return JSON.parse(new TextDecoder().decode(array));
     } catch (e) {
       if (!(e instanceof OS.File.Error) || !e.becauseNoSuchFile)
         throw e;
       return null;
     }
-  }),
+  },
 
   /**
    * Stores an article in the cache.
@@ -338,14 +353,14 @@ this.ReaderMode = {
    * @resolves When the article is stored.
    * @rejects OS.File.Error
    */
-  storeArticleInCache: Task.async(function* (article) {
+  async storeArticleInCache(article) {
     let array = new TextEncoder().encode(JSON.stringify(article));
     let path = this._toHashedPath(article.url);
-    yield this._ensureCacheDir();
+    await this._ensureCacheDir();
     return OS.File.writeAtomic(path, array, { tmpPath: path + ".tmp" })
       .then(success => {
         OS.File.stat(path).then(info => {
-          return Messaging.sendRequest({
+          return EventDispatcher.instance.sendRequest({
             type: "Reader:AddedToCache",
             url: article.url,
             size: info.size,
@@ -353,7 +368,7 @@ this.ReaderMode = {
           });
         });
       });
-  }),
+  },
 
   /**
    * Removes an article from the cache given an article URI.
@@ -363,26 +378,29 @@ this.ReaderMode = {
    * @resolves When the article is removed.
    * @rejects OS.File.Error
    */
-  removeArticleFromCache: Task.async(function* (url) {
+  async removeArticleFromCache(url) {
     let path = this._toHashedPath(url);
-    yield OS.File.remove(path);
-  }),
+    await OS.File.remove(path);
+  },
 
-  log: function(msg) {
+  log(msg) {
     if (this.DEBUG)
       dump("Reader: " + msg);
   },
 
   _blockedHosts: [
-    "mail.google.com",
+    "amazon.com",
+    "basilisk-browser.org",
     "github.com",
+    "mail.google.com",
+    "palemoon.org",
     "pinterest.com",
     "reddit.com",
     "twitter.com",
     "youtube.com",
   ],
 
-  _shouldCheckUri: function (uri, isBaseUri = false) {
+  _shouldCheckUri(uri, isBaseUri = false) {
     if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
       this.log("Not parsing URI scheme: " + uri.scheme);
       return false;
@@ -412,59 +430,77 @@ this.ReaderMode = {
    * Attempts to parse a document into an article. Heavy lifting happens
    * in readerWorker.js.
    *
-   * @param uri The base URI of the article.
    * @param doc The document to parse.
    * @return {Promise}
    * @resolves JS object representing the article, or null if no article is found.
    */
-  _readerParse: Task.async(function* (uri, doc) {
-    let histogram = Services.telemetry.getHistogramById("READER_MODE_PARSE_RESULT");
+  async _readerParse(doc) {
     if (this.parseNodeLimit) {
       let numTags = doc.getElementsByTagName("*").length;
       if (numTags > this.parseNodeLimit) {
-        this.log("Aborting parse for " + uri.spec + "; " + numTags + " elements found");
-        histogram.add(PARSE_ERROR_TOO_MANY_ELEMENTS);
+        this.log("Aborting parse for " + doc.baseURIObject.spec + "; " + numTags + " elements found");
         return null;
       }
     }
 
+    // Fetch this here before we send `doc` off to the worker thread, as later on the
+    // document might be nuked but we will still want the URI.
+    let {documentURI} = doc;
+
     let uriParam = {
-      spec: uri.spec,
-      host: uri.host,
-      prePath: uri.prePath,
-      scheme: uri.scheme,
-      pathBase: Services.io.newURI(".", null, uri).spec
+      spec: doc.baseURIObject.spec,
+      host: doc.baseURIObject.host,
+      prePath: doc.baseURIObject.prePath,
+      scheme: doc.baseURIObject.scheme,
+      pathBase: Services.io.newURI(".", null, doc.baseURIObject).spec
+    };
+
+    let langAttributes = {
+      charset: doc.characterSet,
+      lang: doc.documentElement.lang
     };
 
     let serializer = Cc["@mozilla.org/xmlextras/xmlserializer;1"].
                      createInstance(Ci.nsIDOMSerializer);
     let serializedDoc = serializer.serializeToString(doc);
 
+    let options = {
+      classesToPreserve: CLASSES_TO_PRESERVE,
+    };
+
     let article = null;
     try {
-      article = yield ReaderWorker.post("parseDocument", [uriParam, serializedDoc]);
+      article = await ReaderWorker.post("parseDocument", [uriParam, serializedDoc, options]);
     } catch (e) {
       Cu.reportError("Error in ReaderWorker: " + e);
-      histogram.add(PARSE_ERROR_WORKER);
     }
 
+    // Explicitly null out doc to make it clear it might not be available from this
+    // point on.
+    doc = null;
+
     if (!article) {
       this.log("Worker did not return an article");
-      histogram.add(PARSE_ERROR_NO_ARTICLE);
       return null;
     }
 
-    // Readability returns a URI object, but we only care about the URL.
-    article.url = article.uri.spec;
+    // Readability returns a URI object based on the baseURI, but we only care
+    // about the original document's URL from now on. This also avoids spoofing
+    // attempts where the baseURI doesn't match the domain of the documentURI
+    article.url = documentURI;
     delete article.uri;
 
     let flags = Ci.nsIDocumentEncoder.OutputSelectionOnly | Ci.nsIDocumentEncoder.OutputAbsoluteLinks;
     article.title = Cc["@mozilla.org/parserutils;1"].getService(Ci.nsIParserUtils)
                                                     .convertToPlainText(article.title, flags, 0);
 
-    histogram.add(PARSE_SUCCESS);
+    await this._assignLanguage(article, langAttributes);
+    this._maybeAssignTextDirection(article);
+
+    this._assignReadTime(article);
+
     return article;
-  }),
+  },
 
   get _cryptoHash() {
     delete this._cryptoHash;
@@ -485,7 +521,7 @@ this.ReaderMode = {
    * @param url The article URL. This should have referrers removed.
    * @return The file path to the cached article.
    */
-  _toHashedPath: function (url) {
+  _toHashedPath(url) {
     let value = this._unicodeConverter.convertToByteArray(url);
     this._cryptoHash.init(this._cryptoHash.MD5);
     this._cryptoHash.update(value, value.length);
@@ -502,7 +538,7 @@ this.ReaderMode = {
    * @resolves When the cache directory exists.
    * @rejects OS.File.Error
    */
-  _ensureCacheDir: function () {
+  _ensureCacheDir() {
     let dir = OS.Path.join(OS.Constants.Path.profileDir, "readercache");
     return OS.File.exists(dir).then(exists => {
       if (!exists) {
@@ -510,5 +546,107 @@ this.ReaderMode = {
       }
       return undefined;
     });
-  }
+  },
+
+  /**
+   * Sets a global language string value if possible. If langauge detection is
+   * available, use that. Otherwise, revert to a simpler mechanism using the
+   * document's lang attribute or charset.
+   *
+   * @return Promise
+   * @resolves when the language is detected
+   */
+  _assignLanguage(article, attributes) {
+    try {
+      Cu.import("resource://modules/translation/LanguageDetector.jsm");
+      return LanguageDetector.detectLanguage(article.textContent).then(result => {
+        article.language = result.confident ? result.language : null;
+      });
+    } catch(ex) {
+      return new Promise((resolve) => {
+        resolve(this._assignSimpleLanguage(attributes));
+      }).then(result => {
+        article.language = result;
+      });
+    }
+  },
+
+  _assignSimpleLanguage(attributes) {
+    var lang = attributes.lang.substring(0,2);
+    if (lang) {
+      return lang;
+    }
+
+    // If there is no lang attribute, try the charset.
+    // We can only use this for charsets that are specific to one language.
+    const charsetLang = new Map([
+      [ "us-ascii",    "en" ],
+      [ "iso-8859-6",  "ar" ],
+      [ "iso-8859-7",  "el" ],
+      [ "iso-8859-8",  "he" ],
+      [ "iso-8859-9",  "tr" ],
+      [ "iso-8859-11", "th" ],
+      [ "jis_x0201",   "ja" ],
+      [ "shift_jis",   "ja" ],
+      [ "euc-jp",      "ja" ]
+    ]);
+
+    return charsetLang.get(attributes.charset);
+  },
+
+  _maybeAssignTextDirection(article) {
+    // TODO: Remove the hardcoded language codes below once bug 1320265 is resolved.
+    if (!article.dir && ["ar", "fa", "he", "ug", "ur"].includes(article.language)) {
+      article.dir = "rtl";
+    }
+  },
+
+  /**
+   * Assigns the estimated reading time range of the article to the article object.
+   *
+   * @param article the article object to assign the reading time estimate to.
+   */
+  _assignReadTime(article) {
+    let lang = article.language || "en";
+    const readingSpeed = this._getReadingSpeedForLanguage(lang);
+    const charactersPerMinuteLow = readingSpeed.cpm - readingSpeed.variance;
+    const charactersPerMinuteHigh = readingSpeed.cpm + readingSpeed.variance;
+    const length = article.length;
+
+    article.readingTimeMinsSlow = Math.ceil(length / charactersPerMinuteLow);
+    article.readingTimeMinsFast  = Math.ceil(length / charactersPerMinuteHigh);
+  },
+
+  /**
+   * Returns the reading speed of a selection of languages with likely variance.
+   *
+   * Reading speed estimated from a study done on reading speeds in various languages.
+   * study can be found here: http://iovs.arvojournals.org/article.aspx?articleid=2166061
+   *
+   * @return object with characters per minute and variance. Defaults to English
+   *         if no suitable language is found in the collection.
+   */
+  _getReadingSpeedForLanguage(lang) {
+    const readingSpeed = new Map([
+      [ "en", {cpm: 987,  variance: 118 } ],
+      [ "ar", {cpm: 612,  variance: 88 } ],
+      [ "de", {cpm: 920,  variance: 86 } ],
+      [ "es", {cpm: 1025, variance: 127 } ],
+      [ "fi", {cpm: 1078, variance: 121 } ],
+      [ "fr", {cpm: 998,  variance: 126 } ],
+      [ "he", {cpm: 833,  variance: 130 } ],
+      [ "it", {cpm: 950,  variance: 140 } ],
+      [ "jw", {cpm: 357,  variance: 56 } ],
+      [ "nl", {cpm: 978,  variance: 143 } ],
+      [ "pl", {cpm: 916,  variance: 126 } ],
+      [ "pt", {cpm: 913,  variance: 145 } ],
+      [ "ru", {cpm: 986,  variance: 175 } ],
+      [ "sk", {cpm: 885,  variance: 145 } ],
+      [ "sv", {cpm: 917,  variance: 156 } ],
+      [ "tr", {cpm: 1054, variance: 156 } ],
+      [ "zh", {cpm: 255,  variance: 29 } ],
+    ]);
+
+    return readingSpeed.get(lang) || readingSpeed.get("en");
+  },
 };
diff --git a/toolkit/components/reader/ReaderWorker.js b/toolkit/components/reader/ReaderWorker.js
index 20023d4e0..9ae589d7d 100644
--- a/toolkit/components/reader/ReaderWorker.js
+++ b/toolkit/components/reader/ReaderWorker.js
@@ -2,6 +2,8 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+/* eslint-env mozilla/chrome-worker */
+
 "use strict";
 
 /**
@@ -40,11 +42,12 @@ var Agent = {
    *
    * @param {object} uri URI data for the document.
    * @param {string} serializedDoc The serialized document.
+   * @param {object} options Options object to pass to Readability.
    *
    * @return {object} Article object returned from Readability.
    */
-  parseDocument: function (uri, serializedDoc) {
+  parseDocument(uri, serializedDoc, options) {
     let doc = new JSDOMParser().parse(serializedDoc);
-    return new Readability(uri, doc).parse();
+    return new Readability(uri, doc, options).parse();
   },
 };
diff --git a/toolkit/components/reader/content/aboutReader.html b/toolkit/components/reader/content/aboutReader.html
index b9c1139f6..1aa644474 100644
--- a/toolkit/components/reader/content/aboutReader.html
+++ b/toolkit/components/reader/content/aboutReader.html
@@ -7,63 +7,56 @@
 
   <link rel="stylesheet" href="chrome://global/skin/aboutReader.css" type="text/css"/>
 
-  <script type="text/javascript;version=1.8" src="chrome://global/content/reader/aboutReader.js"></script>
+  <script type="text/javascript" src="chrome://global/content/reader/aboutReader.js"></script>
 </head>
 
 <body>
-  <div id="container" class="container">
-    <div id="reader-header" class="header">
-      <style scoped>
-        @import url("chrome://global/skin/aboutReaderControls.css");
-      </style>
-      <a id="reader-domain" class="domain"></a>
+  <div class="container">
+    <div class="header reader-header">
+      <a class="domain reader-domain"></a>
       <div class="domain-border"></div>
-      <h1 id="reader-title"></h1>
-      <div id="reader-credits" class="credits"></div>
+      <h1 class="reader-title"></h1>
+      <div class="credits reader-credits"></div>
+      <div class="meta-data">
+        <div class="reader-estimated-time"></div>
+      </div>
     </div>
 
+    <hr>
+
     <div class="content">
-      <style scoped>
-        @import url("chrome://global/skin/aboutReaderContent.css");
-      </style>
-      <div id="moz-reader-content"></div>
+      <div class="moz-reader-content"></div>
     </div>
 
     <div>
-      <style scoped>
-        @import url("chrome://global/skin/aboutReaderControls.css");
-      </style>
-      <div id="reader-message"></div>
+      <div class="reader-message"></div>
     </div>
   </div>
 
-  <ul id="reader-toolbar" class="toolbar">
-    <style scoped>
-      @import url("chrome://global/skin/aboutReaderControls.css");
-    </style>
-    <li><button id="close-button" class="button close-button"/></li>
-    <ul id="style-dropdown" class="dropdown">
+  <ul class="toolbar reader-toolbar">
+    <li><button class="button close-button"/></li>
+    <ul class="dropdown style-dropdown">
       <li><button class="dropdown-toggle button style-button"/></li>
-      <li id="reader-popup" class="dropdown-popup">
-        <div id="font-type-buttons"></div>
-        <hr></hr>
-        <div id="font-size-buttons">
-          <button id="font-size-minus" class="minus-button"/>
-          <button id="font-size-sample"/>
-          <button id="font-size-plus" class="plus-button"/>
+      <li class="dropdown-popup">
+        <div class="font-type-buttons"></div>
+        <hr>
+        <div class="font-size-buttons">
+          <button class="minus-button"/>
+          <button class="font-size-sample"/>
+          <button class="plus-button"/>
         </div>
-        <hr></hr>
-        <div id="content-width-buttons">
-          <button id="content-width-minus" class="content-width-minus-button"/>
-          <button id="content-width-plus" class="content-width-plus-button"/>
+        <hr>
+        <div class="content-width-buttons">
+          <button class="content-width-minus-button"/>
+          <button class="content-width-plus-button"/>
         </div>
-        <hr></hr>
-        <div id="line-height-buttons">
-          <button id="line-height-minus" class="line-height-minus-button"/>
-          <button id="line-height-plus" class="line-height-plus-button"/>
+        <hr>
+        <div class="line-height-buttons">
+          <button class="line-height-minus-button"/>
+          <button class="line-height-plus-button"/>
         </div>
-        <hr></hr>
-        <div id="color-scheme-buttons"></div>
+        <hr>
+        <div class="color-scheme-buttons"></div>
         <div class="dropdown-arrow"/>
       </li>
     </ul>
diff --git a/toolkit/components/reader/content/aboutReader.js b/toolkit/components/reader/content/aboutReader.js
index 17133e69d..6c963382e 100644
--- a/toolkit/components/reader/content/aboutReader.js
+++ b/toolkit/components/reader/content/aboutReader.js
@@ -4,6 +4,6 @@
 
 "use strict";
 
-window.addEventListener("DOMContentLoaded", function () {
+window.addEventListener("DOMContentLoaded", function() {
   document.dispatchEvent(new CustomEvent("AboutReaderContentLoaded", { bubbles: true }));
 });
author	Moonchild <mcwerewolf@gmail.com>	2018-05-16 17:10:38 +0200
committer	GitHub <noreply@github.com>	2018-05-16 17:10:38 +0200
commit	90942a2af0cabb9345cf04fa6113e12197504fcf (patch)
tree	e16c71be5a1343abe0489863f84ed271b6ebd3d7 /toolkit/components/reader
parent	819ca50f163a9113772a7dbfd617d97151893337 (diff)
parent	9ef464a5ac0a17135a0f7b4fef070bb4f7fbe44c (diff)
download	UXP-90942a2af0cabb9345cf04fa6113e12197504fcf.tar UXP-90942a2af0cabb9345cf04fa6113e12197504fcf.tar.gz UXP-90942a2af0cabb9345cf04fa6113e12197504fcf.tar.lz UXP-90942a2af0cabb9345cf04fa6113e12197504fcf.tar.xz UXP-90942a2af0cabb9345cf04fa6113e12197504fcf.zip