From 0ddd00f1959c78ce37c14fef3c83401408fca3bf Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Tue, 25 Feb 2020 15:07:00 -0500 Subject: Issue #439 - Remove tests from toolkit/ --- toolkit/components/reader/moz.build | 11 +- toolkit/components/reader/test/browser.ini | 15 -- .../test/browser_bug1124271_readerModePinnedTab.js | 47 ----- .../components/reader/test/browser_readerMode.js | 220 --------------------- .../reader/test/browser_readerMode_hidden_nodes.js | 53 ----- .../reader/test/browser_readerMode_with_anchor.js | 21 -- toolkit/components/reader/test/head.js | 126 ------------ .../components/reader/test/readerModeArticle.html | 25 --- .../reader/test/readerModeArticleHiddenNodes.html | 22 --- 9 files changed, 1 insertion(+), 539 deletions(-) delete mode 100644 toolkit/components/reader/test/browser.ini delete mode 100644 toolkit/components/reader/test/browser_bug1124271_readerModePinnedTab.js delete mode 100644 toolkit/components/reader/test/browser_readerMode.js delete mode 100644 toolkit/components/reader/test/browser_readerMode_hidden_nodes.js delete mode 100644 toolkit/components/reader/test/browser_readerMode_with_anchor.js delete mode 100644 toolkit/components/reader/test/head.js delete mode 100644 toolkit/components/reader/test/readerModeArticle.html delete mode 100644 toolkit/components/reader/test/readerModeArticleHiddenNodes.html (limited to 'toolkit/components/reader') diff --git a/toolkit/components/reader/moz.build b/toolkit/components/reader/moz.build index d49bda14f..4ffca0421 100644 --- a/toolkit/components/reader/moz.build +++ b/toolkit/components/reader/moz.build @@ -11,9 +11,7 @@ EXTRA_JS_MODULES += [ 'ReaderMode.jsm' ] -EXTRA_PP_JS_MODULES += [ - 'Readerable.jsm' -] +EXTRA_PP_JS_MODULES += ['Readerable.jsm'] EXTRA_JS_MODULES.reader = [ 'JSDOMParser.js', @@ -21,10 +19,3 @@ EXTRA_JS_MODULES.reader = [ 'ReaderWorker.js', 'ReaderWorker.jsm' ] - -BROWSER_CHROME_MANIFESTS += [ - 'test/browser.ini' -] - -with Files('**'): - BUG_COMPONENT = ('Toolkit', 'Reader Mode') diff --git a/toolkit/components/reader/test/browser.ini b/toolkit/components/reader/test/browser.ini deleted file mode 100644 index 4f9df23b3..000000000 --- a/toolkit/components/reader/test/browser.ini +++ /dev/null @@ -1,15 +0,0 @@ -[DEFAULT] -support-files = head.js -[browser_readerMode.js] -support-files = - readerModeArticle.html - readerModeArticleHiddenNodes.html -[browser_readerMode_hidden_nodes.js] -support-files = - readerModeArticleHiddenNodes.html -[browser_readerMode_with_anchor.js] -support-files = - readerModeArticle.html -[browser_bug1124271_readerModePinnedTab.js] -support-files = - readerModeArticle.html diff --git a/toolkit/components/reader/test/browser_bug1124271_readerModePinnedTab.js b/toolkit/components/reader/test/browser_bug1124271_readerModePinnedTab.js deleted file mode 100644 index 39913aa3e..000000000 --- a/toolkit/components/reader/test/browser_bug1124271_readerModePinnedTab.js +++ /dev/null @@ -1,47 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -// Test that the reader mode button won't open in a new tab when clicked from a pinned tab - -const PREF = "reader.parse-on-load.enabled"; - -const TEST_PATH = getRootDirectory(gTestPath).replace("chrome://mochitests/content", "http://example.com"); - -var readerButton = document.getElementById("reader-mode-button"); - -add_task(function* () { - registerCleanupFunction(function() { - Services.prefs.clearUserPref(PREF); - while (gBrowser.tabs.length > 1) { - gBrowser.removeCurrentTab(); - } - }); - - // Enable the reader mode button. - Services.prefs.setBoolPref(PREF, true); - - let tab = gBrowser.selectedTab = gBrowser.addTab(); - gBrowser.pinTab(tab); - - let initialTabsCount = gBrowser.tabs.length; - - // Point tab to a test page that is reader-able. - let url = TEST_PATH + "readerModeArticle.html"; - yield promiseTabLoadEvent(tab, url); - yield promiseWaitForCondition(() => !readerButton.hidden); - - readerButton.click(); - yield promiseTabLoadEvent(tab); - - // Ensure no new tabs are opened when exiting reader mode in a pinned tab - is(gBrowser.tabs.length, initialTabsCount, "No additional tabs were opened."); - - let pageShownPromise = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); - readerButton.click(); - yield pageShownPromise; - // Ensure no new tabs are opened when exiting reader mode in a pinned tab - is(gBrowser.tabs.length, initialTabsCount, "No additional tabs were opened."); - - gBrowser.removeCurrentTab(); -}); diff --git a/toolkit/components/reader/test/browser_readerMode.js b/toolkit/components/reader/test/browser_readerMode.js deleted file mode 100644 index 70290c3b5..000000000 --- a/toolkit/components/reader/test/browser_readerMode.js +++ /dev/null @@ -1,220 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -/** - * Test that the reader mode button appears and works properly on - * reader-able content. - */ -const TEST_PREFS = [ - ["reader.parse-on-load.enabled", true], -]; - -const TEST_PATH = getRootDirectory(gTestPath).replace("chrome://mochitests/content", "http://example.com"); - -var readerButton = document.getElementById("reader-mode-button"); - -add_task(function* test_reader_button() { - registerCleanupFunction(function() { - // Reset test prefs. - TEST_PREFS.forEach(([name, value]) => { - Services.prefs.clearUserPref(name); - }); - while (gBrowser.tabs.length > 1) { - gBrowser.removeCurrentTab(); - } - }); - - // Set required test prefs. - TEST_PREFS.forEach(([name, value]) => { - Services.prefs.setBoolPref(name, value); - }); - Services.prefs.setBoolPref("browser.reader.detectedFirstArticle", false); - - let tab = gBrowser.selectedTab = gBrowser.addTab(); - is_element_hidden(readerButton, "Reader mode button is not present on a new tab"); - ok(!UITour.isInfoOnTarget(window, "readerMode-urlBar"), - "Info panel shouldn't appear without the reader mode button"); - ok(!Services.prefs.getBoolPref("browser.reader.detectedFirstArticle"), - "Shouldn't have detected the first article"); - - // We're going to show the reader mode intro popup, make sure we wait for it: - let tourPopupShownPromise = - BrowserTestUtils.waitForEvent(document.getElementById("UITourTooltip"), "popupshown"); - // Point tab to a test page that is reader-able. - let url = TEST_PATH + "readerModeArticle.html"; - yield promiseTabLoadEvent(tab, url); - yield promiseWaitForCondition(() => !readerButton.hidden); - yield tourPopupShownPromise; - is_element_visible(readerButton, "Reader mode button is present on a reader-able page"); - ok(UITour.isInfoOnTarget(window, "readerMode-urlBar"), - "Info panel should be anchored at the reader mode button"); - ok(Services.prefs.getBoolPref("browser.reader.detectedFirstArticle"), - "Should have detected the first article"); - - // Switch page into reader mode. - readerButton.click(); - yield promiseTabLoadEvent(tab); - ok(!UITour.isInfoOnTarget(window, "readerMode-urlBar"), "Info panel should have closed"); - - let readerUrl = gBrowser.selectedBrowser.currentURI.spec; - ok(readerUrl.startsWith("about:reader"), "about:reader loaded after clicking reader mode button"); - is_element_visible(readerButton, "Reader mode button is present on about:reader"); - - is(gURLBar.value, readerUrl, "gURLBar value is about:reader URL"); - is(gURLBar.textValue, url.substring("http://".length), "gURLBar is displaying original article URL"); - - // Check selected value for URL bar - yield new Promise((resolve, reject) => { - waitForClipboard(url, function () { - gURLBar.focus(); - gURLBar.select(); - goDoCommand("cmd_copy"); - }, resolve, reject); - }); - - info("Got correct URL when copying"); - - // Switch page back out of reader mode. - let promisePageShow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); - readerButton.click(); - yield promisePageShow; - is(gBrowser.selectedBrowser.currentURI.spec, url, - "Back to the original page after clicking active reader mode button"); - ok(gBrowser.selectedBrowser.canGoForward, - "Moved one step back in the session history."); - - // Load a new tab that is NOT reader-able. - let newTab = gBrowser.selectedTab = gBrowser.addTab(); - yield promiseTabLoadEvent(newTab, "about:robots"); - yield promiseWaitForCondition(() => readerButton.hidden); - is_element_hidden(readerButton, "Reader mode button is not present on a non-reader-able page"); - - // Switch back to the original tab to make sure reader mode button is still visible. - gBrowser.removeCurrentTab(); - yield promiseWaitForCondition(() => !readerButton.hidden); - is_element_visible(readerButton, "Reader mode button is present on a reader-able page"); -}); - -add_task(function* test_getOriginalUrl() { - let { ReaderMode } = Cu.import("resource://gre/modules/ReaderMode.jsm", {}); - let url = "http://foo.com/article.html"; - - is(ReaderMode.getOriginalUrl("about:reader?url=" + encodeURIComponent(url)), url, "Found original URL from encoded URL"); - is(ReaderMode.getOriginalUrl("about:reader?foobar"), null, "Did not find original URL from malformed reader URL"); - is(ReaderMode.getOriginalUrl(url), null, "Did not find original URL from non-reader URL"); - - let badUrl = "http://foo.com/?;$%^^"; - is(ReaderMode.getOriginalUrl("about:reader?url=" + encodeURIComponent(badUrl)), badUrl, "Found original URL from encoded malformed URL"); - is(ReaderMode.getOriginalUrl("about:reader?url=" + badUrl), badUrl, "Found original URL from non-encoded malformed URL"); -}); - -add_task(function* test_reader_view_element_attribute_transform() { - registerCleanupFunction(function() { - while (gBrowser.tabs.length > 1) { - gBrowser.removeCurrentTab(); - } - }); - - function observeAttribute(element, attribute, triggerFn, checkFn) { - return new Promise(resolve => { - let observer = new MutationObserver((mutations) => { - mutations.forEach( mu => { - if (element.getAttribute(attribute) !== mu.oldValue) { - checkFn(); - resolve(); - observer.disconnect(); - } - }); - }); - - observer.observe(element, { - attributes: true, - attributeOldValue: true, - attributeFilter: [attribute] - }); - - triggerFn(); - }); - } - - let command = document.getElementById("View:ReaderView"); - let tab = yield BrowserTestUtils.openNewForegroundTab(gBrowser); - is(command.hidden, true, "Command element should have the hidden attribute"); - - info("Navigate a reader-able page"); - let waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); - yield observeAttribute(command, "hidden", - () => { - let url = TEST_PATH + "readerModeArticle.html"; - tab.linkedBrowser.loadURI(url); - }, - () => { - is(command.hidden, false, "Command's hidden attribute should be false on a reader-able page"); - } - ); - yield waitForPageshow; - - info("Navigate a non-reader-able page"); - waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); - yield observeAttribute(command, "hidden", - () => { - let url = TEST_PATH + "readerModeArticleHiddenNodes.html"; - tab.linkedBrowser.loadURI(url); - }, - () => { - is(command.hidden, true, "Command's hidden attribute should be true on a non-reader-able page"); - } - ); - yield waitForPageshow; - - info("Navigate a reader-able page"); - waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); - yield observeAttribute(command, "hidden", - () => { - let url = TEST_PATH + "readerModeArticle.html"; - tab.linkedBrowser.loadURI(url); - }, - () => { - is(command.hidden, false, "Command's hidden attribute should be false on a reader-able page"); - } - ); - yield waitForPageshow; - - info("Enter Reader Mode"); - waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); - yield observeAttribute(readerButton, "readeractive", - () => { - readerButton.click(); - }, - () => { - is(readerButton.getAttribute("readeractive"), "true", "readerButton's readeractive attribute should be true when entering reader mode"); - } - ); - yield waitForPageshow; - - info("Exit Reader Mode"); - waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); - yield observeAttribute(readerButton, "readeractive", - () => { - readerButton.click(); - }, - () => { - is(readerButton.getAttribute("readeractive"), "", "readerButton's readeractive attribute should be empty when reader mode is exited"); - } - ); - yield waitForPageshow; - - info("Navigate a non-reader-able page"); - waitForPageshow = BrowserTestUtils.waitForContentEvent(tab.linkedBrowser, "pageshow"); - yield observeAttribute(command, "hidden", - () => { - let url = TEST_PATH + "readerModeArticleHiddenNodes.html"; - tab.linkedBrowser.loadURI(url); - }, - () => { - is(command.hidden, true, "Command's hidden attribute should be true on a non-reader-able page"); - } - ); - yield waitForPageshow; -}); diff --git a/toolkit/components/reader/test/browser_readerMode_hidden_nodes.js b/toolkit/components/reader/test/browser_readerMode_hidden_nodes.js deleted file mode 100644 index b73eab58d..000000000 --- a/toolkit/components/reader/test/browser_readerMode_hidden_nodes.js +++ /dev/null @@ -1,53 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -/** - * Test that the reader mode button appears and works properly on - * reader-able content. - */ -const TEST_PREFS = [ - ["reader.parse-on-load.enabled", true], - ["browser.reader.detectedFirstArticle", false], -]; - -const TEST_PATH = getRootDirectory(gTestPath).replace("chrome://mochitests/content", "http://example.com"); - -var readerButton = document.getElementById("reader-mode-button"); - -add_task(function* test_reader_button() { - registerCleanupFunction(function() { - // Reset test prefs. - TEST_PREFS.forEach(([name, value]) => { - Services.prefs.clearUserPref(name); - }); - while (gBrowser.tabs.length > 1) { - gBrowser.removeCurrentTab(); - } - }); - - // Set required test prefs. - TEST_PREFS.forEach(([name, value]) => { - Services.prefs.setBoolPref(name, value); - }); - - let tab = gBrowser.selectedTab = gBrowser.addTab(); - is_element_hidden(readerButton, "Reader mode button is not present on a new tab"); - // Point tab to a test page that is not reader-able due to hidden nodes. - let url = TEST_PATH + "readerModeArticleHiddenNodes.html"; - let paintPromise = ContentTask.spawn(tab.linkedBrowser, "", function() { - return new Promise(resolve => { - addEventListener("DOMContentLoaded", function onDCL() { - removeEventListener("DOMContentLoaded", onDCL); - addEventListener("MozAfterPaint", function onPaint() { - removeEventListener("MozAfterPaint", onPaint); - resolve(); - }); - }); - }); - }); - tab.linkedBrowser.loadURI(url); - yield paintPromise; - - is_element_hidden(readerButton, "Reader mode button is still not present on tab with unreadable content."); -}); diff --git a/toolkit/components/reader/test/browser_readerMode_with_anchor.js b/toolkit/components/reader/test/browser_readerMode_with_anchor.js deleted file mode 100644 index 24c23c49f..000000000 --- a/toolkit/components/reader/test/browser_readerMode_with_anchor.js +++ /dev/null @@ -1,21 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -"use strict"; - -const TEST_PATH = getRootDirectory(gTestPath).replace("chrome://mochitests/content", "http://example.com"); - -add_task(function* () { - yield BrowserTestUtils.withNewTab(TEST_PATH + "readerModeArticle.html#foo", function* (browser) { - let pageShownPromise = BrowserTestUtils.waitForContentEvent(browser, "AboutReaderContentReady"); - let readerButton = document.getElementById("reader-mode-button"); - readerButton.click(); - yield pageShownPromise; - yield ContentTask.spawn(browser, null, function* () { - // Check if offset != 0 - ok(content.document.getElementById("foo") !== null, "foo element should be in document"); - ok(content.pageYOffset != 0, "pageYOffset should be > 0"); - }); - }); -}); diff --git a/toolkit/components/reader/test/head.js b/toolkit/components/reader/test/head.js deleted file mode 100644 index 3d8d989bc..000000000 --- a/toolkit/components/reader/test/head.js +++ /dev/null @@ -1,126 +0,0 @@ -XPCOMUtils.defineLazyModuleGetter(this, "Promise", - "resource://gre/modules/Promise.jsm"); - -/* exported promiseTabLoadEvent, promiseWaitForCondition, is_element_visible, is_element_hidden */ - -/** - * Waits for a load (or custom) event to finish in a given tab. If provided - * load an uri into the tab. - * - * @param tab - * The tab to load into. - * @param [optional] url - * The url to load, or the current url. - * @return {Promise} resolved when the event is handled. - * @resolves to the received event - * @rejects if a valid load event is not received within a meaningful interval - */ -function promiseTabLoadEvent(tab, url) { - let deferred = Promise.defer(); - info("Wait tab event: load"); - - function handle(loadedUrl) { - if (loadedUrl === "about:blank" || (url && loadedUrl !== url)) { - info(`Skipping spurious load event for ${loadedUrl}`); - return false; - } - - info("Tab event received: load"); - return true; - } - - // Create two promises: one resolved from the content process when the page - // loads and one that is rejected if we take too long to load the url. - let loaded = BrowserTestUtils.browserLoaded(tab.linkedBrowser, false, handle); - - let timeout = setTimeout(() => { - deferred.reject(new Error("Timed out while waiting for a 'load' event")); - }, 30000); - - loaded.then(() => { - clearTimeout(timeout); - deferred.resolve(); - }); - - if (url) - BrowserTestUtils.loadURI(tab.linkedBrowser, url); - - // Promise.all rejects if either promise rejects (i.e. if we time out) and - // if our loaded promise resolves before the timeout, then we resolve the - // timeout promise as well, causing the all promise to resolve. - return Promise.all([deferred.promise, loaded]); -} - -function waitForCondition(condition, nextTest, errorMsg, retryTimes) { - retryTimes = typeof retryTimes !== 'undefined' ? retryTimes : 30; - var tries = 0; - var interval = setInterval(function() { - if (tries >= retryTimes) { - ok(false, errorMsg); - moveOn(); - } - var conditionPassed; - try { - conditionPassed = condition(); - } catch (e) { - ok(false, e + "\n" + e.stack); - conditionPassed = false; - } - if (conditionPassed) { - moveOn(); - } - tries++; - }, 100); - var moveOn = function() { - clearInterval(interval); - nextTest(); - }; -} - -function promiseWaitForCondition(aConditionFn) { - let deferred = Promise.defer(); - waitForCondition(aConditionFn, deferred.resolve, "Condition didn't pass."); - return deferred.promise; -} - -function is_element_visible(element, msg) { - isnot(element, null, "Element should not be null, when checking visibility"); - ok(is_visible(element), msg || "Element should be visible"); - -} -function is_element_hidden(element, msg) { - isnot(element, null, "Element should not be null, when checking visibility"); - ok(is_hidden(element), msg || "Element should be hidden"); -} - -function is_visible(element) { - var style = element.ownerGlobal.getComputedStyle(element); - if (style.display == "none") - return false; - if (style.visibility != "visible") - return false; - if (style.display == "-moz-popup" && element.state != "open") - return false; - - // Hiding a parent element will hide all its children - if (element.parentNode != element.ownerDocument) - return is_visible(element.parentNode); - - return true; -} - -function is_hidden(element) { - var style = element.ownerGlobal.getComputedStyle(element); - if (style.display == "none") - return true; - if (style.visibility != "visible") - return true; - if (style.display == "-moz-popup") - return ["hiding", "closed"].indexOf(element.state) != -1; - - // Hiding a parent element will hide all its children - if (element.parentNode != element.ownerDocument) - return is_hidden(element.parentNode); - - return false; -} diff --git a/toolkit/components/reader/test/readerModeArticle.html b/toolkit/components/reader/test/readerModeArticle.html deleted file mode 100644 index 7c5033d5b..000000000 --- a/toolkit/components/reader/test/readerModeArticle.html +++ /dev/null @@ -1,25 +0,0 @@ - - - -Article title - - - -
Site header
-
-

Article title

-

by Jane Doe

-

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. In rutrum accumsan ultricies. Mauris vitae nisi at sem facilisis semper ac in est.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-
by John Doe
-

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. In rutrum accumsan ultricies. Mauris vitae nisi at sem facilisis semper ac in est.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-
- - diff --git a/toolkit/components/reader/test/readerModeArticleHiddenNodes.html b/toolkit/components/reader/test/readerModeArticleHiddenNodes.html deleted file mode 100644 index 92441b797..000000000 --- a/toolkit/components/reader/test/readerModeArticleHiddenNodes.html +++ /dev/null @@ -1,22 +0,0 @@ - - - -Article title - - - - -
Site header
-
-

Article title

-

by Jane Doe

-

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis. Curabitur dapibus enim sit amet elit pharetra tincidunt feugiat nisl imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed odio eros. Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. In rutrum accumsan ultricies. Mauris vitae nisi at sem facilisis semper ac in est.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-

Vivamus fermentum semper porta. Nunc diam velit, adipiscing ut tristique vitae, sagittis vel odio. Maecenas convallis ullamcorper ultricies. Curabitur ornare, ligula semper consectetur sagittis, nisi diam iaculis velit, id fringilla sem nunc vel mi. Nam dictum, odio nec pretium volutpat, arcu ante placerat erat, non tristique elit urna et turpis. Quisque mi metus, ornare sit amet fermentum et, tincidunt et orci. Fusce eget orci a orci congue vestibulum. Ut dolor diam, elementum et vestibulum eu, porttitor vel elit. Curabitur venenatis pulvinar tellus gravida ornare. Sed et erat faucibus nunc euismod ultricies ut id justo. Nullam cursus suscipit nisi, et ultrices justo sodales nec. Fusce venenatis facilisis lectus ac semper. Aliquam at massa ipsum. Quisque bibendum purus convallis nulla ultrices ultricies. Nullam aliquam, mi eu aliquam tincidunt, purus velit laoreet tortor, viverra pretium nisi quam vitae mi. Fusce vel volutpat elit. Nam sagittis nisi dui.

-
- - -- cgit v1.2.3 From 09bdffde5ed2c3d2eee455d3937bbba11da64eef Mon Sep 17 00:00:00 2001 From: Ascrod <32915892+Ascrod@users.noreply.github.com> Date: Sat, 23 May 2020 22:12:01 -0400 Subject: Issue #361 - Update Readability from upstream. (git rev 52ab9b5c8916c306a47b2119270dcdabebf9d203) --- toolkit/components/reader/JSDOMParser.js | 34 +- .../components/reader/Readability-readerable.js | 11 +- toolkit/components/reader/Readability.js | 390 ++++++++++++++++++--- 3 files changed, 367 insertions(+), 68 deletions(-) (limited to 'toolkit/components/reader') diff --git a/toolkit/components/reader/JSDOMParser.js b/toolkit/components/reader/JSDOMParser.js index ab2f503e1..2d3d6f156 100644 --- a/toolkit/components/reader/JSDOMParser.js +++ b/toolkit/components/reader/JSDOMParser.js @@ -315,6 +315,7 @@ } } getElems(this); + elems._isLiveNodeList = true; return elems; } @@ -503,17 +504,9 @@ }, setValue: function(newValue) { this._value = newValue; - delete this._decodedValue; }, - setDecodedValue: function(newValue) { - this._value = encodeHTML(newValue); - this._decodedValue = newValue; - }, - getDecodedValue: function() { - if (typeof this._decodedValue === "undefined") { - this._decodedValue = (this._value && decodeHTML(this._value)) || ""; - } - return this._decodedValue; + getEncodedValue: function() { + return encodeHTML(this._value); }, }; @@ -673,6 +666,14 @@ this.setAttribute("src", str); }, + get srcset() { + return this.getAttribute("srcset") || ""; + }, + + set srcset(str) { + this.setAttribute("srcset", str); + }, + get nodeName() { return this.tagName; }, @@ -689,7 +690,7 @@ for (var j = 0; j < child.attributes.length; j++) { var attr = child.attributes[j]; // the attribute value will be HTML escaped. - var val = attr.value; + var val = attr.getEncodedValue(); var quote = (val.indexOf('"') === -1 ? '"' : "'"); arr.push(" " + attr.name + "=" + quote + val + quote); } @@ -767,8 +768,9 @@ getAttribute: function (name) { for (var i = this.attributes.length; --i >= 0;) { var attr = this.attributes[i]; - if (attr.name === name) - return attr.getDecodedValue(); + if (attr.name === name) { + return attr.value; + } } return undefined; }, @@ -777,11 +779,11 @@ for (var i = this.attributes.length; --i >= 0;) { var attr = this.attributes[i]; if (attr.name === name) { - attr.setDecodedValue(value); + attr.setValue(value); return; } } - this.attributes.push(new Attribute(name, encodeHTML(value))); + this.attributes.push(new Attribute(name, value)); }, removeAttribute: function (name) { @@ -945,7 +947,7 @@ // Read the attribute value (and consume the matching quote) var value = this.readString(c); - node.attributes.push(new Attribute(name, value)); + node.attributes.push(new Attribute(name, decodeHTML(value))); return; }, diff --git a/toolkit/components/reader/Readability-readerable.js b/toolkit/components/reader/Readability-readerable.js index d0e1b8164..839d9fbf7 100644 --- a/toolkit/components/reader/Readability-readerable.js +++ b/toolkit/components/reader/Readability-readerable.js @@ -31,13 +31,16 @@ var REGEXPS = { // NOTE: These two regular expressions are duplicated in // Readability.js. Please keep both copies in sync. - unlikelyCandidates: /-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i, - okMaybeItsACandidate: /and|article|body|column|main|shadow/i, + unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i, + okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i, }; function isNodeVisible(node) { - // Have to null-check node.style to deal with SVG and MathML nodes. - return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden"); + // Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes. + return (!node.style || node.style.display != "none") + && !node.hasAttribute("hidden") + //check for "fallback-image" so that wikimedia math images are displayed + && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || (node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1)); } /** diff --git a/toolkit/components/reader/Readability.js b/toolkit/components/reader/Readability.js index 69fb53f86..4a3689885 100644 --- a/toolkit/components/reader/Readability.js +++ b/toolkit/components/reader/Readability.js @@ -43,6 +43,7 @@ function Readability(doc, options) { options = options || {}; this._doc = doc; + this._docJSDOMParser = this._doc.firstChild.__JSDOMParser__; this._articleTitle = null; this._articleByline = null; this._articleDir = null; @@ -55,6 +56,7 @@ function Readability(doc, options) { this._nbTopCandidates = options.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES; this._charThreshold = options.charThreshold || this.DEFAULT_CHAR_THRESHOLD; this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(options.classesToPreserve || []); + this._keepClasses = !!options.keepClasses; // Start with all flags set this._flags = this.FLAG_STRIP_UNLIKELYS | @@ -121,20 +123,23 @@ Readability.prototype = { REGEXPS: { // NOTE: These two regular expressions are duplicated in // Readability-readerable.js. Please keep both copies in sync. - unlikelyCandidates: /-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i, - okMaybeItsACandidate: /and|article|body|column|main|shadow/i, + unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i, + okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i, positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i, - negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i, + negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i, extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i, byline: /byline|author|dateline|writtenby|p-author/i, replaceFonts: /<(\/?)font[^>]*>/gi, normalize: /\s{2,}/g, videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i, + shareElements: /(\b|_)(share|sharedaddy)(\b|_)/i, nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i, prevLink: /(prev|earl|old|new|<|«)/i, whitespace: /^\s*$/, hasContent: /\S$/, + srcsetUrl: /(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/g, + b64DataUrl: /^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i }, DIV_TO_P_ELEMS: [ "A", "BLOCKQUOTE", "DL", "DIV", "IMG", "OL", "P", "PRE", "TABLE", "UL", "SELECT" ], @@ -159,6 +164,15 @@ Readability.prototype = { // These are the classes that readability sets itself. CLASSES_TO_PRESERVE: [ "page" ], + // These are the list of HTML entities that need to be escaped. + HTML_ESCAPE_MAP: { + "lt": "<", + "gt": ">", + "amp": "&", + "quot": '"', + "apos": "'", + }, + /** * Run any post-process modifications to article content as necessary. * @@ -169,8 +183,10 @@ Readability.prototype = { // Readability cannot open relative uris so we convert them to absolute uris. this._fixRelativeUris(articleContent); - // Remove classes. - this._cleanClasses(articleContent); + if (!this._keepClasses) { + // Remove classes. + this._cleanClasses(articleContent); + } }, /** @@ -184,6 +200,10 @@ Readability.prototype = { * @return void */ _removeNodes: function(nodeList, filterFn) { + // Avoid ever operating on live node lists. + if (this._docJSDOMParser && nodeList._isLiveNodeList) { + throw new Error("Do not pass live node lists to _removeNodes"); + } for (var i = nodeList.length - 1; i >= 0; i--) { var node = nodeList[i]; var parentNode = node.parentNode; @@ -203,6 +223,10 @@ Readability.prototype = { * @return void */ _replaceNodeTags: function(nodeList, newTagName) { + // Avoid ever operating on live node lists. + if (this._docJSDOMParser && nodeList._isLiveNodeList) { + throw new Error("Do not pass live node lists to _replaceNodeTags"); + } for (var i = nodeList.length - 1; i >= 0; i--) { var node = nodeList[i]; this._setNodeTag(node, newTagName); @@ -322,6 +346,7 @@ Readability.prototype = { if (baseURI == documentURI && uri.charAt(0) == "#") { return uri; } + // Otherwise, resolve against base URI: try { return new URL(uri, baseURI).href; @@ -335,22 +360,50 @@ Readability.prototype = { this._forEachNode(links, function(link) { var href = link.getAttribute("href"); if (href) { - // Replace links with javascript: URIs with text content, since + // Remove links with javascript: URIs, since // they won't work after scripts have been removed from the page. if (href.indexOf("javascript:") === 0) { - var text = this._doc.createTextNode(link.textContent); - link.parentNode.replaceChild(text, link); + // if the link only contains simple text content, it can be converted to a text node + if (link.childNodes.length === 1 && link.childNodes[0].nodeType === this.TEXT_NODE) { + var text = this._doc.createTextNode(link.textContent); + link.parentNode.replaceChild(text, link); + } else { + // if the link has multiple children, they should all be preserved + var container = this._doc.createElement("span"); + while (link.childNodes.length > 0) { + container.appendChild(link.childNodes[0]); + } + link.parentNode.replaceChild(container, link); + } } else { link.setAttribute("href", toAbsoluteURI(href)); } } }); - var imgs = this._getAllNodesWithTag(articleContent, ["img"]); - this._forEachNode(imgs, function(img) { - var src = img.getAttribute("src"); + var medias = this._getAllNodesWithTag(articleContent, [ + "img", "picture", "figure", "video", "audio", "source" + ]); + + this._forEachNode(medias, function(media) { + var src = media.getAttribute("src"); + var poster = media.getAttribute("poster"); + var srcset = media.getAttribute("srcset"); + if (src) { - img.setAttribute("src", toAbsoluteURI(src)); + media.setAttribute("src", toAbsoluteURI(src)); + } + + if (poster) { + media.setAttribute("poster", toAbsoluteURI(poster)); + } + + if (srcset) { + var newSrcset = srcset.replace(this.REGEXPS.srcsetUrl, function(_, p1, p2, p3) { + return toAbsoluteURI(p1) + (p2 || "") + p3; + }); + + media.setAttribute("srcset", newSrcset); } }); }, @@ -444,13 +497,13 @@ Readability.prototype = { var doc = this._doc; // Remove all style tags in head - this._removeNodes(doc.getElementsByTagName("style")); + this._removeNodes(this._getAllNodesWithTag(doc, ["style"])); if (doc.body) { this._replaceBrs(doc.body); } - this._replaceNodeTags(doc.getElementsByTagName("font"), "SPAN"); + this._replaceNodeTags(this._getAllNodesWithTag(doc, ["font"]), "SPAN"); }, /** @@ -530,7 +583,7 @@ Readability.prototype = { _setNodeTag: function (node, tag) { this.log("_setNodeTag", node, tag); - if (node.__JSDOMParser__) { + if (this._docJSDOMParser) { node.localName = tag.toLowerCase(); node.tagName = tag.toUpperCase(); return node; @@ -545,7 +598,16 @@ Readability.prototype = { replacement.readability = node.readability; for (var i = 0; i < node.attributes.length; i++) { - replacement.setAttribute(node.attributes[i].name, node.attributes[i].value); + try { + replacement.setAttribute(node.attributes[i].name, node.attributes[i].value); + } catch (ex) { + /* it's possible for setAttribute() to throw if the attribute name + * isn't a valid XML Name. Such attributes can however be parsed from + * source in HTML docs, see https://github.com/whatwg/html/issues/4275, + * so we can hit them here and then throw. We don't care about such + * attributes so we ignore them. + */ + } } return replacement; }, @@ -565,6 +627,8 @@ Readability.prototype = { // visually linked to other content-ful elements (text, images, etc.). this._markDataTables(articleContent); + this._fixLazyImages(articleContent); + // Clean out junk from the article content this._cleanConditionally(articleContent, "form"); this._cleanConditionally(articleContent, "fieldset"); @@ -575,10 +639,15 @@ Readability.prototype = { this._clean(articleContent, "link"); this._clean(articleContent, "aside"); - // Clean out elements have "share" in their id/class combinations from final top candidates, + // Clean out elements with little content that have "share" in their id/class combinations from final top candidates, // which means we don't remove the top candidates even they have "share". - this._forEachNode(articleContent.children, function(topCandidate) { - this._cleanMatchedNodes(topCandidate, /share/); + + var shareElementThreshold = this.DEFAULT_CHAR_THRESHOLD; + + this._forEachNode(articleContent.children, function (topCandidate) { + this._cleanMatchedNodes(topCandidate, function (node, matchString) { + return this.REGEXPS.shareElements.test(matchString) && node.textContent.length < shareElementThreshold; + }); }); // If there is only one h2 and its text content substantially equals article title, @@ -614,7 +683,7 @@ Readability.prototype = { this._cleanConditionally(articleContent, "div"); // Remove extra paragraphs - this._removeNodes(articleContent.getElementsByTagName("p"), function (paragraph) { + this._removeNodes(this._getAllNodesWithTag(articleContent, ["p"]), function (paragraph) { var imgCount = paragraph.getElementsByTagName("img").length; var embedCount = paragraph.getElementsByTagName("embed").length; var objectCount = paragraph.getElementsByTagName("object").length; @@ -729,9 +798,10 @@ Readability.prototype = { if (node.getAttribute !== undefined) { var rel = node.getAttribute("rel"); + var itemprop = node.getAttribute("itemprop"); } - if ((rel === "author" || this.REGEXPS.byline.test(matchString)) && this._isValidByline(node.textContent)) { + if ((rel === "author" || (itemprop && itemprop.indexOf("author") !== -1) || this.REGEXPS.byline.test(matchString)) && this._isValidByline(node.textContent)) { this._articleByline = node.textContent.trim(); return true; } @@ -800,12 +870,19 @@ Readability.prototype = { if (stripUnlikelyCandidates) { if (this.REGEXPS.unlikelyCandidates.test(matchString) && !this.REGEXPS.okMaybeItsACandidate.test(matchString) && + !this._hasAncestorTag(node, "table") && node.tagName !== "BODY" && node.tagName !== "A") { this.log("Removing unlikely candidate - " + matchString); node = this._removeAndGetNext(node); continue; } + + if (node.getAttribute("role") == "complementary") { + this.log("Removing complementary content - " + matchString); + node = this._removeAndGetNext(node); + continue; + } } // Remove DIV, SECTION, and HEADER nodes without any content(e.g. text, image, video, or iframe). @@ -1199,6 +1276,26 @@ Readability.prototype = { return false; }, + /** + * Converts some of the common HTML entities in string to their corresponding characters. + * + * @param str {string} - a string to unescape. + * @return string without HTML entity. + */ + _unescapeHtmlEntities: function(str) { + if (!str) { + return str; + } + + var htmlEscapeMap = this.HTML_ESCAPE_MAP; + return str.replace(/&(quot|amp|apos|lt|gt);/g, function(_, tag) { + return htmlEscapeMap[tag]; + }).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, function(_, hex, numStr) { + var num = parseInt(hex || numStr, hex ? 16 : 10); + return String.fromCharCode(num); + }); + }, + /** * Attempts to get excerpt and byline metadata for the article. * @@ -1220,6 +1317,9 @@ Readability.prototype = { var elementName = element.getAttribute("name"); var elementProperty = element.getAttribute("property"); var content = element.getAttribute("content"); + if (!content) { + return; + } var matches = null; var name = null; @@ -1276,21 +1376,123 @@ Readability.prototype = { // get site name metadata.siteName = values["og:site_name"]; + // in many sites the meta value is escaped with HTML entities, + // so here we need to unescape it + metadata.title = this._unescapeHtmlEntities(metadata.title); + metadata.byline = this._unescapeHtmlEntities(metadata.byline); + metadata.excerpt = this._unescapeHtmlEntities(metadata.excerpt); + metadata.siteName = this._unescapeHtmlEntities(metadata.siteName); + return metadata; }, + /** + * Check if node is image, or if node contains exactly only one image + * whether as a direct child or as its descendants. + * + * @param Element + **/ + _isSingleImage: function(node) { + if (node.tagName === "IMG") { + return true; + } + + if (node.children.length !== 1 || node.textContent.trim() !== "") { + return false; + } + + return this._isSingleImage(node.children[0]); + }, + + /** + * Find all