diff options
Diffstat (limited to 'browser/components/translation/TranslationDocument.jsm')
-rw-r--r-- | browser/components/translation/TranslationDocument.jsm | 683 |
1 files changed, 0 insertions, 683 deletions
diff --git a/browser/components/translation/TranslationDocument.jsm b/browser/components/translation/TranslationDocument.jsm deleted file mode 100644 index 058d07a49..000000000 --- a/browser/components/translation/TranslationDocument.jsm +++ /dev/null @@ -1,683 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -"use strict"; - -const {classes: Cc, interfaces: Ci, utils: Cu} = Components; - -this.EXPORTED_SYMBOLS = [ "TranslationDocument" ]; - -const SHOW_ELEMENT = Ci.nsIDOMNodeFilter.SHOW_ELEMENT; -const SHOW_TEXT = Ci.nsIDOMNodeFilter.SHOW_TEXT; -const TEXT_NODE = Ci.nsIDOMNode.TEXT_NODE; - -Cu.import("resource://services-common/utils.js"); -Cu.import("resource://gre/modules/Task.jsm"); - -/** - * This class represents a document that is being translated, - * and it is responsible for parsing the document, - * generating the data structures translation (the list of - * translation items and roots), and managing the original - * and translated texts on the translation items. - * - * @param document The document to be translated - */ -this.TranslationDocument = function(document) { - this.itemsMap = new Map(); - this.roots = []; - this._init(document); -}; - -this.TranslationDocument.prototype = { - translatedFrom: "", - translatedTo: "", - translationError: false, - originalShown: true, - - /** - * Initializes the object and populates - * the roots lists. - * - * @param document The document to be translated - */ - _init: function(document) { - let window = document.defaultView; - let winUtils = window.QueryInterface(Ci.nsIInterfaceRequestor) - .getInterface(Ci.nsIDOMWindowUtils); - - // Get all the translation nodes in the document's body: - // a translation node is a node from the document which - // contains useful content for translation, and therefore - // must be included in the translation process. - let nodeList = winUtils.getTranslationNodes(document.body); - - let length = nodeList.length; - - for (let i = 0; i < length; i++) { - let node = nodeList.item(i); - let isRoot = nodeList.isTranslationRootAtIndex(i); - - // Create a TranslationItem object for this node. - // This function will also add it to the this.roots array. - this._createItemForNode(node, i, isRoot); - } - - // At first all roots are stored in the roots list, and only after - // the process has finished we're able to determine which roots are - // simple, and which ones are not. - - // A simple root is defined by a root with no children items, which - // basically represents an element from a page with only text content - // inside. - - // This distinction is useful for optimization purposes: we treat a - // simple root as plain-text in the translation process and with that - // we are able to reduce their data payload sent to the translation service. - - for (let root of this.roots) { - if (root.children.length == 0 && - root.nodeRef.childElementCount == 0) { - root.isSimpleRoot = true; - } - } - }, - - /** - * Creates a TranslationItem object, which should be called - * for each node returned by getTranslationNodes. - * - * @param node The DOM node for this item. - * @param id A unique, numeric id for this item. - * @parem isRoot A boolean saying whether this item is a root. - * - * @returns A TranslationItem object. - */ - _createItemForNode: function(node, id, isRoot) { - if (this.itemsMap.has(node)) { - return this.itemsMap.get(node); - } - - let item = new TranslationItem(node, id, isRoot); - - if (isRoot) { - // Root items do not have a parent item. - this.roots.push(item); - } else { - let parentItem = this.itemsMap.get(node.parentNode); - if (parentItem) { - parentItem.children.push(item); - } - } - - this.itemsMap.set(node, item); - return item; - }, - - /** - * Generate the text string that represents a TranslationItem object. - * Besides generating the string, it's also stored in the "original" - * field of the TranslationItem object, which needs to be stored for - * later to be used in the "Show Original" functionality. - * If this function had already been called for the given item (determined - * by the presence of the "original" array in the item), the text will - * be regenerated from the "original" data instead of from the related - * DOM nodes (because the nodes might contain translated data). - * - * @param item A TranslationItem object - * - * @returns A string representation of the TranslationItem. - */ - generateTextForItem: function(item) { - if (item.original) { - return regenerateTextFromOriginalHelper(item); - } - - if (item.isSimpleRoot) { - let text = item.nodeRef.firstChild.nodeValue.trim(); - item.original = [text]; - return text; - } - - let str = ""; - item.original = []; - let wasLastItemPlaceholder = false; - - for (let child of item.nodeRef.childNodes) { - if (child.nodeType == TEXT_NODE) { - let x = child.nodeValue.trim(); - if (x != "") { - item.original.push(x); - str += x; - wasLastItemPlaceholder = false; - } - continue; - } - - let objInMap = this.itemsMap.get(child); - if (objInMap && !objInMap.isRoot) { - // If this childNode is present in the itemsMap, it means - // it's a translation node: it has useful content for translation. - // In this case, we need to stringify this node. - // However, if this item is a root, we should skip it here in this - // object's child list (and just add a placeholder for it), because - // it will be stringfied separately for being a root. - item.original.push(objInMap); - str += this.generateTextForItem(objInMap); - wasLastItemPlaceholder = false; - } else if (!wasLastItemPlaceholder) { - // Otherwise, if this node doesn't contain any useful content, - // or if it is a root itself, we can replace it with a placeholder node. - // We can't simply eliminate this node from our string representation - // because that could change the HTML structure (e.g., it would - // probably merge two separate text nodes). - // It's not necessary to add more than one placeholder in sequence; - // we can optimize them away. - item.original.push(TranslationItem_NodePlaceholder); - str += '<br>'; - wasLastItemPlaceholder = true; - } - } - - return generateTranslationHtmlForItem(item, str); - }, - - /** - * Changes the document to display its translated - * content. - */ - showTranslation: function() { - this.originalShown = false; - this._swapDocumentContent("translation"); - }, - - /** - * Changes the document to display its original - * content. - */ - showOriginal: function() { - this.originalShown = true; - this._swapDocumentContent("original"); - }, - - /** - * Swap the document with the resulting translation, - * or back with the original content. - * - * @param target A string that is either "translation" - * or "original". - */ - _swapDocumentContent: function(target) { - Task.spawn(function *() { - // Let the event loop breath on every 100 nodes - // that are replaced. - const YIELD_INTERVAL = 100; - let count = YIELD_INTERVAL; - - for (let root of this.roots) { - root.swapText(target); - if (count-- == 0) { - count = YIELD_INTERVAL; - yield CommonUtils.laterTickResolvingPromise(); - } - } - }.bind(this)); - } -}; - -/** - * This class represents an item for translation. It's basically our - * wrapper class around a node returned by getTranslationNode, with - * more data and structural information on it. - * - * At the end of the translation process, besides the properties below, - * a TranslationItem will contain two other properties: one called "original" - * and one called "translation". They are twin objects, one which reflect - * the structure of that node in its original state, and the other in its - * translated state. - * - * The "original" array is generated in the generateTextForItem function, - * and the "translation" array is generated when the translation results - * are parsed. - * - * They are both arrays, which contain a mix of strings and references to - * child TranslationItems. The references in both arrays point to the * same * - * TranslationItem object, but they might appear in different orders between the - * "original" and "translation" arrays. - * - * An example: - * - * English: <div id="n1">Welcome to <b id="n2">Mozilla's</b> website</div> - * Portuguese: <div id="n1">Bem vindo a pagina <b id="n2">da Mozilla</b></div> - * - * TranslationItem n1 = { - * id: 1, - * original: ["Welcome to", ptr to n2, "website"] - * translation: ["Bem vindo a pagina", ptr to n2] - * } - * - * TranslationItem n2 = { - * id: 2, - * original: ["Mozilla's"], - * translation: ["da Mozilla"] - * } - */ -function TranslationItem(node, id, isRoot) { - this.nodeRef = node; - this.id = id; - this.isRoot = isRoot; - this.children = []; -} - -TranslationItem.prototype = { - isRoot: false, - isSimpleRoot: false, - - toString: function() { - let rootType = ""; - if (this.isRoot) { - if (this.isSimpleRoot) { - rootType = " (simple root)"; - } - else { - rootType = " (non simple root)"; - } - } - return "[object TranslationItem: <" + this.nodeRef.localName + ">" - + rootType + "]"; - }, - - /** - * This function will parse the result of the translation of one translation - * item. If this item was a simple root, all we sent was a plain-text version - * of it, so the result is also straightforward text. - * - * For non-simple roots, we sent a simplified HTML representation of that - * node, and we'll first parse that into an HTML doc and then call the - * parseResultNode helper function to parse it. - * - * While parsing, the result is stored in the "translation" field of the - * TranslationItem, which will be used to display the final translation when - * all items are finished. It remains stored too to allow back-and-forth - * switching between the "Show Original" and "Show Translation" functions. - * - * @param result A string with the textual result received from the server, - * which can be plain-text or a serialized HTML doc. - */ - parseResult: function(result) { - if (this.isSimpleRoot) { - this.translation = [result]; - return; - } - - let domParser = Cc["@mozilla.org/xmlextras/domparser;1"] - .createInstance(Ci.nsIDOMParser); - - let doc = domParser.parseFromString(result, "text/html"); - parseResultNode(this, doc.body.firstChild); - }, - - /** - * This function finds a child TranslationItem - * with the given id. - * @param id The id to look for, in the format "n#" - * @returns A TranslationItem with the given id, or null if - * it was not found. - */ - getChildById: function(id) { - for (let child of this.children) { - if (("n" + child.id) == id) { - return child; - } - } - return null; - }, - - /** - * Swap the text of this TranslationItem between - * its original and translated states. - * - * @param target A string that is either "translation" - * or "original". - */ - swapText: function(target) { - swapTextForItem(this, target); - } -}; - -/** - * This object represents a placeholder item for translation. It's similar to - * the TranslationItem class, but it represents nodes that have no meaningful - * content for translation. These nodes will be replaced by "<br>" in a - * translation request. It's necessary to keep them to use it as a mark - * for correct positioning and spliting of text nodes. - */ -const TranslationItem_NodePlaceholder = { - toString: function() { - return "[object TranslationItem_NodePlaceholder]"; - } -}; - -/** - * Generate the outer HTML representation for a given item. - * - * @param item A TranslationItem object. - * param content The inner content for this item. - * @returns string The outer HTML needed for translation - * of this item. - */ -function generateTranslationHtmlForItem(item, content) { - let localName = item.isRoot ? "div" : "b"; - return '<' + localName + ' id=n' + item.id + '>' + - content + - "</" + localName + ">"; -} - - /** - * Regenerate the text string that represents a TranslationItem object, - * with data from its "original" array. The array must have already - * been created by TranslationDocument.generateTextForItem(). - * - * @param item A TranslationItem object - * - * @returns A string representation of the TranslationItem. - */ -function regenerateTextFromOriginalHelper(item) { - if (item.isSimpleRoot) { - return item.original[0]; - } - - let str = ""; - for (let child of item.original) { - if (child instanceof TranslationItem) { - str += regenerateTextFromOriginalHelper(child); - } else if (child === TranslationItem_NodePlaceholder) { - str += "<br>"; - } else { - str += child; - } - } - - return generateTranslationHtmlForItem(item, str); -} - -/** - * Helper function to parse a HTML doc result. - * How it works: - * - * An example result string is: - * - * <div id="n1">Hello <b id="n2">World</b> of Mozilla.</div> - * - * For an element node, we look at its id and find the corresponding - * TranslationItem that was associated with this node, and then we - * walk down it repeating the process. - * - * For text nodes we simply add it as a string. - */ -function parseResultNode(item, node) { - item.translation = []; - for (let child of node.childNodes) { - if (child.nodeType == TEXT_NODE) { - item.translation.push(child.nodeValue); - } else if (child.localName == "br") { - item.translation.push(TranslationItem_NodePlaceholder); - } else { - let translationItemChild = item.getChildById(child.id); - - if (translationItemChild) { - item.translation.push(translationItemChild); - parseResultNode(translationItemChild, child); - } - } - } -} - -/** - * Helper function to swap the text of a TranslationItem - * between its original and translated states. - * How it works: - * - * The function iterates through the target array (either the `original` or - * `translation` array from the TranslationItem), while also keeping a pointer - * to a current position in the child nodes from the actual DOM node that we - * are modifying. This pointer is moved forward after each item of the array - * is translated. If, at any given time, the pointer doesn't match the expected - * node that was supposed to be seen, it means that the original and translated - * contents have a different ordering, and thus we need to adjust that. - * - * A full example of the reordering process, swapping from Original to - * Translation: - * - * Original (en): <div>I <em>miss</em> <b>you</b></div> - * - * Translation (fr): <div><b>Tu</b> me <em>manques</em></div> - * - * Step 1: - * pointer points to firstChild of the DOM node, textnode "I " - * first item in item.translation is [object TranslationItem <b>] - * - * pointer does not match the expected element, <b>. So let's move <b> to the - * pointer position. - * - * Current state of the DOM: - * <div><b>you</b>I <em>miss</em> </div> - * - * Step 2: - * pointer moves forward to nextSibling, textnode "I " again. - * second item in item.translation is the string " me " - * - * pointer points to a text node, and we were expecting a text node. Match! - * just replace the text content. - * - * Current state of the DOM: - * <div><b>you</b> me <em>miss</em> </div> - * - * Step 3: - * pointer moves forward to nextSibling, <em>miss</em> - * third item in item.translation is [object TranslationItem <em>] - * - * pointer points to the expected node. Match! Nothing to do. - * - * Step 4: - * all items in this item.translation were transformed. The remaining - * text nodes are cleared to "", and domNode.normalize() removes them. - * - * Current state of the DOM: - * <div><b>you</b> me <em>miss</em></div> - * - * Further steps: - * After that, the function will visit the child items (from the visitStack), - * and the text inside the <b> and <em> nodes will be swapped as well, - * yielding the final result: - * - * <div><b>Tu</b> me <em>manques</em></div> - * - * - * @param item A TranslationItem object - * @param target A string that is either "translation" - * or "original". - */ -function swapTextForItem(item, target) { - // visitStack is the stack of items that we still need to visit. - // Let's start the process by adding the root item. - let visitStack = [ item ]; - - while (visitStack.length > 0) { - let curItem = visitStack.shift(); - - let domNode = curItem.nodeRef; - if (!domNode) { - // Skipping this item due to a missing node. - continue; - } - - if (!curItem[target]) { - // Translation not found for this item. This could be due to - // an error in the server response. For example, if a translation - // was broken in various chunks, and one of the chunks failed, - // the items from that chunk will be missing its "translation" - // field. - continue; - } - - domNode.normalize(); - - // curNode points to the child nodes of the DOM node that we are - // modifying. During most of the process, while the target array is - // being iterated (in the for loop below), it should walk together with - // the array and be pointing to the correct node that needs to modified. - // If it's not pointing to it, that means some sort of node reordering - // will be necessary to produce the correct translation. - // Note that text nodes don't need to be reordered, as we can just replace - // the content of one text node with another. - // - // curNode starts in the firstChild... - let curNode = domNode.firstChild; - - // ... actually, let's make curNode start at the first useful node (either - // a non-blank text node or something else). This is not strictly necessary, - // as the reordering algorithm would correctly handle this case. However, - // this better aligns the resulting translation with the DOM content of the - // page, avoiding cases that would need to be unecessarily reordered. - // - // An example of how this helps: - // - // ---- Original: <div> <b>Hello </b> world.</div> - // ^textnode 1 ^item 1 ^textnode 2 - // - // - Translation: <div><b>Hallo </b> Welt.</div> - // - // Transformation process without this optimization: - // 1 - start pointer at textnode 1 - // 2 - move item 1 to first position inside the <div> - // - // Node now looks like: <div><b>Hello </b>[ ][ world.]</div> - // textnode 1^ ^textnode 2 - // - // 3 - replace textnode 1 with " Welt." - // 4 - clear remaining text nodes (in this case, textnode 2) - // - // Transformation process with this optimization: - // 1 - start pointer at item 1 - // 2 - item 1 is already in position - // 3 - replace textnode 2 with " Welt." - // - // which completely avoids any node reordering, and requires only one - // text change instead of two (while also leaving the page closer to - // its original state). - while (curNode && - curNode.nodeType == TEXT_NODE && - curNode.nodeValue.trim() == "") { - curNode = curNode.nextSibling; - } - - // Now let's walk through all items in the `target` array of the - // TranslationItem. This means either the TranslationItem.original or - // TranslationItem.translation array. - for (let targetItem of curItem[target]) { - - if (targetItem instanceof TranslationItem) { - // If the array element is another TranslationItem object, let's - // add it to the stack to be visited. - visitStack.push(targetItem); - - let targetNode = targetItem.nodeRef; - - // If the node is not in the expected position, let's reorder - // it into position... - if (curNode != targetNode && - // ...unless the page has reparented this node under a totally - // different node (or removed it). In this case, all bets are off - // on being able to do anything correctly, so it's better not to - // bring back the node to this parent. - targetNode.parentNode == domNode) { - - // We don't need to null-check curNode because insertBefore(..., null) - // does what we need in that case: reorder this node to the end - // of child nodes. - domNode.insertBefore(targetNode, curNode); - curNode = targetNode; - } - - // Move pointer forward. Since we do not add empty text nodes to the - // list of translation items, we must skip them here too while - // traversing the DOM in order to get better alignment between the - // text nodes and the translation items. - if (curNode) { - curNode = getNextSiblingSkippingEmptyTextNodes(curNode); - } - - } else if (targetItem === TranslationItem_NodePlaceholder) { - // If the current item is a placeholder node, we need to move - // our pointer "past" it, jumping from one side of a block of - // elements + empty text nodes to the other side. Even if - // non-placeholder elements exists inside the jumped block, - // they will be pulled correctly later in the process when the - // targetItem for those nodes are handled. - - while (curNode && - (curNode.nodeType != TEXT_NODE || - curNode.nodeValue.trim() == "")) { - curNode = curNode.nextSibling; - } - - } else { - // Finally, if it's a text item, we just need to find the next - // text node to use. Text nodes don't need to be reordered, so - // the first one found can be used. - while (curNode && curNode.nodeType != TEXT_NODE) { - curNode = curNode.nextSibling; - } - - // If none was found and we reached the end of the child nodes, - // let's create a new one. - if (!curNode) { - // We don't know if the original content had a space or not, - // so the best bet is to create the text node with " " which - // will add one space at the beginning and one at the end. - curNode = domNode.appendChild(domNode.ownerDocument.createTextNode(" ")); - } - - // A trailing and a leading space must be preserved because - // they are meaningful in HTML. - let preSpace = /^\s/.test(curNode.nodeValue) ? " " : ""; - let endSpace = /\s$/.test(curNode.nodeValue) ? " " : ""; - - curNode.nodeValue = preSpace + targetItem + endSpace; - curNode = getNextSiblingSkippingEmptyTextNodes(curNode); - } - } - - // The translated version of a node might have less text nodes than its - // original version. If that's the case, let's clear the remaining nodes. - if (curNode) { - clearRemainingNonEmptyTextNodesFromElement(curNode); - } - - // And remove any garbage "" nodes left after clearing. - domNode.normalize(); - } -} - -function getNextSiblingSkippingEmptyTextNodes(startSibling) { - let item = startSibling.nextSibling; - while (item && - item.nodeType == TEXT_NODE && - item.nodeValue.trim() == "") { - item = item.nextSibling; - } - return item; -} - -function clearRemainingNonEmptyTextNodesFromElement(startSibling) { - let item = startSibling; - while (item) { - if (item.nodeType == TEXT_NODE && - item.nodeValue != "") { - item.nodeValue = ""; - } - item = item.nextSibling; - } -} |