diff options
Diffstat (limited to 'extensions/spellcheck/src/mozInlineSpellWordUtil.h')
-rw-r--r-- | extensions/spellcheck/src/mozInlineSpellWordUtil.h | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/extensions/spellcheck/src/mozInlineSpellWordUtil.h b/extensions/spellcheck/src/mozInlineSpellWordUtil.h new file mode 100644 index 000000000..b28d24ae5 --- /dev/null +++ b/extensions/spellcheck/src/mozInlineSpellWordUtil.h @@ -0,0 +1,179 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozInlineSpellWordUtil_h +#define mozInlineSpellWordUtil_h + +#include "nsCOMPtr.h" +#include "nsIDOMDocument.h" +#include "nsIDocument.h" +#include "nsString.h" +#include "nsTArray.h" + +//#define DEBUG_SPELLCHECK + +class nsRange; +class nsINode; + +/** + * This class extracts text from the DOM and builds it into a single string. + * The string includes whitespace breaks whereever non-inline elements begin + * and end. This string is broken into "real words", following somewhat + * complex rules; for example substrings that look like URLs or + * email addresses are treated as single words, but otherwise many kinds of + * punctuation are treated as word separators. GetNextWord provides a way + * to iterate over these "real words". + * + * The basic operation is: + * + * 1. Call Init with the weak pointer to the editor that you're using. + * 2. Call SetEnd to set where you want to stop spellchecking. We'll stop + * at the word boundary after that. If SetEnd is not called, we'll stop + * at the end of the document's root element. + * 3. Call SetPosition to initialize the current position inside the + * previously given range. + * 4. Call GetNextWord over and over until it returns false. + */ + +class mozInlineSpellWordUtil +{ +public: + struct NodeOffset { + nsINode* mNode; + int32_t mOffset; + + NodeOffset(nsINode* aNode, int32_t aOffset) : + mNode(aNode), mOffset(aOffset) {} + + bool operator==(const NodeOffset& aOther) const { + return mNode == aOther.mNode && mOffset == aOther.mOffset; + } + + bool operator!=(const NodeOffset& aOther) const { + return !(*this == aOther); + } + }; + + mozInlineSpellWordUtil() + : mRootNode(nullptr), + mSoftBegin(nullptr, 0), mSoftEnd(nullptr, 0), + mNextWordIndex(-1), mSoftTextValid(false) {} + + nsresult Init(nsWeakPtr aWeakEditor); + + nsresult SetEnd(nsINode* aEndNode, int32_t aEndOffset); + + // sets the current position, this should be inside the range. If we are in + // the middle of a word, we'll move to its start. + nsresult SetPosition(nsINode* aNode, int32_t aOffset); + + // Given a point inside or immediately following a word, this returns the + // DOM range that exactly encloses that word's characters. The current + // position will be at the end of the word. This will find the previous + // word if the current position is space, so if you care that the point is + // inside the word, you should check the range. + // + // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called + // before you actually generate the range you are interested in and iterate + // the words in it. + nsresult GetRangeForWord(nsIDOMNode* aWordNode, int32_t aWordOffset, + nsRange** aRange); + + // Moves to the the next word in the range, and retrieves it's text and range. + // An empty word and a nullptr range are returned when we are done checking. + // aSkipChecking will be set if the word is "special" and shouldn't be + // checked (e.g., an email address). + nsresult GetNextWord(nsAString& aText, nsRange** aRange, + bool* aSkipChecking); + + // Call to normalize some punctuation. This function takes an autostring + // so we can access characters directly. + static void NormalizeWord(nsSubstring& aWord); + + nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; } + nsIDocument* GetDocument() const { return mDocument; } + nsINode* GetRootNode() { return mRootNode; } + +private: + + // cached stuff for the editor, set by Init + nsCOMPtr<nsIDOMDocument> mDOMDocument; + nsCOMPtr<nsIDocument> mDocument; + + // range to check, see SetPosition and SetEnd + nsINode* mRootNode; + NodeOffset mSoftBegin; + NodeOffset mSoftEnd; + + // DOM text covering the soft range, with newlines added at block boundaries + nsString mSoftText; + // A list of where we extracted text from, ordered by mSoftTextOffset. A given + // DOM node appears at most once in this list. + struct DOMTextMapping { + NodeOffset mNodeOffset; + int32_t mSoftTextOffset; + int32_t mLength; + + DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, int32_t aLength) + : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset), + mLength(aLength) {} + }; + nsTArray<DOMTextMapping> mSoftTextDOMMapping; + + // A list of the "real words" in mSoftText, ordered by mSoftTextOffset + struct RealWord { + int32_t mSoftTextOffset; + uint32_t mLength : 31; + uint32_t mCheckableWord : 1; + + RealWord(int32_t aOffset, uint32_t aLength, bool aCheckable) + : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) + { + static_assert(sizeof(RealWord) == 8, "RealWord should be limited to 8 bytes"); + MOZ_ASSERT(aLength < INT32_MAX, "Word length is too large to fit in the bitfield"); + } + + int32_t EndOffset() const { return mSoftTextOffset + mLength; } + }; + nsTArray<RealWord> mRealWords; + int32_t mNextWordIndex; + + bool mSoftTextValid; + + void InvalidateWords() { mSoftTextValid = false; } + nsresult EnsureWords(); + + int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset); + // Map an offset into mSoftText to a DOM position. Note that two DOM positions + // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb + // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So, + // aHintBefore controls which position we return ... if aHint is eEnd + // then the position indicates the END of a range so we return (A,4). Otherwise + // the position indicates the START of a range so we return (B,0). + enum DOMMapHint { HINT_BEGIN, HINT_END }; + NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset, + DOMMapHint aHint); + // Finds the index of the real word containing aSoftTextOffset, or -1 if none + // If it's exactly between two words, then if aHint is HINT_BEGIN, return the + // later word (favouring the assumption that it's the BEGINning of a word), + // otherwise return the earlier word (assuming it's the END of a word). + // If aSearchForward is true, then if we don't find a word at the given + // position, search forward until we do find a word and return that (if found). + int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint, + bool aSearchForward); + + // build mSoftText and mSoftTextDOMMapping + void BuildSoftText(); + // Build mRealWords array + nsresult BuildRealWords(); + + nsresult SplitDOMWord(int32_t aStart, int32_t aEnd); + + // Convenience functions, object must be initialized + nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange); + nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange); +}; + +#endif |