diff options
Diffstat (limited to 'embedding/components/find/nsFind.cpp')
-rw-r--r-- | embedding/components/find/nsFind.cpp | 1383 |
1 files changed, 1383 insertions, 0 deletions
diff --git a/embedding/components/find/nsFind.cpp b/embedding/components/find/nsFind.cpp new file mode 100644 index 000000000..cbc42298b --- /dev/null +++ b/embedding/components/find/nsFind.cpp @@ -0,0 +1,1383 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//#define DEBUG_FIND 1 + +#include "nsFind.h" +#include "nsContentCID.h" +#include "nsIContent.h" +#include "nsIDOMNode.h" +#include "nsIDOMNodeList.h" +#include "nsISelection.h" +#include "nsISelectionController.h" +#include "nsIFrame.h" +#include "nsITextControlFrame.h" +#include "nsIFormControl.h" +#include "nsIEditor.h" +#include "nsIPlaintextEditor.h" +#include "nsTextFragment.h" +#include "nsString.h" +#include "nsIAtom.h" +#include "nsServiceManagerUtils.h" +#include "nsUnicharUtils.h" +#include "nsIDOMElement.h" +#include "nsIWordBreaker.h" +#include "nsCRT.h" +#include "nsRange.h" +#include "nsContentUtils.h" +#include "mozilla/DebugOnly.h" + +using namespace mozilla; + +// Yikes! Casting a char to unichar can fill with ones! +#define CHAR_TO_UNICHAR(c) ((char16_t)(const unsigned char)c) + +static NS_DEFINE_CID(kCContentIteratorCID, NS_CONTENTITERATOR_CID); +static NS_DEFINE_CID(kCPreContentIteratorCID, NS_PRECONTENTITERATOR_CID); + +#define CH_QUOTE ((char16_t)0x22) +#define CH_APOSTROPHE ((char16_t)0x27) +#define CH_LEFT_SINGLE_QUOTE ((char16_t)0x2018) +#define CH_RIGHT_SINGLE_QUOTE ((char16_t)0x2019) +#define CH_LEFT_DOUBLE_QUOTE ((char16_t)0x201C) +#define CH_RIGHT_DOUBLE_QUOTE ((char16_t)0x201D) + +#define CH_SHY ((char16_t)0xAD) + +// nsFind::Find casts CH_SHY to char before calling StripChars +// This works correctly if and only if CH_SHY <= 255 +static_assert(CH_SHY <= 255, "CH_SHY is not an ascii character"); + +// nsFindContentIterator is a special iterator that also goes through any +// existing <textarea>'s or text <input>'s editor to lookup the anonymous DOM +// content there. +// +// Details: +// 1) We use two iterators: The "outer-iterator" goes through the normal DOM. +// The "inner-iterator" goes through the anonymous DOM inside the editor. +// +// 2) [MaybeSetupInnerIterator] As soon as the outer-iterator's current node is +// changed, a check is made to see if the node is a <textarea> or a text <input> +// node. If so, an inner-iterator is created to lookup the anynomous contents of +// the editor underneath the text control. +// +// 3) When the inner-iterator is created, we position the outer-iterator 'after' +// (or 'before' in backward search) the text control to avoid revisiting that +// control. +// +// 4) As a consequence of searching through text controls, we can be called via +// FindNext with the current selection inside a <textarea> or a text <input>. +// This means that we can be given an initial search range that stretches across +// the anonymous DOM and the normal DOM. To cater for this situation, we split +// the anonymous part into the inner-iterator and then reposition the outer- +// iterator outside. +// +// 5) The implementation assumes that First() and Next() are only called in +// find-forward mode, while Last() and Prev() are used in find-backward. + +class nsFindContentIterator final : public nsIContentIterator +{ +public: + explicit nsFindContentIterator(bool aFindBackward) + : mStartOffset(0) + , mEndOffset(0) + , mFindBackward(aFindBackward) + { + } + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS(nsFindContentIterator) + + // nsIContentIterator + virtual nsresult Init(nsINode* aRoot) override + { + NS_NOTREACHED("internal error"); + return NS_ERROR_NOT_IMPLEMENTED; + } + virtual nsresult Init(nsIDOMRange* aRange) override + { + NS_NOTREACHED("internal error"); + return NS_ERROR_NOT_IMPLEMENTED; + } + // Not a range because one of the endpoints may be anonymous. + nsresult Init(nsIDOMNode* aStartNode, int32_t aStartOffset, + nsIDOMNode* aEndNode, int32_t aEndOffset); + virtual void First() override; + virtual void Last() override; + virtual void Next() override; + virtual void Prev() override; + virtual nsINode* GetCurrentNode() override; + virtual bool IsDone() override; + virtual nsresult PositionAt(nsINode* aCurNode) override; + +protected: + virtual ~nsFindContentIterator() {} + +private: + static already_AddRefed<nsIDOMRange> CreateRange(nsINode* aNode) + { + RefPtr<nsRange> range = new nsRange(aNode); + range->SetMaySpanAnonymousSubtrees(true); + return range.forget(); + } + + nsCOMPtr<nsIContentIterator> mOuterIterator; + nsCOMPtr<nsIContentIterator> mInnerIterator; + // Can't use a range here, since we want to represent part of the flattened + // tree, including native anonymous content. + nsCOMPtr<nsIDOMNode> mStartNode; + int32_t mStartOffset; + nsCOMPtr<nsIDOMNode> mEndNode; + int32_t mEndOffset; + + nsCOMPtr<nsIContent> mStartOuterContent; + nsCOMPtr<nsIContent> mEndOuterContent; + bool mFindBackward; + + void Reset(); + void MaybeSetupInnerIterator(); + void SetupInnerIterator(nsIContent* aContent); +}; + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsFindContentIterator) + NS_INTERFACE_MAP_ENTRY(nsIContentIterator) + NS_INTERFACE_MAP_ENTRY(nsISupports) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(nsFindContentIterator) +NS_IMPL_CYCLE_COLLECTING_RELEASE(nsFindContentIterator) + +NS_IMPL_CYCLE_COLLECTION(nsFindContentIterator, mOuterIterator, mInnerIterator, + mStartOuterContent, mEndOuterContent, mEndNode, + mStartNode) + +nsresult +nsFindContentIterator::Init(nsIDOMNode* aStartNode, int32_t aStartOffset, + nsIDOMNode* aEndNode, int32_t aEndOffset) +{ + NS_ENSURE_ARG_POINTER(aStartNode); + NS_ENSURE_ARG_POINTER(aEndNode); + if (!mOuterIterator) { + if (mFindBackward) { + // Use post-order in the reverse case, so we get parents before children + // in case we want to prevent descending into a node. + mOuterIterator = do_CreateInstance(kCContentIteratorCID); + } else { + // Use pre-order in the forward case, so we get parents before children in + // case we want to prevent descending into a node. + mOuterIterator = do_CreateInstance(kCPreContentIteratorCID); + } + NS_ENSURE_ARG_POINTER(mOuterIterator); + } + + // Set up the search "range" that we will examine + mStartNode = aStartNode; + mStartOffset = aStartOffset; + mEndNode = aEndNode; + mEndOffset = aEndOffset; + + return NS_OK; +} + +void +nsFindContentIterator::First() +{ + Reset(); +} + +void +nsFindContentIterator::Last() +{ + Reset(); +} + +void +nsFindContentIterator::Next() +{ + if (mInnerIterator) { + mInnerIterator->Next(); + if (!mInnerIterator->IsDone()) { + return; + } + + // by construction, mOuterIterator is already on the next node + } else { + mOuterIterator->Next(); + } + MaybeSetupInnerIterator(); +} + +void +nsFindContentIterator::Prev() +{ + if (mInnerIterator) { + mInnerIterator->Prev(); + if (!mInnerIterator->IsDone()) { + return; + } + + // by construction, mOuterIterator is already on the previous node + } else { + mOuterIterator->Prev(); + } + MaybeSetupInnerIterator(); +} + +nsINode* +nsFindContentIterator::GetCurrentNode() +{ + if (mInnerIterator && !mInnerIterator->IsDone()) { + return mInnerIterator->GetCurrentNode(); + } + return mOuterIterator->GetCurrentNode(); +} + +bool +nsFindContentIterator::IsDone() +{ + if (mInnerIterator && !mInnerIterator->IsDone()) { + return false; + } + return mOuterIterator->IsDone(); +} + +nsresult +nsFindContentIterator::PositionAt(nsINode* aCurNode) +{ + nsINode* oldNode = mOuterIterator->GetCurrentNode(); + nsresult rv = mOuterIterator->PositionAt(aCurNode); + if (NS_SUCCEEDED(rv)) { + MaybeSetupInnerIterator(); + } else { + mOuterIterator->PositionAt(oldNode); + if (mInnerIterator) { + rv = mInnerIterator->PositionAt(aCurNode); + } + } + return rv; +} + +void +nsFindContentIterator::Reset() +{ + mInnerIterator = nullptr; + mStartOuterContent = nullptr; + mEndOuterContent = nullptr; + + // As a consequence of searching through text controls, we may have been + // initialized with a selection inside a <textarea> or a text <input>. + + // see if the start node is an anonymous text node inside a text control + nsCOMPtr<nsIContent> startContent(do_QueryInterface(mStartNode)); + if (startContent) { + mStartOuterContent = startContent->FindFirstNonChromeOnlyAccessContent(); + } + + // see if the end node is an anonymous text node inside a text control + nsCOMPtr<nsIContent> endContent(do_QueryInterface(mEndNode)); + if (endContent) { + mEndOuterContent = endContent->FindFirstNonChromeOnlyAccessContent(); + } + + // Note: OK to just set up the outer iterator here; if our range has a native + // anonymous endpoint we'll end up setting up an inner iterator, and reset the + // outer one in the process. + nsCOMPtr<nsINode> node = do_QueryInterface(mStartNode); + NS_ENSURE_TRUE_VOID(node); + + nsCOMPtr<nsIDOMRange> range = CreateRange(node); + range->SetStart(mStartNode, mStartOffset); + range->SetEnd(mEndNode, mEndOffset); + mOuterIterator->Init(range); + + if (!mFindBackward) { + if (mStartOuterContent != startContent) { + // the start node was an anonymous text node + SetupInnerIterator(mStartOuterContent); + if (mInnerIterator) { + mInnerIterator->First(); + } + } + if (!mOuterIterator->IsDone()) { + mOuterIterator->First(); + } + } else { + if (mEndOuterContent != endContent) { + // the end node was an anonymous text node + SetupInnerIterator(mEndOuterContent); + if (mInnerIterator) { + mInnerIterator->Last(); + } + } + if (!mOuterIterator->IsDone()) { + mOuterIterator->Last(); + } + } + + // if we didn't create an inner-iterator, the boundary node could still be + // a text control, in which case we also need an inner-iterator straightaway + if (!mInnerIterator) { + MaybeSetupInnerIterator(); + } +} + +void +nsFindContentIterator::MaybeSetupInnerIterator() +{ + mInnerIterator = nullptr; + + nsCOMPtr<nsIContent> content = + do_QueryInterface(mOuterIterator->GetCurrentNode()); + if (!content || !content->IsNodeOfType(nsINode::eHTML_FORM_CONTROL)) { + return; + } + + nsCOMPtr<nsIFormControl> formControl(do_QueryInterface(content)); + if (!formControl->IsTextControl(true)) { + return; + } + + SetupInnerIterator(content); + if (mInnerIterator) { + if (!mFindBackward) { + mInnerIterator->First(); + // finish setup: position mOuterIterator on the actual "next" node (this + // completes its re-init, @see SetupInnerIterator) + if (!mOuterIterator->IsDone()) { + mOuterIterator->First(); + } + } else { + mInnerIterator->Last(); + // finish setup: position mOuterIterator on the actual "previous" node + // (this completes its re-init, @see SetupInnerIterator) + if (!mOuterIterator->IsDone()) { + mOuterIterator->Last(); + } + } + } +} + +void +nsFindContentIterator::SetupInnerIterator(nsIContent* aContent) +{ + if (!aContent) { + return; + } + NS_ASSERTION(!aContent->IsRootOfNativeAnonymousSubtree(), "invalid call"); + + nsITextControlFrame* tcFrame = do_QueryFrame(aContent->GetPrimaryFrame()); + if (!tcFrame) { + return; + } + + nsCOMPtr<nsIEditor> editor; + tcFrame->GetEditor(getter_AddRefs(editor)); + if (!editor) { + return; + } + + // don't mess with disabled input fields + uint32_t editorFlags = 0; + editor->GetFlags(&editorFlags); + if (editorFlags & nsIPlaintextEditor::eEditorDisabledMask) { + return; + } + + nsCOMPtr<nsIDOMElement> rootElement; + editor->GetRootElement(getter_AddRefs(rootElement)); + + nsCOMPtr<nsIDOMRange> innerRange = CreateRange(aContent); + nsCOMPtr<nsIDOMRange> outerRange = CreateRange(aContent); + if (!innerRange || !outerRange) { + return; + } + + // now create the inner-iterator + mInnerIterator = do_CreateInstance(kCPreContentIteratorCID); + + if (mInnerIterator) { + innerRange->SelectNodeContents(rootElement); + + // fix up the inner bounds, we may have to only lookup a portion + // of the text control if the current node is a boundary point + if (aContent == mStartOuterContent) { + innerRange->SetStart(mStartNode, mStartOffset); + } + if (aContent == mEndOuterContent) { + innerRange->SetEnd(mEndNode, mEndOffset); + } + // Note: we just init here. We do First() or Last() later. + mInnerIterator->Init(innerRange); + + // make sure to place the outer-iterator outside the text control so that we + // don't go there again. + nsresult res1, res2; + nsCOMPtr<nsIDOMNode> outerNode(do_QueryInterface(aContent)); + if (!mFindBackward) { // find forward + // cut the outer-iterator after the current node + res1 = outerRange->SetEnd(mEndNode, mEndOffset); + res2 = outerRange->SetStartAfter(outerNode); + } else { // find backward + // cut the outer-iterator before the current node + res1 = outerRange->SetStart(mStartNode, mStartOffset); + res2 = outerRange->SetEndBefore(outerNode); + } + if (NS_FAILED(res1) || NS_FAILED(res2)) { + // we are done with the outer-iterator, the inner-iterator will traverse + // what we want + outerRange->Collapse(true); + } + + // Note: we just re-init here, using the segment of our search range that + // is yet to be visited. Thus when we later do mOuterIterator->First() [or + // mOuterIterator->Last()], we will effectively be on the next node [or + // the previous node] _with respect to_ the search range. + mOuterIterator->Init(outerRange); + } +} + +nsresult +NS_NewFindContentIterator(bool aFindBackward, nsIContentIterator** aResult) +{ + NS_ENSURE_ARG_POINTER(aResult); + if (!aResult) { + return NS_ERROR_NULL_POINTER; + } + + nsFindContentIterator* it = new nsFindContentIterator(aFindBackward); + if (!it) { + return NS_ERROR_OUT_OF_MEMORY; + } + return it->QueryInterface(NS_GET_IID(nsIContentIterator), (void**)aResult); +} + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsFind) + NS_INTERFACE_MAP_ENTRY(nsIFind) + NS_INTERFACE_MAP_ENTRY(nsISupports) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(nsFind) +NS_IMPL_CYCLE_COLLECTING_RELEASE(nsFind) + +NS_IMPL_CYCLE_COLLECTION(nsFind, mLastBlockParent, mIterNode, mIterator) + +nsFind::nsFind() + : mFindBackward(false) + , mCaseSensitive(false) + , mIterOffset(0) +{ +} + +nsFind::~nsFind() +{ +} + +#ifdef DEBUG_FIND +static void +DumpNode(nsIDOMNode* aNode) +{ + if (!aNode) { + printf(">>>> Node: NULL\n"); + return; + } + nsAutoString nodeName; + aNode->GetNodeName(nodeName); + nsCOMPtr<nsIContent> textContent(do_QueryInterface(aNode)); + if (textContent && textContent->IsNodeOfType(nsINode::eTEXT)) { + nsAutoString newText; + textContent->AppendTextTo(newText); + printf(">>>> Text node (node name %s): '%s'\n", + NS_LossyConvertUTF16toASCII(nodeName).get(), + NS_LossyConvertUTF16toASCII(newText).get()); + } else { + printf(">>>> Node: %s\n", NS_LossyConvertUTF16toASCII(nodeName).get()); + } +} +#endif + +nsresult +nsFind::InitIterator(nsIDOMNode* aStartNode, int32_t aStartOffset, + nsIDOMNode* aEndNode, int32_t aEndOffset) +{ + if (!mIterator) { + mIterator = new nsFindContentIterator(mFindBackward); + NS_ENSURE_TRUE(mIterator, NS_ERROR_OUT_OF_MEMORY); + } + + NS_ENSURE_ARG_POINTER(aStartNode); + NS_ENSURE_ARG_POINTER(aEndNode); + +#ifdef DEBUG_FIND + printf("InitIterator search range:\n"); + printf(" -- start %d, ", aStartOffset); + DumpNode(aStartNode); + printf(" -- end %d, ", aEndOffset); + DumpNode(aEndNode); +#endif + + nsresult rv = mIterator->Init(aStartNode, aStartOffset, aEndNode, aEndOffset); + NS_ENSURE_SUCCESS(rv, rv); + if (mFindBackward) { + mIterator->Last(); + } else { + mIterator->First(); + } + return NS_OK; +} + +NS_IMETHODIMP +nsFind::GetFindBackwards(bool* aFindBackward) +{ + if (!aFindBackward) { + return NS_ERROR_NULL_POINTER; + } + + *aFindBackward = mFindBackward; + return NS_OK; +} + +NS_IMETHODIMP +nsFind::SetFindBackwards(bool aFindBackward) +{ + mFindBackward = aFindBackward; + return NS_OK; +} + +NS_IMETHODIMP +nsFind::GetCaseSensitive(bool* aCaseSensitive) +{ + if (!aCaseSensitive) { + return NS_ERROR_NULL_POINTER; + } + + *aCaseSensitive = mCaseSensitive; + return NS_OK; +} + +NS_IMETHODIMP +nsFind::SetCaseSensitive(bool aCaseSensitive) +{ + mCaseSensitive = aCaseSensitive; + return NS_OK; +} + +/* attribute boolean entireWord; */ +NS_IMETHODIMP +nsFind::GetEntireWord(bool *aEntireWord) +{ + if (!aEntireWord) + return NS_ERROR_NULL_POINTER; + + *aEntireWord = !!mWordBreaker; + return NS_OK; +} + +NS_IMETHODIMP +nsFind::SetEntireWord(bool aEntireWord) +{ + mWordBreaker = aEntireWord ? nsContentUtils::WordBreaker() : nullptr; + return NS_OK; +} + +// Here begins the find code. A ten-thousand-foot view of how it works: Find +// needs to be able to compare across inline (but not block) nodes, e.g. find +// for "abc" should match a<b>b</b>c. So after we've searched a node, we're not +// done with it; in the case of a partial match we may need to reset the +// iterator to go back to a previously visited node, so we always save the +// "match anchor" node and offset. +// +// Text nodes store their text in an nsTextFragment, which is effectively a +// union of a one-byte string or a two-byte string. Single and double strings +// are intermixed in the dom. We don't have string classes which can deal with +// intermixed strings, so all the handling is done explicitly here. + +nsresult +nsFind::NextNode(nsIDOMRange* aSearchRange, + nsIDOMRange* aStartPoint, nsIDOMRange* aEndPoint, + bool aContinueOk) +{ + nsresult rv; + + nsCOMPtr<nsIContent> content; + + if (!mIterator || aContinueOk) { + // If we are continuing, that means we have a match in progress. In that + // case, we want to continue from the end point (where we are now) to the + // beginning/end of the search range. + nsCOMPtr<nsIDOMNode> startNode; + nsCOMPtr<nsIDOMNode> endNode; + int32_t startOffset, endOffset; + if (aContinueOk) { +#ifdef DEBUG_FIND + printf("Match in progress: continuing past endpoint\n"); +#endif + if (mFindBackward) { + aSearchRange->GetStartContainer(getter_AddRefs(startNode)); + aSearchRange->GetStartOffset(&startOffset); + aEndPoint->GetStartContainer(getter_AddRefs(endNode)); + aEndPoint->GetStartOffset(&endOffset); + } else { // forward + aEndPoint->GetEndContainer(getter_AddRefs(startNode)); + aEndPoint->GetEndOffset(&startOffset); + aSearchRange->GetEndContainer(getter_AddRefs(endNode)); + aSearchRange->GetEndOffset(&endOffset); + } + } else { // Normal, not continuing + if (mFindBackward) { + aSearchRange->GetStartContainer(getter_AddRefs(startNode)); + aSearchRange->GetStartOffset(&startOffset); + aStartPoint->GetEndContainer(getter_AddRefs(endNode)); + aStartPoint->GetEndOffset(&endOffset); + // XXX Needs work: Problem with this approach: if there is a match which + // starts just before the current selection and continues into the + // selection, we will miss it, because our search algorithm only starts + // searching from the end of the word, so we would have to search the + // current selection but discount any matches that fall entirely inside + // it. + } else { // forward + aStartPoint->GetStartContainer(getter_AddRefs(startNode)); + aStartPoint->GetStartOffset(&startOffset); + aEndPoint->GetEndContainer(getter_AddRefs(endNode)); + aEndPoint->GetEndOffset(&endOffset); + } + } + + rv = InitIterator(startNode, startOffset, endNode, endOffset); + NS_ENSURE_SUCCESS(rv, rv); + if (!aStartPoint) { + aStartPoint = aSearchRange; + } + + content = do_QueryInterface(mIterator->GetCurrentNode()); +#ifdef DEBUG_FIND + nsCOMPtr<nsIDOMNode> dnode(do_QueryInterface(content)); + printf(":::::: Got the first node "); + DumpNode(dnode); +#endif + if (content && content->IsNodeOfType(nsINode::eTEXT) && + !SkipNode(content)) { + mIterNode = do_QueryInterface(content); + // Also set mIterOffset if appropriate: + nsCOMPtr<nsIDOMNode> node; + if (mFindBackward) { + aStartPoint->GetEndContainer(getter_AddRefs(node)); + if (mIterNode.get() == node.get()) { + aStartPoint->GetEndOffset(&mIterOffset); + } else { + mIterOffset = -1; // sign to start from end + } + } else { + aStartPoint->GetStartContainer(getter_AddRefs(node)); + if (mIterNode.get() == node.get()) { + aStartPoint->GetStartOffset(&mIterOffset); + } else { + mIterOffset = 0; + } + } +#ifdef DEBUG_FIND + printf("Setting initial offset to %d\n", mIterOffset); +#endif + return NS_OK; + } + } + + while (true) { + if (mFindBackward) { + mIterator->Prev(); + } else { + mIterator->Next(); + } + + content = do_QueryInterface(mIterator->GetCurrentNode()); + if (!content) { + break; + } + +#ifdef DEBUG_FIND + nsCOMPtr<nsIDOMNode> dnode(do_QueryInterface(content)); + printf(":::::: Got another node "); + DumpNode(dnode); +#endif + + // If we ever cross a block node, we might want to reset the match anchor: + // we don't match patterns extending across block boundaries. But we can't + // depend on this test here now, because the iterator doesn't give us the + // parent going in and going out, and we need it both times to depend on + // this. + //if (IsBlockNode(content)) + + // Now see if we need to skip this node -- e.g. is it part of a script or + // other invisible node? Note that we don't ask for CSS information; a node + // can be invisible due to CSS, and we'd still find it. + if (SkipNode(content)) { + continue; + } + + if (content->IsNodeOfType(nsINode::eTEXT)) { + break; + } +#ifdef DEBUG_FIND + dnode = do_QueryInterface(content); + printf("Not a text node: "); + DumpNode(dnode); +#endif + } + + if (content) { + mIterNode = do_QueryInterface(content); + } else { + mIterNode = nullptr; + } + mIterOffset = -1; + +#ifdef DEBUG_FIND + printf("Iterator gave: "); + DumpNode(mIterNode); +#endif + return NS_OK; +} + +class MOZ_STACK_CLASS PeekNextCharRestoreState final +{ +public: + explicit PeekNextCharRestoreState(nsFind* aFind) + : mIterOffset(aFind->mIterOffset), + mIterNode(aFind->mIterNode), + mCurrNode(aFind->mIterator->GetCurrentNode()), + mFind(aFind) + { + } + + ~PeekNextCharRestoreState() + { + mFind->mIterOffset = mIterOffset; + mFind->mIterNode = mIterNode; + mFind->mIterator->PositionAt(mCurrNode); + } + +private: + int32_t mIterOffset; + nsCOMPtr<nsIDOMNode> mIterNode; + nsCOMPtr<nsINode> mCurrNode; + RefPtr<nsFind> mFind; +}; + +char16_t +nsFind::PeekNextChar(nsIDOMRange* aSearchRange, + nsIDOMRange* aStartPoint, + nsIDOMRange* aEndPoint) +{ + // We need to restore the necessary member variables before this function + // returns. + PeekNextCharRestoreState restoreState(this); + + nsCOMPtr<nsIContent> tc; + nsresult rv; + const nsTextFragment *frag; + int32_t fragLen; + + // Loop through non-block nodes until we find one that's not empty. + do { + tc = nullptr; + NextNode(aSearchRange, aStartPoint, aEndPoint, false); + + // Get the text content: + tc = do_QueryInterface(mIterNode); + + // Get the block parent. + nsCOMPtr<nsIDOMNode> blockParent; + rv = GetBlockParent(mIterNode, getter_AddRefs(blockParent)); + if (NS_FAILED(rv)) + return L'\0'; + + // If out of nodes or in new parent. + if (!mIterNode || !tc || (blockParent != mLastBlockParent)) + return L'\0'; + + frag = tc->GetText(); + fragLen = frag->GetLength(); + } while (fragLen <= 0); + + const char16_t *t2b = nullptr; + const char *t1b = nullptr; + + if (frag->Is2b()) { + t2b = frag->Get2b(); + } else { + t1b = frag->Get1b(); + } + + // Index of char to return. + int32_t index = mFindBackward ? fragLen - 1 : 0; + + return t1b ? CHAR_TO_UNICHAR(t1b[index]) : t2b[index]; +} + +bool +nsFind::IsBlockNode(nsIContent* aContent) +{ + if (aContent->IsAnyOfHTMLElements(nsGkAtoms::img, + nsGkAtoms::hr, + nsGkAtoms::th, + nsGkAtoms::td)) { + return true; + } + + return nsContentUtils::IsHTMLBlock(aContent); +} + +bool +nsFind::IsTextNode(nsIDOMNode* aNode) +{ + uint16_t nodeType; + aNode->GetNodeType(&nodeType); + + return nodeType == nsIDOMNode::TEXT_NODE || + nodeType == nsIDOMNode::CDATA_SECTION_NODE; +} + +bool +nsFind::IsVisibleNode(nsIDOMNode* aDOMNode) +{ + nsCOMPtr<nsIContent> content(do_QueryInterface(aDOMNode)); + if (!content) { + return false; + } + + nsIFrame* frame = content->GetPrimaryFrame(); + if (!frame) { + // No frame! Not visible then. + return false; + } + + return frame->StyleVisibility()->IsVisible(); +} + +bool +nsFind::SkipNode(nsIContent* aContent) +{ +#ifdef HAVE_BIDI_ITERATOR + // We may not need to skip comment nodes, now that IsTextNode distinguishes + // them from real text nodes. + return aContent->IsNodeOfType(nsINode::eCOMMENT) || + aContent->IsAnyOfHTMLElements(sScriptAtom, sNoframesAtom, sSelectAtom); + +#else /* HAVE_BIDI_ITERATOR */ + // Temporary: eventually we will have an iterator to do this, but for now, we + // have to climb up the tree for each node and see whether any parent is a + // skipped node, and take the performance hit. + + nsIContent* content = aContent; + while (content) { + if (aContent->IsNodeOfType(nsINode::eCOMMENT) || + content->IsAnyOfHTMLElements(nsGkAtoms::script, + nsGkAtoms::noframes, + nsGkAtoms::select)) { +#ifdef DEBUG_FIND + printf("Skipping node: "); + nsCOMPtr<nsIDOMNode> node(do_QueryInterface(content)); + DumpNode(node); +#endif + + return true; + } + + // Only climb to the nearest block node + if (IsBlockNode(content)) { + return false; + } + + content = content->GetParent(); + } + + return false; +#endif /* HAVE_BIDI_ITERATOR */ +} + +nsresult +nsFind::GetBlockParent(nsIDOMNode* aNode, nsIDOMNode** aParent) +{ + while (aNode) { + nsCOMPtr<nsIDOMNode> parent; + nsresult rv = aNode->GetParentNode(getter_AddRefs(parent)); + NS_ENSURE_SUCCESS(rv, rv); + nsCOMPtr<nsIContent> content(do_QueryInterface(parent)); + if (content && IsBlockNode(content)) { + *aParent = parent; + NS_ADDREF(*aParent); + return NS_OK; + } + aNode = parent; + } + return NS_ERROR_FAILURE; +} + +// Call ResetAll before returning, to remove all references to external objects. +void +nsFind::ResetAll() +{ + mIterator = nullptr; + mLastBlockParent = nullptr; +} + +#define NBSP_CHARCODE (CHAR_TO_UNICHAR(160)) +#define IsSpace(c) (nsCRT::IsAsciiSpace(c) || (c) == NBSP_CHARCODE) +#define OVERFLOW_PINDEX (mFindBackward ? pindex < 0 : pindex > patLen) +#define DONE_WITH_PINDEX (mFindBackward ? pindex <= 0 : pindex >= patLen) +#define ALMOST_DONE_WITH_PINDEX (mFindBackward ? pindex <= 0 : pindex >= patLen - 1) + +// Take nodes out of the tree with NextNode, until null (NextNode will return 0 +// at the end of our range). +NS_IMETHODIMP +nsFind::Find(const char16_t* aPatText, nsIDOMRange* aSearchRange, + nsIDOMRange* aStartPoint, nsIDOMRange* aEndPoint, + nsIDOMRange** aRangeRet) +{ +#ifdef DEBUG_FIND + printf("============== nsFind::Find('%s'%s, %p, %p, %p)\n", + NS_LossyConvertUTF16toASCII(aPatText).get(), + mFindBackward ? " (backward)" : " (forward)", + (void*)aSearchRange, (void*)aStartPoint, (void*)aEndPoint); +#endif + + NS_ENSURE_ARG(aSearchRange); + NS_ENSURE_ARG(aStartPoint); + NS_ENSURE_ARG(aEndPoint); + NS_ENSURE_ARG_POINTER(aRangeRet); + *aRangeRet = 0; + + if (!aPatText) { + return NS_ERROR_NULL_POINTER; + } + + ResetAll(); + + nsAutoString patAutoStr(aPatText); + if (!mCaseSensitive) { + ToLowerCase(patAutoStr); + } + + // Ignore soft hyphens in the pattern + static const char kShy[] = { char(CH_SHY), 0 }; + patAutoStr.StripChars(kShy); + + const char16_t* patStr = patAutoStr.get(); + int32_t patLen = patAutoStr.Length() - 1; + + // current offset into the pattern -- reset to beginning/end: + int32_t pindex = (mFindBackward ? patLen : 0); + + // Current offset into the fragment + int32_t findex = 0; + + // Direction to move pindex and ptr* + int incr = (mFindBackward ? -1 : 1); + + nsCOMPtr<nsIContent> tc; + const nsTextFragment* frag = nullptr; + int32_t fragLen = 0; + + // Pointers into the current fragment: + const char16_t* t2b = nullptr; + const char* t1b = nullptr; + + // Keep track of when we're in whitespace: + // (only matters when we're matching) + bool inWhitespace = false; + // Keep track of whether the previous char was a word-breaking one. + bool wordBreakPrev = false; + + // Place to save the range start point in case we find a match: + nsCOMPtr<nsIDOMNode> matchAnchorNode; + int32_t matchAnchorOffset = 0; + + // Get the end point, so we know when to end searches: + nsCOMPtr<nsIDOMNode> endNode; + int32_t endOffset; + aEndPoint->GetEndContainer(getter_AddRefs(endNode)); + aEndPoint->GetEndOffset(&endOffset); + + char16_t c = 0; + char16_t patc = 0; + char16_t prevChar = 0; + char16_t prevCharInMatch = 0; + while (1) { +#ifdef DEBUG_FIND + printf("Loop ...\n"); +#endif + + // If this is our first time on a new node, reset the pointers: + if (!frag) { + + tc = nullptr; + NextNode(aSearchRange, aStartPoint, aEndPoint, false); + if (!mIterNode) { // Out of nodes + // Are we in the middle of a match? If so, try again with continuation. + if (matchAnchorNode) { + NextNode(aSearchRange, aStartPoint, aEndPoint, true); + } + + // Reset the iterator, so this nsFind will be usable if the user wants + // to search again (from beginning/end). + ResetAll(); + return NS_OK; + } + + // We have a new text content. If its block parent is different from the + // block parent of the last text content, then we need to clear the match + // since we don't want to find across block boundaries. + nsCOMPtr<nsIDOMNode> blockParent; + GetBlockParent(mIterNode, getter_AddRefs(blockParent)); +#ifdef DEBUG_FIND + printf("New node: old blockparent = %p, new = %p\n", + (void*)mLastBlockParent.get(), (void*)blockParent.get()); +#endif + if (blockParent != mLastBlockParent) { +#ifdef DEBUG_FIND + printf("Different block parent!\n"); +#endif + mLastBlockParent = blockParent; + // End any pending match: + matchAnchorNode = nullptr; + matchAnchorOffset = 0; + pindex = (mFindBackward ? patLen : 0); + inWhitespace = false; + } + + // Get the text content: + tc = do_QueryInterface(mIterNode); + if (!tc || !(frag = tc->GetText())) { // Out of nodes + mIterator = nullptr; + mLastBlockParent = nullptr; + ResetAll(); + return NS_OK; + } + + fragLen = frag->GetLength(); + + // Set our starting point in this node. If we're going back to the anchor + // node, which means that we just ended a partial match, use the saved + // offset: + if (mIterNode == matchAnchorNode) { + findex = matchAnchorOffset + (mFindBackward ? 1 : 0); + } + + // mIterOffset, if set, is the range's idea of an offset, and points + // between characters. But when translated to a string index, it points to + // a character. If we're going backward, this is one character too late + // and we'll match part of our previous pattern. + else if (mIterOffset >= 0) { + findex = mIterOffset - (mFindBackward ? 1 : 0); + } + + // Otherwise, just start at the appropriate end of the fragment: + else if (mFindBackward) { + findex = fragLen - 1; + } else { + findex = 0; + } + + // Offset can only apply to the first node: + mIterOffset = -1; + + // If this is outside the bounds of the string, then skip this node: + if (findex < 0 || findex > fragLen - 1) { +#ifdef DEBUG_FIND + printf("At the end of a text node -- skipping to the next\n"); +#endif + frag = 0; + continue; + } + +#ifdef DEBUG_FIND + printf("Starting from offset %d\n", findex); +#endif + if (frag->Is2b()) { + t2b = frag->Get2b(); + t1b = nullptr; +#ifdef DEBUG_FIND + nsAutoString str2(t2b, fragLen); + printf("2 byte, '%s'\n", NS_LossyConvertUTF16toASCII(str2).get()); +#endif + } else { + t1b = frag->Get1b(); + t2b = nullptr; +#ifdef DEBUG_FIND + nsAutoCString str1(t1b, fragLen); + printf("1 byte, '%s'\n", str1.get()); +#endif + } + } else { + // Still on the old node. Advance the pointers, then see if we need to + // pull a new node. + findex += incr; +#ifdef DEBUG_FIND + printf("Same node -- (%d, %d)\n", pindex, findex); +#endif + if (mFindBackward ? (findex < 0) : (findex >= fragLen)) { +#ifdef DEBUG_FIND + printf("Will need to pull a new node: mAO = %d, frag len=%d\n", + matchAnchorOffset, fragLen); +#endif + // Done with this node. Pull a new one. + frag = nullptr; + continue; + } + } + + // Have we gone past the endpoint yet? If we have, and we're not in the + // middle of a match, return. + if (mIterNode == endNode && + ((mFindBackward && findex < endOffset) || + (!mFindBackward && findex > endOffset))) { + ResetAll(); + return NS_OK; + } + + // Save the previous character for word boundary detection + prevChar = c; + // The two characters we'll be comparing: + c = (t2b ? t2b[findex] : CHAR_TO_UNICHAR(t1b[findex])); + patc = patStr[pindex]; + +#ifdef DEBUG_FIND + printf("Comparing '%c'=%x to '%c' (%d of %d), findex=%d%s\n", + (char)c, (int)c, patc, pindex, patLen, findex, + inWhitespace ? " (inWhitespace)" : ""); +#endif + + // Do we need to go back to non-whitespace mode? If inWhitespace, then this + // space in the pat str has already matched at least one space in the + // document. + if (inWhitespace && !IsSpace(c)) { + inWhitespace = false; + pindex += incr; +#ifdef DEBUG + // This shouldn't happen -- if we were still matching, and we were at the + // end of the pat string, then we should have caught it in the last + // iteration and returned success. + if (OVERFLOW_PINDEX) { + NS_ASSERTION(false, "Missed a whitespace match"); + } +#endif + patc = patStr[pindex]; + } + if (!inWhitespace && IsSpace(patc)) { + inWhitespace = true; + } else if (!inWhitespace && !mCaseSensitive && IsUpperCase(c)) { + c = ToLowerCase(c); + } + + if (c == CH_SHY) { + // ignore soft hyphens in the document + continue; + } + + if (!mCaseSensitive) { + switch (c) { + // treat curly and straight quotes as identical + case CH_LEFT_SINGLE_QUOTE: + case CH_RIGHT_SINGLE_QUOTE: + c = CH_APOSTROPHE; + break; + case CH_LEFT_DOUBLE_QUOTE: + case CH_RIGHT_DOUBLE_QUOTE: + c = CH_QUOTE; + break; + } + + switch (patc) { + // treat curly and straight quotes as identical + case CH_LEFT_SINGLE_QUOTE: + case CH_RIGHT_SINGLE_QUOTE: + patc = CH_APOSTROPHE; + break; + case CH_LEFT_DOUBLE_QUOTE: + case CH_RIGHT_DOUBLE_QUOTE: + patc = CH_QUOTE; + break; + } + } + + // a '\n' between CJ characters is ignored + if (pindex != (mFindBackward ? patLen : 0) && c != patc && !inWhitespace) { + if (c == '\n' && t2b && IS_CJ_CHAR(prevCharInMatch)) { + int32_t nindex = findex + incr; + if (mFindBackward ? (nindex >= 0) : (nindex < fragLen)) { + if (IS_CJ_CHAR(t2b[nindex])) { + continue; + } + } + } + } + + wordBreakPrev = false; + if (mWordBreaker) { + if (prevChar == NBSP_CHARCODE) + prevChar = CHAR_TO_UNICHAR(' '); + wordBreakPrev = mWordBreaker->BreakInBetween(&prevChar, 1, &c, 1); + } + + // Compare. Match if we're in whitespace and c is whitespace, or if the + // characters match and at least one of the following is true: + // a) we're not matching the entire word + // b) a match has already been stored + // c) the previous character is a different "class" than the current character. + if ((c == patc && (!mWordBreaker || matchAnchorNode || wordBreakPrev)) || + (inWhitespace && IsSpace(c))) + { + prevCharInMatch = c; +#ifdef DEBUG_FIND + if (inWhitespace) { + printf("YES (whitespace)(%d of %d)\n", pindex, patLen); + } else { + printf("YES! '%c' == '%c' (%d of %d)\n", c, patc, pindex, patLen); + } +#endif + + // Save the range anchors if we haven't already: + if (!matchAnchorNode) { + matchAnchorNode = mIterNode; + matchAnchorOffset = findex; + } + + // Are we done? + if (DONE_WITH_PINDEX) { + // Matched the whole string! +#ifdef DEBUG_FIND + printf("Found a match!\n"); +#endif + + // Make the range: + nsCOMPtr<nsIDOMNode> startParent; + nsCOMPtr<nsIDOMNode> endParent; + + // Check for word break (if necessary) + if (mWordBreaker) { + int32_t nextfindex = findex + incr; + + char16_t nextChar; + // If still in array boundaries, get nextChar. + if (mFindBackward ? (nextfindex >= 0) : (nextfindex < fragLen)) + nextChar = (t2b ? t2b[nextfindex] : CHAR_TO_UNICHAR(t1b[nextfindex])); + // Get next character from the next node. + else + nextChar = PeekNextChar(aSearchRange, aStartPoint, aEndPoint); + + if (nextChar == NBSP_CHARCODE) + nextChar = CHAR_TO_UNICHAR(' '); + + // If a word break isn't there when it needs to be, reset search. + if (!mWordBreaker->BreakInBetween(&c, 1, &nextChar, 1)) { + matchAnchorNode = nullptr; + continue; + } + } + + nsCOMPtr<nsIDOMRange> range = new nsRange(tc); + if (range) { + int32_t matchStartOffset, matchEndOffset; + // convert char index to range point: + int32_t mao = matchAnchorOffset + (mFindBackward ? 1 : 0); + if (mFindBackward) { + startParent = do_QueryInterface(tc); + endParent = matchAnchorNode; + matchStartOffset = findex; + matchEndOffset = mao; + } else { + startParent = matchAnchorNode; + endParent = do_QueryInterface(tc); + matchStartOffset = mao; + matchEndOffset = findex + 1; + } + if (startParent && endParent && + IsVisibleNode(startParent) && IsVisibleNode(endParent)) { + range->SetStart(startParent, matchStartOffset); + range->SetEnd(endParent, matchEndOffset); + *aRangeRet = range.get(); + NS_ADDREF(*aRangeRet); + } else { + // This match is no good -- invisible or bad range + startParent = nullptr; + } + } + + if (startParent) { + // If startParent == nullptr, we didn't successfully make range + // or, we didn't make a range because the start or end node were + // invisible. Reset the offset to the other end of the found string: + mIterOffset = findex + (mFindBackward ? 1 : 0); +#ifdef DEBUG_FIND + printf("mIterOffset = %d, mIterNode = ", mIterOffset); + DumpNode(mIterNode); +#endif + + ResetAll(); + return NS_OK; + } + // This match is no good, continue on in document + matchAnchorNode = nullptr; + } + + if (matchAnchorNode) { + // Not done, but still matching. Advance and loop around for the next + // characters. But don't advance from a space to a non-space: + if (!inWhitespace || DONE_WITH_PINDEX || + IsSpace(patStr[pindex + incr])) { + pindex += incr; + inWhitespace = false; +#ifdef DEBUG_FIND + printf("Advancing pindex to %d\n", pindex); +#endif + } + + continue; + } + } + +#ifdef DEBUG_FIND + printf("NOT: %c == %c\n", c, patc); +#endif + + // If we didn't match, go back to the beginning of patStr, and set findex + // back to the next char after we started the current match. + if (matchAnchorNode) { // we're ending a partial match + findex = matchAnchorOffset; + mIterOffset = matchAnchorOffset; + // +incr will be added to findex when we continue + + // Are we going back to a previous node? + if (matchAnchorNode != mIterNode) { + nsCOMPtr<nsIContent> content(do_QueryInterface(matchAnchorNode)); + DebugOnly<nsresult> rv = NS_ERROR_UNEXPECTED; + if (content) { + rv = mIterator->PositionAt(content); + } + frag = 0; + NS_ASSERTION(NS_SUCCEEDED(rv), "Text content wasn't nsIContent!"); +#ifdef DEBUG_FIND + printf("Repositioned anchor node\n"); +#endif + } +#ifdef DEBUG_FIND + printf("Ending a partial match; findex -> %d, mIterOffset -> %d\n", + findex, mIterOffset); +#endif + } + matchAnchorNode = nullptr; + matchAnchorOffset = 0; + inWhitespace = false; + pindex = (mFindBackward ? patLen : 0); +#ifdef DEBUG_FIND + printf("Setting findex back to %d, pindex to %d\n", findex, pindex); + +#endif + } + + // Out of nodes, and didn't match. + ResetAll(); + return NS_OK; +} |