1 files changed, 650 insertions, 0 deletions
diff --git a/parser/htmlparser/nsScannerString.cpp b/parser/htmlparser/nsScannerString.cpp
new file mode 100644
index 000000000..b492b9054
--- /dev/null
+++ b/parser/htmlparser/nsScannerString.cpp
@@ -0,0 +1,650 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdlib.h>
+#include "nsScannerString.h"
+#include "mozilla/CheckedInt.h"
+
+
+  /**
+   * nsScannerBufferList
+   */
+
+#define MAX_CAPACITY ((UINT32_MAX / sizeof(char16_t)) - \
+                      (sizeof(Buffer) + sizeof(char16_t)))
+
+nsScannerBufferList::Buffer*
+nsScannerBufferList::AllocBufferFromString( const nsAString& aString )
+  {
+    uint32_t len = aString.Length();
+    Buffer* buf = AllocBuffer(len);
+
+    if (buf)
+      {
+        nsAString::const_iterator source;
+        aString.BeginReading(source);
+        nsCharTraits<char16_t>::copy(buf->DataStart(), source.get(), len);
+      }
+    return buf;
+  }
+
+nsScannerBufferList::Buffer*
+nsScannerBufferList::AllocBuffer( uint32_t capacity )
+  {
+    if (capacity > MAX_CAPACITY)
+      return nullptr;
+
+    void* ptr = malloc(sizeof(Buffer) + (capacity + 1) * sizeof(char16_t));
+    if (!ptr)
+      return nullptr;
+
+    Buffer* buf = new (ptr) Buffer();
+
+    buf->mUsageCount = 0;
+    buf->mDataEnd = buf->DataStart() + capacity;
+
+    // XXX null terminate.  this shouldn't be required, but we do it because
+    // nsScanner erroneously thinks it can dereference DataEnd :-(
+    *buf->mDataEnd = char16_t(0);
+    return buf;
+  }
+
+void
+nsScannerBufferList::ReleaseAll()
+  {
+    while (!mBuffers.isEmpty())
+      {
+        Buffer* node = mBuffers.popFirst();
+        //printf(">>> freeing buffer @%p\n", node);
+        free(node);
+      }
+  }
+
+void
+nsScannerBufferList::SplitBuffer( const Position& pos )
+  {
+    // splitting to the right keeps the work string and any extant token
+    // pointing to and holding a reference count on the same buffer.
+
+    Buffer* bufferToSplit = pos.mBuffer;
+    NS_ASSERTION(bufferToSplit, "null pointer");
+
+    uint32_t splitOffset = pos.mPosition - bufferToSplit->DataStart();
+    NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() &&
+                 splitOffset <= bufferToSplit->DataLength(),
+                 "split offset is outside buffer");
+    
+    uint32_t len = bufferToSplit->DataLength() - splitOffset;
+    Buffer* new_buffer = AllocBuffer(len);
+    if (new_buffer)
+      {
+        nsCharTraits<char16_t>::copy(new_buffer->DataStart(),
+                                      bufferToSplit->DataStart() + splitOffset,
+                                      len);
+        InsertAfter(new_buffer, bufferToSplit);
+        bufferToSplit->SetDataLength(splitOffset);
+      }
+  }
+
+void
+nsScannerBufferList::DiscardUnreferencedPrefix( Buffer* aBuf )
+  {
+    if (aBuf == Head())
+      {
+        while (!mBuffers.isEmpty() && !Head()->IsInUse())
+          {
+            Buffer* buffer = Head();
+            buffer->remove();
+            free(buffer);
+          }
+      }
+  }
+
+size_t
+nsScannerBufferList::Position::Distance( const Position& aStart, const Position& aEnd )
+  {
+    size_t result = 0;
+    if (aStart.mBuffer == aEnd.mBuffer)
+      {
+        result = aEnd.mPosition - aStart.mPosition;
+      }
+    else
+      {
+        result = aStart.mBuffer->DataEnd() - aStart.mPosition;
+        for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next())
+          result += b->DataLength();
+        result += aEnd.mPosition - aEnd.mBuffer->DataStart();
+      }
+    return result;
+  }
+
+
+/**
+ * nsScannerSubstring
+ */
+
+nsScannerSubstring::nsScannerSubstring()
+  : mStart(nullptr, nullptr)
+  , mEnd(nullptr, nullptr)
+  , mBufferList(nullptr)
+  , mLength(0)
+  , mIsDirty(true)
+  {
+  }
+
+nsScannerSubstring::nsScannerSubstring( const nsAString& s )
+  : mBufferList(nullptr)
+  , mIsDirty(true)
+  {
+    Rebind(s);
+  }
+
+nsScannerSubstring::~nsScannerSubstring()
+  {
+    release_ownership_of_buffer_list();
+  }
+
+int32_t
+nsScannerSubstring::CountChar( char16_t c ) const
+  {
+      /*
+        re-write this to use a counting sink
+       */
+
+    size_type result = 0;
+    size_type lengthToExamine = Length();
+
+    nsScannerIterator iter;
+    for ( BeginReading(iter); ; )
+      {
+        int32_t lengthToExamineInThisFragment = iter.size_forward();
+        const char16_t* fromBegin = iter.get();
+        result += size_type(NS_COUNT(fromBegin, fromBegin+lengthToExamineInThisFragment, c));
+        if ( !(lengthToExamine -= lengthToExamineInThisFragment) )
+          return result;
+        iter.advance(lengthToExamineInThisFragment);
+      }
+      // never reached; quiets warnings
+    return 0;
+  }
+
+void
+nsScannerSubstring::Rebind( const nsScannerSubstring& aString,
+                            const nsScannerIterator& aStart, 
+                            const nsScannerIterator& aEnd )
+  {
+    // allow for the case where &aString == this
+
+    aString.acquire_ownership_of_buffer_list();
+    release_ownership_of_buffer_list();
+
+    mStart      = aStart;
+    mEnd        = aEnd;
+    mBufferList = aString.mBufferList;
+    mLength     = Distance(aStart, aEnd);
+    mIsDirty    = true;
+  }
+
+void
+nsScannerSubstring::Rebind( const nsAString& aString )
+  {
+    release_ownership_of_buffer_list();
+
+    mBufferList = new nsScannerBufferList(AllocBufferFromString(aString));
+    mIsDirty    = true;
+
+    init_range_from_buffer_list();
+    acquire_ownership_of_buffer_list();
+  }
+
+const nsSubstring&
+nsScannerSubstring::AsString() const
+  {
+    if (mIsDirty)
+      {
+        nsScannerSubstring* mutable_this = const_cast<nsScannerSubstring*>(this);
+
+        if (mStart.mBuffer == mEnd.mBuffer) {
+          // We only have a single fragment to deal with, so just return it
+          // as a substring.
+          mutable_this->mFlattenedRep.Rebind(mStart.mPosition, mEnd.mPosition);
+        } else {
+          // Otherwise, we need to copy the data into a flattened buffer.
+          nsScannerIterator start, end;
+          CopyUnicodeTo(BeginReading(start), EndReading(end), mutable_this->mFlattenedRep);
+        }
+
+        mutable_this->mIsDirty = false;
+      }
+
+    return mFlattenedRep;
+  }
+
+nsScannerIterator&
+nsScannerSubstring::BeginReading( nsScannerIterator& iter ) const
+  {
+    iter.mOwner = this;
+
+    iter.mFragment.mBuffer = mStart.mBuffer;
+    iter.mFragment.mFragmentStart = mStart.mPosition;
+    if (mStart.mBuffer == mEnd.mBuffer)
+      iter.mFragment.mFragmentEnd = mEnd.mPosition;
+    else
+      iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd();
+
+    iter.mPosition = mStart.mPosition;
+    iter.normalize_forward();
+    return iter;
+  }
+
+nsScannerIterator&
+nsScannerSubstring::EndReading( nsScannerIterator& iter ) const
+  {
+    iter.mOwner = this;
+
+    iter.mFragment.mBuffer = mEnd.mBuffer;
+    iter.mFragment.mFragmentEnd = mEnd.mPosition;
+    if (mStart.mBuffer == mEnd.mBuffer)
+      iter.mFragment.mFragmentStart = mStart.mPosition;
+    else
+      iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart();
+
+    iter.mPosition = mEnd.mPosition;
+    // must not |normalize_backward| as that would likely invalidate tests like |while ( first != last )|
+    return iter;
+  }
+
+bool
+nsScannerSubstring::GetNextFragment( nsScannerFragment& frag ) const
+  {
+    // check to see if we are at the end of the buffer list
+    if (frag.mBuffer == mEnd.mBuffer)
+      return false;
+
+    frag.mBuffer = frag.mBuffer->getNext();
+
+    if (frag.mBuffer == mStart.mBuffer)
+      frag.mFragmentStart = mStart.mPosition;
+    else
+      frag.mFragmentStart = frag.mBuffer->DataStart();
+
+    if (frag.mBuffer == mEnd.mBuffer)
+      frag.mFragmentEnd = mEnd.mPosition;
+    else
+      frag.mFragmentEnd = frag.mBuffer->DataEnd();
+
+    return true;
+  }
+
+bool
+nsScannerSubstring::GetPrevFragment( nsScannerFragment& frag ) const
+  {
+    // check to see if we are at the beginning of the buffer list
+    if (frag.mBuffer == mStart.mBuffer)
+      return false;
+
+    frag.mBuffer = frag.mBuffer->getPrevious();
+
+    if (frag.mBuffer == mStart.mBuffer)
+      frag.mFragmentStart = mStart.mPosition;
+    else
+      frag.mFragmentStart = frag.mBuffer->DataStart();
+
+    if (frag.mBuffer == mEnd.mBuffer)
+      frag.mFragmentEnd = mEnd.mPosition;
+    else
+      frag.mFragmentEnd = frag.mBuffer->DataEnd();
+
+    return true;
+  }
+
+
+  /**
+   * nsScannerString
+   */
+
+nsScannerString::nsScannerString( Buffer* aBuf )
+  {
+    mBufferList = new nsScannerBufferList(aBuf);
+
+    init_range_from_buffer_list();
+    acquire_ownership_of_buffer_list();
+  }
+
+void
+nsScannerString::AppendBuffer( Buffer* aBuf )
+  {
+    mBufferList->Append(aBuf);
+    mLength += aBuf->DataLength();
+
+    mEnd.mBuffer = aBuf;
+    mEnd.mPosition = aBuf->DataEnd();
+
+    mIsDirty = true;
+  }
+
+void
+nsScannerString::DiscardPrefix( const nsScannerIterator& aIter )
+  {
+    Position old_start(mStart);
+    mStart = aIter;
+    mLength -= Position::Distance(old_start, mStart);
+    
+    mStart.mBuffer->IncrementUsageCount();
+    old_start.mBuffer->DecrementUsageCount();
+
+    mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer);
+
+    mIsDirty = true;
+  }
+
+void
+nsScannerString::UngetReadable( const nsAString& aReadable, const nsScannerIterator& aInsertPoint )
+    /*
+     * Warning: this routine manipulates the shared buffer list in an unexpected way.
+     *  The original design did not really allow for insertions, but this call promises
+     *  that if called for a point after the end of all extant token strings, that no token string
+     *  or the work string will be invalidated.
+     *
+     *  This routine is protected because it is the responsibility of the derived class to keep those promises.
+     */
+  {
+    Position insertPos(aInsertPoint);
+
+    mBufferList->SplitBuffer(insertPos);
+      // splitting to the right keeps the work string and any extant token pointing to and
+      //  holding a reference count on the same buffer
+
+    Buffer* new_buffer = AllocBufferFromString(aReadable);
+      // make a new buffer with all the data to insert...
+      //  BULLSHIT ALERT: we may have empty space to re-use in the split buffer, measure the cost
+      //  of this and decide if we should do the work to fill it
+
+    Buffer* buffer_to_split = insertPos.mBuffer;
+    mBufferList->InsertAfter(new_buffer, buffer_to_split);
+    mLength += aReadable.Length();
+
+    mEnd.mBuffer = mBufferList->Tail();
+    mEnd.mPosition = mEnd.mBuffer->DataEnd();
+
+    mIsDirty = true;
+  }
+
+  /**
+   * nsScannerSharedSubstring
+   */
+
+void
+nsScannerSharedSubstring::Rebind(const nsScannerIterator &aStart,
+                              const nsScannerIterator &aEnd)
+{
+  // If the start and end positions are inside the same buffer, we must
+  // acquire ownership of the buffer.  If not, we can optimize by not holding
+  // onto it.
+
+  Buffer *buffer = const_cast<Buffer*>(aStart.buffer());
+  bool sameBuffer = buffer == aEnd.buffer();
+
+  nsScannerBufferList *bufferList;
+
+  if (sameBuffer) {
+    bufferList = aStart.mOwner->mBufferList;
+    bufferList->AddRef();
+    buffer->IncrementUsageCount();
+  }
+
+  if (mBufferList)
+    ReleaseBuffer();
+
+  if (sameBuffer) {
+    mBuffer = buffer;
+    mBufferList = bufferList;
+    mString.Rebind(aStart.mPosition, aEnd.mPosition);
+  } else {
+    mBuffer = nullptr;
+    mBufferList = nullptr;
+    CopyUnicodeTo(aStart, aEnd, mString);
+  }
+}
+
+void
+nsScannerSharedSubstring::ReleaseBuffer()
+{
+  NS_ASSERTION(mBufferList, "Should only be called with non-null mBufferList");
+  mBuffer->DecrementUsageCount();
+  mBufferList->DiscardUnreferencedPrefix(mBuffer);
+  mBufferList->Release();
+}
+
+void
+nsScannerSharedSubstring::MakeMutable()
+{
+  nsString temp(mString); // this will force a copy of the data
+  mString.Assign(temp);   // mString will now share the just-allocated buffer
+
+  ReleaseBuffer();
+
+  mBuffer = nullptr;
+  mBufferList = nullptr;
+}
+
+  /**
+   * utils -- based on code from nsReadableUtils.cpp
+   */
+
+// private helper function
+static inline
+nsAString::iterator&
+copy_multifragment_string( nsScannerIterator& first, const nsScannerIterator& last, nsAString::iterator& result )
+  {
+    typedef nsCharSourceTraits<nsScannerIterator> source_traits;
+    typedef nsCharSinkTraits<nsAString::iterator> sink_traits;
+
+    while ( first != last )
+      {
+        uint32_t distance = source_traits::readable_distance(first, last);
+        sink_traits::write(result, source_traits::read(first), distance);
+        NS_ASSERTION(distance > 0, "|copy_multifragment_string| will never terminate");
+        source_traits::advance(first, distance);
+      }
+
+    return result;
+  }
+
+bool
+CopyUnicodeTo( const nsScannerIterator& aSrcStart,
+               const nsScannerIterator& aSrcEnd,
+               nsAString& aDest )
+  {
+    nsAString::iterator writer;
+
+    mozilla::CheckedInt<nsAString::size_type> distance(Distance(aSrcStart, aSrcEnd));
+    if (!distance.isValid()) {
+      return false; // overflow detected
+    }
+
+    if (!aDest.SetLength(distance.value(), mozilla::fallible)) {
+      aDest.Truncate();
+      return false; // out of memory
+    }
+    aDest.BeginWriting(writer);
+    nsScannerIterator fromBegin(aSrcStart);
+    
+    copy_multifragment_string(fromBegin, aSrcEnd, writer);
+    return true;
+  }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+                 const nsScannerIterator& aSrcEnd,
+                 nsScannerSharedSubstring& aDest )
+  {
+    // Check whether we can just create a dependent string.
+    if (aDest.str().IsEmpty()) {
+      // We can just make |aDest| point to the buffer.
+      // This will take care of copying if the buffer spans fragments.
+      aDest.Rebind(aSrcStart, aSrcEnd);
+      return true;
+    }
+    // The dest string is not empty, so it can't be a dependent substring.
+    return AppendUnicodeTo(aSrcStart, aSrcEnd, aDest.writable());
+  }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+                 const nsScannerIterator& aSrcEnd,
+                 nsAString& aDest )
+  {
+    nsAString::iterator writer;
+    const nsAString::size_type oldLength = aDest.Length();
+    CheckedInt<nsAString::size_type> newLen(Distance(aSrcStart, aSrcEnd));
+    newLen += oldLength;
+    if (!newLen.isValid()) {
+      return false; // overflow detected
+    }
+
+    if (!aDest.SetLength(newLen.value(), mozilla::fallible))
+      return false; // out of memory
+    aDest.BeginWriting(writer).advance(oldLength);
+    nsScannerIterator fromBegin(aSrcStart);
+    
+    copy_multifragment_string(fromBegin, aSrcEnd, writer);
+    return true;
+  }
+
+bool
+FindCharInReadable( char16_t aChar,
+                    nsScannerIterator& aSearchStart,
+                    const nsScannerIterator& aSearchEnd )
+  {
+    while ( aSearchStart != aSearchEnd )
+      {
+        int32_t fragmentLength;
+        if ( SameFragment(aSearchStart, aSearchEnd) ) 
+          fragmentLength = aSearchEnd.get() - aSearchStart.get();
+        else
+          fragmentLength = aSearchStart.size_forward();
+
+        const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
+        if ( charFoundAt ) {
+          aSearchStart.advance( charFoundAt - aSearchStart.get() );
+          return true;
+        }
+
+        aSearchStart.advance(fragmentLength);
+      }
+
+    return false;
+  }
+
+bool
+FindInReadable( const nsAString& aPattern,
+                nsScannerIterator& aSearchStart,
+                nsScannerIterator& aSearchEnd,
+                const nsStringComparator& compare )
+  {
+    bool found_it = false;
+
+      // only bother searching at all if we're given a non-empty range to search
+    if ( aSearchStart != aSearchEnd )
+      {
+        nsAString::const_iterator aPatternStart, aPatternEnd;
+        aPattern.BeginReading(aPatternStart);
+        aPattern.EndReading(aPatternEnd);
+
+          // outer loop keeps searching till we find it or run out of string to search
+        while ( !found_it )
+          {
+              // fast inner loop (that's what it's called, not what it is) looks for a potential match
+            while ( aSearchStart != aSearchEnd &&
+                    compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
+              ++aSearchStart;
+
+              // if we broke out of the `fast' loop because we're out of string ... we're done: no match
+            if ( aSearchStart == aSearchEnd )
+              break;
+
+              // otherwise, we're at a potential match, let's see if we really hit one
+            nsAString::const_iterator testPattern(aPatternStart);
+            nsScannerIterator testSearch(aSearchStart);
+
+              // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
+            for(;;)
+              {
+                  // we already compared the first character in the outer loop,
+                  //  so we'll advance before the next comparison
+                ++testPattern;
+                ++testSearch;
+
+                  // if we verified all the way to the end of the pattern, then we found it!
+                if ( testPattern == aPatternEnd )
+                  {
+                    found_it = true;
+                    aSearchEnd = testSearch; // return the exact found range through the parameters
+                    break;
+                  }
+
+                  // if we got to end of the string we're searching before we hit the end of the
+                  //  pattern, we'll never find what we're looking for
+                if ( testSearch == aSearchEnd )
+                  {
+                    aSearchStart = aSearchEnd;
+                    break;
+                  }
+
+                  // else if we mismatched ... it's time to advance to the next search position
+                  //  and get back into the `fast' loop
+                if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
+                  {
+                    ++aSearchStart;
+                    break;
+                  }
+              }
+          }
+      }
+
+    return found_it;
+  }
+
+  /**
+   * This implementation is simple, but does too much work.
+   * It searches the entire string from left to right, and returns the last match found, if any.
+   * This implementation will be replaced when I get |reverse_iterator|s working.
+   */
+bool
+RFindInReadable( const nsAString& aPattern,
+                 nsScannerIterator& aSearchStart,
+                 nsScannerIterator& aSearchEnd,
+                 const nsStringComparator& aComparator )
+  {
+    bool found_it = false;
+
+    nsScannerIterator savedSearchEnd(aSearchEnd);
+    nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd);
+
+    while ( searchStart != searchEnd )
+      {
+        if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
+          {
+            found_it = true;
+
+              // this is the best match so far, so remember it
+            aSearchStart = searchStart;
+            aSearchEnd = searchEnd;
+
+              // ...and get ready to search some more
+              //  (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
+            ++searchStart;
+            searchEnd = savedSearchEnd;
+          }
+      }
+
+      // if we never found it, return an empty range
+    if ( !found_it )
+      aSearchStart = aSearchEnd;
+
+    return found_it;
+  }