summaryrefslogtreecommitdiffstats
path: root/parser/htmlparser/nsScannerString.h
diff options
context:
space:
mode:
Diffstat (limited to 'parser/htmlparser/nsScannerString.h')
-rw-r--r--parser/htmlparser/nsScannerString.h604
1 files changed, 604 insertions, 0 deletions
diff --git a/parser/htmlparser/nsScannerString.h b/parser/htmlparser/nsScannerString.h
new file mode 100644
index 000000000..7b722238f
--- /dev/null
+++ b/parser/htmlparser/nsScannerString.h
@@ -0,0 +1,604 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsScannerString_h___
+#define nsScannerString_h___
+
+#include "nsString.h"
+#include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
+#include "mozilla/LinkedList.h"
+#include <algorithm>
+
+
+ /**
+ * NOTE: nsScannerString (and the other classes defined in this file) are
+ * not related to nsAString or any of the other xpcom/string classes.
+ *
+ * nsScannerString is based on the nsSlidingString implementation that used
+ * to live in xpcom/string. Now that nsAString is limited to representing
+ * only single fragment strings, nsSlidingString can no longer be used.
+ *
+ * An advantage to this design is that it does not employ any virtual
+ * functions.
+ *
+ * This file uses SCC-style indenting in deference to the nsSlidingString
+ * code from which this code is derived ;-)
+ */
+
+class nsScannerIterator;
+class nsScannerSubstring;
+class nsScannerString;
+
+
+ /**
+ * nsScannerBufferList
+ *
+ * This class maintains a list of heap-allocated Buffer objects. The buffers
+ * are maintained in a circular linked list. Each buffer has a usage count
+ * that is decremented by the owning nsScannerSubstring.
+ *
+ * The buffer list itself is reference counted. This allows the buffer list
+ * to be shared by multiple nsScannerSubstring objects. The reference
+ * counting is not threadsafe, which is not at all a requirement.
+ *
+ * When a nsScannerSubstring releases its reference to a buffer list, it
+ * decrements the usage count of the first buffer in the buffer list that it
+ * was referencing. It informs the buffer list that it can discard buffers
+ * starting at that prefix. The buffer list will do so if the usage count of
+ * that buffer is 0 and if it is the first buffer in the list. It will
+ * continue to prune buffers starting from the front of the buffer list until
+ * it finds a buffer that has a usage count that is non-zero.
+ */
+class nsScannerBufferList
+ {
+ public:
+
+ /**
+ * Buffer objects are directly followed by a data segment. The start
+ * of the data segment is determined by increment the |this| pointer
+ * by 1 unit.
+ */
+ class Buffer : public mozilla::LinkedListElement<Buffer>
+ {
+ public:
+
+ void IncrementUsageCount() { ++mUsageCount; }
+ void DecrementUsageCount() { --mUsageCount; }
+
+ bool IsInUse() const { return mUsageCount != 0; }
+
+ const char16_t* DataStart() const { return (const char16_t*) (this+1); }
+ char16_t* DataStart() { return ( char16_t*) (this+1); }
+
+ const char16_t* DataEnd() const { return mDataEnd; }
+ char16_t* DataEnd() { return mDataEnd; }
+
+ const Buffer* Next() const { return getNext(); }
+ Buffer* Next() { return getNext(); }
+
+ const Buffer* Prev() const { return getPrevious(); }
+ Buffer* Prev() { return getPrevious(); }
+
+ uint32_t DataLength() const { return mDataEnd - DataStart(); }
+ void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
+
+ private:
+
+ friend class nsScannerBufferList;
+
+ int32_t mUsageCount;
+ char16_t* mDataEnd;
+ };
+
+ /**
+ * Position objects serve as lightweight pointers into a buffer list.
+ * The mPosition member must be contained with mBuffer->DataStart()
+ * and mBuffer->DataEnd().
+ */
+ class Position
+ {
+ public:
+
+ Position() {}
+
+ Position( Buffer* buffer, char16_t* position )
+ : mBuffer(buffer)
+ , mPosition(position)
+ {}
+
+ inline
+ explicit Position( const nsScannerIterator& aIter );
+
+ inline
+ Position& operator=( const nsScannerIterator& aIter );
+
+ static size_t Distance( const Position& p1, const Position& p2 );
+
+ Buffer* mBuffer;
+ char16_t* mPosition;
+ };
+
+ static Buffer* AllocBufferFromString( const nsAString& );
+ static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars
+
+ explicit nsScannerBufferList( Buffer* buf )
+ : mRefCnt(0)
+ {
+ mBuffers.insertBack(buf);
+ }
+
+ void AddRef() { ++mRefCnt; }
+ void Release() { if (--mRefCnt == 0) delete this; }
+
+ void Append( Buffer* buf ) { mBuffers.insertBack(buf); }
+ void InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); }
+ void SplitBuffer( const Position& );
+ void DiscardUnreferencedPrefix( Buffer* );
+
+ Buffer* Head() { return mBuffers.getFirst(); }
+ const Buffer* Head() const { return mBuffers.getFirst(); }
+
+ Buffer* Tail() { return mBuffers.getLast(); }
+ const Buffer* Tail() const { return mBuffers.getLast(); }
+
+ private:
+
+ friend class nsScannerSubstring;
+
+ ~nsScannerBufferList() { ReleaseAll(); }
+ void ReleaseAll();
+
+ int32_t mRefCnt;
+ mozilla::LinkedList<Buffer> mBuffers;
+ };
+
+
+ /**
+ * nsScannerFragment represents a "slice" of a Buffer object.
+ */
+struct nsScannerFragment
+ {
+ typedef nsScannerBufferList::Buffer Buffer;
+
+ const Buffer* mBuffer;
+ const char16_t* mFragmentStart;
+ const char16_t* mFragmentEnd;
+ };
+
+
+ /**
+ * nsScannerSubstring is the base class for nsScannerString. It provides
+ * access to iterators and methods to bind the substring to another
+ * substring or nsAString instance.
+ *
+ * This class owns the buffer list.
+ */
+class nsScannerSubstring
+ {
+ public:
+ typedef nsScannerBufferList::Buffer Buffer;
+ typedef nsScannerBufferList::Position Position;
+ typedef uint32_t size_type;
+
+ nsScannerSubstring();
+ explicit nsScannerSubstring( const nsAString& s );
+
+ ~nsScannerSubstring();
+
+ nsScannerIterator& BeginReading( nsScannerIterator& iter ) const;
+ nsScannerIterator& EndReading( nsScannerIterator& iter ) const;
+
+ size_type Length() const { return mLength; }
+
+ int32_t CountChar( char16_t ) const;
+
+ void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& );
+ void Rebind( const nsAString& );
+
+ const nsSubstring& AsString() const;
+
+ bool GetNextFragment( nsScannerFragment& ) const;
+ bool GetPrevFragment( nsScannerFragment& ) const;
+
+ static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); }
+ static inline Buffer* AllocBuffer( size_type aCapacity ) { return nsScannerBufferList::AllocBuffer(aCapacity); }
+
+ protected:
+
+ void acquire_ownership_of_buffer_list() const
+ {
+ mBufferList->AddRef();
+ mStart.mBuffer->IncrementUsageCount();
+ }
+
+ void release_ownership_of_buffer_list()
+ {
+ if (mBufferList)
+ {
+ mStart.mBuffer->DecrementUsageCount();
+ mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
+ mBufferList->Release();
+ }
+ }
+
+ void init_range_from_buffer_list()
+ {
+ mStart.mBuffer = mBufferList->Head();
+ mStart.mPosition = mStart.mBuffer->DataStart();
+
+ mEnd.mBuffer = mBufferList->Tail();
+ mEnd.mPosition = mEnd.mBuffer->DataEnd();
+
+ mLength = Position::Distance(mStart, mEnd);
+ }
+
+ Position mStart;
+ Position mEnd;
+ nsScannerBufferList *mBufferList;
+ size_type mLength;
+
+ // these fields are used to implement AsString
+ nsDependentSubstring mFlattenedRep;
+ bool mIsDirty;
+
+ friend class nsScannerSharedSubstring;
+ };
+
+
+ /**
+ * nsScannerString provides methods to grow and modify a buffer list.
+ */
+class nsScannerString : public nsScannerSubstring
+ {
+ public:
+
+ explicit nsScannerString( Buffer* );
+
+ // you are giving ownership to the string, it takes and keeps your
+ // buffer, deleting it when done.
+ // Use AllocBuffer or AllocBufferFromString to create a Buffer object
+ // for use with this function.
+ void AppendBuffer( Buffer* );
+
+ void DiscardPrefix( const nsScannerIterator& );
+ // any other way you want to do this?
+
+ void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition);
+ };
+
+
+ /**
+ * nsScannerSharedSubstring implements copy-on-write semantics for
+ * nsScannerSubstring. When you call .writable(), it will copy the data
+ * and return a mutable string object. This class also manages releasing
+ * the reference to the scanner buffer when it is no longer needed.
+ */
+
+class nsScannerSharedSubstring
+ {
+ public:
+ nsScannerSharedSubstring()
+ : mBuffer(nullptr), mBufferList(nullptr) { }
+
+ ~nsScannerSharedSubstring()
+ {
+ if (mBufferList)
+ ReleaseBuffer();
+ }
+
+ // Acquire a copy-on-write reference to the given substring.
+ void Rebind(const nsScannerIterator& aStart,
+ const nsScannerIterator& aEnd);
+
+ // Get a mutable reference to this string
+ nsSubstring& writable()
+ {
+ if (mBufferList)
+ MakeMutable();
+
+ return mString;
+ }
+
+ // Get a const reference to this string
+ const nsSubstring& str() const { return mString; }
+
+ private:
+ typedef nsScannerBufferList::Buffer Buffer;
+
+ void ReleaseBuffer();
+ void MakeMutable();
+
+ nsDependentSubstring mString;
+ Buffer *mBuffer;
+ nsScannerBufferList *mBufferList;
+ };
+
+ /**
+ * nsScannerIterator works just like nsReadingIterator<CharT> except that
+ * it knows how to iterate over a list of scanner buffers.
+ */
+class nsScannerIterator
+ {
+ public:
+ typedef nsScannerIterator self_type;
+ typedef ptrdiff_t difference_type;
+ typedef char16_t value_type;
+ typedef const char16_t* pointer;
+ typedef const char16_t& reference;
+ typedef nsScannerSubstring::Buffer Buffer;
+
+ protected:
+
+ nsScannerFragment mFragment;
+ const char16_t* mPosition;
+ const nsScannerSubstring* mOwner;
+
+ friend class nsScannerSubstring;
+ friend class nsScannerSharedSubstring;
+
+ public:
+ nsScannerIterator() {}
+ // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK
+ // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK
+
+ inline void normalize_forward();
+ inline void normalize_backward();
+
+ pointer get() const
+ {
+ return mPosition;
+ }
+
+ char16_t operator*() const
+ {
+ return *get();
+ }
+
+ const nsScannerFragment& fragment() const
+ {
+ return mFragment;
+ }
+
+ const Buffer* buffer() const
+ {
+ return mFragment.mBuffer;
+ }
+
+ self_type& operator++()
+ {
+ ++mPosition;
+ normalize_forward();
+ return *this;
+ }
+
+ self_type operator++( int )
+ {
+ self_type result(*this);
+ ++mPosition;
+ normalize_forward();
+ return result;
+ }
+
+ self_type& operator--()
+ {
+ normalize_backward();
+ --mPosition;
+ return *this;
+ }
+
+ self_type operator--( int )
+ {
+ self_type result(*this);
+ normalize_backward();
+ --mPosition;
+ return result;
+ }
+
+ difference_type size_forward() const
+ {
+ return mFragment.mFragmentEnd - mPosition;
+ }
+
+ difference_type size_backward() const
+ {
+ return mPosition - mFragment.mFragmentStart;
+ }
+
+ self_type& advance( difference_type n )
+ {
+ while ( n > 0 )
+ {
+ difference_type one_hop = std::min(n, size_forward());
+
+ NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
+ // perhaps I should |break| if |!one_hop|?
+
+ mPosition += one_hop;
+ normalize_forward();
+ n -= one_hop;
+ }
+
+ while ( n < 0 )
+ {
+ normalize_backward();
+ difference_type one_hop = std::max(n, -size_backward());
+
+ NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
+ // perhaps I should |break| if |!one_hop|?
+
+ mPosition += one_hop;
+ n -= one_hop;
+ }
+
+ return *this;
+ }
+ };
+
+
+inline
+bool
+SameFragment( const nsScannerIterator& a, const nsScannerIterator& b )
+ {
+ return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
+ }
+
+
+ /**
+ * this class is needed in order to make use of the methods in nsAlgorithm.h
+ */
+template <>
+struct nsCharSourceTraits<nsScannerIterator>
+ {
+ typedef nsScannerIterator::difference_type difference_type;
+
+ static
+ uint32_t
+ readable_distance( const nsScannerIterator& first, const nsScannerIterator& last )
+ {
+ return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward());
+ }
+
+ static
+ const nsScannerIterator::value_type*
+ read( const nsScannerIterator& iter )
+ {
+ return iter.get();
+ }
+
+ static
+ void
+ advance( nsScannerIterator& s, difference_type n )
+ {
+ s.advance(n);
+ }
+ };
+
+
+ /**
+ * inline methods follow
+ */
+
+inline
+void
+nsScannerIterator::normalize_forward()
+ {
+ while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment))
+ mPosition = mFragment.mFragmentStart;
+ }
+
+inline
+void
+nsScannerIterator::normalize_backward()
+ {
+ while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment))
+ mPosition = mFragment.mFragmentEnd;
+ }
+
+inline
+bool
+operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
+ {
+ return lhs.get() == rhs.get();
+ }
+
+inline
+bool
+operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
+ {
+ return lhs.get() != rhs.get();
+ }
+
+
+inline
+nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
+ : mBuffer(const_cast<Buffer*>(aIter.buffer()))
+ , mPosition(const_cast<char16_t*>(aIter.get()))
+ {}
+
+inline
+nsScannerBufferList::Position&
+nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter)
+ {
+ mBuffer = const_cast<Buffer*>(aIter.buffer());
+ mPosition = const_cast<char16_t*>(aIter.get());
+ return *this;
+ }
+
+
+ /**
+ * scanner string utils
+ *
+ * These methods mimic the API provided by nsReadableUtils in xpcom/string.
+ * Here we provide only the methods that the htmlparser module needs.
+ */
+
+inline
+size_t
+Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd )
+ {
+ typedef nsScannerBufferList::Position Position;
+ return Position::Distance(Position(aStart), Position(aEnd));
+ }
+
+bool
+CopyUnicodeTo( const nsScannerIterator& aSrcStart,
+ const nsScannerIterator& aSrcEnd,
+ nsAString& aDest );
+
+inline
+bool
+CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
+ {
+ nsScannerIterator begin, end;
+ return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
+ }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+ const nsScannerIterator& aSrcEnd,
+ nsAString& aDest );
+
+inline
+bool
+AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
+ {
+ nsScannerIterator begin, end;
+ return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
+ }
+
+bool
+AppendUnicodeTo( const nsScannerIterator& aSrcStart,
+ const nsScannerIterator& aSrcEnd,
+ nsScannerSharedSubstring& aDest );
+
+bool
+FindCharInReadable( char16_t aChar,
+ nsScannerIterator& aStart,
+ const nsScannerIterator& aEnd );
+
+bool
+FindInReadable( const nsAString& aPattern,
+ nsScannerIterator& aStart,
+ nsScannerIterator& aEnd,
+ const nsStringComparator& = nsDefaultStringComparator() );
+
+bool
+RFindInReadable( const nsAString& aPattern,
+ nsScannerIterator& aStart,
+ nsScannerIterator& aEnd,
+ const nsStringComparator& = nsDefaultStringComparator() );
+
+inline
+bool
+CaseInsensitiveFindInReadable( const nsAString& aPattern,
+ nsScannerIterator& aStart,
+ nsScannerIterator& aEnd )
+ {
+ return FindInReadable(aPattern, aStart, aEnd,
+ nsCaseInsensitiveStringComparator());
+ }
+
+#endif // !defined(nsScannerString_h___)