diff options
Diffstat (limited to 'intl/icu/source/i18n/stsearch.cpp')
-rw-r--r-- | intl/icu/source/i18n/stsearch.cpp | 483 |
1 files changed, 483 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/stsearch.cpp b/intl/icu/source/i18n/stsearch.cpp new file mode 100644 index 000000000..6daee6efe --- /dev/null +++ b/intl/icu/source/i18n/stsearch.cpp @@ -0,0 +1,483 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2001-2014 IBM and others. All rights reserved. +********************************************************************** +* Date Name Description +* 03/22/2000 helena Creation. +********************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/stsearch.h" +#include "usrchimp.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) + +// public constructors and destructors ----------------------------------- + +StringSearch::StringSearch(const UnicodeString &pattern, + const UnicodeString &text, + const Locale &locale, + BreakIterator *breakiter, + UErrorCode &status) : + SearchIterator(text, breakiter), + m_pattern_(pattern) +{ + if (U_FAILURE(status)) { + m_strsrch_ = NULL; + return; + } + + m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), + m_text_.getBuffer(), m_text_.length(), + locale.getName(), (UBreakIterator *)breakiter, + &status); + uprv_free(m_search_); + m_search_ = NULL; + + if (U_SUCCESS(status)) { + // m_search_ has been created by the base SearchIterator class + m_search_ = m_strsrch_->search; + } +} + +StringSearch::StringSearch(const UnicodeString &pattern, + const UnicodeString &text, + RuleBasedCollator *coll, + BreakIterator *breakiter, + UErrorCode &status) : + SearchIterator(text, breakiter), + m_pattern_(pattern) +{ + if (U_FAILURE(status)) { + m_strsrch_ = NULL; + return; + } + if (coll == NULL) { + status = U_ILLEGAL_ARGUMENT_ERROR; + m_strsrch_ = NULL; + return; + } + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), + m_pattern_.length(), + m_text_.getBuffer(), + m_text_.length(), coll->toUCollator(), + (UBreakIterator *)breakiter, + &status); + uprv_free(m_search_); + m_search_ = NULL; + + if (U_SUCCESS(status)) { + // m_search_ has been created by the base SearchIterator class + m_search_ = m_strsrch_->search; + } +} + +StringSearch::StringSearch(const UnicodeString &pattern, + CharacterIterator &text, + const Locale &locale, + BreakIterator *breakiter, + UErrorCode &status) : + SearchIterator(text, breakiter), + m_pattern_(pattern) +{ + if (U_FAILURE(status)) { + m_strsrch_ = NULL; + return; + } + m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), + m_text_.getBuffer(), m_text_.length(), + locale.getName(), (UBreakIterator *)breakiter, + &status); + uprv_free(m_search_); + m_search_ = NULL; + + if (U_SUCCESS(status)) { + // m_search_ has been created by the base SearchIterator class + m_search_ = m_strsrch_->search; + } +} + +StringSearch::StringSearch(const UnicodeString &pattern, + CharacterIterator &text, + RuleBasedCollator *coll, + BreakIterator *breakiter, + UErrorCode &status) : + SearchIterator(text, breakiter), + m_pattern_(pattern) +{ + if (U_FAILURE(status)) { + m_strsrch_ = NULL; + return; + } + if (coll == NULL) { + status = U_ILLEGAL_ARGUMENT_ERROR; + m_strsrch_ = NULL; + return; + } + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), + m_pattern_.length(), + m_text_.getBuffer(), + m_text_.length(), coll->toUCollator(), + (UBreakIterator *)breakiter, + &status); + uprv_free(m_search_); + m_search_ = NULL; + + if (U_SUCCESS(status)) { + // m_search_ has been created by the base SearchIterator class + m_search_ = m_strsrch_->search; + } +} + +StringSearch::StringSearch(const StringSearch &that) : + SearchIterator(that.m_text_, that.m_breakiterator_), + m_pattern_(that.m_pattern_) +{ + UErrorCode status = U_ZERO_ERROR; + + // Free m_search_ from the superclass + uprv_free(m_search_); + m_search_ = NULL; + + if (that.m_strsrch_ == NULL) { + // This was not a good copy + m_strsrch_ = NULL; + } + else { + // Make a deep copy + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), + m_pattern_.length(), + m_text_.getBuffer(), + m_text_.length(), + that.m_strsrch_->collator, + (UBreakIterator *)that.m_breakiterator_, + &status); + if (U_SUCCESS(status)) { + // m_search_ has been created by the base SearchIterator class + m_search_ = m_strsrch_->search; + } + } +} + +StringSearch::~StringSearch() +{ + if (m_strsrch_ != NULL) { + usearch_close(m_strsrch_); + m_search_ = NULL; + } +} + +StringSearch * +StringSearch::clone() const { + return new StringSearch(*this); +} + +// operator overloading --------------------------------------------- +StringSearch & StringSearch::operator=(const StringSearch &that) +{ + if ((*this) != that) { + UErrorCode status = U_ZERO_ERROR; + m_text_ = that.m_text_; + m_breakiterator_ = that.m_breakiterator_; + m_pattern_ = that.m_pattern_; + // all m_search_ in the parent class is linked up with m_strsrch_ + usearch_close(m_strsrch_); + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), + m_pattern_.length(), + m_text_.getBuffer(), + m_text_.length(), + that.m_strsrch_->collator, + NULL, &status); + // Check null pointer + if (m_strsrch_ != NULL) { + m_search_ = m_strsrch_->search; + } + } + return *this; +} + +UBool StringSearch::operator==(const SearchIterator &that) const +{ + if (this == &that) { + return TRUE; + } + if (SearchIterator::operator ==(that)) { + StringSearch &thatsrch = (StringSearch &)that; + return (this->m_pattern_ == thatsrch.m_pattern_ && + this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); + } + return FALSE; +} + +// public get and set methods ---------------------------------------- + +void StringSearch::setOffset(int32_t position, UErrorCode &status) +{ + // status checked in usearch_setOffset + usearch_setOffset(m_strsrch_, position, &status); +} + +int32_t StringSearch::getOffset(void) const +{ + return usearch_getOffset(m_strsrch_); +} + +void StringSearch::setText(const UnicodeString &text, UErrorCode &status) +{ + if (U_SUCCESS(status)) { + m_text_ = text; + usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); + } +} + +void StringSearch::setText(CharacterIterator &text, UErrorCode &status) +{ + if (U_SUCCESS(status)) { + text.getText(m_text_); + usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); + } +} + +RuleBasedCollator * StringSearch::getCollator() const +{ + // Note the const_cast. It would be cleaner if this const method returned a const collator. + return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator)); +} + +void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) +{ + if (U_SUCCESS(status)) { + usearch_setCollator(m_strsrch_, coll->toUCollator(), &status); + } +} + +void StringSearch::setPattern(const UnicodeString &pattern, + UErrorCode &status) +{ + if (U_SUCCESS(status)) { + m_pattern_ = pattern; + usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), + &status); + } +} + +const UnicodeString & StringSearch::getPattern() const +{ + return m_pattern_; +} + +// public methods ---------------------------------------------------- + +void StringSearch::reset() +{ + usearch_reset(m_strsrch_); +} + +SearchIterator * StringSearch::safeClone(void) const +{ + UErrorCode status = U_ZERO_ERROR; + StringSearch *result = new StringSearch(m_pattern_, m_text_, + getCollator(), + m_breakiterator_, + status); + /* test for NULL */ + if (result == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + result->setOffset(getOffset(), status); + result->setMatchStart(m_strsrch_->search->matchedIndex); + result->setMatchLength(m_strsrch_->search->matchedLength); + if (U_FAILURE(status)) { + return NULL; + } + return result; +} + +// protected method ------------------------------------------------- + +int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) +{ + // values passed here are already in the pre-shift position + if (U_SUCCESS(status)) { + if (m_strsrch_->pattern.cesLength == 0) { + m_search_->matchedIndex = + m_search_->matchedIndex == USEARCH_DONE ? + getOffset() : m_search_->matchedIndex + 1; + m_search_->matchedLength = 0; + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, + &status); + if (m_search_->matchedIndex == m_search_->textLength) { + m_search_->matchedIndex = USEARCH_DONE; + } + } + else { + // looking at usearch.cpp, this part is shifted out to + // StringSearch instead of SearchIterator because m_strsrch_ is + // not accessible in SearchIterator +#if 0 + if (position + m_strsrch_->pattern.defaultShiftSize + > m_search_->textLength) { + setMatchNotFound(); + return USEARCH_DONE; + } +#endif + if (m_search_->matchedLength <= 0) { + // the flipping direction issue has already been handled + // in next() + // for boundary check purposes. this will ensure that the + // next match will not preceed the current offset + // note search->matchedIndex will always be set to something + // in the code + m_search_->matchedIndex = position - 1; + } + + ucol_setOffset(m_strsrch_->textIter, position, &status); + +#if 0 + for (;;) { + if (m_search_->isCanonicalMatch) { + // can't use exact here since extra accents are allowed. + usearch_handleNextCanonical(m_strsrch_, &status); + } + else { + usearch_handleNextExact(m_strsrch_, &status); + } + if (U_FAILURE(status)) { + return USEARCH_DONE; + } + if (m_breakiterator_ == NULL +#if !UCONFIG_NO_BREAK_ITERATION + || + m_search_->matchedIndex == USEARCH_DONE || + (m_breakiterator_->isBoundary(m_search_->matchedIndex) && + m_breakiterator_->isBoundary(m_search_->matchedIndex + + m_search_->matchedLength)) +#endif + ) { + if (m_search_->matchedIndex == USEARCH_DONE) { + ucol_setOffset(m_strsrch_->textIter, + m_search_->textLength, &status); + } + else { + ucol_setOffset(m_strsrch_->textIter, + m_search_->matchedIndex, &status); + } + return m_search_->matchedIndex; + } + } +#else + // if m_strsrch_->breakIter is always the same as m_breakiterator_ + // then we don't need to check the match boundaries here because + // usearch_handleNextXXX will already have done it. + if (m_search_->isCanonicalMatch) { + // *could* actually use exact here 'cause no extra accents allowed... + usearch_handleNextCanonical(m_strsrch_, &status); + } else { + usearch_handleNextExact(m_strsrch_, &status); + } + + if (U_FAILURE(status)) { + return USEARCH_DONE; + } + + if (m_search_->matchedIndex == USEARCH_DONE) { + ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); + } else { + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); + } + + return m_search_->matchedIndex; +#endif + } + } + return USEARCH_DONE; +} + +int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) +{ + // values passed here are already in the pre-shift position + if (U_SUCCESS(status)) { + if (m_strsrch_->pattern.cesLength == 0) { + m_search_->matchedIndex = + (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : + m_search_->matchedIndex); + if (m_search_->matchedIndex == 0) { + setMatchNotFound(); + } + else { + m_search_->matchedIndex --; + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, + &status); + m_search_->matchedLength = 0; + } + } + else { + // looking at usearch.cpp, this part is shifted out to + // StringSearch instead of SearchIterator because m_strsrch_ is + // not accessible in SearchIterator +#if 0 + if (!m_search_->isOverlap && + position - m_strsrch_->pattern.defaultShiftSize < 0) { + setMatchNotFound(); + return USEARCH_DONE; + } + + for (;;) { + if (m_search_->isCanonicalMatch) { + // can't use exact here since extra accents are allowed. + usearch_handlePreviousCanonical(m_strsrch_, &status); + } + else { + usearch_handlePreviousExact(m_strsrch_, &status); + } + if (U_FAILURE(status)) { + return USEARCH_DONE; + } + if (m_breakiterator_ == NULL +#if !UCONFIG_NO_BREAK_ITERATION + || + m_search_->matchedIndex == USEARCH_DONE || + (m_breakiterator_->isBoundary(m_search_->matchedIndex) && + m_breakiterator_->isBoundary(m_search_->matchedIndex + + m_search_->matchedLength)) +#endif + ) { + return m_search_->matchedIndex; + } + } +#else + ucol_setOffset(m_strsrch_->textIter, position, &status); + + if (m_search_->isCanonicalMatch) { + // *could* use exact match here since extra accents *not* allowed! + usearch_handlePreviousCanonical(m_strsrch_, &status); + } else { + usearch_handlePreviousExact(m_strsrch_, &status); + } + + if (U_FAILURE(status)) { + return USEARCH_DONE; + } + + return m_search_->matchedIndex; +#endif + } + + return m_search_->matchedIndex; + } + return USEARCH_DONE; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_COLLATION */ |