summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/stsearch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/i18n/stsearch.cpp')
-rw-r--r--intl/icu/source/i18n/stsearch.cpp483
1 files changed, 483 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/stsearch.cpp b/intl/icu/source/i18n/stsearch.cpp
new file mode 100644
index 000000000..6daee6efe
--- /dev/null
+++ b/intl/icu/source/i18n/stsearch.cpp
@@ -0,0 +1,483 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2001-2014 IBM and others. All rights reserved.
+**********************************************************************
+* Date Name Description
+* 03/22/2000 helena Creation.
+**********************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/stsearch.h"
+#include "usrchimp.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
+
+// public constructors and destructors -----------------------------------
+
+StringSearch::StringSearch(const UnicodeString &pattern,
+ const UnicodeString &text,
+ const Locale &locale,
+ BreakIterator *breakiter,
+ UErrorCode &status) :
+ SearchIterator(text, breakiter),
+ m_pattern_(pattern)
+{
+ if (U_FAILURE(status)) {
+ m_strsrch_ = NULL;
+ return;
+ }
+
+ m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
+ m_text_.getBuffer(), m_text_.length(),
+ locale.getName(), (UBreakIterator *)breakiter,
+ &status);
+ uprv_free(m_search_);
+ m_search_ = NULL;
+
+ if (U_SUCCESS(status)) {
+ // m_search_ has been created by the base SearchIterator class
+ m_search_ = m_strsrch_->search;
+ }
+}
+
+StringSearch::StringSearch(const UnicodeString &pattern,
+ const UnicodeString &text,
+ RuleBasedCollator *coll,
+ BreakIterator *breakiter,
+ UErrorCode &status) :
+ SearchIterator(text, breakiter),
+ m_pattern_(pattern)
+{
+ if (U_FAILURE(status)) {
+ m_strsrch_ = NULL;
+ return;
+ }
+ if (coll == NULL) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ m_strsrch_ = NULL;
+ return;
+ }
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+ m_pattern_.length(),
+ m_text_.getBuffer(),
+ m_text_.length(), coll->toUCollator(),
+ (UBreakIterator *)breakiter,
+ &status);
+ uprv_free(m_search_);
+ m_search_ = NULL;
+
+ if (U_SUCCESS(status)) {
+ // m_search_ has been created by the base SearchIterator class
+ m_search_ = m_strsrch_->search;
+ }
+}
+
+StringSearch::StringSearch(const UnicodeString &pattern,
+ CharacterIterator &text,
+ const Locale &locale,
+ BreakIterator *breakiter,
+ UErrorCode &status) :
+ SearchIterator(text, breakiter),
+ m_pattern_(pattern)
+{
+ if (U_FAILURE(status)) {
+ m_strsrch_ = NULL;
+ return;
+ }
+ m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
+ m_text_.getBuffer(), m_text_.length(),
+ locale.getName(), (UBreakIterator *)breakiter,
+ &status);
+ uprv_free(m_search_);
+ m_search_ = NULL;
+
+ if (U_SUCCESS(status)) {
+ // m_search_ has been created by the base SearchIterator class
+ m_search_ = m_strsrch_->search;
+ }
+}
+
+StringSearch::StringSearch(const UnicodeString &pattern,
+ CharacterIterator &text,
+ RuleBasedCollator *coll,
+ BreakIterator *breakiter,
+ UErrorCode &status) :
+ SearchIterator(text, breakiter),
+ m_pattern_(pattern)
+{
+ if (U_FAILURE(status)) {
+ m_strsrch_ = NULL;
+ return;
+ }
+ if (coll == NULL) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ m_strsrch_ = NULL;
+ return;
+ }
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+ m_pattern_.length(),
+ m_text_.getBuffer(),
+ m_text_.length(), coll->toUCollator(),
+ (UBreakIterator *)breakiter,
+ &status);
+ uprv_free(m_search_);
+ m_search_ = NULL;
+
+ if (U_SUCCESS(status)) {
+ // m_search_ has been created by the base SearchIterator class
+ m_search_ = m_strsrch_->search;
+ }
+}
+
+StringSearch::StringSearch(const StringSearch &that) :
+ SearchIterator(that.m_text_, that.m_breakiterator_),
+ m_pattern_(that.m_pattern_)
+{
+ UErrorCode status = U_ZERO_ERROR;
+
+ // Free m_search_ from the superclass
+ uprv_free(m_search_);
+ m_search_ = NULL;
+
+ if (that.m_strsrch_ == NULL) {
+ // This was not a good copy
+ m_strsrch_ = NULL;
+ }
+ else {
+ // Make a deep copy
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+ m_pattern_.length(),
+ m_text_.getBuffer(),
+ m_text_.length(),
+ that.m_strsrch_->collator,
+ (UBreakIterator *)that.m_breakiterator_,
+ &status);
+ if (U_SUCCESS(status)) {
+ // m_search_ has been created by the base SearchIterator class
+ m_search_ = m_strsrch_->search;
+ }
+ }
+}
+
+StringSearch::~StringSearch()
+{
+ if (m_strsrch_ != NULL) {
+ usearch_close(m_strsrch_);
+ m_search_ = NULL;
+ }
+}
+
+StringSearch *
+StringSearch::clone() const {
+ return new StringSearch(*this);
+}
+
+// operator overloading ---------------------------------------------
+StringSearch & StringSearch::operator=(const StringSearch &that)
+{
+ if ((*this) != that) {
+ UErrorCode status = U_ZERO_ERROR;
+ m_text_ = that.m_text_;
+ m_breakiterator_ = that.m_breakiterator_;
+ m_pattern_ = that.m_pattern_;
+ // all m_search_ in the parent class is linked up with m_strsrch_
+ usearch_close(m_strsrch_);
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
+ m_pattern_.length(),
+ m_text_.getBuffer(),
+ m_text_.length(),
+ that.m_strsrch_->collator,
+ NULL, &status);
+ // Check null pointer
+ if (m_strsrch_ != NULL) {
+ m_search_ = m_strsrch_->search;
+ }
+ }
+ return *this;
+}
+
+UBool StringSearch::operator==(const SearchIterator &that) const
+{
+ if (this == &that) {
+ return TRUE;
+ }
+ if (SearchIterator::operator ==(that)) {
+ StringSearch &thatsrch = (StringSearch &)that;
+ return (this->m_pattern_ == thatsrch.m_pattern_ &&
+ this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
+ }
+ return FALSE;
+}
+
+// public get and set methods ----------------------------------------
+
+void StringSearch::setOffset(int32_t position, UErrorCode &status)
+{
+ // status checked in usearch_setOffset
+ usearch_setOffset(m_strsrch_, position, &status);
+}
+
+int32_t StringSearch::getOffset(void) const
+{
+ return usearch_getOffset(m_strsrch_);
+}
+
+void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
+{
+ if (U_SUCCESS(status)) {
+ m_text_ = text;
+ usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
+ }
+}
+
+void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
+{
+ if (U_SUCCESS(status)) {
+ text.getText(m_text_);
+ usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
+ }
+}
+
+RuleBasedCollator * StringSearch::getCollator() const
+{
+ // Note the const_cast. It would be cleaner if this const method returned a const collator.
+ return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
+}
+
+void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
+{
+ if (U_SUCCESS(status)) {
+ usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
+ }
+}
+
+void StringSearch::setPattern(const UnicodeString &pattern,
+ UErrorCode &status)
+{
+ if (U_SUCCESS(status)) {
+ m_pattern_ = pattern;
+ usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
+ &status);
+ }
+}
+
+const UnicodeString & StringSearch::getPattern() const
+{
+ return m_pattern_;
+}
+
+// public methods ----------------------------------------------------
+
+void StringSearch::reset()
+{
+ usearch_reset(m_strsrch_);
+}
+
+SearchIterator * StringSearch::safeClone(void) const
+{
+ UErrorCode status = U_ZERO_ERROR;
+ StringSearch *result = new StringSearch(m_pattern_, m_text_,
+ getCollator(),
+ m_breakiterator_,
+ status);
+ /* test for NULL */
+ if (result == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ result->setOffset(getOffset(), status);
+ result->setMatchStart(m_strsrch_->search->matchedIndex);
+ result->setMatchLength(m_strsrch_->search->matchedLength);
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ return result;
+}
+
+// protected method -------------------------------------------------
+
+int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
+{
+ // values passed here are already in the pre-shift position
+ if (U_SUCCESS(status)) {
+ if (m_strsrch_->pattern.cesLength == 0) {
+ m_search_->matchedIndex =
+ m_search_->matchedIndex == USEARCH_DONE ?
+ getOffset() : m_search_->matchedIndex + 1;
+ m_search_->matchedLength = 0;
+ ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
+ &status);
+ if (m_search_->matchedIndex == m_search_->textLength) {
+ m_search_->matchedIndex = USEARCH_DONE;
+ }
+ }
+ else {
+ // looking at usearch.cpp, this part is shifted out to
+ // StringSearch instead of SearchIterator because m_strsrch_ is
+ // not accessible in SearchIterator
+#if 0
+ if (position + m_strsrch_->pattern.defaultShiftSize
+ > m_search_->textLength) {
+ setMatchNotFound();
+ return USEARCH_DONE;
+ }
+#endif
+ if (m_search_->matchedLength <= 0) {
+ // the flipping direction issue has already been handled
+ // in next()
+ // for boundary check purposes. this will ensure that the
+ // next match will not preceed the current offset
+ // note search->matchedIndex will always be set to something
+ // in the code
+ m_search_->matchedIndex = position - 1;
+ }
+
+ ucol_setOffset(m_strsrch_->textIter, position, &status);
+
+#if 0
+ for (;;) {
+ if (m_search_->isCanonicalMatch) {
+ // can't use exact here since extra accents are allowed.
+ usearch_handleNextCanonical(m_strsrch_, &status);
+ }
+ else {
+ usearch_handleNextExact(m_strsrch_, &status);
+ }
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+ if (m_breakiterator_ == NULL
+#if !UCONFIG_NO_BREAK_ITERATION
+ ||
+ m_search_->matchedIndex == USEARCH_DONE ||
+ (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
+ m_breakiterator_->isBoundary(m_search_->matchedIndex +
+ m_search_->matchedLength))
+#endif
+ ) {
+ if (m_search_->matchedIndex == USEARCH_DONE) {
+ ucol_setOffset(m_strsrch_->textIter,
+ m_search_->textLength, &status);
+ }
+ else {
+ ucol_setOffset(m_strsrch_->textIter,
+ m_search_->matchedIndex, &status);
+ }
+ return m_search_->matchedIndex;
+ }
+ }
+#else
+ // if m_strsrch_->breakIter is always the same as m_breakiterator_
+ // then we don't need to check the match boundaries here because
+ // usearch_handleNextXXX will already have done it.
+ if (m_search_->isCanonicalMatch) {
+ // *could* actually use exact here 'cause no extra accents allowed...
+ usearch_handleNextCanonical(m_strsrch_, &status);
+ } else {
+ usearch_handleNextExact(m_strsrch_, &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+
+ if (m_search_->matchedIndex == USEARCH_DONE) {
+ ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
+ } else {
+ ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
+ }
+
+ return m_search_->matchedIndex;
+#endif
+ }
+ }
+ return USEARCH_DONE;
+}
+
+int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
+{
+ // values passed here are already in the pre-shift position
+ if (U_SUCCESS(status)) {
+ if (m_strsrch_->pattern.cesLength == 0) {
+ m_search_->matchedIndex =
+ (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
+ m_search_->matchedIndex);
+ if (m_search_->matchedIndex == 0) {
+ setMatchNotFound();
+ }
+ else {
+ m_search_->matchedIndex --;
+ ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
+ &status);
+ m_search_->matchedLength = 0;
+ }
+ }
+ else {
+ // looking at usearch.cpp, this part is shifted out to
+ // StringSearch instead of SearchIterator because m_strsrch_ is
+ // not accessible in SearchIterator
+#if 0
+ if (!m_search_->isOverlap &&
+ position - m_strsrch_->pattern.defaultShiftSize < 0) {
+ setMatchNotFound();
+ return USEARCH_DONE;
+ }
+
+ for (;;) {
+ if (m_search_->isCanonicalMatch) {
+ // can't use exact here since extra accents are allowed.
+ usearch_handlePreviousCanonical(m_strsrch_, &status);
+ }
+ else {
+ usearch_handlePreviousExact(m_strsrch_, &status);
+ }
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+ if (m_breakiterator_ == NULL
+#if !UCONFIG_NO_BREAK_ITERATION
+ ||
+ m_search_->matchedIndex == USEARCH_DONE ||
+ (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
+ m_breakiterator_->isBoundary(m_search_->matchedIndex +
+ m_search_->matchedLength))
+#endif
+ ) {
+ return m_search_->matchedIndex;
+ }
+ }
+#else
+ ucol_setOffset(m_strsrch_->textIter, position, &status);
+
+ if (m_search_->isCanonicalMatch) {
+ // *could* use exact match here since extra accents *not* allowed!
+ usearch_handlePreviousCanonical(m_strsrch_, &status);
+ } else {
+ usearch_handlePreviousExact(m_strsrch_, &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+
+ return m_search_->matchedIndex;
+#endif
+ }
+
+ return m_search_->matchedIndex;
+ }
+ return USEARCH_DONE;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */