diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /intl/hyphenation/glue | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'intl/hyphenation/glue')
-rw-r--r-- | intl/hyphenation/glue/hnjalloc.h | 51 | ||||
-rw-r--r-- | intl/hyphenation/glue/hnjstdio.cpp | 119 | ||||
-rw-r--r-- | intl/hyphenation/glue/moz.build | 29 | ||||
-rw-r--r-- | intl/hyphenation/glue/nsHyphenationManager.cpp | 321 | ||||
-rw-r--r-- | intl/hyphenation/glue/nsHyphenationManager.h | 55 | ||||
-rw-r--r-- | intl/hyphenation/glue/nsHyphenator.cpp | 159 | ||||
-rw-r--r-- | intl/hyphenation/glue/nsHyphenator.h | 33 |
7 files changed, 767 insertions, 0 deletions
diff --git a/intl/hyphenation/glue/hnjalloc.h b/intl/hyphenation/glue/hnjalloc.h new file mode 100644 index 000000000..fec3a4bc9 --- /dev/null +++ b/intl/hyphenation/glue/hnjalloc.h @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Simple replacement for hnjalloc.h from libhyphen-2.x, to use moz_x* memory + * allocation functions. Note that the hyphen.c code does *NOT* check for + * NULL from memory (re)allocation, so it is essential that we use the + * "infallible" moz_x* variants here. + */ + +#include "mozilla/mozalloc.h" + +#define hnj_malloc(size) moz_xmalloc(size) +#define hnj_realloc(p, size) moz_xrealloc(p, size) +#define hnj_free(p) free(p) + +/* + * To enable us to load hyphenation dictionaries from arbitrary resource URIs, + * not just through file paths using stdio, we override the (few) stdio APIs + * that hyphen.c uses and provide our own reimplementation that calls Gecko + * i/o methods. + */ + +#include <stdio.h> /* ensure stdio.h is loaded before our macros */ + +#undef FILE +#define FILE hnjFile + +#define fopen(path,mode) hnjFopen(path,mode) +#define fclose(file) hnjFclose(file) +#define fgets(buf,count,file) hnjFgets(buf,count,file) + +typedef struct hnjFile_ hnjFile; + +#ifdef __cplusplus +extern "C" { +#endif + +hnjFile* hnjFopen(const char* aURISpec, const char* aMode); + +int hnjFclose(hnjFile* f); + +char* hnjFgets(char* s, int n, hnjFile* f); + +#ifdef __cplusplus +} +#endif + + diff --git a/intl/hyphenation/glue/hnjstdio.cpp b/intl/hyphenation/glue/hnjstdio.cpp new file mode 100644 index 000000000..660ebaf13 --- /dev/null +++ b/intl/hyphenation/glue/hnjstdio.cpp @@ -0,0 +1,119 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This file provides substitutes for the basic stdio routines used by hyphen.c +// to read its dictionary files. We #define the stdio names to these versions +// in hnjalloc.h, so that we can use nsIURI and nsIInputStream to specify and +// access the dictionary resources. + +#include "hnjalloc.h" +#undef FILE // Undo the damage done in hnjalloc.h +#include "nsNetUtil.h" +#include "nsIInputStream.h" +#include "nsIURI.h" +#include "nsContentUtils.h" + +#define BUFSIZE 1024 + +struct hnjFile_ { + nsCOMPtr<nsIInputStream> mStream; + char mBuffer[BUFSIZE]; + uint32_t mCurPos; + uint32_t mLimit; +}; + +// replacement for fopen() +// (not a full substitute: only supports read access) +hnjFile* +hnjFopen(const char* aURISpec, const char* aMode) +{ + // this override only needs to support "r" + NS_ASSERTION(!strcmp(aMode, "r"), "unsupported fopen() mode in hnjFopen"); + + nsCOMPtr<nsIURI> uri; + nsresult rv = NS_NewURI(getter_AddRefs(uri), aURISpec); + if (NS_FAILED(rv)) { + return nullptr; + } + + nsCOMPtr<nsIChannel> channel; + rv = NS_NewChannel(getter_AddRefs(channel), + uri, + nsContentUtils::GetSystemPrincipal(), + nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_DATA_IS_NULL, + nsIContentPolicy::TYPE_OTHER); + if (NS_FAILED(rv)) { + return nullptr; + } + + nsCOMPtr<nsIInputStream> instream; + rv = channel->Open2(getter_AddRefs(instream)); + if (NS_FAILED(rv)) { + return nullptr; + } + + hnjFile *f = new hnjFile; + f->mStream = instream; + f->mCurPos = 0; + f->mLimit = 0; + + return f; +} + +// replacement for fclose() +int +hnjFclose(hnjFile* f) +{ + NS_ASSERTION(f && f->mStream, "bad argument to hnjFclose"); + + int result = 0; + nsresult rv = f->mStream->Close(); + if (NS_FAILED(rv)) { + result = EOF; + } + f->mStream = nullptr; + + delete f; + return result; +} + +// replacement for fgets() +// (not a full reimplementation, but sufficient for libhyphen's needs) +char* +hnjFgets(char* s, int n, hnjFile* f) +{ + NS_ASSERTION(s && f, "bad argument to hnjFgets"); + + int i = 0; + while (i < n - 1) { + if (f->mCurPos < f->mLimit) { + char c = f->mBuffer[f->mCurPos++]; + s[i++] = c; + if (c == '\n' || c == '\r') { + break; + } + continue; + } + + f->mCurPos = 0; + + nsresult rv = f->mStream->Read(f->mBuffer, BUFSIZE, &f->mLimit); + if (NS_FAILED(rv)) { + f->mLimit = 0; + return nullptr; + } + + if (f->mLimit == 0) { + break; + } + } + + if (i == 0) { + return nullptr; // end of file + } + + s[i] = '\0'; // null-terminate the returned string + return s; +} diff --git a/intl/hyphenation/glue/moz.build b/intl/hyphenation/glue/moz.build new file mode 100644 index 000000000..a2d71f4b5 --- /dev/null +++ b/intl/hyphenation/glue/moz.build @@ -0,0 +1,29 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + 'nsHyphenationManager.h', + 'nsHyphenator.h', +] + +UNIFIED_SOURCES += [ + 'nsHyphenationManager.cpp', + 'nsHyphenator.cpp', +] + +# These files cannot be built in unified mode because they include hnjalloc.h. +SOURCES += [ + 'hnjstdio.cpp', +] + +LOCAL_INCLUDES += [ + '../hyphen', +] + +FINAL_LIBRARY = 'xul' + +if CONFIG['GNU_CXX']: + CXXFLAGS += ['-Wno-error=shadow'] diff --git a/intl/hyphenation/glue/nsHyphenationManager.cpp b/intl/hyphenation/glue/nsHyphenationManager.cpp new file mode 100644 index 000000000..998550e5d --- /dev/null +++ b/intl/hyphenation/glue/nsHyphenationManager.cpp @@ -0,0 +1,321 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsHyphenationManager.h" +#include "nsHyphenator.h" +#include "nsIAtom.h" +#include "nsIFile.h" +#include "nsIURI.h" +#include "nsIProperties.h" +#include "nsISimpleEnumerator.h" +#include "nsIDirectoryEnumerator.h" +#include "nsDirectoryServiceDefs.h" +#include "nsNetUtil.h" +#include "nsUnicharUtils.h" +#include "mozilla/Preferences.h" +#include "nsZipArchive.h" +#include "mozilla/Services.h" +#include "nsIObserverService.h" +#include "nsCRT.h" +#include "nsAppDirectoryServiceDefs.h" +#include "nsDirectoryServiceUtils.h" + +using namespace mozilla; + +static const char kIntlHyphenationAliasPrefix[] = "intl.hyphenation-alias."; +static const char kMemoryPressureNotification[] = "memory-pressure"; + +nsHyphenationManager *nsHyphenationManager::sInstance = nullptr; + +NS_IMPL_ISUPPORTS(nsHyphenationManager::MemoryPressureObserver, + nsIObserver) + +NS_IMETHODIMP +nsHyphenationManager::MemoryPressureObserver::Observe(nsISupports *aSubject, + const char *aTopic, + const char16_t *aData) +{ + if (!nsCRT::strcmp(aTopic, kMemoryPressureNotification)) { + // We don't call Instance() here, as we don't want to create a hyphenation + // manager if there isn't already one in existence. + // (This observer class is local to the hyphenation manager, so it can use + // the protected members directly.) + if (nsHyphenationManager::sInstance) { + nsHyphenationManager::sInstance->mHyphenators.Clear(); + } + } + return NS_OK; +} + +nsHyphenationManager* +nsHyphenationManager::Instance() +{ + if (sInstance == nullptr) { + sInstance = new nsHyphenationManager(); + + nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); + if (obs) { + obs->AddObserver(new MemoryPressureObserver, + kMemoryPressureNotification, false); + } + } + return sInstance; +} + +void +nsHyphenationManager::Shutdown() +{ + delete sInstance; + sInstance = nullptr; +} + +nsHyphenationManager::nsHyphenationManager() +{ + LoadPatternList(); + LoadAliases(); +} + +nsHyphenationManager::~nsHyphenationManager() +{ + sInstance = nullptr; +} + +already_AddRefed<nsHyphenator> +nsHyphenationManager::GetHyphenator(nsIAtom *aLocale) +{ + RefPtr<nsHyphenator> hyph; + mHyphenators.Get(aLocale, getter_AddRefs(hyph)); + if (hyph) { + return hyph.forget(); + } + nsCOMPtr<nsIURI> uri = mPatternFiles.Get(aLocale); + if (!uri) { + nsCOMPtr<nsIAtom> alias = mHyphAliases.Get(aLocale); + if (alias) { + mHyphenators.Get(alias, getter_AddRefs(hyph)); + if (hyph) { + return hyph.forget(); + } + uri = mPatternFiles.Get(alias); + if (uri) { + aLocale = alias; + } + } + if (!uri) { + // In the case of a locale such as "de-DE-1996", we try replacing + // successive trailing subtags with "-*" to find fallback patterns, + // so "de-DE-1996" -> "de-DE-*" (and then recursively -> "de-*") + nsAtomCString localeStr(aLocale); + if (StringEndsWith(localeStr, NS_LITERAL_CSTRING("-*"))) { + localeStr.Truncate(localeStr.Length() - 2); + } + int32_t i = localeStr.RFindChar('-'); + if (i > 1) { + localeStr.Replace(i, localeStr.Length() - i, "-*"); + nsCOMPtr<nsIAtom> fuzzyLocale = NS_Atomize(localeStr); + return GetHyphenator(fuzzyLocale); + } else { + return nullptr; + } + } + } + hyph = new nsHyphenator(uri); + if (hyph->IsValid()) { + mHyphenators.Put(aLocale, hyph); + return hyph.forget(); + } +#ifdef DEBUG + nsCString msg("failed to load patterns from "); + msg += uri->GetSpecOrDefault(); + NS_WARNING(msg.get()); +#endif + mPatternFiles.Remove(aLocale); + return nullptr; +} + +void +nsHyphenationManager::LoadPatternList() +{ + mPatternFiles.Clear(); + mHyphenators.Clear(); + + LoadPatternListFromOmnijar(Omnijar::GRE); + LoadPatternListFromOmnijar(Omnijar::APP); + + nsCOMPtr<nsIProperties> dirSvc = + do_GetService(NS_DIRECTORY_SERVICE_CONTRACTID); + if (!dirSvc) { + return; + } + + nsresult rv; + nsCOMPtr<nsIFile> greDir; + rv = dirSvc->Get(NS_GRE_DIR, + NS_GET_IID(nsIFile), getter_AddRefs(greDir)); + if (NS_SUCCEEDED(rv)) { + greDir->AppendNative(NS_LITERAL_CSTRING("hyphenation")); + LoadPatternListFromDir(greDir); + } + + nsCOMPtr<nsIFile> appDir; + rv = dirSvc->Get(NS_XPCOM_CURRENT_PROCESS_DIR, + NS_GET_IID(nsIFile), getter_AddRefs(appDir)); + if (NS_SUCCEEDED(rv)) { + appDir->AppendNative(NS_LITERAL_CSTRING("hyphenation")); + bool equals; + if (NS_SUCCEEDED(appDir->Equals(greDir, &equals)) && !equals) { + LoadPatternListFromDir(appDir); + } + } + + nsCOMPtr<nsIFile> profileDir; + rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR, + getter_AddRefs(profileDir)); + if (NS_SUCCEEDED(rv)) { + profileDir->AppendNative(NS_LITERAL_CSTRING("hyphenation")); + LoadPatternListFromDir(profileDir); + } +} + +void +nsHyphenationManager::LoadPatternListFromOmnijar(Omnijar::Type aType) +{ + nsCString base; + nsresult rv = Omnijar::GetURIString(aType, base); + if (NS_FAILED(rv)) { + return; + } + + RefPtr<nsZipArchive> zip = Omnijar::GetReader(aType); + if (!zip) { + return; + } + + nsZipFind *find; + zip->FindInit("hyphenation/hyph_*.dic", &find); + if (!find) { + return; + } + + const char *result; + uint16_t len; + while (NS_SUCCEEDED(find->FindNext(&result, &len))) { + nsCString uriString(base); + uriString.Append(result, len); + nsCOMPtr<nsIURI> uri; + rv = NS_NewURI(getter_AddRefs(uri), uriString); + if (NS_FAILED(rv)) { + continue; + } + nsCString locale; + rv = uri->GetPath(locale); + if (NS_FAILED(rv)) { + continue; + } + ToLowerCase(locale); + locale.SetLength(locale.Length() - 4); // strip ".dic" + locale.Cut(0, locale.RFindChar('/') + 1); // strip directory + if (StringBeginsWith(locale, NS_LITERAL_CSTRING("hyph_"))) { + locale.Cut(0, 5); + } + for (uint32_t i = 0; i < locale.Length(); ++i) { + if (locale[i] == '_') { + locale.Replace(i, 1, '-'); + } + } + nsCOMPtr<nsIAtom> localeAtom = NS_Atomize(locale); + if (NS_SUCCEEDED(rv)) { + mPatternFiles.Put(localeAtom, uri); + } + } + + delete find; +} + +void +nsHyphenationManager::LoadPatternListFromDir(nsIFile *aDir) +{ + nsresult rv; + + bool check = false; + rv = aDir->Exists(&check); + if (NS_FAILED(rv) || !check) { + return; + } + + rv = aDir->IsDirectory(&check); + if (NS_FAILED(rv) || !check) { + return; + } + + nsCOMPtr<nsISimpleEnumerator> e; + rv = aDir->GetDirectoryEntries(getter_AddRefs(e)); + if (NS_FAILED(rv)) { + return; + } + + nsCOMPtr<nsIDirectoryEnumerator> files(do_QueryInterface(e)); + if (!files) { + return; + } + + nsCOMPtr<nsIFile> file; + while (NS_SUCCEEDED(files->GetNextFile(getter_AddRefs(file))) && file){ + nsAutoString dictName; + file->GetLeafName(dictName); + NS_ConvertUTF16toUTF8 locale(dictName); + ToLowerCase(locale); + if (!StringEndsWith(locale, NS_LITERAL_CSTRING(".dic"))) { + continue; + } + if (StringBeginsWith(locale, NS_LITERAL_CSTRING("hyph_"))) { + locale.Cut(0, 5); + } + locale.SetLength(locale.Length() - 4); // strip ".dic" + for (uint32_t i = 0; i < locale.Length(); ++i) { + if (locale[i] == '_') { + locale.Replace(i, 1, '-'); + } + } +#ifdef DEBUG_hyph + printf("adding hyphenation patterns for %s: %s\n", locale.get(), + NS_ConvertUTF16toUTF8(dictName).get()); +#endif + nsCOMPtr<nsIAtom> localeAtom = NS_Atomize(locale); + nsCOMPtr<nsIURI> uri; + nsresult rv = NS_NewFileURI(getter_AddRefs(uri), file); + if (NS_SUCCEEDED(rv)) { + mPatternFiles.Put(localeAtom, uri); + } + } +} + +void +nsHyphenationManager::LoadAliases() +{ + nsIPrefBranch* prefRootBranch = Preferences::GetRootBranch(); + if (!prefRootBranch) { + return; + } + uint32_t prefCount; + char **prefNames; + nsresult rv = prefRootBranch->GetChildList(kIntlHyphenationAliasPrefix, + &prefCount, &prefNames); + if (NS_SUCCEEDED(rv) && prefCount > 0) { + for (uint32_t i = 0; i < prefCount; ++i) { + nsAdoptingCString value = Preferences::GetCString(prefNames[i]); + if (value) { + nsAutoCString alias(prefNames[i]); + alias.Cut(0, sizeof(kIntlHyphenationAliasPrefix) - 1); + ToLowerCase(alias); + ToLowerCase(value); + nsCOMPtr<nsIAtom> aliasAtom = NS_Atomize(alias); + nsCOMPtr<nsIAtom> valueAtom = NS_Atomize(value); + mHyphAliases.Put(aliasAtom, valueAtom); + } + } + NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(prefCount, prefNames); + } +} diff --git a/intl/hyphenation/glue/nsHyphenationManager.h b/intl/hyphenation/glue/nsHyphenationManager.h new file mode 100644 index 000000000..fa7d73f18 --- /dev/null +++ b/intl/hyphenation/glue/nsHyphenationManager.h @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsHyphenationManager_h__ +#define nsHyphenationManager_h__ + +#include "nsInterfaceHashtable.h" +#include "nsRefPtrHashtable.h" +#include "nsHashKeys.h" +#include "nsIObserver.h" +#include "mozilla/Omnijar.h" + +class nsHyphenator; +class nsIAtom; +class nsIURI; + +class nsHyphenationManager +{ +public: + nsHyphenationManager(); + + already_AddRefed<nsHyphenator> GetHyphenator(nsIAtom *aLocale); + + static nsHyphenationManager *Instance(); + + static void Shutdown(); + +private: + ~nsHyphenationManager(); + +protected: + class MemoryPressureObserver final : public nsIObserver + { + ~MemoryPressureObserver() {} + + public: + NS_DECL_ISUPPORTS + NS_DECL_NSIOBSERVER + }; + + void LoadPatternList(); + void LoadPatternListFromOmnijar(mozilla::Omnijar::Type aType); + void LoadPatternListFromDir(nsIFile *aDir); + void LoadAliases(); + + nsInterfaceHashtable<nsISupportsHashKey,nsIAtom> mHyphAliases; + nsInterfaceHashtable<nsISupportsHashKey,nsIURI> mPatternFiles; + nsRefPtrHashtable<nsISupportsHashKey,nsHyphenator> mHyphenators; + + static nsHyphenationManager *sInstance; +}; + +#endif // nsHyphenationManager_h__ diff --git a/intl/hyphenation/glue/nsHyphenator.cpp b/intl/hyphenation/glue/nsHyphenator.cpp new file mode 100644 index 000000000..bcb87baf6 --- /dev/null +++ b/intl/hyphenation/glue/nsHyphenator.cpp @@ -0,0 +1,159 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsHyphenator.h" +#include "nsIFile.h" +#include "nsUTF8Utils.h" +#include "nsUnicodeProperties.h" +#include "nsUnicharUtilCIID.h" +#include "nsIURI.h" + +#include "hyphen.h" + +nsHyphenator::nsHyphenator(nsIURI *aURI) + : mDict(nullptr) +{ + nsCString uriSpec; + nsresult rv = aURI->GetSpec(uriSpec); + if (NS_FAILED(rv)) { + return; + } + mDict = hnj_hyphen_load(uriSpec.get()); +#ifdef DEBUG + if (mDict) { + printf("loaded hyphenation patterns from %s\n", uriSpec.get()); + } +#endif +} + +nsHyphenator::~nsHyphenator() +{ + if (mDict != nullptr) { + hnj_hyphen_free((HyphenDict*)mDict); + mDict = nullptr; + } +} + +bool +nsHyphenator::IsValid() +{ + return (mDict != nullptr); +} + +nsresult +nsHyphenator::Hyphenate(const nsAString& aString, nsTArray<bool>& aHyphens) +{ + if (!aHyphens.SetLength(aString.Length(), mozilla::fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + memset(aHyphens.Elements(), false, aHyphens.Length() * sizeof(bool)); + + bool inWord = false; + uint32_t wordStart = 0, wordLimit = 0; + uint32_t chLen; + for (uint32_t i = 0; i < aString.Length(); i += chLen) { + uint32_t ch = aString[i]; + chLen = 1; + + if (NS_IS_HIGH_SURROGATE(ch)) { + if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) { + ch = SURROGATE_TO_UCS4(ch, aString[i+1]); + chLen = 2; + } else { + NS_WARNING("unpaired surrogate found during hyphenation"); + } + } + + nsIUGenCategory::nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch); + if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) { + if (!inWord) { + inWord = true; + wordStart = i; + } + wordLimit = i + chLen; + if (i + chLen < aString.Length()) { + continue; + } + } + + if (inWord) { + // Convert the word to utf-8 for libhyphen, lowercasing it as we go + // so that it will match the (lowercased) patterns (bug 1105644). + nsAutoCString utf8; + const char16_t* const begin = aString.BeginReading(); + const char16_t *cur = begin + wordStart; + const char16_t *end = begin + wordLimit; + while (cur < end) { + uint32_t ch = *cur++; + + if (NS_IS_HIGH_SURROGATE(ch)) { + if (cur < end && NS_IS_LOW_SURROGATE(*cur)) { + ch = SURROGATE_TO_UCS4(ch, *cur++); + } else { + ch = 0xfffd; // unpaired surrogate, treat as REPLACEMENT CHAR + } + } else if (NS_IS_LOW_SURROGATE(ch)) { + ch = 0xfffd; // unpaired surrogate + } + + // XXX What about language-specific casing? Consider Turkish I/i... + // In practice, it looks like the current patterns will not be + // affected by this, as they treat dotted and undotted i similarly. + ch = ToLowerCase(ch); + + if (ch < 0x80) { // U+0000 - U+007F + utf8.Append(ch); + } else if (ch < 0x0800) { // U+0100 - U+07FF + utf8.Append(0xC0 | (ch >> 6)); + utf8.Append(0x80 | (0x003F & ch)); + } else if (ch < 0x10000) { // U+0800 - U+D7FF,U+E000 - U+FFFF + utf8.Append(0xE0 | (ch >> 12)); + utf8.Append(0x80 | (0x003F & (ch >> 6))); + utf8.Append(0x80 | (0x003F & ch)); + } else { + utf8.Append(0xF0 | (ch >> 18)); + utf8.Append(0x80 | (0x003F & (ch >> 12))); + utf8.Append(0x80 | (0x003F & (ch >> 6))); + utf8.Append(0x80 | (0x003F & ch)); + } + } + + AutoTArray<char,200> utf8hyphens; + utf8hyphens.SetLength(utf8.Length() + 5); + char **rep = nullptr; + int *pos = nullptr; + int *cut = nullptr; + int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict, + utf8.BeginReading(), utf8.Length(), + utf8hyphens.Elements(), nullptr, + &rep, &pos, &cut); + if (!err) { + // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer + // from utf8 code unit indexing (which would match the utf8 input + // string directly) to Unicode character indexing. + // We then need to convert this to utf16 code unit offsets for Gecko. + const char *hyphPtr = utf8hyphens.Elements(); + const char16_t *cur = begin + wordStart; + const char16_t *end = begin + wordLimit; + while (cur < end) { + if (*hyphPtr & 0x01) { + aHyphens[cur - begin] = true; + } + cur++; + if (cur < end && NS_IS_LOW_SURROGATE(*cur) && + NS_IS_HIGH_SURROGATE(*(cur-1))) + { + cur++; + } + hyphPtr++; + } + } + } + + inWord = false; + } + + return NS_OK; +} diff --git a/intl/hyphenation/glue/nsHyphenator.h b/intl/hyphenation/glue/nsHyphenator.h new file mode 100644 index 000000000..96975d253 --- /dev/null +++ b/intl/hyphenation/glue/nsHyphenator.h @@ -0,0 +1,33 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsHyphenator_h__ +#define nsHyphenator_h__ + +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsTArray.h" + +class nsIURI; + +class nsHyphenator +{ +public: + explicit nsHyphenator(nsIURI *aURI); + + NS_INLINE_DECL_REFCOUNTING(nsHyphenator) + + bool IsValid(); + + nsresult Hyphenate(const nsAString& aText, nsTArray<bool>& aHyphens); + +private: + ~nsHyphenator(); + +protected: + void *mDict; +}; + +#endif // nsHyphenator_h__ |