summaryrefslogtreecommitdiffstats
path: root/intl/hyphenation/glue
diff options
context:
space:
mode:
Diffstat (limited to 'intl/hyphenation/glue')
-rw-r--r--intl/hyphenation/glue/hnjalloc.h51
-rw-r--r--intl/hyphenation/glue/hnjstdio.cpp119
-rw-r--r--intl/hyphenation/glue/moz.build29
-rw-r--r--intl/hyphenation/glue/nsHyphenationManager.cpp321
-rw-r--r--intl/hyphenation/glue/nsHyphenationManager.h55
-rw-r--r--intl/hyphenation/glue/nsHyphenator.cpp159
-rw-r--r--intl/hyphenation/glue/nsHyphenator.h33
7 files changed, 767 insertions, 0 deletions
diff --git a/intl/hyphenation/glue/hnjalloc.h b/intl/hyphenation/glue/hnjalloc.h
new file mode 100644
index 000000000..fec3a4bc9
--- /dev/null
+++ b/intl/hyphenation/glue/hnjalloc.h
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Simple replacement for hnjalloc.h from libhyphen-2.x, to use moz_x* memory
+ * allocation functions. Note that the hyphen.c code does *NOT* check for
+ * NULL from memory (re)allocation, so it is essential that we use the
+ * "infallible" moz_x* variants here.
+ */
+
+#include "mozilla/mozalloc.h"
+
+#define hnj_malloc(size) moz_xmalloc(size)
+#define hnj_realloc(p, size) moz_xrealloc(p, size)
+#define hnj_free(p) free(p)
+
+/*
+ * To enable us to load hyphenation dictionaries from arbitrary resource URIs,
+ * not just through file paths using stdio, we override the (few) stdio APIs
+ * that hyphen.c uses and provide our own reimplementation that calls Gecko
+ * i/o methods.
+ */
+
+#include <stdio.h> /* ensure stdio.h is loaded before our macros */
+
+#undef FILE
+#define FILE hnjFile
+
+#define fopen(path,mode) hnjFopen(path,mode)
+#define fclose(file) hnjFclose(file)
+#define fgets(buf,count,file) hnjFgets(buf,count,file)
+
+typedef struct hnjFile_ hnjFile;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+hnjFile* hnjFopen(const char* aURISpec, const char* aMode);
+
+int hnjFclose(hnjFile* f);
+
+char* hnjFgets(char* s, int n, hnjFile* f);
+
+#ifdef __cplusplus
+}
+#endif
+
+
diff --git a/intl/hyphenation/glue/hnjstdio.cpp b/intl/hyphenation/glue/hnjstdio.cpp
new file mode 100644
index 000000000..660ebaf13
--- /dev/null
+++ b/intl/hyphenation/glue/hnjstdio.cpp
@@ -0,0 +1,119 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This file provides substitutes for the basic stdio routines used by hyphen.c
+// to read its dictionary files. We #define the stdio names to these versions
+// in hnjalloc.h, so that we can use nsIURI and nsIInputStream to specify and
+// access the dictionary resources.
+
+#include "hnjalloc.h"
+#undef FILE // Undo the damage done in hnjalloc.h
+#include "nsNetUtil.h"
+#include "nsIInputStream.h"
+#include "nsIURI.h"
+#include "nsContentUtils.h"
+
+#define BUFSIZE 1024
+
+struct hnjFile_ {
+ nsCOMPtr<nsIInputStream> mStream;
+ char mBuffer[BUFSIZE];
+ uint32_t mCurPos;
+ uint32_t mLimit;
+};
+
+// replacement for fopen()
+// (not a full substitute: only supports read access)
+hnjFile*
+hnjFopen(const char* aURISpec, const char* aMode)
+{
+ // this override only needs to support "r"
+ NS_ASSERTION(!strcmp(aMode, "r"), "unsupported fopen() mode in hnjFopen");
+
+ nsCOMPtr<nsIURI> uri;
+ nsresult rv = NS_NewURI(getter_AddRefs(uri), aURISpec);
+ if (NS_FAILED(rv)) {
+ return nullptr;
+ }
+
+ nsCOMPtr<nsIChannel> channel;
+ rv = NS_NewChannel(getter_AddRefs(channel),
+ uri,
+ nsContentUtils::GetSystemPrincipal(),
+ nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_DATA_IS_NULL,
+ nsIContentPolicy::TYPE_OTHER);
+ if (NS_FAILED(rv)) {
+ return nullptr;
+ }
+
+ nsCOMPtr<nsIInputStream> instream;
+ rv = channel->Open2(getter_AddRefs(instream));
+ if (NS_FAILED(rv)) {
+ return nullptr;
+ }
+
+ hnjFile *f = new hnjFile;
+ f->mStream = instream;
+ f->mCurPos = 0;
+ f->mLimit = 0;
+
+ return f;
+}
+
+// replacement for fclose()
+int
+hnjFclose(hnjFile* f)
+{
+ NS_ASSERTION(f && f->mStream, "bad argument to hnjFclose");
+
+ int result = 0;
+ nsresult rv = f->mStream->Close();
+ if (NS_FAILED(rv)) {
+ result = EOF;
+ }
+ f->mStream = nullptr;
+
+ delete f;
+ return result;
+}
+
+// replacement for fgets()
+// (not a full reimplementation, but sufficient for libhyphen's needs)
+char*
+hnjFgets(char* s, int n, hnjFile* f)
+{
+ NS_ASSERTION(s && f, "bad argument to hnjFgets");
+
+ int i = 0;
+ while (i < n - 1) {
+ if (f->mCurPos < f->mLimit) {
+ char c = f->mBuffer[f->mCurPos++];
+ s[i++] = c;
+ if (c == '\n' || c == '\r') {
+ break;
+ }
+ continue;
+ }
+
+ f->mCurPos = 0;
+
+ nsresult rv = f->mStream->Read(f->mBuffer, BUFSIZE, &f->mLimit);
+ if (NS_FAILED(rv)) {
+ f->mLimit = 0;
+ return nullptr;
+ }
+
+ if (f->mLimit == 0) {
+ break;
+ }
+ }
+
+ if (i == 0) {
+ return nullptr; // end of file
+ }
+
+ s[i] = '\0'; // null-terminate the returned string
+ return s;
+}
diff --git a/intl/hyphenation/glue/moz.build b/intl/hyphenation/glue/moz.build
new file mode 100644
index 000000000..a2d71f4b5
--- /dev/null
+++ b/intl/hyphenation/glue/moz.build
@@ -0,0 +1,29 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+EXPORTS += [
+ 'nsHyphenationManager.h',
+ 'nsHyphenator.h',
+]
+
+UNIFIED_SOURCES += [
+ 'nsHyphenationManager.cpp',
+ 'nsHyphenator.cpp',
+]
+
+# These files cannot be built in unified mode because they include hnjalloc.h.
+SOURCES += [
+ 'hnjstdio.cpp',
+]
+
+LOCAL_INCLUDES += [
+ '../hyphen',
+]
+
+FINAL_LIBRARY = 'xul'
+
+if CONFIG['GNU_CXX']:
+ CXXFLAGS += ['-Wno-error=shadow']
diff --git a/intl/hyphenation/glue/nsHyphenationManager.cpp b/intl/hyphenation/glue/nsHyphenationManager.cpp
new file mode 100644
index 000000000..998550e5d
--- /dev/null
+++ b/intl/hyphenation/glue/nsHyphenationManager.cpp
@@ -0,0 +1,321 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHyphenationManager.h"
+#include "nsHyphenator.h"
+#include "nsIAtom.h"
+#include "nsIFile.h"
+#include "nsIURI.h"
+#include "nsIProperties.h"
+#include "nsISimpleEnumerator.h"
+#include "nsIDirectoryEnumerator.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsNetUtil.h"
+#include "nsUnicharUtils.h"
+#include "mozilla/Preferences.h"
+#include "nsZipArchive.h"
+#include "mozilla/Services.h"
+#include "nsIObserverService.h"
+#include "nsCRT.h"
+#include "nsAppDirectoryServiceDefs.h"
+#include "nsDirectoryServiceUtils.h"
+
+using namespace mozilla;
+
+static const char kIntlHyphenationAliasPrefix[] = "intl.hyphenation-alias.";
+static const char kMemoryPressureNotification[] = "memory-pressure";
+
+nsHyphenationManager *nsHyphenationManager::sInstance = nullptr;
+
+NS_IMPL_ISUPPORTS(nsHyphenationManager::MemoryPressureObserver,
+ nsIObserver)
+
+NS_IMETHODIMP
+nsHyphenationManager::MemoryPressureObserver::Observe(nsISupports *aSubject,
+ const char *aTopic,
+ const char16_t *aData)
+{
+ if (!nsCRT::strcmp(aTopic, kMemoryPressureNotification)) {
+ // We don't call Instance() here, as we don't want to create a hyphenation
+ // manager if there isn't already one in existence.
+ // (This observer class is local to the hyphenation manager, so it can use
+ // the protected members directly.)
+ if (nsHyphenationManager::sInstance) {
+ nsHyphenationManager::sInstance->mHyphenators.Clear();
+ }
+ }
+ return NS_OK;
+}
+
+nsHyphenationManager*
+nsHyphenationManager::Instance()
+{
+ if (sInstance == nullptr) {
+ sInstance = new nsHyphenationManager();
+
+ nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
+ if (obs) {
+ obs->AddObserver(new MemoryPressureObserver,
+ kMemoryPressureNotification, false);
+ }
+ }
+ return sInstance;
+}
+
+void
+nsHyphenationManager::Shutdown()
+{
+ delete sInstance;
+ sInstance = nullptr;
+}
+
+nsHyphenationManager::nsHyphenationManager()
+{
+ LoadPatternList();
+ LoadAliases();
+}
+
+nsHyphenationManager::~nsHyphenationManager()
+{
+ sInstance = nullptr;
+}
+
+already_AddRefed<nsHyphenator>
+nsHyphenationManager::GetHyphenator(nsIAtom *aLocale)
+{
+ RefPtr<nsHyphenator> hyph;
+ mHyphenators.Get(aLocale, getter_AddRefs(hyph));
+ if (hyph) {
+ return hyph.forget();
+ }
+ nsCOMPtr<nsIURI> uri = mPatternFiles.Get(aLocale);
+ if (!uri) {
+ nsCOMPtr<nsIAtom> alias = mHyphAliases.Get(aLocale);
+ if (alias) {
+ mHyphenators.Get(alias, getter_AddRefs(hyph));
+ if (hyph) {
+ return hyph.forget();
+ }
+ uri = mPatternFiles.Get(alias);
+ if (uri) {
+ aLocale = alias;
+ }
+ }
+ if (!uri) {
+ // In the case of a locale such as "de-DE-1996", we try replacing
+ // successive trailing subtags with "-*" to find fallback patterns,
+ // so "de-DE-1996" -> "de-DE-*" (and then recursively -> "de-*")
+ nsAtomCString localeStr(aLocale);
+ if (StringEndsWith(localeStr, NS_LITERAL_CSTRING("-*"))) {
+ localeStr.Truncate(localeStr.Length() - 2);
+ }
+ int32_t i = localeStr.RFindChar('-');
+ if (i > 1) {
+ localeStr.Replace(i, localeStr.Length() - i, "-*");
+ nsCOMPtr<nsIAtom> fuzzyLocale = NS_Atomize(localeStr);
+ return GetHyphenator(fuzzyLocale);
+ } else {
+ return nullptr;
+ }
+ }
+ }
+ hyph = new nsHyphenator(uri);
+ if (hyph->IsValid()) {
+ mHyphenators.Put(aLocale, hyph);
+ return hyph.forget();
+ }
+#ifdef DEBUG
+ nsCString msg("failed to load patterns from ");
+ msg += uri->GetSpecOrDefault();
+ NS_WARNING(msg.get());
+#endif
+ mPatternFiles.Remove(aLocale);
+ return nullptr;
+}
+
+void
+nsHyphenationManager::LoadPatternList()
+{
+ mPatternFiles.Clear();
+ mHyphenators.Clear();
+
+ LoadPatternListFromOmnijar(Omnijar::GRE);
+ LoadPatternListFromOmnijar(Omnijar::APP);
+
+ nsCOMPtr<nsIProperties> dirSvc =
+ do_GetService(NS_DIRECTORY_SERVICE_CONTRACTID);
+ if (!dirSvc) {
+ return;
+ }
+
+ nsresult rv;
+ nsCOMPtr<nsIFile> greDir;
+ rv = dirSvc->Get(NS_GRE_DIR,
+ NS_GET_IID(nsIFile), getter_AddRefs(greDir));
+ if (NS_SUCCEEDED(rv)) {
+ greDir->AppendNative(NS_LITERAL_CSTRING("hyphenation"));
+ LoadPatternListFromDir(greDir);
+ }
+
+ nsCOMPtr<nsIFile> appDir;
+ rv = dirSvc->Get(NS_XPCOM_CURRENT_PROCESS_DIR,
+ NS_GET_IID(nsIFile), getter_AddRefs(appDir));
+ if (NS_SUCCEEDED(rv)) {
+ appDir->AppendNative(NS_LITERAL_CSTRING("hyphenation"));
+ bool equals;
+ if (NS_SUCCEEDED(appDir->Equals(greDir, &equals)) && !equals) {
+ LoadPatternListFromDir(appDir);
+ }
+ }
+
+ nsCOMPtr<nsIFile> profileDir;
+ rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR,
+ getter_AddRefs(profileDir));
+ if (NS_SUCCEEDED(rv)) {
+ profileDir->AppendNative(NS_LITERAL_CSTRING("hyphenation"));
+ LoadPatternListFromDir(profileDir);
+ }
+}
+
+void
+nsHyphenationManager::LoadPatternListFromOmnijar(Omnijar::Type aType)
+{
+ nsCString base;
+ nsresult rv = Omnijar::GetURIString(aType, base);
+ if (NS_FAILED(rv)) {
+ return;
+ }
+
+ RefPtr<nsZipArchive> zip = Omnijar::GetReader(aType);
+ if (!zip) {
+ return;
+ }
+
+ nsZipFind *find;
+ zip->FindInit("hyphenation/hyph_*.dic", &find);
+ if (!find) {
+ return;
+ }
+
+ const char *result;
+ uint16_t len;
+ while (NS_SUCCEEDED(find->FindNext(&result, &len))) {
+ nsCString uriString(base);
+ uriString.Append(result, len);
+ nsCOMPtr<nsIURI> uri;
+ rv = NS_NewURI(getter_AddRefs(uri), uriString);
+ if (NS_FAILED(rv)) {
+ continue;
+ }
+ nsCString locale;
+ rv = uri->GetPath(locale);
+ if (NS_FAILED(rv)) {
+ continue;
+ }
+ ToLowerCase(locale);
+ locale.SetLength(locale.Length() - 4); // strip ".dic"
+ locale.Cut(0, locale.RFindChar('/') + 1); // strip directory
+ if (StringBeginsWith(locale, NS_LITERAL_CSTRING("hyph_"))) {
+ locale.Cut(0, 5);
+ }
+ for (uint32_t i = 0; i < locale.Length(); ++i) {
+ if (locale[i] == '_') {
+ locale.Replace(i, 1, '-');
+ }
+ }
+ nsCOMPtr<nsIAtom> localeAtom = NS_Atomize(locale);
+ if (NS_SUCCEEDED(rv)) {
+ mPatternFiles.Put(localeAtom, uri);
+ }
+ }
+
+ delete find;
+}
+
+void
+nsHyphenationManager::LoadPatternListFromDir(nsIFile *aDir)
+{
+ nsresult rv;
+
+ bool check = false;
+ rv = aDir->Exists(&check);
+ if (NS_FAILED(rv) || !check) {
+ return;
+ }
+
+ rv = aDir->IsDirectory(&check);
+ if (NS_FAILED(rv) || !check) {
+ return;
+ }
+
+ nsCOMPtr<nsISimpleEnumerator> e;
+ rv = aDir->GetDirectoryEntries(getter_AddRefs(e));
+ if (NS_FAILED(rv)) {
+ return;
+ }
+
+ nsCOMPtr<nsIDirectoryEnumerator> files(do_QueryInterface(e));
+ if (!files) {
+ return;
+ }
+
+ nsCOMPtr<nsIFile> file;
+ while (NS_SUCCEEDED(files->GetNextFile(getter_AddRefs(file))) && file){
+ nsAutoString dictName;
+ file->GetLeafName(dictName);
+ NS_ConvertUTF16toUTF8 locale(dictName);
+ ToLowerCase(locale);
+ if (!StringEndsWith(locale, NS_LITERAL_CSTRING(".dic"))) {
+ continue;
+ }
+ if (StringBeginsWith(locale, NS_LITERAL_CSTRING("hyph_"))) {
+ locale.Cut(0, 5);
+ }
+ locale.SetLength(locale.Length() - 4); // strip ".dic"
+ for (uint32_t i = 0; i < locale.Length(); ++i) {
+ if (locale[i] == '_') {
+ locale.Replace(i, 1, '-');
+ }
+ }
+#ifdef DEBUG_hyph
+ printf("adding hyphenation patterns for %s: %s\n", locale.get(),
+ NS_ConvertUTF16toUTF8(dictName).get());
+#endif
+ nsCOMPtr<nsIAtom> localeAtom = NS_Atomize(locale);
+ nsCOMPtr<nsIURI> uri;
+ nsresult rv = NS_NewFileURI(getter_AddRefs(uri), file);
+ if (NS_SUCCEEDED(rv)) {
+ mPatternFiles.Put(localeAtom, uri);
+ }
+ }
+}
+
+void
+nsHyphenationManager::LoadAliases()
+{
+ nsIPrefBranch* prefRootBranch = Preferences::GetRootBranch();
+ if (!prefRootBranch) {
+ return;
+ }
+ uint32_t prefCount;
+ char **prefNames;
+ nsresult rv = prefRootBranch->GetChildList(kIntlHyphenationAliasPrefix,
+ &prefCount, &prefNames);
+ if (NS_SUCCEEDED(rv) && prefCount > 0) {
+ for (uint32_t i = 0; i < prefCount; ++i) {
+ nsAdoptingCString value = Preferences::GetCString(prefNames[i]);
+ if (value) {
+ nsAutoCString alias(prefNames[i]);
+ alias.Cut(0, sizeof(kIntlHyphenationAliasPrefix) - 1);
+ ToLowerCase(alias);
+ ToLowerCase(value);
+ nsCOMPtr<nsIAtom> aliasAtom = NS_Atomize(alias);
+ nsCOMPtr<nsIAtom> valueAtom = NS_Atomize(value);
+ mHyphAliases.Put(aliasAtom, valueAtom);
+ }
+ }
+ NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(prefCount, prefNames);
+ }
+}
diff --git a/intl/hyphenation/glue/nsHyphenationManager.h b/intl/hyphenation/glue/nsHyphenationManager.h
new file mode 100644
index 000000000..fa7d73f18
--- /dev/null
+++ b/intl/hyphenation/glue/nsHyphenationManager.h
@@ -0,0 +1,55 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsHyphenationManager_h__
+#define nsHyphenationManager_h__
+
+#include "nsInterfaceHashtable.h"
+#include "nsRefPtrHashtable.h"
+#include "nsHashKeys.h"
+#include "nsIObserver.h"
+#include "mozilla/Omnijar.h"
+
+class nsHyphenator;
+class nsIAtom;
+class nsIURI;
+
+class nsHyphenationManager
+{
+public:
+ nsHyphenationManager();
+
+ already_AddRefed<nsHyphenator> GetHyphenator(nsIAtom *aLocale);
+
+ static nsHyphenationManager *Instance();
+
+ static void Shutdown();
+
+private:
+ ~nsHyphenationManager();
+
+protected:
+ class MemoryPressureObserver final : public nsIObserver
+ {
+ ~MemoryPressureObserver() {}
+
+ public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIOBSERVER
+ };
+
+ void LoadPatternList();
+ void LoadPatternListFromOmnijar(mozilla::Omnijar::Type aType);
+ void LoadPatternListFromDir(nsIFile *aDir);
+ void LoadAliases();
+
+ nsInterfaceHashtable<nsISupportsHashKey,nsIAtom> mHyphAliases;
+ nsInterfaceHashtable<nsISupportsHashKey,nsIURI> mPatternFiles;
+ nsRefPtrHashtable<nsISupportsHashKey,nsHyphenator> mHyphenators;
+
+ static nsHyphenationManager *sInstance;
+};
+
+#endif // nsHyphenationManager_h__
diff --git a/intl/hyphenation/glue/nsHyphenator.cpp b/intl/hyphenation/glue/nsHyphenator.cpp
new file mode 100644
index 000000000..bcb87baf6
--- /dev/null
+++ b/intl/hyphenation/glue/nsHyphenator.cpp
@@ -0,0 +1,159 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHyphenator.h"
+#include "nsIFile.h"
+#include "nsUTF8Utils.h"
+#include "nsUnicodeProperties.h"
+#include "nsUnicharUtilCIID.h"
+#include "nsIURI.h"
+
+#include "hyphen.h"
+
+nsHyphenator::nsHyphenator(nsIURI *aURI)
+ : mDict(nullptr)
+{
+ nsCString uriSpec;
+ nsresult rv = aURI->GetSpec(uriSpec);
+ if (NS_FAILED(rv)) {
+ return;
+ }
+ mDict = hnj_hyphen_load(uriSpec.get());
+#ifdef DEBUG
+ if (mDict) {
+ printf("loaded hyphenation patterns from %s\n", uriSpec.get());
+ }
+#endif
+}
+
+nsHyphenator::~nsHyphenator()
+{
+ if (mDict != nullptr) {
+ hnj_hyphen_free((HyphenDict*)mDict);
+ mDict = nullptr;
+ }
+}
+
+bool
+nsHyphenator::IsValid()
+{
+ return (mDict != nullptr);
+}
+
+nsresult
+nsHyphenator::Hyphenate(const nsAString& aString, nsTArray<bool>& aHyphens)
+{
+ if (!aHyphens.SetLength(aString.Length(), mozilla::fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ memset(aHyphens.Elements(), false, aHyphens.Length() * sizeof(bool));
+
+ bool inWord = false;
+ uint32_t wordStart = 0, wordLimit = 0;
+ uint32_t chLen;
+ for (uint32_t i = 0; i < aString.Length(); i += chLen) {
+ uint32_t ch = aString[i];
+ chLen = 1;
+
+ if (NS_IS_HIGH_SURROGATE(ch)) {
+ if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) {
+ ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
+ chLen = 2;
+ } else {
+ NS_WARNING("unpaired surrogate found during hyphenation");
+ }
+ }
+
+ nsIUGenCategory::nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch);
+ if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) {
+ if (!inWord) {
+ inWord = true;
+ wordStart = i;
+ }
+ wordLimit = i + chLen;
+ if (i + chLen < aString.Length()) {
+ continue;
+ }
+ }
+
+ if (inWord) {
+ // Convert the word to utf-8 for libhyphen, lowercasing it as we go
+ // so that it will match the (lowercased) patterns (bug 1105644).
+ nsAutoCString utf8;
+ const char16_t* const begin = aString.BeginReading();
+ const char16_t *cur = begin + wordStart;
+ const char16_t *end = begin + wordLimit;
+ while (cur < end) {
+ uint32_t ch = *cur++;
+
+ if (NS_IS_HIGH_SURROGATE(ch)) {
+ if (cur < end && NS_IS_LOW_SURROGATE(*cur)) {
+ ch = SURROGATE_TO_UCS4(ch, *cur++);
+ } else {
+ ch = 0xfffd; // unpaired surrogate, treat as REPLACEMENT CHAR
+ }
+ } else if (NS_IS_LOW_SURROGATE(ch)) {
+ ch = 0xfffd; // unpaired surrogate
+ }
+
+ // XXX What about language-specific casing? Consider Turkish I/i...
+ // In practice, it looks like the current patterns will not be
+ // affected by this, as they treat dotted and undotted i similarly.
+ ch = ToLowerCase(ch);
+
+ if (ch < 0x80) { // U+0000 - U+007F
+ utf8.Append(ch);
+ } else if (ch < 0x0800) { // U+0100 - U+07FF
+ utf8.Append(0xC0 | (ch >> 6));
+ utf8.Append(0x80 | (0x003F & ch));
+ } else if (ch < 0x10000) { // U+0800 - U+D7FF,U+E000 - U+FFFF
+ utf8.Append(0xE0 | (ch >> 12));
+ utf8.Append(0x80 | (0x003F & (ch >> 6)));
+ utf8.Append(0x80 | (0x003F & ch));
+ } else {
+ utf8.Append(0xF0 | (ch >> 18));
+ utf8.Append(0x80 | (0x003F & (ch >> 12)));
+ utf8.Append(0x80 | (0x003F & (ch >> 6)));
+ utf8.Append(0x80 | (0x003F & ch));
+ }
+ }
+
+ AutoTArray<char,200> utf8hyphens;
+ utf8hyphens.SetLength(utf8.Length() + 5);
+ char **rep = nullptr;
+ int *pos = nullptr;
+ int *cut = nullptr;
+ int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict,
+ utf8.BeginReading(), utf8.Length(),
+ utf8hyphens.Elements(), nullptr,
+ &rep, &pos, &cut);
+ if (!err) {
+ // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer
+ // from utf8 code unit indexing (which would match the utf8 input
+ // string directly) to Unicode character indexing.
+ // We then need to convert this to utf16 code unit offsets for Gecko.
+ const char *hyphPtr = utf8hyphens.Elements();
+ const char16_t *cur = begin + wordStart;
+ const char16_t *end = begin + wordLimit;
+ while (cur < end) {
+ if (*hyphPtr & 0x01) {
+ aHyphens[cur - begin] = true;
+ }
+ cur++;
+ if (cur < end && NS_IS_LOW_SURROGATE(*cur) &&
+ NS_IS_HIGH_SURROGATE(*(cur-1)))
+ {
+ cur++;
+ }
+ hyphPtr++;
+ }
+ }
+ }
+
+ inWord = false;
+ }
+
+ return NS_OK;
+}
diff --git a/intl/hyphenation/glue/nsHyphenator.h b/intl/hyphenation/glue/nsHyphenator.h
new file mode 100644
index 000000000..96975d253
--- /dev/null
+++ b/intl/hyphenation/glue/nsHyphenator.h
@@ -0,0 +1,33 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsHyphenator_h__
+#define nsHyphenator_h__
+
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+class nsIURI;
+
+class nsHyphenator
+{
+public:
+ explicit nsHyphenator(nsIURI *aURI);
+
+ NS_INLINE_DECL_REFCOUNTING(nsHyphenator)
+
+ bool IsValid();
+
+ nsresult Hyphenate(const nsAString& aText, nsTArray<bool>& aHyphens);
+
+private:
+ ~nsHyphenator();
+
+protected:
+ void *mDict;
+};
+
+#endif // nsHyphenator_h__