summaryrefslogtreecommitdiffstats
path: root/intl/chardet/nsCyrillicDetector.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/chardet/nsCyrillicDetector.cpp')
-rw-r--r--intl/chardet/nsCyrillicDetector.cpp160
1 files changed, 160 insertions, 0 deletions
diff --git a/intl/chardet/nsCyrillicDetector.cpp b/intl/chardet/nsCyrillicDetector.cpp
new file mode 100644
index 000000000..feebeed65
--- /dev/null
+++ b/intl/chardet/nsCyrillicDetector.cpp
@@ -0,0 +1,160 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "nscore.h"
+#include "nsCyrillicProb.h"
+#include <stdio.h>
+
+#include "nsCOMPtr.h"
+#include "nsISupports.h"
+#include "nsICharsetDetector.h"
+#include "nsICharsetDetectionObserver.h"
+#include "nsIStringCharsetDetector.h"
+#include "nsCyrillicDetector.h"
+
+//----------------------------------------------------------------------
+// Interface nsISupports [implementation]
+NS_IMPL_ISUPPORTS(nsCyrXPCOMDetector, nsICharsetDetector)
+NS_IMPL_ISUPPORTS(nsCyrXPCOMStringDetector, nsIStringCharsetDetector)
+
+void nsCyrillicDetector::HandleData(const char* aBuf, uint32_t aLen)
+{
+ uint8_t cls;
+ const char* b;
+ uint32_t i;
+ if(mDone)
+ return;
+ for(i=0, b=aBuf;i<aLen;i++,b++)
+ {
+ for(unsigned j=0;j<mItems;j++)
+ {
+ if( 0x80 & *b)
+ cls = mCyrillicClass[j][(*b) & 0x7F];
+ else
+ cls = 0;
+ NS_ASSERTION( cls <= 32 , "illegal character class");
+ mProb[j] += gCyrillicProb[mLastCls[j]][cls];
+ mLastCls[j] = cls;
+ }
+ }
+ // We now only based on the first block we receive
+ DataEnd();
+}
+
+//---------------------------------------------------------------------
+#define THRESHOLD_RATIO 1.5f
+void nsCyrillicDetector::DataEnd()
+{
+ uint32_t max=0;
+ uint8_t maxIdx=0;
+ uint8_t j;
+ if(mDone)
+ return;
+ for(j=0;j<mItems;j++) {
+ if(mProb[j] > max)
+ {
+ max = mProb[j];
+ maxIdx= j;
+ }
+ }
+
+ if( 0 == max ) // if we didn't get any 8 bits data
+ return;
+
+#ifdef DEBUG
+ for(j=0;j<mItems;j++)
+ printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]);
+#endif
+ this->Report(mCharsets[maxIdx]);
+ mDone = true;
+}
+
+//---------------------------------------------------------------------
+nsCyrXPCOMDetector:: nsCyrXPCOMDetector(uint8_t aItems,
+ const uint8_t ** aCyrillicClass,
+ const char **aCharsets)
+ : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
+{
+ mObserver = nullptr;
+}
+
+//---------------------------------------------------------------------
+nsCyrXPCOMDetector::~nsCyrXPCOMDetector()
+{
+}
+
+//---------------------------------------------------------------------
+NS_IMETHODIMP nsCyrXPCOMDetector::Init(
+ nsICharsetDetectionObserver* aObserver)
+{
+ NS_ASSERTION(mObserver == nullptr , "Init twice");
+ if(nullptr == aObserver)
+ return NS_ERROR_ILLEGAL_VALUE;
+
+ mObserver = aObserver;
+ return NS_OK;
+}
+
+//----------------------------------------------------------
+NS_IMETHODIMP nsCyrXPCOMDetector::DoIt(
+ const char* aBuf, uint32_t aLen, bool* oDontFeedMe)
+{
+ NS_ASSERTION(mObserver != nullptr , "have not init yet");
+
+ if((nullptr == aBuf) || (nullptr == oDontFeedMe))
+ return NS_ERROR_ILLEGAL_VALUE;
+
+ this->HandleData(aBuf, aLen);
+ *oDontFeedMe = false;
+ return NS_OK;
+}
+
+//----------------------------------------------------------
+NS_IMETHODIMP nsCyrXPCOMDetector::Done()
+{
+ NS_ASSERTION(mObserver != nullptr , "have not init yet");
+ this->DataEnd();
+ return NS_OK;
+}
+
+//----------------------------------------------------------
+void nsCyrXPCOMDetector::Report(const char* aCharset)
+{
+ NS_ASSERTION(mObserver != nullptr , "have not init yet");
+ mObserver->Notify(aCharset, eBestAnswer);
+}
+
+//---------------------------------------------------------------------
+nsCyrXPCOMStringDetector:: nsCyrXPCOMStringDetector(uint8_t aItems,
+ const uint8_t ** aCyrillicClass,
+ const char **aCharsets)
+ : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
+{
+}
+
+//---------------------------------------------------------------------
+nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector()
+{
+}
+
+//---------------------------------------------------------------------
+void nsCyrXPCOMStringDetector::Report(const char *aCharset)
+{
+ mResult = aCharset;
+}
+
+//---------------------------------------------------------------------
+NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, uint32_t aLen,
+ const char** oCharset, nsDetectionConfident &oConf)
+{
+ mResult = nullptr;
+ mDone = false;
+ this->HandleData(aBuf, aLen);
+ this->DataEnd();
+ *oCharset=mResult;
+ oConf = eBestAnswer;
+ return NS_OK;
+}
+
+