diff options
Diffstat (limited to 'intl/chardet/nsCyrillicDetector.cpp')
-rw-r--r-- | intl/chardet/nsCyrillicDetector.cpp | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/intl/chardet/nsCyrillicDetector.cpp b/intl/chardet/nsCyrillicDetector.cpp new file mode 100644 index 000000000..feebeed65 --- /dev/null +++ b/intl/chardet/nsCyrillicDetector.cpp @@ -0,0 +1,160 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "nscore.h" +#include "nsCyrillicProb.h" +#include <stdio.h> + +#include "nsCOMPtr.h" +#include "nsISupports.h" +#include "nsICharsetDetector.h" +#include "nsICharsetDetectionObserver.h" +#include "nsIStringCharsetDetector.h" +#include "nsCyrillicDetector.h" + +//---------------------------------------------------------------------- +// Interface nsISupports [implementation] +NS_IMPL_ISUPPORTS(nsCyrXPCOMDetector, nsICharsetDetector) +NS_IMPL_ISUPPORTS(nsCyrXPCOMStringDetector, nsIStringCharsetDetector) + +void nsCyrillicDetector::HandleData(const char* aBuf, uint32_t aLen) +{ + uint8_t cls; + const char* b; + uint32_t i; + if(mDone) + return; + for(i=0, b=aBuf;i<aLen;i++,b++) + { + for(unsigned j=0;j<mItems;j++) + { + if( 0x80 & *b) + cls = mCyrillicClass[j][(*b) & 0x7F]; + else + cls = 0; + NS_ASSERTION( cls <= 32 , "illegal character class"); + mProb[j] += gCyrillicProb[mLastCls[j]][cls]; + mLastCls[j] = cls; + } + } + // We now only based on the first block we receive + DataEnd(); +} + +//--------------------------------------------------------------------- +#define THRESHOLD_RATIO 1.5f +void nsCyrillicDetector::DataEnd() +{ + uint32_t max=0; + uint8_t maxIdx=0; + uint8_t j; + if(mDone) + return; + for(j=0;j<mItems;j++) { + if(mProb[j] > max) + { + max = mProb[j]; + maxIdx= j; + } + } + + if( 0 == max ) // if we didn't get any 8 bits data + return; + +#ifdef DEBUG + for(j=0;j<mItems;j++) + printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]); +#endif + this->Report(mCharsets[maxIdx]); + mDone = true; +} + +//--------------------------------------------------------------------- +nsCyrXPCOMDetector:: nsCyrXPCOMDetector(uint8_t aItems, + const uint8_t ** aCyrillicClass, + const char **aCharsets) + : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets) +{ + mObserver = nullptr; +} + +//--------------------------------------------------------------------- +nsCyrXPCOMDetector::~nsCyrXPCOMDetector() +{ +} + +//--------------------------------------------------------------------- +NS_IMETHODIMP nsCyrXPCOMDetector::Init( + nsICharsetDetectionObserver* aObserver) +{ + NS_ASSERTION(mObserver == nullptr , "Init twice"); + if(nullptr == aObserver) + return NS_ERROR_ILLEGAL_VALUE; + + mObserver = aObserver; + return NS_OK; +} + +//---------------------------------------------------------- +NS_IMETHODIMP nsCyrXPCOMDetector::DoIt( + const char* aBuf, uint32_t aLen, bool* oDontFeedMe) +{ + NS_ASSERTION(mObserver != nullptr , "have not init yet"); + + if((nullptr == aBuf) || (nullptr == oDontFeedMe)) + return NS_ERROR_ILLEGAL_VALUE; + + this->HandleData(aBuf, aLen); + *oDontFeedMe = false; + return NS_OK; +} + +//---------------------------------------------------------- +NS_IMETHODIMP nsCyrXPCOMDetector::Done() +{ + NS_ASSERTION(mObserver != nullptr , "have not init yet"); + this->DataEnd(); + return NS_OK; +} + +//---------------------------------------------------------- +void nsCyrXPCOMDetector::Report(const char* aCharset) +{ + NS_ASSERTION(mObserver != nullptr , "have not init yet"); + mObserver->Notify(aCharset, eBestAnswer); +} + +//--------------------------------------------------------------------- +nsCyrXPCOMStringDetector:: nsCyrXPCOMStringDetector(uint8_t aItems, + const uint8_t ** aCyrillicClass, + const char **aCharsets) + : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets) +{ +} + +//--------------------------------------------------------------------- +nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector() +{ +} + +//--------------------------------------------------------------------- +void nsCyrXPCOMStringDetector::Report(const char *aCharset) +{ + mResult = aCharset; +} + +//--------------------------------------------------------------------- +NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, uint32_t aLen, + const char** oCharset, nsDetectionConfident &oConf) +{ + mResult = nullptr; + mDone = false; + this->HandleData(aBuf, aLen); + this->DataEnd(); + *oCharset=mResult; + oConf = eBestAnswer; + return NS_OK; +} + + |