diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /extensions/universalchardet/src/base/nsEUCJPProber.cpp | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'extensions/universalchardet/src/base/nsEUCJPProber.cpp')
-rw-r--r-- | extensions/universalchardet/src/base/nsEUCJPProber.cpp | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/extensions/universalchardet/src/base/nsEUCJPProber.cpp b/extensions/universalchardet/src/base/nsEUCJPProber.cpp new file mode 100644 index 000000000..663421f03 --- /dev/null +++ b/extensions/universalchardet/src/base/nsEUCJPProber.cpp @@ -0,0 +1,69 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// for japanese encoding, obeserve characteristic: +// 1, kana character (or hankaku?) often have hight frequency of appereance +// 2, kana character often exist in group +// 3, certain combination of kana is never used in japanese language + +#include "nsEUCJPProber.h" +#include "nsDebug.h" + +void nsEUCJPProber::Reset(void) +{ + mCodingSM->Reset(); + mState = eDetecting; + mContextAnalyser.Reset(); + mDistributionAnalyser.Reset(); +} + +nsProbingState nsEUCJPProber::HandleData(const char* aBuf, uint32_t aLen) +{ + NS_ASSERTION(aLen, "HandleData called with empty buffer"); + nsSMState codingState; + + for (uint32_t i = 0; i < aLen; i++) + { + codingState = mCodingSM->NextState(aBuf[i]); + if (codingState == eItsMe) + { + mState = eFoundIt; + break; + } + if (codingState == eStart) + { + uint32_t charLen = mCodingSM->GetCurrentCharLen(); + + if (i == 0) + { + mLastChar[1] = aBuf[0]; + mContextAnalyser.HandleOneChar(mLastChar, charLen); + mDistributionAnalyser.HandleOneChar(mLastChar, charLen); + } + else + { + mContextAnalyser.HandleOneChar(aBuf+i-1, charLen); + mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); + } + } + } + + mLastChar[0] = aBuf[aLen-1]; + + if (mState == eDetecting) + if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) + mState = eFoundIt; + + return mState; +} + +float nsEUCJPProber::GetConfidence(void) +{ + float contxtCf = mContextAnalyser.GetConfidence(); + float distribCf = mDistributionAnalyser.GetConfidence(); + + return (contxtCf > distribCf ? contxtCf : distribCf); +} + |