diff options
Diffstat (limited to 'browser/components/translation/cld2/internal/offsetmap.cc')
-rw-r--r-- | browser/components/translation/cld2/internal/offsetmap.cc | 569 |
1 files changed, 0 insertions, 569 deletions
diff --git a/browser/components/translation/cld2/internal/offsetmap.cc b/browser/components/translation/cld2/internal/offsetmap.cc deleted file mode 100644 index 84609a71f..000000000 --- a/browser/components/translation/cld2/internal/offsetmap.cc +++ /dev/null @@ -1,569 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// -// Author: dsites@google.com (Dick Sites) -// -// - -#include "offsetmap.h" - -#include <string.h> // for strcmp -#include <stdio.h> // for fprintf, stderr, fclose, etc -#include <algorithm> // for min - -using namespace std; - -namespace CLD2 { - -// Constructor, destructor -OffsetMap::OffsetMap() { - Clear(); -} - -OffsetMap::~OffsetMap() { -} - -// Clear the map -// After: -// next_diff_sub_ is 0 -// Windows are the a and a' ranges covered by diffs_[next_diff_sub_-1] -// which is a fake range of width 0 mapping 0=>0 -void OffsetMap::Clear() { - diffs_.clear(); - pending_op_ = COPY_OP; - pending_length_ = 0; - next_diff_sub_ = 0; - current_lo_aoffset_ = 0; - current_hi_aoffset_ = 0; - current_lo_aprimeoffset_ = 0; - current_hi_aprimeoffset_ = 0; - current_diff_ = 0; - max_aoffset_ = 0; // Largest seen so far - max_aprimeoffset_ = 0; // Largest seen so far -} - -static inline char OpPart(const char c) { - return (c >> 6) & 3; -} -static inline char LenPart(const char c) { - return c & 0x3f; -} - -// Print map to file, for debugging -void OffsetMap::Printmap(const char* filename) { - FILE* fout; - bool needs_close = false; - if (strcmp(filename, "stdout") == 0) { - fout = stdout; - } else if (strcmp(filename, "stderr") == 0) { - fout = stderr; - } else { - fout = fopen(filename, "w"); - needs_close = true; - } - if (fout == NULL) { - fprintf(stderr, "%s did not open\n", filename); - return; - } - - Flush(); // Make sure any pending entry gets printed - fprintf(fout, "Offsetmap: %ld bytes\n", diffs_.size()); - for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) { - fprintf(fout, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i])); - if ((i % 20) == 19) {fprintf(fout, "\n");} - } - fprintf(fout, "\n"); - if (needs_close) { - fclose(fout); - } -} - -// Reset to offset 0 -void OffsetMap::Reset() { - MaybeFlushAll(); - - next_diff_sub_ = 0; - current_lo_aoffset_ = 0; - current_hi_aoffset_ = 0; - current_lo_aprimeoffset_ = 0; - current_hi_aprimeoffset_ = 0; - current_diff_ = 0; -} - -// Add to mapping from A to A', specifying how many next bytes are -// identical in A and A' -void OffsetMap::Copy(int bytes) { - if (bytes == 0) {return;} - max_aoffset_ += bytes; // Largest seen so far - max_aprimeoffset_ += bytes; // Largest seen so far - if (pending_op_ == COPY_OP) { - pending_length_ += bytes; - } else { - Flush(); - pending_op_ = COPY_OP; - pending_length_ = bytes; - } -} - -// Add to mapping from A to A', specifying how many next bytes are -// inserted in A' while not advancing in A at all -void OffsetMap::Insert(int bytes){ - if (bytes == 0) {return;} - max_aprimeoffset_ += bytes; // Largest seen so far - if (pending_op_ == INSERT_OP) { - pending_length_ += bytes; - } else if ((bytes == 1) && - (pending_op_ == DELETE_OP) && (pending_length_ == 1)) { - // Special-case exactly delete(1) insert(1) +> copy(1); - // all others backmap inserts to after deletes - pending_op_ = COPY_OP; - } else { - Flush(); - pending_op_ = INSERT_OP; - pending_length_ = bytes; - } -} - -// Add to mapping from A to A', specifying how many next bytes are -// deleted from A while not advancing in A' at all -void OffsetMap::Delete(int bytes){ - if (bytes == 0) {return;} - max_aoffset_ += bytes; // Largest seen so far - if (pending_op_ == DELETE_OP) { - pending_length_ += bytes; - } else if ((bytes == 1) && - (pending_op_ == INSERT_OP) && (pending_length_ == 1)) { - // Special-case exactly insert(1) delete(1) => copy(1); - // all others backmap deletes to after insertss - pending_op_ = COPY_OP; - } else { - Flush(); - pending_op_ = DELETE_OP; - pending_length_ = bytes; - } -} - -void OffsetMap::Flush() { - if (pending_length_ == 0) { - return; - } - // We may be emitting a copy op just after a copy op because +1 -1 cancelled - // inbetween. If the lengths don't need a prefix byte, combine them - if ((pending_op_ == COPY_OP) && !diffs_.empty()) { - char c = diffs_[diffs_.size() - 1]; - MapOp prior_op = static_cast<MapOp>(OpPart(c)); - int prior_len = LenPart(c); - if ((prior_op == COPY_OP) && ((prior_len + pending_length_) <= 0x3f)) { - diffs_[diffs_.size() - 1] += pending_length_; - pending_length_ = 0; - return; - } - } - if (pending_length_ > 0x3f) { - bool non_zero_emitted = false; - for (int shift = 30; shift > 0; shift -= 6) { - int prefix = (pending_length_ >> shift) & 0x3f; - if ((prefix > 0) || non_zero_emitted) { - Emit(PREFIX_OP, prefix); - non_zero_emitted = true; - } - } - } - Emit(pending_op_, pending_length_ & 0x3f); - pending_length_ = 0; -} - - -// Add one more entry to copy one byte off the end, then flush -void OffsetMap::FlushAll() { - Copy(1); - Flush(); -} - -// Flush all if necessary -void OffsetMap::MaybeFlushAll() { - if ((0 < pending_length_) || diffs_.empty()) { - FlushAll(); - } -} - -// Len may be 0, for example as the low piece of length=64 -void OffsetMap::Emit(MapOp op, int len) { - char c = (static_cast<char>(op) << 6) | (len & 0x3f); - diffs_.push_back(c); -} - -void OffsetMap::DumpString() { - for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) { - fprintf(stderr, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i])); - } - fprintf(stderr, "\n"); - - // Print running table of correspondences - fprintf(stderr, " op A => A' (A forward-maps to A')\n"); - int aoffset = 0; - int aprimeoffset = 0; - int length = 0; - for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) { - char c = diffs_[i]; - MapOp op = static_cast<MapOp>(OpPart(c)); - int len = LenPart(c); - length = (length << 6) + len; - if (op == COPY_OP) { - aoffset += length; - aprimeoffset += length; - length = 0; - } else if (op == INSERT_OP) { - aoffset += 0; - aprimeoffset += length; - length = 0; - } else if (op == DELETE_OP) { - aoffset += length; - aprimeoffset += 0; - length = 0; - } else { // (op == PREFIX_OP) - // Do nothing else - } - fprintf(stderr, "[%3d] %c%02d %6d %6d%s\n", - i, "&=+-"[op], len, - aoffset, aprimeoffset, - (next_diff_sub_ == i) ? " <==next_diff_sub_" : ""); - - } - fprintf(stderr, "\n"); -} - -void OffsetMap::DumpWindow() { - fprintf(stderr, "DumpWindow(A => A'): max_aoffset_ = %d, " - "max_aprimeoffset_ = %d, next_diff_sub_ = %d<br>\n", - max_aoffset_, max_aprimeoffset_, next_diff_sub_); - fprintf(stderr, "A [%u..%u)\n", - current_lo_aoffset_, current_hi_aoffset_); - fprintf(stderr, "A' [%u..%u)\n", - current_lo_aprimeoffset_, current_hi_aprimeoffset_); - fprintf(stderr, " diff = %d\n", current_diff_); - DumpString(); -} - -//----------------------------------------------------------------------------// -// The guts of the 2013 design // -// If there are three ranges a b c in diffs_, we can be in one of five // -// states: LEFT of a, in ranges a b c, or RIGHT of c // -// In each state, there are windows A[Alo..Ahi), A'[A'lo..A'hi) and diffs_ // -// position next_diff_sub_ // -// There also are mapping constants max_aoffset_ and max_aprimeoffset_ // -// If LEFT, Alo=Ahi=0, A'lo=A'hi=0 and next_diff_sub_=0 // -// If RIGHT, Alo=Ahi=max_aoffset_, A'lo=A'hi=max_aprimeoffset_ and // -// next_diff_sub_=diffs_.size() // -// Otherwise, at least one of A[) and A'[) is non-empty and the first bytes // -// correspond to each other. If range i is active, next_diff_sub_ is at // -// the first byte of range i+1. Because of the length-prefix operator, // -// an individual range item in diffs_ may be multiple bytes // -// In all cases aprimeoffset = aoffset + current_diff_ // -// i.e. current_diff_ = aprimeoffset - aoffset // -// // -// In the degenerate case of diffs_.empty(), there are only two states // -// LEFT and RIGHT and the mapping is the identity mapping. // -// The initial state is LEFT. // -// It is an error to move left into LEFT or right into RIGHT, but the code // -// below is robust in these cases. // -//----------------------------------------------------------------------------// - -void OffsetMap::SetLeft() { - current_lo_aoffset_ = 0; - current_hi_aoffset_ = 0; - current_lo_aprimeoffset_ = 0; - current_hi_aprimeoffset_ = 0; - current_diff_ = 0; - next_diff_sub_ = 0; -} - -void OffsetMap::SetRight() { - current_lo_aoffset_ = max_aoffset_; - current_hi_aoffset_ = max_aoffset_; - current_lo_aprimeoffset_ = max_aprimeoffset_; - current_hi_aprimeoffset_ = max_aprimeoffset_; - current_diff_ = max_aprimeoffset_ - max_aoffset_; - next_diff_sub_ = 0; -} - -// Back up over previous range, 1..5 bytes -// Return subscript at the beginning of that. Pins at 0 -int OffsetMap::Backup(int sub) { - if (sub <= 0) {return 0;} - --sub; - while ((0 < sub) && - (static_cast<MapOp>(OpPart(diffs_[sub - 1]) == PREFIX_OP))) { - --sub; - } - return sub; -} - -// Parse next range, 1..5 bytes -// Return subscript just off the end of that -int OffsetMap::ParseNext(int sub, MapOp* op, int* length) { - *op = PREFIX_OP; - *length = 0; - char c; - while ((sub < static_cast<int>(diffs_.size())) && (*op == PREFIX_OP)) { - c = diffs_[sub++]; - *op = static_cast<MapOp>(OpPart(c)); - int len = LenPart(c); - *length = (*length << 6) + len; - } - // If mal-formed or in RIGHT, this will return with op = PREFIX_OP - // Mal-formed can include a trailing prefix byte with no following op - return sub; -} - -// Parse previous range, 1..5 bytes -// Return current subscript -int OffsetMap::ParsePrevious(int sub, MapOp* op, int* length) { - sub = Backup(sub); - return ParseNext(sub, op, length); -} - -// Quick debugging dump; does not parse multi-byte items, so just length & 0x3f -void OffsetMap::PrintPosition(const char* str) { - MapOp op = PREFIX_OP; - int length = 0; - if ((0 < next_diff_sub_) && (next_diff_sub_ <= static_cast<int>(diffs_.size()))) { - op = static_cast<MapOp>(OpPart(diffs_[next_diff_sub_ - 1])); - length = LenPart(diffs_[next_diff_sub_ - 1]); - } - fprintf(stderr, "%s[%d] %c%02d = A[%d..%d) ==> A'[%d..%d)\n", - str, - next_diff_sub_, "&=+-"[op], length, - current_lo_aoffset_, current_hi_aoffset_, - current_lo_aprimeoffset_, current_hi_aprimeoffset_); -} - -// Move active window one range to the right -// Return true if move was OK -bool OffsetMap::MoveRight() { - // If at last range or RIGHT, set to RIGHT, return error - if (next_diff_sub_ >= static_cast<int>(diffs_.size())) { - SetRight(); - return false; - } - // Actually OK to move right - MapOp op; - int length; - bool retval = true; - // If mal-formed or in RIGHT, this will return with op = PREFIX_OP - next_diff_sub_ = ParseNext(next_diff_sub_, &op, &length); - - current_lo_aoffset_ = current_hi_aoffset_; - current_lo_aprimeoffset_ = current_hi_aprimeoffset_; - if (op == COPY_OP) { - current_hi_aoffset_ = current_lo_aoffset_ + length; - current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length; - } else if (op == INSERT_OP) { - current_hi_aoffset_ = current_lo_aoffset_ + 0; - current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length; - } else if (op == DELETE_OP) { - current_hi_aoffset_ = current_lo_aoffset_ + length; - current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + 0; - } else { - SetRight(); - retval = false; - } - current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_; - return retval; -} - -// Move active window one range to the left -// Return true if move was OK -bool OffsetMap::MoveLeft() { - // If at first range or LEFT, set to LEFT, return error - if (next_diff_sub_ <= 0) { - SetLeft(); - return false; - } - // Back up over current active window - next_diff_sub_ = Backup(next_diff_sub_); - if (next_diff_sub_ <= 0) { - SetLeft(); - return false; - } - // Actually OK to move left - MapOp op; - int length; - bool retval = true; - // If mal-formed or in LEFT, this will return with op = PREFIX_OP - next_diff_sub_ = ParsePrevious(next_diff_sub_, &op, &length); - - current_hi_aoffset_ = current_lo_aoffset_; - current_hi_aprimeoffset_ = current_lo_aprimeoffset_; - if (op == COPY_OP) { - current_lo_aoffset_ = current_hi_aoffset_ - length; - current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length; - } else if (op == INSERT_OP) { - current_lo_aoffset_ = current_hi_aoffset_ - 0; - current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length; - } else if (op == DELETE_OP) { - current_lo_aoffset_ = current_hi_aoffset_ - length; - current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - 0; - } else { - SetLeft(); - retval = false; - } - current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_; - return true; -} - -// Map an offset in A' to the corresponding offset in A -int OffsetMap::MapBack(int aprimeoffset){ - MaybeFlushAll(); - if (aprimeoffset < 0) {return 0;} - if (max_aprimeoffset_ <= aprimeoffset) { - return (aprimeoffset - max_aprimeoffset_) + max_aoffset_; - } - - // If current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_, - // use current mapping, else move window left/right - bool ok = true; - while (ok && (aprimeoffset < current_lo_aprimeoffset_)) { - ok = MoveLeft(); - } - while (ok && (current_hi_aprimeoffset_ <= aprimeoffset)) { - ok = MoveRight(); - } - // So now current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_ - - int aoffset = aprimeoffset - current_diff_; - if (aoffset >= current_hi_aoffset_) { - // A' is in an insert region, all bytes of which backmap to A=hi_aoffset_ - aoffset = current_hi_aoffset_; - } - return aoffset; -} - -// Map an offset in A to the corresponding offset in A' -int OffsetMap::MapForward(int aoffset){ - MaybeFlushAll(); - if (aoffset < 0) {return 0;} - if (max_aoffset_ <= aoffset) { - return (aoffset - max_aoffset_) + max_aprimeoffset_; - } - - // If current_lo_aoffset_ <= aoffset < current_hi_aoffset_, - // use current mapping, else move window left/right - bool ok = true; - while (ok && (aoffset < current_lo_aoffset_)) { - ok = MoveLeft(); - } - while (ok && (current_hi_aoffset_ <= aoffset)) { - ok = MoveRight(); - } - - int aprimeoffset = aoffset + current_diff_; - if (aprimeoffset >= current_hi_aprimeoffset_) { - // A is in a delete region, all bytes of which map to A'=hi_aprimeoffset_ - aprimeoffset = current_hi_aprimeoffset_; - } - return aprimeoffset; -} - - -// static -bool OffsetMap::CopyInserts(OffsetMap* source, OffsetMap* dest) { - bool ok = true; - while (ok && (source->next_diff_sub_ != source->diffs_.size())) { - ok = source->MoveRight(); - if (source->current_lo_aoffset_ != source->current_hi_aoffset_) { - return false; - } - dest->Insert( - source->current_hi_aprimeoffset_ - source->current_lo_aprimeoffset_); - } - return true; -} - -// static -bool OffsetMap::CopyDeletes(OffsetMap* source, OffsetMap* dest) { - bool ok = true; - while (ok && (source->next_diff_sub_ != source->diffs_.size())) { - ok = source->MoveRight(); - if (source->current_lo_aprimeoffset_ != source->current_hi_aprimeoffset_) { - return false; - } - dest->Delete(source->current_hi_aoffset_ - source->current_lo_aoffset_); - } - return true; -} - -// static -void OffsetMap::ComposeOffsetMap( - OffsetMap* g, OffsetMap* f, OffsetMap* h) { - h->Clear(); - f->Reset(); - g->Reset(); - - int lo = 0; - for (;;) { - // Consume delete operations in f. This moves A without moving - // A' and A''. - if (lo >= g->current_hi_aoffset_ && CopyInserts(g, h)) { - if (lo >= f->current_hi_aprimeoffset_ && CopyDeletes(f, h)) { - // fprintf(stderr, - // "ComposeOffsetMap ERROR, f is longer than g.<br>\n"); - } - - // FlushAll(), called by Reset(), MapForward() or MapBack(), has - // added an extra COPY_OP to f and g, so this function has - // composed an extra COPY_OP in h from those. To avoid - // FlushAll() adds one more extra COPY_OP to h later, dispatch - // Flush() right now. - h->Flush(); - return; - } - - // Consume insert operations in g. This moves A'' without moving A - // and A'. - if (lo >= f->current_hi_aprimeoffset_) { - if (!CopyDeletes(f, h)) { - // fprintf(stderr, - // "ComposeOffsetMap ERROR, g is longer than f.<br>\n"); - } - } - - // Compose one operation which moves A' from lo to hi. - int hi = min(f->current_hi_aprimeoffset_, g->current_hi_aoffset_); - if (f->current_lo_aoffset_ != f->current_hi_aoffset_ && - g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) { - h->Copy(hi - lo); - } else if (f->current_lo_aoffset_ != f->current_hi_aoffset_) { - h->Delete(hi - lo); - } else if (g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) { - h->Insert(hi - lo); - } - - lo = hi; - } -} - -// For testing only -- force a mapping -void OffsetMap::StuffIt(const string& diffs, - int max_aoffset, int max_aprimeoffset) { - Clear(); - diffs_ = diffs; - max_aoffset_ = max_aoffset; - max_aprimeoffset_ = max_aprimeoffset; -} - - -} // namespace CLD2 - |