summaryrefslogtreecommitdiffstats
path: root/browser/components/translation/cld2/internal/offsetmap.cc
diff options
context:
space:
mode:
Diffstat (limited to 'browser/components/translation/cld2/internal/offsetmap.cc')
-rw-r--r--browser/components/translation/cld2/internal/offsetmap.cc569
1 files changed, 0 insertions, 569 deletions
diff --git a/browser/components/translation/cld2/internal/offsetmap.cc b/browser/components/translation/cld2/internal/offsetmap.cc
deleted file mode 100644
index 84609a71f..000000000
--- a/browser/components/translation/cld2/internal/offsetmap.cc
+++ /dev/null
@@ -1,569 +0,0 @@
-// Copyright 2013 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//
-// Author: dsites@google.com (Dick Sites)
-//
-//
-
-#include "offsetmap.h"
-
-#include <string.h> // for strcmp
-#include <stdio.h> // for fprintf, stderr, fclose, etc
-#include <algorithm> // for min
-
-using namespace std;
-
-namespace CLD2 {
-
-// Constructor, destructor
-OffsetMap::OffsetMap() {
- Clear();
-}
-
-OffsetMap::~OffsetMap() {
-}
-
-// Clear the map
-// After:
-// next_diff_sub_ is 0
-// Windows are the a and a' ranges covered by diffs_[next_diff_sub_-1]
-// which is a fake range of width 0 mapping 0=>0
-void OffsetMap::Clear() {
- diffs_.clear();
- pending_op_ = COPY_OP;
- pending_length_ = 0;
- next_diff_sub_ = 0;
- current_lo_aoffset_ = 0;
- current_hi_aoffset_ = 0;
- current_lo_aprimeoffset_ = 0;
- current_hi_aprimeoffset_ = 0;
- current_diff_ = 0;
- max_aoffset_ = 0; // Largest seen so far
- max_aprimeoffset_ = 0; // Largest seen so far
-}
-
-static inline char OpPart(const char c) {
- return (c >> 6) & 3;
-}
-static inline char LenPart(const char c) {
- return c & 0x3f;
-}
-
-// Print map to file, for debugging
-void OffsetMap::Printmap(const char* filename) {
- FILE* fout;
- bool needs_close = false;
- if (strcmp(filename, "stdout") == 0) {
- fout = stdout;
- } else if (strcmp(filename, "stderr") == 0) {
- fout = stderr;
- } else {
- fout = fopen(filename, "w");
- needs_close = true;
- }
- if (fout == NULL) {
- fprintf(stderr, "%s did not open\n", filename);
- return;
- }
-
- Flush(); // Make sure any pending entry gets printed
- fprintf(fout, "Offsetmap: %ld bytes\n", diffs_.size());
- for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
- fprintf(fout, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
- if ((i % 20) == 19) {fprintf(fout, "\n");}
- }
- fprintf(fout, "\n");
- if (needs_close) {
- fclose(fout);
- }
-}
-
-// Reset to offset 0
-void OffsetMap::Reset() {
- MaybeFlushAll();
-
- next_diff_sub_ = 0;
- current_lo_aoffset_ = 0;
- current_hi_aoffset_ = 0;
- current_lo_aprimeoffset_ = 0;
- current_hi_aprimeoffset_ = 0;
- current_diff_ = 0;
-}
-
-// Add to mapping from A to A', specifying how many next bytes are
-// identical in A and A'
-void OffsetMap::Copy(int bytes) {
- if (bytes == 0) {return;}
- max_aoffset_ += bytes; // Largest seen so far
- max_aprimeoffset_ += bytes; // Largest seen so far
- if (pending_op_ == COPY_OP) {
- pending_length_ += bytes;
- } else {
- Flush();
- pending_op_ = COPY_OP;
- pending_length_ = bytes;
- }
-}
-
-// Add to mapping from A to A', specifying how many next bytes are
-// inserted in A' while not advancing in A at all
-void OffsetMap::Insert(int bytes){
- if (bytes == 0) {return;}
- max_aprimeoffset_ += bytes; // Largest seen so far
- if (pending_op_ == INSERT_OP) {
- pending_length_ += bytes;
- } else if ((bytes == 1) &&
- (pending_op_ == DELETE_OP) && (pending_length_ == 1)) {
- // Special-case exactly delete(1) insert(1) +> copy(1);
- // all others backmap inserts to after deletes
- pending_op_ = COPY_OP;
- } else {
- Flush();
- pending_op_ = INSERT_OP;
- pending_length_ = bytes;
- }
-}
-
-// Add to mapping from A to A', specifying how many next bytes are
-// deleted from A while not advancing in A' at all
-void OffsetMap::Delete(int bytes){
- if (bytes == 0) {return;}
- max_aoffset_ += bytes; // Largest seen so far
- if (pending_op_ == DELETE_OP) {
- pending_length_ += bytes;
- } else if ((bytes == 1) &&
- (pending_op_ == INSERT_OP) && (pending_length_ == 1)) {
- // Special-case exactly insert(1) delete(1) => copy(1);
- // all others backmap deletes to after insertss
- pending_op_ = COPY_OP;
- } else {
- Flush();
- pending_op_ = DELETE_OP;
- pending_length_ = bytes;
- }
-}
-
-void OffsetMap::Flush() {
- if (pending_length_ == 0) {
- return;
- }
- // We may be emitting a copy op just after a copy op because +1 -1 cancelled
- // inbetween. If the lengths don't need a prefix byte, combine them
- if ((pending_op_ == COPY_OP) && !diffs_.empty()) {
- char c = diffs_[diffs_.size() - 1];
- MapOp prior_op = static_cast<MapOp>(OpPart(c));
- int prior_len = LenPart(c);
- if ((prior_op == COPY_OP) && ((prior_len + pending_length_) <= 0x3f)) {
- diffs_[diffs_.size() - 1] += pending_length_;
- pending_length_ = 0;
- return;
- }
- }
- if (pending_length_ > 0x3f) {
- bool non_zero_emitted = false;
- for (int shift = 30; shift > 0; shift -= 6) {
- int prefix = (pending_length_ >> shift) & 0x3f;
- if ((prefix > 0) || non_zero_emitted) {
- Emit(PREFIX_OP, prefix);
- non_zero_emitted = true;
- }
- }
- }
- Emit(pending_op_, pending_length_ & 0x3f);
- pending_length_ = 0;
-}
-
-
-// Add one more entry to copy one byte off the end, then flush
-void OffsetMap::FlushAll() {
- Copy(1);
- Flush();
-}
-
-// Flush all if necessary
-void OffsetMap::MaybeFlushAll() {
- if ((0 < pending_length_) || diffs_.empty()) {
- FlushAll();
- }
-}
-
-// Len may be 0, for example as the low piece of length=64
-void OffsetMap::Emit(MapOp op, int len) {
- char c = (static_cast<char>(op) << 6) | (len & 0x3f);
- diffs_.push_back(c);
-}
-
-void OffsetMap::DumpString() {
- for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
- fprintf(stderr, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
- }
- fprintf(stderr, "\n");
-
- // Print running table of correspondences
- fprintf(stderr, " op A => A' (A forward-maps to A')\n");
- int aoffset = 0;
- int aprimeoffset = 0;
- int length = 0;
- for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
- char c = diffs_[i];
- MapOp op = static_cast<MapOp>(OpPart(c));
- int len = LenPart(c);
- length = (length << 6) + len;
- if (op == COPY_OP) {
- aoffset += length;
- aprimeoffset += length;
- length = 0;
- } else if (op == INSERT_OP) {
- aoffset += 0;
- aprimeoffset += length;
- length = 0;
- } else if (op == DELETE_OP) {
- aoffset += length;
- aprimeoffset += 0;
- length = 0;
- } else { // (op == PREFIX_OP)
- // Do nothing else
- }
- fprintf(stderr, "[%3d] %c%02d %6d %6d%s\n",
- i, "&=+-"[op], len,
- aoffset, aprimeoffset,
- (next_diff_sub_ == i) ? " <==next_diff_sub_" : "");
-
- }
- fprintf(stderr, "\n");
-}
-
-void OffsetMap::DumpWindow() {
- fprintf(stderr, "DumpWindow(A => A'): max_aoffset_ = %d, "
- "max_aprimeoffset_ = %d, next_diff_sub_ = %d<br>\n",
- max_aoffset_, max_aprimeoffset_, next_diff_sub_);
- fprintf(stderr, "A [%u..%u)\n",
- current_lo_aoffset_, current_hi_aoffset_);
- fprintf(stderr, "A' [%u..%u)\n",
- current_lo_aprimeoffset_, current_hi_aprimeoffset_);
- fprintf(stderr, " diff = %d\n", current_diff_);
- DumpString();
-}
-
-//----------------------------------------------------------------------------//
-// The guts of the 2013 design //
-// If there are three ranges a b c in diffs_, we can be in one of five //
-// states: LEFT of a, in ranges a b c, or RIGHT of c //
-// In each state, there are windows A[Alo..Ahi), A'[A'lo..A'hi) and diffs_ //
-// position next_diff_sub_ //
-// There also are mapping constants max_aoffset_ and max_aprimeoffset_ //
-// If LEFT, Alo=Ahi=0, A'lo=A'hi=0 and next_diff_sub_=0 //
-// If RIGHT, Alo=Ahi=max_aoffset_, A'lo=A'hi=max_aprimeoffset_ and //
-// next_diff_sub_=diffs_.size() //
-// Otherwise, at least one of A[) and A'[) is non-empty and the first bytes //
-// correspond to each other. If range i is active, next_diff_sub_ is at //
-// the first byte of range i+1. Because of the length-prefix operator, //
-// an individual range item in diffs_ may be multiple bytes //
-// In all cases aprimeoffset = aoffset + current_diff_ //
-// i.e. current_diff_ = aprimeoffset - aoffset //
-// //
-// In the degenerate case of diffs_.empty(), there are only two states //
-// LEFT and RIGHT and the mapping is the identity mapping. //
-// The initial state is LEFT. //
-// It is an error to move left into LEFT or right into RIGHT, but the code //
-// below is robust in these cases. //
-//----------------------------------------------------------------------------//
-
-void OffsetMap::SetLeft() {
- current_lo_aoffset_ = 0;
- current_hi_aoffset_ = 0;
- current_lo_aprimeoffset_ = 0;
- current_hi_aprimeoffset_ = 0;
- current_diff_ = 0;
- next_diff_sub_ = 0;
-}
-
-void OffsetMap::SetRight() {
- current_lo_aoffset_ = max_aoffset_;
- current_hi_aoffset_ = max_aoffset_;
- current_lo_aprimeoffset_ = max_aprimeoffset_;
- current_hi_aprimeoffset_ = max_aprimeoffset_;
- current_diff_ = max_aprimeoffset_ - max_aoffset_;
- next_diff_sub_ = 0;
-}
-
-// Back up over previous range, 1..5 bytes
-// Return subscript at the beginning of that. Pins at 0
-int OffsetMap::Backup(int sub) {
- if (sub <= 0) {return 0;}
- --sub;
- while ((0 < sub) &&
- (static_cast<MapOp>(OpPart(diffs_[sub - 1]) == PREFIX_OP))) {
- --sub;
- }
- return sub;
-}
-
-// Parse next range, 1..5 bytes
-// Return subscript just off the end of that
-int OffsetMap::ParseNext(int sub, MapOp* op, int* length) {
- *op = PREFIX_OP;
- *length = 0;
- char c;
- while ((sub < static_cast<int>(diffs_.size())) && (*op == PREFIX_OP)) {
- c = diffs_[sub++];
- *op = static_cast<MapOp>(OpPart(c));
- int len = LenPart(c);
- *length = (*length << 6) + len;
- }
- // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
- // Mal-formed can include a trailing prefix byte with no following op
- return sub;
-}
-
-// Parse previous range, 1..5 bytes
-// Return current subscript
-int OffsetMap::ParsePrevious(int sub, MapOp* op, int* length) {
- sub = Backup(sub);
- return ParseNext(sub, op, length);
-}
-
-// Quick debugging dump; does not parse multi-byte items, so just length & 0x3f
-void OffsetMap::PrintPosition(const char* str) {
- MapOp op = PREFIX_OP;
- int length = 0;
- if ((0 < next_diff_sub_) && (next_diff_sub_ <= static_cast<int>(diffs_.size()))) {
- op = static_cast<MapOp>(OpPart(diffs_[next_diff_sub_ - 1]));
- length = LenPart(diffs_[next_diff_sub_ - 1]);
- }
- fprintf(stderr, "%s[%d] %c%02d = A[%d..%d) ==> A'[%d..%d)\n",
- str,
- next_diff_sub_, "&=+-"[op], length,
- current_lo_aoffset_, current_hi_aoffset_,
- current_lo_aprimeoffset_, current_hi_aprimeoffset_);
-}
-
-// Move active window one range to the right
-// Return true if move was OK
-bool OffsetMap::MoveRight() {
- // If at last range or RIGHT, set to RIGHT, return error
- if (next_diff_sub_ >= static_cast<int>(diffs_.size())) {
- SetRight();
- return false;
- }
- // Actually OK to move right
- MapOp op;
- int length;
- bool retval = true;
- // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
- next_diff_sub_ = ParseNext(next_diff_sub_, &op, &length);
-
- current_lo_aoffset_ = current_hi_aoffset_;
- current_lo_aprimeoffset_ = current_hi_aprimeoffset_;
- if (op == COPY_OP) {
- current_hi_aoffset_ = current_lo_aoffset_ + length;
- current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
- } else if (op == INSERT_OP) {
- current_hi_aoffset_ = current_lo_aoffset_ + 0;
- current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
- } else if (op == DELETE_OP) {
- current_hi_aoffset_ = current_lo_aoffset_ + length;
- current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + 0;
- } else {
- SetRight();
- retval = false;
- }
- current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
- return retval;
-}
-
-// Move active window one range to the left
-// Return true if move was OK
-bool OffsetMap::MoveLeft() {
- // If at first range or LEFT, set to LEFT, return error
- if (next_diff_sub_ <= 0) {
- SetLeft();
- return false;
- }
- // Back up over current active window
- next_diff_sub_ = Backup(next_diff_sub_);
- if (next_diff_sub_ <= 0) {
- SetLeft();
- return false;
- }
- // Actually OK to move left
- MapOp op;
- int length;
- bool retval = true;
- // If mal-formed or in LEFT, this will return with op = PREFIX_OP
- next_diff_sub_ = ParsePrevious(next_diff_sub_, &op, &length);
-
- current_hi_aoffset_ = current_lo_aoffset_;
- current_hi_aprimeoffset_ = current_lo_aprimeoffset_;
- if (op == COPY_OP) {
- current_lo_aoffset_ = current_hi_aoffset_ - length;
- current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
- } else if (op == INSERT_OP) {
- current_lo_aoffset_ = current_hi_aoffset_ - 0;
- current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
- } else if (op == DELETE_OP) {
- current_lo_aoffset_ = current_hi_aoffset_ - length;
- current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - 0;
- } else {
- SetLeft();
- retval = false;
- }
- current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
- return true;
-}
-
-// Map an offset in A' to the corresponding offset in A
-int OffsetMap::MapBack(int aprimeoffset){
- MaybeFlushAll();
- if (aprimeoffset < 0) {return 0;}
- if (max_aprimeoffset_ <= aprimeoffset) {
- return (aprimeoffset - max_aprimeoffset_) + max_aoffset_;
- }
-
- // If current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_,
- // use current mapping, else move window left/right
- bool ok = true;
- while (ok && (aprimeoffset < current_lo_aprimeoffset_)) {
- ok = MoveLeft();
- }
- while (ok && (current_hi_aprimeoffset_ <= aprimeoffset)) {
- ok = MoveRight();
- }
- // So now current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_
-
- int aoffset = aprimeoffset - current_diff_;
- if (aoffset >= current_hi_aoffset_) {
- // A' is in an insert region, all bytes of which backmap to A=hi_aoffset_
- aoffset = current_hi_aoffset_;
- }
- return aoffset;
-}
-
-// Map an offset in A to the corresponding offset in A'
-int OffsetMap::MapForward(int aoffset){
- MaybeFlushAll();
- if (aoffset < 0) {return 0;}
- if (max_aoffset_ <= aoffset) {
- return (aoffset - max_aoffset_) + max_aprimeoffset_;
- }
-
- // If current_lo_aoffset_ <= aoffset < current_hi_aoffset_,
- // use current mapping, else move window left/right
- bool ok = true;
- while (ok && (aoffset < current_lo_aoffset_)) {
- ok = MoveLeft();
- }
- while (ok && (current_hi_aoffset_ <= aoffset)) {
- ok = MoveRight();
- }
-
- int aprimeoffset = aoffset + current_diff_;
- if (aprimeoffset >= current_hi_aprimeoffset_) {
- // A is in a delete region, all bytes of which map to A'=hi_aprimeoffset_
- aprimeoffset = current_hi_aprimeoffset_;
- }
- return aprimeoffset;
-}
-
-
-// static
-bool OffsetMap::CopyInserts(OffsetMap* source, OffsetMap* dest) {
- bool ok = true;
- while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
- ok = source->MoveRight();
- if (source->current_lo_aoffset_ != source->current_hi_aoffset_) {
- return false;
- }
- dest->Insert(
- source->current_hi_aprimeoffset_ - source->current_lo_aprimeoffset_);
- }
- return true;
-}
-
-// static
-bool OffsetMap::CopyDeletes(OffsetMap* source, OffsetMap* dest) {
- bool ok = true;
- while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
- ok = source->MoveRight();
- if (source->current_lo_aprimeoffset_ != source->current_hi_aprimeoffset_) {
- return false;
- }
- dest->Delete(source->current_hi_aoffset_ - source->current_lo_aoffset_);
- }
- return true;
-}
-
-// static
-void OffsetMap::ComposeOffsetMap(
- OffsetMap* g, OffsetMap* f, OffsetMap* h) {
- h->Clear();
- f->Reset();
- g->Reset();
-
- int lo = 0;
- for (;;) {
- // Consume delete operations in f. This moves A without moving
- // A' and A''.
- if (lo >= g->current_hi_aoffset_ && CopyInserts(g, h)) {
- if (lo >= f->current_hi_aprimeoffset_ && CopyDeletes(f, h)) {
- // fprintf(stderr,
- // "ComposeOffsetMap ERROR, f is longer than g.<br>\n");
- }
-
- // FlushAll(), called by Reset(), MapForward() or MapBack(), has
- // added an extra COPY_OP to f and g, so this function has
- // composed an extra COPY_OP in h from those. To avoid
- // FlushAll() adds one more extra COPY_OP to h later, dispatch
- // Flush() right now.
- h->Flush();
- return;
- }
-
- // Consume insert operations in g. This moves A'' without moving A
- // and A'.
- if (lo >= f->current_hi_aprimeoffset_) {
- if (!CopyDeletes(f, h)) {
- // fprintf(stderr,
- // "ComposeOffsetMap ERROR, g is longer than f.<br>\n");
- }
- }
-
- // Compose one operation which moves A' from lo to hi.
- int hi = min(f->current_hi_aprimeoffset_, g->current_hi_aoffset_);
- if (f->current_lo_aoffset_ != f->current_hi_aoffset_ &&
- g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
- h->Copy(hi - lo);
- } else if (f->current_lo_aoffset_ != f->current_hi_aoffset_) {
- h->Delete(hi - lo);
- } else if (g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
- h->Insert(hi - lo);
- }
-
- lo = hi;
- }
-}
-
-// For testing only -- force a mapping
-void OffsetMap::StuffIt(const string& diffs,
- int max_aoffset, int max_aprimeoffset) {
- Clear();
- diffs_ = diffs;
- max_aoffset_ = max_aoffset;
- max_aprimeoffset_ = max_aprimeoffset;
-}
-
-
-} // namespace CLD2
-