summaryrefslogtreecommitdiffstats
path: root/browser/components/translation/cld2/internal/offsetmap.h
diff options
context:
space:
mode:
Diffstat (limited to 'browser/components/translation/cld2/internal/offsetmap.h')
-rw-r--r--browser/components/translation/cld2/internal/offsetmap.h175
1 files changed, 175 insertions, 0 deletions
diff --git a/browser/components/translation/cld2/internal/offsetmap.h b/browser/components/translation/cld2/internal/offsetmap.h
new file mode 100644
index 000000000..632555400
--- /dev/null
+++ b/browser/components/translation/cld2/internal/offsetmap.h
@@ -0,0 +1,175 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//
+// Author: dsites@google.com (Dick Sites)
+//
+
+#ifndef UTIL_UTF8_OFFSETMAP_H_
+#define UTIL_UTF8_OFFSETMAP_H_
+
+#include <string> // for string
+#include "integral_types.h" // for uint32
+
+// ***************************** OffsetMap **************************
+//
+// An OffsetMap object is a container for a mapping from offsets in one text
+// buffer A' to offsets in another text buffer A. It is most useful when A' is
+// built from A via substitutions that occasionally do not preserve byte length.
+//
+// A series of operators are used to build the correspondence map, then
+// calls can be made to map an offset in A' to an offset in A, or vice versa.
+// The map starts with offset 0 in A corresponding to offset 0 in A'.
+// The mapping is then built sequentially, adding on byte ranges that are
+// identical in A and A', byte ranges that are inserted in A', and byte ranges
+// that are deleted from A. All bytes beyond those specified when building the
+// map are assumed to correspond, i.e. a Copy(infinity) is assumed at the
+// end of the map.
+//
+// The internal data structure records positions at which bytes are added or
+// deleted. Using the map is O(1) when increasing the A' or A offset
+// monotonically, and O(n) when accessing random offsets, where n is the
+// number of differences.
+//
+
+namespace CLD2 {
+
+class OffsetMap {
+ public:
+ // Constructor, destructor
+ OffsetMap();
+ ~OffsetMap();
+
+ // Clear the map
+ void Clear();
+
+ // Add to mapping from A to A', specifying how many next bytes correspond
+ // in A and A'
+ void Copy(int bytes);
+
+ // Add to mapping from A to A', specifying how many next bytes are
+ // inserted in A' while not advancing in A at all
+ void Insert(int bytes);
+
+ // Add to mapping from A to A', specifying how many next bytes are
+ // deleted from A while not advancing in A' at all
+ void Delete(int bytes);
+
+ // Print map to file, for debugging
+ void Printmap(const char* filename);
+
+ // [Finish building map,] Re-position to offset 0
+ // This call is optional; MapForward and MapBack finish building the map
+ // if necessary
+ void Reset();
+
+ // Map an offset in A' to the corresponding offset in A
+ int MapBack(int aprimeoffset);
+
+ // Map an offset in A to the corresponding offset in A'
+ int MapForward(int aoffset);
+
+ // h = ComposeOffsetMap(g, f), where f is a map from A to A', g is
+ // from A' to A'' and h is from A to A''.
+ //
+ // Note that g->MoveForward(f->MoveForward(aoffset)) always equals
+ // to h->MoveForward(aoffset), while
+ // f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals
+ // to h->MoveBack(aprimeprimeoffset). This happens when deletion in
+ // f and insertion in g are at the same place. For example,
+ //
+ // A 1 2 3 4
+ // ^ | ^ ^
+ // | | / | f
+ // v vv v
+ // A' 1' 2' 3'
+ // ^ ^^ ^
+ // | | \ | g
+ // v | v v
+ // A'' 1'' 2'' 3'' 4''
+ //
+ // results in:
+ //
+ // A 1 2 3 4
+ // ^ ^\ ^ ^
+ // | | \ | | h
+ // v | vv v
+ // A'' 1'' 2'' 3'' 4''
+ //
+ // 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in
+ // the latter figure.
+ static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h);
+
+ // For debugging only; writes to stderr
+ void DumpWindow();
+
+ // For testing only -- force a mapping
+ void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset);
+
+ private:
+ enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP};
+
+ void Flush();
+ void FlushAll();
+ void MaybeFlushAll();
+ void Emit(MapOp op, int len);
+
+ void SetLeft();
+ void SetRight();
+
+ // Back up over previous range, 1..5 bytes
+ // Return subscript at the beginning of that. Pins at 0
+ int Backup(int sub);
+
+ // Parse next range, 1..5 bytes
+ // Return subscript just off the end of that
+ int ParseNext(int sub, MapOp* op, int* length);
+
+ // Parse previous range, 1..5 bytes
+ // Return current subscript
+ int ParsePrevious(int sub, MapOp* op, int* length);
+
+ void PrintPosition(const char* str);
+
+ bool MoveRight(); // Returns true if OK
+ bool MoveLeft(); // Returns true if OK
+ void DumpString();
+
+ // Copies insert operations from source to dest. Returns true if no
+ // other operations are found.
+ static bool CopyInserts(OffsetMap* source, OffsetMap* dest);
+
+ // Copies delete operations from source to dest. Returns true if no other
+ // operations are found.
+ static bool CopyDeletes(OffsetMap* source, OffsetMap* dest);
+
+ std::string diffs_;
+ MapOp pending_op_;
+ uint32 pending_length_;
+
+ // Offsets in the ranges below correspond to each other, with A' = A + diff
+ int next_diff_sub_;
+ int current_lo_aoffset_;
+ int current_hi_aoffset_;
+ int current_lo_aprimeoffset_;
+ int current_hi_aprimeoffset_;
+ int current_diff_;
+ int max_aoffset_;
+ int max_aprimeoffset_;
+};
+
+} // namespace CLD2
+
+#endif // UTIL_UTF8_OFFSETMAP_H_
+