#!/usr/bin/python

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# Adapted from
# https://hg.mozilla.org/projects/htmlparser/file/0d906fb1ab90/generate-encoding-data.py

# indexes.json comes from 
# https://encoding.spec.whatwg.org/indexes.json
# i.e.
# https://github.com/whatwg/encoding/blob/ce4e83d0df5b5efec0697fc76e66699737e033a3/indexes.json

import json

indexes = json.load(open("indexes.json", "r"))

def nullToZero(codePoint):
  if not codePoint:
    codePoint = 0
  return codePoint

index = []

for codePoint in indexes["big5"]:
  index.append(nullToZero(codePoint))  

# There are four major gaps consisting of more than 4 consecutive invalid pointers
gaps = []
consecutive = 0
consecutiveStart = 0
offset = 0
for codePoint in index:
  if codePoint == 0:
    if consecutive == 0:
      consecutiveStart = offset
    consecutive +=1
  else:
    if consecutive > 4:
      gaps.append((consecutiveStart, consecutiveStart + consecutive))
    consecutive = 0
  offset += 1

def invertRanges(ranges, cap):
  inverted = []
  invertStart = 0
  for (start, end) in ranges:
    if start != 0:
      inverted.append((invertStart, start))
    invertStart = end
  inverted.append((invertStart, cap))
  return inverted

cap = len(index)
ranges = invertRanges(gaps, cap)

# Now compute a compressed lookup table for astralness

gaps = []
consecutive = 0
consecutiveStart = 0
offset = 0
for codePoint in index:
  if codePoint <= 0xFFFF:
    if consecutive == 0:
      consecutiveStart = offset
    consecutive +=1
  else:
    if consecutive > 40:
      gaps.append((consecutiveStart, consecutiveStart + consecutive))
    consecutive = 0
  offset += 1

astralRanges = invertRanges(gaps, cap)


classFile = open("../ucvtw/nsBIG5Data.cpp", "w")
classFile.write('''/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/*
 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
 * Instead, please regenerate using intl/uconv/tools/gen-big5-data.py
 */

#include "nsBIG5Data.h"

static const char16_t kBig5LowBitsTable[] = {
''')

for (low, high) in ranges:
  for i in xrange(low, high):
    classFile.write('  0x%04X,\n' % (index[i] & 0xFFFF))

classFile.write('''};

static const uint32_t kBig5AstralnessTable[] = {
''')

# An array of bool is inefficient per
# http://stackoverflow.com/questions/4049156/1-bit-per-bool-in-array-c

bits = []
for (low, high) in astralRanges:
  for i in xrange(low, high):
    bits.append(1 if index[i] > 0xFFFF else 0)
# pad length to multiple of 32
for i in xrange(32 - (len(bits) % 32)):
  bits.append(0)
i = 0
while i < len(bits):
  accu = 0
  for j in xrange(32):
    accu |= bits[i + j] << j
  classFile.write('  0x%08X,\n' % accu)
  i += 32

classFile.write('''};

// static
char16_t
nsBIG5Data::LowBits(size_t aPointer)
{
''')

base = 0
for (low, high) in ranges:
  classFile.write('''  if (aPointer < %d) {
    return 0;
  }
  if (aPointer < %d) {
    return kBig5LowBitsTable[%d + (aPointer - %d)];
  }
''' % (low, high, base, low))
  base += (high - low)

classFile.write('''  return 0;
}

// static
bool
nsBIG5Data::IsAstral(size_t aPointer)
{
''')

base = 0
for (low, high) in astralRanges:
  if high - low == 1:
    classFile.write('''  if (aPointer < %d) {
    return false;
  }
  if (aPointer == %d) {
    return true;
  }
''' % (low, low))
  else:
    classFile.write('''  if (aPointer < %d) {
    return false;
  }
  if (aPointer < %d) {
    size_t index = %d + (aPointer - %d);
    return kBig5AstralnessTable[index >> 5] & (1 << (index & 0x1F));
  }
''' % (low, high, base, low))
  base += (high - low)

classFile.write('''  return false;
}

//static
size_t
nsBIG5Data::FindPointer(char16_t aLowBits, bool aIsAstral)
{
  if (!aIsAstral) {
    switch (aLowBits) {
''')

hkscsBound = (0xA1 - 0x81) * 157

preferLast = [
  0x2550,
  0x255E,
  0x2561,
  0x256A,
  0x5341,
  0x5345,
]

for codePoint in preferLast:
  # Python lists don't have .rindex() :-(
  for i in xrange(len(index) - 1, -1, -1):
    candidate = index[i]
    if candidate == codePoint:
       classFile.write('''      case 0x%04X:
        return %d;
''' % (codePoint, i))
       break

classFile.write('''      default:
        break;
    }
  }''')

base = 0
start = 0
for (low, high) in ranges:
  if low <= hkscsBound and hkscsBound < high:
    # This is the first range we don't ignore and the
    # range that contains the first non-HKSCS pointer.
    # Avoid searching HKSCS.
    start = base + hkscsBound - low
    break
  base += (high - low)

classFile.write('''
  for (size_t i = %d; i < MOZ_ARRAY_LENGTH(kBig5LowBitsTable); ++i) {
    if (kBig5LowBitsTable[i] == aLowBits) {
      size_t pointer;
      ''' % start)

base = 0
prevLow = 0
prevHigh = 0
prevBase = 0
writing = False
for (low, high) in ranges:
  if writing:
    classFile.write('''if (i < %d) {
        pointer = i + %d;
      } else ''' % ((prevBase + prevHigh - prevLow), (prevLow - prevBase)))
  prevLow = low
  prevHigh = high
  prevBase = base
  if high > hkscsBound:
    writing = True
  base += (high - low)

classFile.write('''{
        pointer = i + %d;
      }''' % (prevLow - prevBase))

classFile.write('''
      if (aIsAstral == IsAstral(pointer)) {
        return pointer;
      }
    }
  }
  return 0;
}
''')
classFile.close()