summaryrefslogtreecommitdiffstats
path: root/intl/uconv/tools/umaptable.c
diff options
context:
space:
mode:
Diffstat (limited to 'intl/uconv/tools/umaptable.c')
-rw-r--r--intl/uconv/tools/umaptable.c460
1 files changed, 460 insertions, 0 deletions
diff --git a/intl/uconv/tools/umaptable.c b/intl/uconv/tools/umaptable.c
new file mode 100644
index 000000000..8bc9a6d47
--- /dev/null
+++ b/intl/uconv/tools/umaptable.c
@@ -0,0 +1,460 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#define NOMAPPING 0xfffd
+
+typedef struct {
+ uint16_t srcBegin; /* 2 byte */
+ uint16_t srcEnd; /* 2 byte */
+ uint16_t destBegin; /* 2 byte */
+} uFormat0;
+
+typedef struct {
+ uint16_t srcBegin; /* 2 byte */
+ uint16_t srcEnd; /* 2 byte */
+ uint16_t mappingOffset; /* 2 byte */
+} uFormat1;
+
+typedef struct {
+ uint16_t srcBegin; /* 2 byte */
+ uint16_t srcEnd; /* 2 byte -waste */
+ uint16_t destBegin; /* 2 byte */
+} uFormat2;
+
+typedef struct {
+ union {
+ uFormat0 format0;
+ uFormat1 format1;
+ uFormat2 format2;
+ } fmt;
+} uMapCell;
+
+/* =================================================
+ uTable
+================================================= */
+typedef struct {
+ uint16_t itemOfList;
+ uint16_t offsetToFormatArray;
+ uint16_t offsetToMapCellArray;
+ uint16_t offsetToMappingTable;
+ uint16_t data[1];
+} uTable;
+
+uint16_t umap[256][256];
+int bInitFromOrTo = 0;
+int bGenerateFromUnicodeTable = 0;
+
+#define MAXCELLNUM 1000
+
+static int numOfItem = 0;
+uMapCell cell[MAXCELLNUM];
+uint16_t format[MAXCELLNUM / 4];
+uint16_t mapping[256*256];
+static int mappinglen = 0;
+static int formatcount[4] = {0,0,0,0};
+
+#define SetFormat(n,f) { format[(n >> 2)] |= ((f) << ((n & 0x0003) << 2)); formatcount[f]++; }
+#define GetFormat(n) ( format[(n >> 2)] >> ((n & 0x0003) << 2)) &0x00FF)
+#define MAPVALUE(i) (umap[(i >> 8) & 0xFF][(i) & 0xFF])
+
+int FORMAT1CNST = 10 ;
+int FORMAT0CNST = 5 ;
+void initmaps()
+{
+ int i,j;
+ for(i=0;i<256;i++)
+ for(j=0;j<256;j++)
+ {
+ umap[i][j]= NOMAPPING;
+ }
+ for(i=0;i<MAXCELLNUM / 4;i++)
+ format[i]=0;
+}
+void SetMapValue(short u,short c)
+{
+ if(NOMAPPING == MAPVALUE(u))
+ MAPVALUE(u) = c & 0x0000FFFF;
+ else {
+ fprintf(stderr, "warning- duplicate mapping %x map to both %x and %x\n", u, MAPVALUE(u), c);
+ }
+}
+void AddFormat2(uint16_t srcBegin)
+{
+ uint16_t destBegin = MAPVALUE(srcBegin);
+ printf("Begin of Item %04X\n",numOfItem);
+ printf(" Format 2\n");
+ printf(" srcBegin = %04X\n", srcBegin);
+ printf(" destBegin = %04X\n", destBegin );
+ SetFormat(numOfItem,2);
+ cell[numOfItem].fmt.format2.srcBegin = srcBegin;
+ cell[numOfItem].fmt.format2.srcEnd = 0;
+ cell[numOfItem].fmt.format2.destBegin = destBegin;
+ printf("End of Item %04X \n\n",numOfItem);
+ numOfItem++;
+ /* Unmark the umap */
+ MAPVALUE(srcBegin) = NOMAPPING;
+}
+void AddFormat1(uint16_t srcBegin, uint16_t srcEnd)
+{
+ uint16_t i;
+ printf("Begin of Item %04X\n",numOfItem);
+ printf(" Format 1\n");
+ printf(" srcBegin = %04X\n", srcBegin);
+ printf(" srcEnd = %04X\n", srcEnd );
+ printf(" mappingOffset = %04X\n", mappinglen);
+ printf(" Mapping = " );
+ SetFormat(numOfItem,1);
+ cell[numOfItem].fmt.format1.srcBegin = srcBegin;
+ cell[numOfItem].fmt.format1.srcEnd = srcEnd;
+ cell[numOfItem].fmt.format1.mappingOffset = mappinglen;
+ for(i=srcBegin ; i <= srcEnd ; i++,mappinglen++)
+ {
+ if( ((i-srcBegin) % 8) == 0)
+ printf("\n ");
+ mapping[mappinglen]= MAPVALUE(i);
+ printf("%04X ",(mapping[mappinglen] ));
+ /* Unmark the umap */
+ MAPVALUE(i) = NOMAPPING;
+ }
+ printf("\n");
+ printf("End of Item %04X \n\n",numOfItem);
+ numOfItem++;
+}
+void AddFormat0(uint16_t srcBegin, uint16_t srcEnd)
+{
+ uint16_t i;
+ uint16_t destBegin = MAPVALUE(srcBegin);
+ printf("Begin of Item %04X\n",numOfItem);
+ printf(" Format 0\n");
+ printf(" srcBegin = %04X\n", srcBegin);
+ printf(" srcEnd = %04X\n", srcEnd );
+ printf(" destBegin = %04X\n", destBegin );
+ SetFormat(numOfItem,0);
+ cell[numOfItem].fmt.format0.srcBegin = srcBegin;
+ cell[numOfItem].fmt.format0.srcEnd = srcEnd;
+ cell[numOfItem].fmt.format0.destBegin = destBegin;
+ for(i=srcBegin ; i <= srcEnd ; i++)
+ {
+ /* Unmark the umap */
+ MAPVALUE(i) = NOMAPPING;
+ }
+ printf("End of Item %04X \n\n",numOfItem);
+ numOfItem++;
+}
+void printnpl()
+{
+printf(
+"/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */\n"
+"/* This Source Code Form is subject to the terms of the Mozilla Public\n"
+" * License, v. 2.0. If a copy of the MPL was not distributed with this\n"
+" * file, You can obtain one at http://mozilla.org/MPL/2.0/. */\n");
+}
+void gentable()
+{
+ /* OK! For now, we just use format 1 for each row */
+ /* We need to chage this to use other format to save the space */
+ uint16_t begin,end;
+ uint16_t ss,gs,gp,state,gc;
+ uint16_t diff, lastdiff;
+
+ printnpl();
+ printf("/*========================================================\n");
+ printf(" This is a Generated file. Please don't edit it.\n");
+ printf("\n");
+ printf(" The tool which used to generate this file is called umaptable.\n");
+ printf(" You can find this tool under mozilla/intl/uconv/tools/umaptable.c.\n");
+
+ printf(" If you have any problems with this file, please file a bug\n");
+ printf(" under the \"Internationalization\" component in\n");
+ printf(" https://bugzilla.mozilla.org/enter_bug.cgi?product=Core\n");
+ printf("\n");
+ printf(" Table in Debug form \n");
+
+ for(begin = 0; MAPVALUE(begin) ==NOMAPPING; begin++)
+ ;
+ for(end = 0xFFFF; MAPVALUE(end) ==NOMAPPING; end--)
+ ;
+ if(end != begin)
+ {
+ lastdiff = MAPVALUE(begin) - begin;
+ for(gp=begin+1,state = 0 ; gp<=end; gp++)
+ {
+ int input ;
+ diff = MAPVALUE(gp) - gp;
+ input = (diff == lastdiff);
+ switch(state)
+ {
+ case 0:
+ if(input)
+ {
+ state = 1;
+ ss = gp -1;
+ gc = 2;
+ }
+ break;
+ case 1:
+ if(input)
+ {
+ if(gc++ >= FORMAT0CNST)
+ {
+ state = 2;
+ }
+ }
+ else
+ {
+ state = 0;
+ }
+ break;
+ case 2:
+ if(input)
+ {
+ }
+ else
+ {
+ AddFormat0(ss,gp-1);
+ state = 0;
+ }
+ break;
+ }
+
+ lastdiff = diff;
+ }
+ }
+ if(state == 2)
+ AddFormat0(ss,end);
+
+ for(;(MAPVALUE(begin) ==NOMAPPING) && (begin <= end); begin++)
+ ;
+ if(begin <= end)
+ {
+ for(;(MAPVALUE(end)==NOMAPPING) && (end >= begin); end--)
+ ;
+ for(ss=gp=begin,state = 0 ; gp<=end; gp++)
+ {
+ int input = (MAPVALUE(gp) == NOMAPPING);
+ switch(state)
+ {
+ case 0:
+ if(input)
+ {
+ gc = 1;
+ gs = gp;
+ state = 1;
+ }
+ break;
+ case 1:
+ if(input)
+ {
+ if(gc++ >= FORMAT1CNST)
+ state = 2;
+ }
+ else
+ state = 0;
+ break;
+ case 2:
+ if(input)
+ {
+ }
+ else
+ {
+ if(gs == (ss+1))
+ AddFormat2(ss);
+ else
+ AddFormat1(ss ,gs-1);
+ state = 0;
+ ss = gp;
+ }
+ break;
+ }
+ }
+ if(end == ss)
+ AddFormat2(ss );
+ else
+ AddFormat1(ss ,end );
+ }
+ printf("========================================================*/\n");
+}
+void writetable()
+{
+ uint16_t i;
+ uint16_t off1,off2,off3;
+ uint16_t cur = 0;
+ uint16_t formatitem = (((numOfItem)>>2) + 1);
+ off1 = 4;
+ off2 = off1 + formatitem ;
+ off3 = off2 + numOfItem * sizeof(uMapCell) / sizeof(uint16_t);
+ /* write itemOfList */
+ printf("/* Offset=0x%04X ItemOfList */\n 0x%04X,\n", cur++, numOfItem);
+
+ /* write offsetToFormatArray */
+ printf("/*-------------------------------------------------------*/\n");
+ printf("/* Offset=0x%04X offsetToFormatArray */\n 0x%04X,\n", cur++,off1);
+
+ /* write offsetToMapCellArray */
+ printf("/*-------------------------------------------------------*/\n");
+ printf("/* Offset=0x%04X offsetToMapCellArray */ \n 0x%04X,\n", cur++,off2);
+
+ /* write offsetToMappingTable */
+ printf("/*-------------------------------------------------------*/\n");
+ printf("/* Offset=0x%04X offsetToMappingTable */ \n 0x%04X,\n", cur++,off3);
+
+ /* write FormatArray */
+ printf("/*-------------------------------------------------------*/\n");
+ printf("/* Offset=0x%04X Start of Format Array */ \n",cur);
+ printf("/* Total of Format 0 : 0x%04X */\n"
+ , formatcount[0]);
+ printf("/* Total of Format 1 : 0x%04X */\n"
+ , formatcount[1]);
+ printf("/* Total of Format 2 : 0x%04X */\n"
+ , formatcount[2]);
+ printf("/* Total of Format 3 : 0x%04X */\n"
+ , formatcount[3]);
+ for(i=0;i<formatitem;i++,cur++)
+ {
+ if((i%8) == 0)
+ printf("\n");
+ printf("0x%04X, ",format[i]);
+ }
+ printf("\n");
+
+ /* write MapCellArray */
+ printf("/*-------------------------------------------------------*/\n");
+ printf("/* Offset=0x%04X Start of MapCell Array */ \n",cur);
+ for(i=0;i<numOfItem;i++,cur+=3)
+ {
+ printf("/* %04X */ 0x%04X, 0x%04X, 0x%04X, \n",
+ i,
+ cell[i].fmt.format0.srcBegin,
+ cell[i].fmt.format0.srcEnd,
+ cell[i].fmt.format0.destBegin
+ );
+ }
+
+ /* write MappingTable */
+ printf("/*-------------------------------------------------------*/\n");
+ printf("/* Offset=0x%04X Start of MappingTable */ \n",cur);
+ for(i=0;i<mappinglen;i++,cur++)
+ {
+ if((i%8) == 0)
+ printf("\n/* %04X */ ",i);
+ printf("0x%04X, ",mapping[i] );
+ }
+ printf("\n");
+ printf("/* End of table Total Length = 0x%04X * 2 */\n",cur);
+}
+
+void usage()
+{
+ fprintf(stderr, "please indicate what kind of mapping mapping table you want to generate:\n");
+ fprintf(stderr, "\t-uf : generate *.uf (from unicode) table, or\n");
+ fprintf(stderr, "\t-ut : generate *.ut (to unicode) table\n");
+}
+void parsearg(int argc, char* argv[])
+{
+ int i;
+ for(i=0;i<argc;i++)
+ {
+ if(strncmp("-uf", argv[i],3) == 0) {
+ if(! bInitFromOrTo) {
+ bGenerateFromUnicodeTable = 1;
+ bInitFromOrTo = 1;
+ } else {
+ usage();
+ exit(-1);
+ }
+ }
+ if(strncmp("-ut", argv[i],3) == 0) {
+ if(! bInitFromOrTo) {
+ bGenerateFromUnicodeTable = 0;
+ bInitFromOrTo = 1;
+ } else {
+ usage();
+ exit(-1);
+ }
+ }
+ if((strncmp("-0", argv[i],2) == 0) && ((i+1) < argc))
+ {
+ int cnst0;
+ if(sscanf(argv[i+1], "%d", &cnst0) == 1)
+ {
+ if(cnst0 > 0)
+ {
+ FORMAT0CNST = cnst0;
+ }
+ }
+ else
+ {
+ fprintf(stderr, "argc error !!!!\n");
+ exit(-1);
+ }
+ i++;
+ }
+ if((strncmp("-1", argv[i],2) == 0) && ((i+1) < argc))
+ {
+ int cnst1;
+ if(sscanf(argv[i+1], "%d", &cnst1) == 1)
+ {
+ if(cnst1 > 0)
+ {
+ FORMAT1CNST = cnst1;
+ }
+ }
+ else
+ {
+ fprintf(stderr, "argc error !!!!\n");
+ exit(-1);
+ }
+ i++;
+ }
+ }
+ if(! bInitFromOrTo)
+ {
+ usage();
+ exit(-1);
+ }
+ fprintf(stderr, "format 0 cnst = %d\n", FORMAT0CNST);
+ fprintf(stderr, "format 1 cnst = %d\n", FORMAT1CNST);
+ fprintf(stderr, "generate u%c table\n",
+ bGenerateFromUnicodeTable ? 'f' : 't');
+}
+void getinput()
+{
+ char buf[256];
+ short c,u;
+ for (; fgets(buf,sizeof(buf),stdin);)
+ {
+ if(buf[0]=='0' && buf[1] == 'x')
+ {
+ u=-1;
+ sscanf(buf,"%hx %hx",&c,&u);
+ if (u == -1 && 0x80 <= c && c <=0x9f)
+ {
+ u = c;
+ }
+ if (u != -1)
+ {
+ if(bGenerateFromUnicodeTable)
+ SetMapValue(u, c);
+ else
+ SetMapValue(c, u);
+ }
+ }
+ }
+}
+int main(int argc, char* argv[])
+{
+ parsearg(argc, argv);
+ initmaps();
+ getinput();
+ gentable();
+ writetable();
+ return 0;
+}