summaryrefslogtreecommitdiffstats
path: root/intl/hyphenation/hyphen/hyphen.h
blob: 2b4e146421ff44324d82a7a9317de53844030468 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/* Hyphen - hyphenation library using converted TeX hyphenation patterns
 *
 * (C) 1998 Raph Levien
 * (C) 2001 ALTLinux, Moscow
 * (C) 2006, 2007, 2008 László Németh
 *
 * This was part of libHnj library by Raph Levien.
 *
 * Peter Novodvorsky from ALTLinux cut hyphenation part from libHnj
 * to use it in OpenOffice.org.
 *
 * Non-standard and compound word hyphenation support by László Németh.
 * 
 * License is the original LibHnj license:
 *
 * LibHnj is dual licensed under LGPL and MPL. Boilerplate for both
 * licenses follows.
 */

/* LibHnj - a library for high quality hyphenation and justification
 * Copyright (C) 1998 Raph Levien
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the 
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
 * Boston, MA  02111-1307  USA.
*/

/*
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.0 (the "MPL"); you may not use this file except in
 * compliance with the MPL.  You may obtain a copy of the MPL at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the MPL is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
 * for the specific language governing rights and limitations under the
 * MPL.
 *
 */
#ifndef __HYPHEN_H__
#define __HYPHEN_H__

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

#include <stdio.h>

typedef struct _HyphenDict HyphenDict;
typedef struct _HyphenState HyphenState;
typedef struct _HyphenTrans HyphenTrans;
#define MAX_CHARS 100
#define MAX_NAME 20

struct _HyphenDict {
  /* user options */
  char lhmin;    /* lefthyphenmin: min. hyph. distance from the left side */
  char rhmin;    /* righthyphenmin: min. hyph. distance from the right side */
  char clhmin;   /* min. hyph. distance from the left compound boundary */
  char crhmin;   /* min. hyph. distance from the right compound boundary */
  char * nohyphen; /* comma separated list of characters or character
                    sequences with forbidden hyphenation */
  int nohyphenl; /* count of elements in nohyphen */
  /* system variables */
  int num_states;
  char cset[MAX_NAME];
  int utf8;
  HyphenState *states;
  HyphenDict *nextlevel;
};

struct _HyphenState {
  char *match;
  char *repl;
  signed char replindex;
  signed char replcut;
  int fallback_state;
  int num_trans;
  HyphenTrans *trans;
};

struct _HyphenTrans {
  char ch;
  int new_state;
};

HyphenDict *hnj_hyphen_load (const char *fn);
HyphenDict *hnj_hyphen_load_file (FILE *f);
void hnj_hyphen_free (HyphenDict *dict);

/* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */
int hnj_hyphen_hyphenate (HyphenDict *dict,
			   const char *word, int word_size,
			   char *hyphens);

/*

 int hnj_hyphen_hyphenate2(): non-standard hyphenation.

 (It supports Catalan, Dutch, German, Hungarian, Norwegian, Swedish
  etc. orthography, see documentation.)
 
 input data:
 word:      input word
 word_size: byte length of the input word
 
 hyphens:   allocated character buffer (size = word_size + 5)
 hyphenated_word: allocated character buffer (size ~ word_size * 2) or NULL
 rep, pos, cut: pointers (point to the allocated and _zeroed_ buffers
                (size=word_size) or with NULL value) or NULL

 output data:
 hyphens:   hyphenation vector (hyphenation points signed with odd numbers)
 hyphenated_word: hyphenated input word (hyphens signed with `='),
                  optional (NULL input)
 rep:       NULL (only standard hyph.), or replacements (hyphenation points
            signed with `=' in replacements);
 pos:       NULL, or difference of the actual position and the beginning
            positions of the change in input words;
 cut:       NULL, or counts of the removed characters of the original words
            at hyphenation,

 Note: rep, pos, cut are complementary arrays to the hyphens, indexed with the
       character positions of the input word.

 For example:
 Schiffahrt -> Schiff=fahrt,
 pattern: f1f/ff=f,1,2
 output: rep[5]="ff=f", pos[5] = 1, cut[5] = 2

 Note: hnj_hyphen_hyphenate2() can allocate rep, pos, cut (word_size
       length arrays):

 char ** rep = NULL;
 int * pos = NULL;
 int * cut = NULL;
 char hyphens[MAXWORDLEN];
 hnj_hyphen_hyphenate2(dict, "example", 7, hyphens, NULL, &rep, &pos, &cut);
 
 See example in the source distribution.

*/

int hnj_hyphen_hyphenate2 (HyphenDict *dict,
        const char *word, int word_size, char * hyphens,
        char *hyphenated_word, char *** rep, int ** pos, int ** cut);

/* like hnj_hyphen_hyphenate2, but with hyphenmin parameters */
/* lhmin: lefthyphenmin
 * rhmin: righthyphenmin
 * clhmin: compoundlefthyphemin
 * crhmin: compoundrighthyphenmin
 * (see documentation) */

int hnj_hyphen_hyphenate3 (HyphenDict *dict,
	const char *word, int word_size, char * hyphens,
	char *hyphword, char *** rep, int ** pos, int ** cut,
	int lhmin, int rhmin, int clhmin, int crhmin);

#ifdef __cplusplus
}
#endif /* __cplusplus */

#endif /* __HYPHEN_H__ */