1 module bindbc.mecab.types;
2 
3 /*
4   MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
5 
6   Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org>
7   Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
8 */
9 
10 /* C common data structures  */
11 
12 @nogc nothrow:
13 
14 
15 extern (C)
16 {
17     extern struct mecab_t;
18     extern struct mecab_lattice_t;
19     extern struct mecab_model_t;
20 
21     /**
22      * DictionaryInfo structure
23      */
24     struct mecab_dictionary_info_t
25     {
26         /**
27          * filename of dictionary
28          * On Windows, filename is stored in UTF-8 encoding
29          */
30         const(char)* filename;
31 
32         /**
33          * character set of the dictionary. e.g., "SHIFT-JIS", "UTF-8"
34          */
35         const(char)* charset;
36 
37         /**
38          * How many words are registered in this dictionary.
39          */
40         uint size;
41 
42         /**
43          * dictionary type
44          * this value should be MECAB_USR_DIC, MECAB_SYS_DIC, or MECAB_UNK_DIC.
45          */
46         int type;
47 
48         /**
49          * left attributes size
50          */
51         uint lsize;
52 
53         /**
54          * right attributes size
55          */
56         uint rsize;
57 
58         /**
59          * version of this dictionary
60          */
61         ushort version_;
62 
63         /**
64          * pointer to the next dictionary info.
65          */
66         mecab_dictionary_info_t* next;
67     }
68 
69     /**
70      * Path structure
71      */
72     struct mecab_path_t
73     {
74         /**
75          * pointer to the right node
76          */
77         mecab_node_t* rnode;
78 
79         /**
80          * pointer to the next right path
81          */
82         mecab_path_t* rnext;
83 
84         /**
85          * pointer to the left node
86          */
87         mecab_node_t* lnode;
88 
89         /**
90          * pointer to the next left path
91          */
92         mecab_path_t* lnext;
93 
94         /**
95          * local cost
96          */
97         int cost;
98 
99         /**
100          * marginal probability
101          */
102         float prob;
103     }
104 
105     /**
106      * Node structure
107      */
108     struct mecab_node_t
109     {
110         /**
111          * pointer to the previous node.
112          */
113         mecab_node_t* prev;
114 
115         /**
116          * pointer to the next node.
117          */
118         mecab_node_t* next;
119 
120         /**
121          * pointer to the node which ends at the same position.
122          */
123         mecab_node_t* enext;
124 
125         /**
126          * pointer to the node which starts at the same position.
127          */
128         mecab_node_t* bnext;
129 
130         /**
131          * pointer to the right path.
132          * this value is NULL if MECAB_ONE_BEST mode.
133          */
134         mecab_path_t* rpath;
135 
136         /**
137          * pointer to the right path.
138          * this value is NULL if MECAB_ONE_BEST mode.
139          */
140         mecab_path_t* lpath;
141 
142         /**
143          * surface string.
144          * this value is not 0 terminated.
145          * You can get the length with length/rlength members.
146          */
147         const(char)* surface;
148 
149         /**
150          * feature string
151          */
152         const(char)* feature;
153 
154         /**
155          * unique node id
156          */
157         uint id;
158 
159         /**
160          * length of the surface form.
161          */
162         ushort length;
163 
164         /**
165          * length of the surface form including white space before the morph.
166          */
167         ushort rlength;
168 
169         /**
170          * right attribute id
171          */
172         ushort rcAttr;
173 
174         /**
175          * left attribute id
176          */
177         ushort lcAttr;
178 
179         /**
180          * unique part of speech id. This value is defined in "pos.def" file.
181          */
182         ushort posid;
183 
184         /**
185          * character type
186          */
187         ubyte char_type;
188 
189         /**
190          * status of this model.
191          * This value is MECAB_NOR_NODE, MECAB_UNK_NODE, MECAB_BOS_NODE, MECAB_EOS_NODE, or MECAB_EON_NODE.
192          */
193         ubyte stat;
194 
195         /**
196          * set 1 if this node is best node.
197          */
198         ubyte isbest;
199 
200         /**
201          * forward accumulative log summation.
202          * This value is only available when MECAB_MARGINAL_PROB is passed.
203          */
204         float alpha;
205 
206         /**
207          * backward accumulative log summation.
208          * This value is only available when MECAB_MARGINAL_PROB is passed.
209          */
210         float beta;
211 
212         /**
213          * marginal probability.
214          * This value is only available when MECAB_MARGINAL_PROB is passed.
215          */
216         float prob;
217 
218         /**
219          * word cost.
220          */
221         short wcost;
222 
223         /**
224          * best accumulative cost from bos node to this node.
225          */
226         long cost;
227     }
228 
229     /**
230      * Parameters for MeCab::Node::stat
231      */
232     enum
233     {
234         /**
235          * Normal node defined in the dictionary.
236          */
237         MECAB_NOR_NODE,
238         /**
239          * Unknown node not defined in the dictionary.
240          */
241         MECAB_UNK_NODE = 1,
242         /**
243          * Virtual node representing a beginning of the sentence.
244          */
245         MECAB_BOS_NODE = 2,
246         /**
247          * Virtual node representing a end of the sentence.
248          */
249         MECAB_EOS_NODE = 3,
250 
251         /**
252          * Virtual node representing a end of the N-best enumeration.
253          */
254         MECAB_EON_NODE = 4
255     };
256 
257     /**
258      * Parameters for MeCab::DictionaryInfo::type
259      */
260     enum
261     {
262         /**
263          * This is a system dictionary.
264          */
265         MECAB_SYS_DIC,
266 
267         /**
268          * This is a user dictionary.
269          */
270         MECAB_USR_DIC = 1,
271 
272         /**
273          * This is a unknown word dictionary.
274          */
275         MECAB_UNK_DIC = 2
276     };
277 
278     /**
279      * Parameters for MeCab::Lattice::request_type
280      */
281     enum
282     {
283         /**
284          * One best result is obtained (default mode)
285          */
286         MECAB_ONE_BEST = 1,
287         /**
288          * Set this flag if you want to obtain N best results.
289          */
290         MECAB_NBEST = 2,
291         /**
292          * Set this flag if you want to enable a partial parsing mode.
293          * When this flag is set, the input |sentence| needs to be written
294          * in partial parsing format.
295          */
296         MECAB_PARTIAL = 4,
297         /**
298          * Set this flag if you want to obtain marginal probabilities.
299          * Marginal probability is set in MeCab::Node::prob.
300          * The parsing speed will get 3-5 times slower than the default mode.
301          */
302         MECAB_MARGINAL_PROB = 8,
303         /**
304          * Set this flag if you want to obtain alternative results.
305          * Not implemented.
306          */
307         MECAB_ALTERNATIVE = 16,
308         /**
309          * When this flag is set, the result linked-list (Node::next/prev)
310          * traverses all nodes in the lattice.
311          */
312         MECAB_ALL_MORPHS = 32,
313 
314         /**
315          * When this flag is set, tagger internally copies the body of passed
316          * sentence into internal buffer.
317          */
318         MECAB_ALLOCATE_SENTENCE = 64
319     }
320 
321     /**
322      * Parameters for MeCab::Lattice::boundary_constraint_type
323      */
324     enum
325     {
326         /**
327          * The token boundary is not specified.
328          */
329         MECAB_ANY_BOUNDARY,
330 
331         /**
332          * The position is a strong token boundary.
333          */
334         MECAB_TOKEN_BOUNDARY = 1,
335 
336         /**
337          * The position is not a token boundary.
338          */
339         MECAB_INSIDE_TOKEN = 2
340     }
341 }