1 module bindbc.mecab.types; 2 3 /* 4 MeCab -- Yet Another Part-of-Speech and Morphological Analyzer 5 6 Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org> 7 Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation 8 */ 9 10 /* C common data structures */ 11 12 @nogc nothrow: 13 14 15 extern (C) 16 { 17 extern struct mecab_t; 18 extern struct mecab_lattice_t; 19 extern struct mecab_model_t; 20 21 /** 22 * DictionaryInfo structure 23 */ 24 struct mecab_dictionary_info_t 25 { 26 /** 27 * filename of dictionary 28 * On Windows, filename is stored in UTF-8 encoding 29 */ 30 const(char)* filename; 31 32 /** 33 * character set of the dictionary. e.g., "SHIFT-JIS", "UTF-8" 34 */ 35 const(char)* charset; 36 37 /** 38 * How many words are registered in this dictionary. 39 */ 40 uint size; 41 42 /** 43 * dictionary type 44 * this value should be MECAB_USR_DIC, MECAB_SYS_DIC, or MECAB_UNK_DIC. 45 */ 46 int type; 47 48 /** 49 * left attributes size 50 */ 51 uint lsize; 52 53 /** 54 * right attributes size 55 */ 56 uint rsize; 57 58 /** 59 * version of this dictionary 60 */ 61 ushort version_; 62 63 /** 64 * pointer to the next dictionary info. 65 */ 66 mecab_dictionary_info_t* next; 67 } 68 69 /** 70 * Path structure 71 */ 72 struct mecab_path_t 73 { 74 /** 75 * pointer to the right node 76 */ 77 mecab_node_t* rnode; 78 79 /** 80 * pointer to the next right path 81 */ 82 mecab_path_t* rnext; 83 84 /** 85 * pointer to the left node 86 */ 87 mecab_node_t* lnode; 88 89 /** 90 * pointer to the next left path 91 */ 92 mecab_path_t* lnext; 93 94 /** 95 * local cost 96 */ 97 int cost; 98 99 /** 100 * marginal probability 101 */ 102 float prob; 103 } 104 105 /** 106 * Node structure 107 */ 108 struct mecab_node_t 109 { 110 /** 111 * pointer to the previous node. 112 */ 113 mecab_node_t* prev; 114 115 /** 116 * pointer to the next node. 117 */ 118 mecab_node_t* next; 119 120 /** 121 * pointer to the node which ends at the same position. 122 */ 123 mecab_node_t* enext; 124 125 /** 126 * pointer to the node which starts at the same position. 127 */ 128 mecab_node_t* bnext; 129 130 /** 131 * pointer to the right path. 132 * this value is NULL if MECAB_ONE_BEST mode. 133 */ 134 mecab_path_t* rpath; 135 136 /** 137 * pointer to the right path. 138 * this value is NULL if MECAB_ONE_BEST mode. 139 */ 140 mecab_path_t* lpath; 141 142 /** 143 * surface string. 144 * this value is not 0 terminated. 145 * You can get the length with length/rlength members. 146 */ 147 const(char)* surface; 148 149 /** 150 * feature string 151 */ 152 const(char)* feature; 153 154 /** 155 * unique node id 156 */ 157 uint id; 158 159 /** 160 * length of the surface form. 161 */ 162 ushort length; 163 164 /** 165 * length of the surface form including white space before the morph. 166 */ 167 ushort rlength; 168 169 /** 170 * right attribute id 171 */ 172 ushort rcAttr; 173 174 /** 175 * left attribute id 176 */ 177 ushort lcAttr; 178 179 /** 180 * unique part of speech id. This value is defined in "pos.def" file. 181 */ 182 ushort posid; 183 184 /** 185 * character type 186 */ 187 ubyte char_type; 188 189 /** 190 * status of this model. 191 * This value is MECAB_NOR_NODE, MECAB_UNK_NODE, MECAB_BOS_NODE, MECAB_EOS_NODE, or MECAB_EON_NODE. 192 */ 193 ubyte stat; 194 195 /** 196 * set 1 if this node is best node. 197 */ 198 ubyte isbest; 199 200 /** 201 * forward accumulative log summation. 202 * This value is only available when MECAB_MARGINAL_PROB is passed. 203 */ 204 float alpha; 205 206 /** 207 * backward accumulative log summation. 208 * This value is only available when MECAB_MARGINAL_PROB is passed. 209 */ 210 float beta; 211 212 /** 213 * marginal probability. 214 * This value is only available when MECAB_MARGINAL_PROB is passed. 215 */ 216 float prob; 217 218 /** 219 * word cost. 220 */ 221 short wcost; 222 223 /** 224 * best accumulative cost from bos node to this node. 225 */ 226 long cost; 227 } 228 229 /** 230 * Parameters for MeCab::Node::stat 231 */ 232 enum 233 { 234 /** 235 * Normal node defined in the dictionary. 236 */ 237 MECAB_NOR_NODE, 238 /** 239 * Unknown node not defined in the dictionary. 240 */ 241 MECAB_UNK_NODE = 1, 242 /** 243 * Virtual node representing a beginning of the sentence. 244 */ 245 MECAB_BOS_NODE = 2, 246 /** 247 * Virtual node representing a end of the sentence. 248 */ 249 MECAB_EOS_NODE = 3, 250 251 /** 252 * Virtual node representing a end of the N-best enumeration. 253 */ 254 MECAB_EON_NODE = 4 255 }; 256 257 /** 258 * Parameters for MeCab::DictionaryInfo::type 259 */ 260 enum 261 { 262 /** 263 * This is a system dictionary. 264 */ 265 MECAB_SYS_DIC, 266 267 /** 268 * This is a user dictionary. 269 */ 270 MECAB_USR_DIC = 1, 271 272 /** 273 * This is a unknown word dictionary. 274 */ 275 MECAB_UNK_DIC = 2 276 }; 277 278 /** 279 * Parameters for MeCab::Lattice::request_type 280 */ 281 enum 282 { 283 /** 284 * One best result is obtained (default mode) 285 */ 286 MECAB_ONE_BEST = 1, 287 /** 288 * Set this flag if you want to obtain N best results. 289 */ 290 MECAB_NBEST = 2, 291 /** 292 * Set this flag if you want to enable a partial parsing mode. 293 * When this flag is set, the input |sentence| needs to be written 294 * in partial parsing format. 295 */ 296 MECAB_PARTIAL = 4, 297 /** 298 * Set this flag if you want to obtain marginal probabilities. 299 * Marginal probability is set in MeCab::Node::prob. 300 * The parsing speed will get 3-5 times slower than the default mode. 301 */ 302 MECAB_MARGINAL_PROB = 8, 303 /** 304 * Set this flag if you want to obtain alternative results. 305 * Not implemented. 306 */ 307 MECAB_ALTERNATIVE = 16, 308 /** 309 * When this flag is set, the result linked-list (Node::next/prev) 310 * traverses all nodes in the lattice. 311 */ 312 MECAB_ALL_MORPHS = 32, 313 314 /** 315 * When this flag is set, tagger internally copies the body of passed 316 * sentence into internal buffer. 317 */ 318 MECAB_ALLOCATE_SENTENCE = 64 319 } 320 321 /** 322 * Parameters for MeCab::Lattice::boundary_constraint_type 323 */ 324 enum 325 { 326 /** 327 * The token boundary is not specified. 328 */ 329 MECAB_ANY_BOUNDARY, 330 331 /** 332 * The position is a strong token boundary. 333 */ 334 MECAB_TOKEN_BOUNDARY = 1, 335 336 /** 337 * The position is not a token boundary. 338 */ 339 MECAB_INSIDE_TOKEN = 2 340 } 341 }