$Id: libmecab.html 65 2007-01-30 00:52:53Z taku-ku $;
C ¶óÀ̺귯¸®´Â ÀÌÇÏÀÇ ÇÔ¼ö¸¦ Á¦°øÇÏ°í ÀÖ½À´Ï´Ù.
mecab_t *mecab_new (int argc, char **argv)
mecab_t *mecab_new2 (const char *arg)
const char *mecab_version()
const char *mecab_strerror (mecab_t* m)
const char *mecab_sparse_tostr (mecab_t *m, const char
*str)
const char *mecab_sparse_tostr2 (mecab_t *m, const char *str,
size_t len)
char *mecab_sparse_tostr3 (mecab_t *m, const char
*istr,size_t ilen char *ostr, size_t olen)
const char *mecab_nbest_sparse_tostr
(mecab_t *m, size_t N, const char *str)
const char *mecab_nbest_sparse_tostr2
(mecab_t *m, size_t N, const char *str, size_t len)
char *mecab_nbest_sparse_tostr3
(mecab_t *m, size_t N, const char *str, size_t len, char *ostr, size_t olen)
int mecab_nbest_init
(mecab_t* m, const char* str);
int mecab_nbest_init2
(mecab_t* m, const char* str, len);
const char *mecab_nbest_next_tostr
(mecab_t* m)
char *mecab_nbest_next_tostr2
(mecab_t *m , char *ostr, size_t olen)
void mecab_destroy(mecab_t *m)
ÇüÅÂ¼Ò Á¤º¸¸¦ ²¨³»·Á¸é, ÀÌÇÏÀÇ mecab_node_t ±¸Á¶Ã¼¿Í mecab_sparse_tonode ÇÔ¼ö¸¦ »ç¿ëÇÕ´Ï´Ù
#define MECAB_NOR_NODE 0 #define MECAB_UNK_NODE 1 #define MECAB_BOS_NODE 2 #define MECAB_EOS_NODE 3 struct mecab_node_t { struct mecab_node_t *prev; // ÇϳªÀüÀÇ Çüżҿ¡ÀÇ Æ÷ÀÎÅÍ struct mecab_node_t *next; // ÇϳªÃ³ÀÇ Çüżҿ¡ÀÇ Æ÷ÀÎÅÍ struct mecab_node_t *enext; // °°Àº À§Ä¡¿¡¼ ³¡³ª´Â Çüżҿ¡ÀÇ Æ÷ÀÎÅÍ struct mecab_node_t *bnext; // °°Àº °³½Ã À§Ä¡¿¡¼ ½ÃÀ۵Ǵ Çüżҿ¡ÀÇ Æ÷ÀÎÅÍ char *surface; // ÇüżÒÀÇ ¹®ÀÚ¿ Á¤º¸ // NULL terminate µÇ°í ÀÖÁö ¾Ê½À´Ï´Ù. ¹®ÀÚ¿·Î¼ ²¨³»·Á¸é // strncpy(buf, node->feature, node->length) (À¸)·Î ÇÒ ÇÊ¿ä°¡ ÀÖ½À´Ï´Ù char *feature; // CSV ±×¸®°í Ç¥±âµÈ Å»ý Á¤º¸ unsigned int length; // ÇüżÒÀÇ ±æÀÌ unsigned int rlength; // ÇüżÒÀÇ ±æÀÌ( ¼±µÎÀÇ ½ºÆäÀ̽º¸¦ Æ÷ÇÔÇÑ´Ù) unsigned int id; // Çüżҿ¡ ºÎ¿©µÈ´Ù À¯´ÏÅ©ID unsigned short rcAttr; // ¿ì¹®¸Æ id unsigned short lcAttr; // ¿ÞÂÊ ¹®¸Æ id unsigned short posid; // ÇüÅÂ¼Ò ID ( ¹Ì»ç¿ë) unsigned char char_type; // ¹®ÀÚÁ¾ Á¤º¸ unsigned char stat; // ÇüżÒÀÇ Á¾·ù: ÀÌÇÏÀÇ ¸ÅÅ©·ÎÀÇ °ª // #define MECAB_NOR_NODE 0 // #define MECAB_UNK_NODE 1 // #define MECAB_BOS_NODE 2 // #define MECAB_EOS_NODE 3 unsigned char isbest; // º£½ºÆ®ÇØÀÇ °æ¿ì 1, ±× ÀÌ¿Ü 0 float alpha; // forward backward ÀÇ foward log È®·ü float beta; // forward backward ÀÇ backward log È®·ü float prob; // ÁÖº¯ È®·ü // alpha, beta, prob ÇÏ -l 2 ¿É¼ÇÀ» ÁöÁ¤ÇßÀ» ¶§¿¡ Á¤Àǵ˴ϴ٠short wcost; // ´Ü¾î ¹ß»ý ÄÚ½ºÆ® long cost; // ´©Àû ÄÚ½ºÆ® };
mecab_node_t *mecab_sparse_tonode (mecab_t *m, const char *str)
mecab_node_t *mecab_sparse_tonode2 (mecab_t *m, const char *str,
size_t len)
mecab_node_t *mecab_next_tonode
(mecab_t* m)
example/example.c
#include <mecab.h> #include <stdio.h> #define CHECK(eval) if (! eval) { fprintf (stderr, "Exception:%sn", mecab_strerror (mecab)); mecab_destroy(mecab); return -1; } int main (int argc, char **argv) { char input[1024] = " Ÿ·Î´Â Áö·Î°¡ °¡Áö°í Àִ åÀ» Çϳª²¿¿¡ °Ç³×ÁÖ¾ú´Ù."; mecab_t *mecab; mecab_node_t *node; const char *result; int i; mecab = mecab_new (argc, argv); CHECK(mecab); result = mecab_sparse_tostr(mecab, input); CHECK(result) printf ("INPUT: %sn", input); printf ("RESULT:n%s", result); result = mecab_nbest_sparse_tostr (mecab, 3, input); CHECK(result); fprintf (stdout, "NBEST:n%s", result); CHECK(mecab_nbest_init(mecab, input)); for (i = 0; i < 3; ++i) { printf ("%d:n%s", i, mecab_nbest_next_tostr (mecab)); } node = mecab_sparse_tonode(mecab, input); CHECK(node); for (; node; node = node->next) { fwrite (node->surface, sizeof(char), node->length, stdout); printf("t%sn", node->feature); } node = mecab_sparse_tonode(mecab, input); CHECK(node); for (; node; node = node->next) { printf("%d ", node->id); if (node->stat == MECAB_BOS_NODE) printf("BOS"); else if (node->stat == MECAB_EOS_NODE) printf("EOS"); else fwrite (node->surface, sizeof(char), node->length, stdout); printf(" %s %d %d %d %d %d %d %d %d %f %f %f %dn", node->feature, (int)(node->surface - input), (int)(node->surface - input + node->length), node->rcAttr, node->lcAttr, node->posid, (int)node->char_type, (int)node->stat, (int)node->isbest, node->alpha, node->beta, node->prob, node->cost); } mecab_destroy(mecab); return 0; }
ÀÌÇÏ°¡ C++ API ÀÔ´Ï´Ù. ±âº»ÀûÀ¸·Î C ÀÇ interface¿Í µ¿ÀÏÇÕ´Ï´Ù¸¸,
±×·¸´Ù°í ÇÏ´Â Â÷ÀÌ°¡ ÀÖ½À´Ï´Ù.
namespace MeCab { typedef struct mecab_node_t Node; class Tagger { public: virtual const char* parse(const char*, size_t, char*, size_t) = 0; virtual const char* parse(const char*, size_t = 0) = 0; virtual Node* parseToNode(const char*, size_t = 0) = 0; virtual const char* parseNBest(size_t, const char*, size_t = 0) = 0; virtual bool parseNBestInit(const char*, size_t = 0) = 0; virtual Node* nextNode() = 0; virtual const char* next() = 0; virtual const char* formatNode(Node *) = 0; virtual const char* next(char*, size_t) = 0; virtual const char* parseNBest(size_t, const char*, size_t, char *, size_t) = 0; virtual const char* formatNode(Node *, char *, size_t) = 0; virtual const char* what() = 0; virtual Tagger() {}; static const char *version(); static Tagger* create(int, char**); static Tagger* create(const char*); }; /* factory method */ Tagger *createTagger (int, char**); Tagger *createTagger (const char*); const char* getTaggerError (); }
#include <iostream> #include <mecab.h> #define CHECK(eval) if (! eval) { const char *e = tagger ? tagger->what() : MeCab::getTaggerError(); std::cerr << "Exception:" << e << std::endl; delete tagger; return -1; } int main (int argc, char **argv) { char input[1024] = " Ÿ·Î´Â Áö·Î°¡ °¡Áö°í Àִ åÀ» Çϳª²¿¿¡ °Ç³×ÁÖ¾ú´Ù."; MeCab::Tagger *tagger = MeCab::createTagger (argc, argv); CHECK(tagger); const char *result = tagger->parse(input); CHECK(result); std::cout << "INPUT: " << input << std::endl; std::cout << "RESULT: " << result << std::endl; result = tagger->parseNBest(3, input); CHECK(result); std::cout << "NBEST: " << std::endl << result; CHECK(tagger->parseNBestInit(input)); for (int i = 0; i < 3; ++i) { std::cout << i << ":" << std::endl << tagger->next(); } MeCab::Node* node = tagger->parseToNode(input); CHECK(node); for (; node; node = node->next) { std::cout.write(node->surface, node->length); } node = tagger->parseToNode(input); CHECK(node); for (; node; node = node->next) { std::cout << node->id << ' '; if (node->stat == MECAB_BOS_NODE) std::cout << "BOS"; else if (node->stat == MECAB_EOS_NODE) std::cout << "EOS"; else std::cout.write (node->surface, node->length); std::cout << ' ' << node->feature << ' ' << (int)(node->surface - input) << ' ' << (int)(node->surface - input + node->length) << ' ' << node->rcAttr << ' ' << node->lcAttr << ' ' << node->posid << ' ' << (int)node->char_type << ' ' << (int)node->stat << ' ' << (int)node->isbest << ' ' << node->alpha << ' ' << node->beta << ' ' << node->prob << ' ' << node->cost << std::endl; } delete tagger; return 0; }
% cc -O2 `mecab-config --cflags` example.c -o example `mecab-config --libs`
¿ì¼±, ÄÄÆÄÀÏ ÀÛ¾÷À» ½Ç½ÃÇÏ´Â µð·ºÅ丮¿¡ includemecab.h, binlibmecab.dll liblibmecab.lib (À»)¸¦ Ä«ÇÇÇÕ´Ï´Ù. ÀÌ ÈÄÀÇ ÀÛ¾÷Àº, »ç¿ëÇÏ´Â ÄÄÆÄÀÏ·¯¿¡ ÀÇÇؼ ¹Ì¹¦ÇÏ°Ô ¹Ù²ò´Ï´Ù.
% gcc -DDLL_IMPORT -I. example.c -o example.exe libmecab.dll
% cl -DDLL_IMPORT -I. example.c libmecab.lib
MeCab ÇÏ, multi-thread ȯ°æ¿¡¼ »ç¿ëÇÏ´Â °ÍÀÌ °¡´ÉÇÕ´Ï´Ù. 1 °³ÀÇ ½º·¿µå¿¡ 1 ÀνºÅϽº(mecab_t *) (À»)¸¦ ÇÒ´çÇÏ´Â °æ¿ì´Â ½º·¿µå¼¼ÀÌÈÄÀÔ´Ï´Ù. ÇÑÃþ ´õ, °°Àº »çÀüÀ» °è¼Ó »ç¿ëÇÏ´Â ÇÑ, »çÀüÀ» º¸°ü À¯ÁöÇÏ°í ÀÖ´Â ÀÚ¿øÀº ÀçÀÌ¿ëµÇ±â À§ÇØ, º¹¼öÀÇ ÀνºÅϽº¸¦ ÀÛ¼ºÇصµ ¸¹Àº ¸Þ¸ð¸®¸¦ »ç¿ëÇÒ °ÍÀº ¾ø½À´Ï´Ù.
ÇϳªÀÇ ÀνºÅϽº¸¦ º¹¼öÀÇ ½º·¿µå·ÎºÎÅÍ »ç¿ëÇÏ´Â °æ¿ì´Â Àû´çÇÏ°Ô ¹èŸ Á¦¾îÇÒ ÇÊ¿ä°¡ ÀÖ½À´Ï´Ù. ´Ù¸¸, ÆÛÆ÷¸Õ½º°¡ ³ª»Ú±â ¶§¹®¿¡ ÃßõÇÒ ¼ö ¾ø½À´Ï´Ù.
$Id: libmecab.html 65 2007-01-30 00:52:53Z taku-ku $;