$Id: libmecab.html 65 2007-01-30 00:52:53Z taku-ku $;
C ¶óÀ̺귯¸®´Â ÀÌÇÏÀÇ ÇÔ¼ö¸¦ Á¦°øÇϰí ÀÖ½À´Ï´Ù.
mecab_t *mecab_new (int argc, char **argv)mecab_t *mecab_new2 (const char *arg)const char *mecab_version()const char *mecab_strerror (mecab_t* m)const char *mecab_sparse_tostr (mecab_t *m, const char
*str)const char *mecab_sparse_tostr2 (mecab_t *m, const char *str,
size_t len)char *mecab_sparse_tostr3 (mecab_t *m, const char
*istr,size_t ilen char *ostr, size_t olen)const char *mecab_nbest_sparse_tostr
(mecab_t *m, size_t N, const char *str)
const char *mecab_nbest_sparse_tostr2
(mecab_t *m, size_t N, const char *str, size_t len)
char *mecab_nbest_sparse_tostr3
(mecab_t *m, size_t N, const char *str, size_t len, char *ostr, size_t olen)
int mecab_nbest_init
(mecab_t* m, const char* str);
int mecab_nbest_init2
(mecab_t* m, const char* str, len);
const char *mecab_nbest_next_tostr
(mecab_t* m)
char *mecab_nbest_next_tostr2
(mecab_t *m , char *ostr, size_t olen)
void mecab_destroy(mecab_t *m)ÇüÅÂ¼Ò Á¤º¸¸¦ ²¨³»·Á¸é, ÀÌÇÏÀÇ mecab_node_t ±¸Á¶Ã¼¿Í mecab_sparse_tonode ÇÔ¼ö¸¦ »ç¿ëÇÕ´Ï´Ù
#define MECAB_NOR_NODE 0
#define MECAB_UNK_NODE 1
#define MECAB_BOS_NODE 2
#define MECAB_EOS_NODE 3
struct mecab_node_t
{
struct mecab_node_t *prev; //
ÇϳªÀüÀÇ Çüżҿ¡ÀÇ Æ÷ÀÎÅÍ
struct mecab_node_t *next; //
ÇϳªÃ³ÀÇ Çüżҿ¡ÀÇ Æ÷ÀÎÅÍ
struct mecab_node_t *enext; //
°°Àº À§Ä¡¿¡¼ ³¡³ª´Â Çüżҿ¡ÀÇ Æ÷ÀÎÅÍ
struct mecab_node_t *bnext; //
°°Àº °³½Ã À§Ä¡¿¡¼ ½ÃÀ۵Ǵ Çüżҿ¡ÀÇ Æ÷ÀÎÅÍ
char *surface; //
ÇüżÒÀÇ ¹®ÀÚ¿ Á¤º¸
// NULL terminate
µÇ°í ÀÖÁö ¾Ê½À´Ï´Ù.
¹®ÀÚ¿·Î¼ ²¨³»·Á¸é
// strncpy(buf, node->feature, node->length)
(À¸)·Î ÇÒ Çʿ䰡 ÀÖ½À´Ï´Ù
char *feature; // CSV
±×¸®°í Ç¥±âµÈ Å»ý Á¤º¸
unsigned int length; //
ÇüżÒÀÇ ±æÀÌ
unsigned int rlength; //
ÇüżÒÀÇ ±æÀÌ(
¼±µÎÀÇ ½ºÆäÀ̽º¸¦ Æ÷ÇÔÇÑ´Ù)
unsigned int id; //
Çüżҿ¡ ºÎ¿©µÈ´Ù
À¯´ÏÅ©ID
unsigned short rcAttr; //
¿ì¹®¸Æ id
unsigned short lcAttr; //
¿ÞÂÊ ¹®¸Æ id
unsigned short posid; //
ÇüÅÂ¼Ò ID (
¹Ì»ç¿ë)
unsigned char char_type; //
¹®ÀÚÁ¾ Á¤º¸
unsigned char stat; //
ÇüżÒÀÇ Á¾·ù:
ÀÌÇÏÀÇ ¸ÅÅ©·ÎÀÇ °ª
// #define MECAB_NOR_NODE 0
// #define MECAB_UNK_NODE 1
// #define MECAB_BOS_NODE 2
// #define MECAB_EOS_NODE 3
unsigned char isbest; //
º£½ºÆ®ÇØÀÇ °æ¿ì 1,
±× ÀÌ¿Ü 0
float alpha; // forward backward
ÀÇ foward log
È®·ü
float beta; // forward backward
ÀÇ backward log
È®·ü
float prob; //
ÁÖº¯ È®·ü
// alpha, beta, prob
ÇÏ -l 2
¿É¼ÇÀ» ÁöÁ¤ÇßÀ» ¶§¿¡ Á¤Àǵ˴ϴÙ
short wcost; //
´Ü¾î ¹ß»ý ÄÚ½ºÆ®
long cost; //
´©Àû ÄÚ½ºÆ®
};
mecab_node_t *mecab_sparse_tonode (mecab_t *m, const char *str)mecab_node_t *mecab_sparse_tonode2 (mecab_t *m, const char *str,
size_t len)mecab_node_t *mecab_next_tonode
(mecab_t* m)
example/example.c
#include <mecab.h>
#include <stdio.h>
#define CHECK(eval) if (! eval) {
fprintf (stderr, "Exception:%sn", mecab_strerror (mecab));
mecab_destroy(mecab);
return -1; }
int main (int argc, char **argv)
{
char input[1024] = "
Ÿ·Î´Â Áö·Î°¡ °¡Áö°í Àִ åÀ» Çϳª²¿¿¡ °Ç³×ÁÖ¾ú´Ù.";
mecab_t *mecab;
mecab_node_t *node;
const char *result;
int i;
mecab = mecab_new (argc, argv);
CHECK(mecab);
result = mecab_sparse_tostr(mecab, input);
CHECK(result)
printf ("INPUT: %sn", input);
printf ("RESULT:n%s", result);
result = mecab_nbest_sparse_tostr (mecab, 3, input);
CHECK(result);
fprintf (stdout, "NBEST:n%s", result);
CHECK(mecab_nbest_init(mecab, input));
for (i = 0; i < 3; ++i) {
printf ("%d:n%s", i, mecab_nbest_next_tostr (mecab));
}
node = mecab_sparse_tonode(mecab, input);
CHECK(node);
for (; node; node = node->next) {
fwrite (node->surface, sizeof(char), node->length, stdout);
printf("t%sn", node->feature);
}
node = mecab_sparse_tonode(mecab, input);
CHECK(node);
for (; node; node = node->next) {
printf("%d ", node->id);
if (node->stat == MECAB_BOS_NODE)
printf("BOS");
else if (node->stat == MECAB_EOS_NODE)
printf("EOS");
else
fwrite (node->surface, sizeof(char), node->length, stdout);
printf(" %s %d %d %d %d %d %d %d %d %f %f %f %dn",
node->feature,
(int)(node->surface - input),
(int)(node->surface - input + node->length),
node->rcAttr,
node->lcAttr,
node->posid,
(int)node->char_type,
(int)node->stat,
(int)node->isbest,
node->alpha,
node->beta,
node->prob,
node->cost);
}
mecab_destroy(mecab);
return 0;
}
ÀÌÇϰ¡ C++ API ÀÔ´Ï´Ù. ±âº»ÀûÀ¸·Î C ÀÇ interface¿Í µ¿ÀÏÇÕ´Ï´Ù¸¸,
±×·¸´Ù°í ÇÏ´Â Â÷À̰¡ ÀÖ½À´Ï´Ù.
namespace MeCab
{
typedef struct mecab_node_t Node;
class Tagger
{
public:
virtual const char* parse(const char*, size_t, char*, size_t) = 0;
virtual const char* parse(const char*, size_t = 0) = 0;
virtual Node* parseToNode(const char*, size_t = 0) = 0;
virtual const char* parseNBest(size_t, const char*, size_t = 0) = 0;
virtual bool parseNBestInit(const char*, size_t = 0) = 0;
virtual Node* nextNode() = 0;
virtual const char* next() = 0;
virtual const char* formatNode(Node *) = 0;
virtual const char* next(char*, size_t) = 0;
virtual const char* parseNBest(size_t, const char*,
size_t, char *, size_t) = 0;
virtual const char* formatNode(Node *, char *, size_t) = 0;
virtual const char* what() = 0;
virtual Tagger() {};
static const char *version();
static Tagger* create(int, char**);
static Tagger* create(const char*);
};
/* factory method */
Tagger *createTagger (int, char**);
Tagger *createTagger (const char*);
const char* getTaggerError ();
}
#include <iostream>
#include <mecab.h>
#define CHECK(eval) if (! eval) {
const char *e = tagger ? tagger->what() : MeCab::getTaggerError();
std::cerr << "Exception:" << e << std::endl;
delete tagger;
return -1; }
int main (int argc, char **argv)
{
char input[1024] = "
Ÿ·Î´Â Áö·Î°¡ °¡Áö°í Àִ åÀ» Çϳª²¿¿¡ °Ç³×ÁÖ¾ú´Ù.";
MeCab::Tagger *tagger = MeCab::createTagger (argc, argv);
CHECK(tagger);
const char *result = tagger->parse(input);
CHECK(result);
std::cout << "INPUT: " << input << std::endl;
std::cout << "RESULT: " << result << std::endl;
result = tagger->parseNBest(3, input);
CHECK(result);
std::cout << "NBEST: " << std::endl << result;
CHECK(tagger->parseNBestInit(input));
for (int i = 0; i < 3; ++i) {
std::cout << i << ":" << std::endl << tagger->next();
}
MeCab::Node* node = tagger->parseToNode(input);
CHECK(node);
for (; node; node = node->next) {
std::cout.write(node->surface, node->length);
}
node = tagger->parseToNode(input);
CHECK(node);
for (; node; node = node->next) {
std::cout << node->id << ' ';
if (node->stat == MECAB_BOS_NODE)
std::cout << "BOS";
else if (node->stat == MECAB_EOS_NODE)
std::cout << "EOS";
else
std::cout.write (node->surface, node->length);
std::cout << ' ' << node->feature
<< ' ' << (int)(node->surface - input)
<< ' ' << (int)(node->surface - input + node->length)
<< ' ' << node->rcAttr
<< ' ' << node->lcAttr
<< ' ' << node->posid
<< ' ' << (int)node->char_type
<< ' ' << (int)node->stat
<< ' ' << (int)node->isbest
<< ' ' << node->alpha
<< ' ' << node->beta
<< ' ' << node->prob
<< ' ' << node->cost << std::endl;
}
delete tagger;
return 0;
}
% cc -O2 `mecab-config --cflags` example.c -o example
`mecab-config --libs`
¿ì¼±, ÄÄÆÄÀÏ ÀÛ¾÷À» ½Ç½ÃÇÏ´Â µð·ºÅ丮¿¡ includemecab.h, binlibmecab.dll liblibmecab.lib (À»)¸¦ Ä«ÇÇÇÕ´Ï´Ù. ÀÌ ÈÄÀÇ ÀÛ¾÷Àº, »ç¿ëÇÏ´Â ÄÄÆÄÀÏ·¯¿¡ ÀÇÇØ¼ ¹Ì¹¦ÇÏ°Ô ¹Ù²ò´Ï´Ù.
% gcc -DDLL_IMPORT -I. example.c -o example.exe libmecab.dll
% cl -DDLL_IMPORT -I. example.c libmecab.lib
MeCab ÇÏ, multi-thread ȯ°æ¿¡¼ »ç¿ëÇÏ´Â °ÍÀÌ °¡´ÉÇÕ´Ï´Ù. 1 °³ÀÇ ½º·¿µå¿¡ 1 ÀνºÅϽº(mecab_t *) (À»)¸¦ ÇÒ´çÇÏ´Â °æ¿ì´Â ½º·¿µå¼¼ÀÌÈÄÀÔ´Ï´Ù. ÇÑÃþ ´õ, °°Àº »çÀüÀ» °è¼Ó »ç¿ëÇÏ´Â ÇÑ, »çÀüÀ» º¸°ü À¯ÁöÇϰí ÀÖ´Â ÀÚ¿øÀº ÀçÀÌ¿ëµÇ±â À§ÇØ, º¹¼öÀÇ ÀνºÅϽº¸¦ ÀÛ¼ºÇصµ ¸¹Àº ¸Þ¸ð¸®¸¦ »ç¿ëÇÒ °ÍÀº ¾ø½À´Ï´Ù.
ÇϳªÀÇ ÀνºÅϽº¸¦ º¹¼öÀÇ ½º·¿µå·ÎºÎÅÍ »ç¿ëÇÏ´Â °æ¿ì´Â Àû´çÇÏ°Ô ¹èŸ Á¦¾îÇÒ Çʿ䰡 ÀÖ½À´Ï´Ù. ´Ù¸¸, ÆÛÆ÷¸Õ½º°¡ ³ª»Ú±â ¶§¹®¿¡ ÃßõÇÒ ¼ö ¾ø½À´Ï´Ù.
$Id: libmecab.html 65 2007-01-30 00:52:53Z taku-ku $;