regMatch Á¤±Ô Ç¥Çö¿¡ ÀÇÇÑ Ä³¸¯ÅÍ ¶óÀÎ Á¶ÇÕ ¾Ë°í¸®Áò regNew ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕÀ» À§ÇÑ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º
ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ char *regMatch(char *text, int len, PAIR *nda); ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º PAIR *regNew(char *pattern, int patlen);
ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ ÇÔ¼ö¿¡ ´ëÇØ text (ÀÔ·Â) ÅؽºÆ® len (ÀÔ·Â) ÅؽºÆ®ÀÇ ±æÀÌ nda (ÀÔ·Â) Á¶ÇÕ ÆÐÅÏ¿¡ ´ëÀÀÇÑ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º ÇÔ¼ö¿¡ ´ëÇØ pattern (ÀÔ·Â) Á¶ÇÕ ÆÐÅÏ patlen (ÀÔ·Â) Á¶ÇÕ ÆÐÅÏÀÇ ±æÀÌ
ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ ÇÔ¼ö¿¡ ´ëÇØ Á¶ÇÕ ÆÐÅÏÀÌ ÅؽºÆ®·ÎºÎÅÍ ¹ß°ßµÇ¾úÀ» °æ¿ì´Â ±× Æ÷ÀÎÅÍ, ¹ß°ßµÇÁö ¾Ê¾Ò´ø °æ¿ì´Â 0 (NULL). ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º ÇÔ¼ö¿¡ ´ëÇØ ÀÛ¼ºÇÑ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡. ÀÛ¼ºÇÒ ¼ö ¾ø¾úÀ» ¶§´Â 0 (NULL).
regNew()¸¦ È£ÃâÇØ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡¸¦ ÀÛ¼ºÇÑ ÈÄ, regMatch()¸¦ ÀÌ¿ëÇØ Ä³¸¯ÅÍ ¶óÀÎ Á¶ÇÕÀ» ½Ç½ÃÇÑ´Ù.
typedef struct _node { int pass; int reach; struct _edge *edge; } NODE; typedef struct _edge { int c; struct _node *target; struct _edge *next; } EDGE; typedef struct _pair { NODE *start; NODE *end; } PAIR; typedef enum { END, SYMBOL, AND, OR, CLOSURE, LP, RP } TOKEN; #define EPSILON -1
#include "regMatch.h" static int nc; static TOKEN token; static char *pat, *patEnd; static void getToken(void) { if (pat == patEnd) { token = END; return; } switch (*pat) { case '|': token = OR; break; case '*': token = CLOSURE; break; case '(': token = LP; break; case ')': token = RP; break; default: token = SYMBOL; if (*pat == '\\' && pat+1 ! = patEnd) nc = *++pat; else nc = *pat; } pat++; } static NODE *newNode(void) { NODE *node; if ((node = (NODE *) malloc(sizeof(NODE))) ! = NULL) { node->pass = 0; node->reach = 0; node->edge = NULL; } return node; } static EDGE *link(NODE *start, NODE *end, int c) { EDGE *edge; if ((edge = (EDGE *) malloc(sizeof(EDGE))) ! = NULL) { edge->c = c; edge->target = end; edge->next = start->edge; start->edge = edge; } return edge; } static int orNode(PAIR *ret, NODE *start1, NODE *end1, NODE *start2, NODE *end2) { NODE *start, *end; start = newNode(); end = newNode(); if (start == NULL || end == NULL) return -1; link(start, start1, EPSILON); link(start, start2, EPSILON); link(end1, end, EPSILON); link(end2, end, EPSILON); ret->start = start; ret->end = end; return 0; } static int andNode(PAIR *ret, NODE *start1, NODE *end1, NODE *start2, NODE *end2) { link(end1, start2, EPSILON); ret->start = start1; ret->end = end2; return 0; } static int closureNode(PAIR *ret, NODE *start1, NODE *end1) { NODE *start, *end; start = newNode(); end = newNode(); if (start == NULL || end == NULL) return -1; link(start, start1, EPSILON); link(end1, end, EPSILON); link(start, end, EPSILON); link(end1, start1, EPSILON); ret->start = start; ret->end = end; return 0; } static int basicExpr(PAIR *ret) { NODE *start, *end; if (token == SYMBOL || token == RP || token == OR) { start = newNode(); end = newNode(); if (start == NULL || end == NULL) return -1; if (token == SYMBOL) { link(start, end, nc); getToken(); } else link(start, end, EPSILON); ret->start = start; ret->end = end; } else if (token == LP) { getToken(); if (orExpr(ret) ! = 0) return -1; if (token ! = RP) return -1; getToken(); } else return -1; return 0; } static int closureExpr(PAIR *ret) { if (basicExpr(ret) ! = 0) return -1; while (token == CLOSURE) { if (closureNode(ret, ret->start, ret->end) ! = 0) return -1; getToken(); } return 0; } static int andExpr(PAIR *ret) { PAIR tmp; if (closureExpr(ret) ! = 0) return -1; while (token == SYMBOL || token == LP) { if (closureExpr(&tmp) ! = 0) return -1; if (andNode(ret, ret->start, ret->end, tmp.start, tmp.end) ! = 0) return -1; } return 0; } static int orExpr(PAIR *ret) { PAIR tmp; if (andExpr(ret) ! = 0) return -1; while (token == OR) { getToken(); if (andExpr(&tmp) ! = 0) return -1; if (orNode(ret, ret->start, ret->end, tmp.start, tmp.end) ! = 0) return -1; } return 0; } PAIR *regNew(char *pattern, int patlen) { PAIR *nda; if ((nda = (PAIR *) malloc(sizeof(PAIR))) == NULL) return NULL; patEnd = pattern + patlen; pat = pattern; getToken(); if (orExpr(nda) == 0 && token == END) return nda; return NULL; } static int stateTransit(NODE *node, int c, int reach, int pass) { EDGE *edge; int ret = -1; node->pass = pass; for (edge = node->edge; edge ! = NULL; edge = edge->next) { if (node->reach == reach && edge->c == c) { if (c == EPSILON && edge->target->reach ! = reach) { edge->target->reach = reach; stateTransit(edge->target, c, reach, pass); ret = 0; } else if (c ! = EPSILON && edge->target->reach ! = reach+1) { edge->target->reach = reach + 1; stateTransit(edge->target, c, reach, pass); ret = 0; } } if (edge->target->pass ! = pass) { if (stateTransit(edge->target, c, reach, pass) == 0) ret = 0; } } return ret; } char *regMatch(char *text, int len, PAIR *nda) { int k; char *p; static int pass = 2; static int reach = 0; do { k = len; p = text++; nda->start->reach = ++reach; while (1) { stateTransit(nda->start, EPSILON, reach, pass++); if (nda->end->reach == reach) return text - 1; if (k-- == 0) break; if (stateTransit(nda->start, *p++, reach++, pass++) ! = 0) break; } } while (len-- ! = 0); return NULL; }
abc|def¡¡¡¡ | abc ¶Ç´Â def |
a(b|) c | b°¡ À־ ¾ø¾îµµ ÁÁ´Ù. Áï ac ¶Ç´Â abc |
ab*c | b¸¦ ¸îȸ ¹Ýº¹Çصµ ±¦Âú´Ù. Áï, ac, abc, abbc,...ÀÇ ¾ðÁ¨°¡ |
ÀÌ·¯ÇÑ ÆÐÅÏÀÇ ÁöÁ¤À» Á¤±Ô Ç¥Çö(regular expression)°ú ¸»ÇÑ´Ù. º¸´Ù Çü½ÄÀûÀ¸·Î ¼³¸íÇϸé(ÀÚ) ÀÌÇÏ¿Í °°ÀÌ µÈ´Ù. ´Ù¸¸, P¿Í Q´Â ÀÖ°í Â÷À̵µ Á¤±Ô Ç¥ÇöÀ¸·Î ÇÑ´Ù.
Á¤±Ô Ç¥Çö | ÀǹÌ(°Å±â¿¡ ÀÏÄ¡Çϴ ij¸¯ÅÍ ¶óÀÎ) |
¹®ÀÚ | ±× ¹®ÀÚ ÀڽŠ|
\¹®ÀÚ | ±× ¹®ÀÚ ÀÚ½Å(Ưº° ±âÈ£ |, (, ),* µîÀ» ¹®Àڷμ »ç¿ëÇÏ´Â Àå¼Ò ÇÕ) |
(P) | P |
PQ | P¿Í Q¸¦ ¿¬°áÇÑ °Í(¿¬°á, concatenation) |
P|Q | P ¶Ç´Â Q(¼±ÅÃ, union) |
P* | PÀÇ 0ȸ ÀÌ»óÀÇ ¹Ýº¹(ÆóÆ÷, closure) |
Á¤±Ô Ç¥ÇöÀÌ ÁÖ¾îÁö¸é(ÀÚ), º»¾Ë°í¸®Áò¿¡¼´Â ±×°ÍÀ»¥åõÀ̸¦ Æ÷ÇÔÇÑ´Ù ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡(NonDeterministic Automaton)·Î º¯È¯ÇÑ ´ÙÀ½, ±× ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡¸¦ ½Ã¹Ä·¹ÀÌÆ® ÇÏ¸é¼ ÅؽºÆ®¿ÍÀÇ Á¶ÇÕÀ» ½Ç½ÃÇÑ´Ù.
º¸´Ù °í¼ÓÀ¸·Î Á¶ÇÕÀ» ½Ç½ÃÇÏ°í ½ÍÀ» ¶§´Â, ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡¸¦ ÇÑÃþ ´õ ±×°Í°ú µî°¡ÀÎ °áÁ¤¼º ÀÚµ¿ ÀåÄ¡·Î º¯È¯ÇÏ´Â °ÍÀ» »ý°¢µÈ´Ù.