regMatch Á¤±Ô Ç¥Çö¿¡ ÀÇÇÑ Ä³¸¯ÅÍ ¶óÀÎ Á¶ÇÕ ¾Ë°í¸®Áò regNew ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕÀ» À§ÇÑ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º
ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ
char *regMatch(char *text, int len, PAIR *nda);
ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º
PAIR *regNew(char *pattern, int patlen);
ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ ÇÔ¼ö¿¡ ´ëÇØ
text (ÀÔ·Â) ÅØ½ºÆ®
len (ÀÔ·Â) ÅØ½ºÆ®ÀÇ ±æÀÌ
nda (ÀÔ·Â) Á¶ÇÕ ÆÐÅÏ¿¡ ´ëÀÀÇÑ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡
ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º ÇÔ¼ö¿¡ ´ëÇØ
pattern (ÀÔ·Â) Á¶ÇÕ ÆÐÅÏ
patlen (ÀÔ·Â) Á¶ÇÕ ÆÐÅÏÀÇ ±æÀÌ
ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ ÇÔ¼ö¿¡ ´ëÇØ
Á¶ÇÕ ÆÐÅÏÀÌ ÅØ½ºÆ®·ÎºÎÅÍ ¹ß°ßµÇ¾úÀ» °æ¿ì´Â ±× Æ÷ÀÎÅÍ,
¹ß°ßµÇÁö ¾Ê¾Ò´ø °æ¿ì´Â 0 (NULL).
ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º ÇÔ¼ö¿¡ ´ëÇØ
ÀÛ¼ºÇÑ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡. ÀÛ¼ºÇÒ ¼ö ¾ø¾úÀ» ¶§´Â 0 (NULL).
regNew()¸¦ È£ÃâÇØ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡¸¦ ÀÛ¼ºÇÑ ÈÄ, regMatch()¸¦ ÀÌ¿ëÇØ ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕÀ» ½Ç½ÃÇÑ´Ù.
typedef struct _node {
int pass;
int reach;
struct _edge *edge;
} NODE;
typedef struct _edge {
int c;
struct _node *target;
struct _edge *next;
} EDGE;
typedef struct _pair {
NODE *start;
NODE *end;
} PAIR;
typedef enum { END, SYMBOL, AND, OR, CLOSURE, LP, RP } TOKEN;
#define EPSILON -1
#include "regMatch.h"
static int nc;
static TOKEN token;
static char *pat, *patEnd;
static void getToken(void)
{
if (pat == patEnd) {
token = END;
return;
}
switch (*pat) {
case '|': token = OR; break;
case '*': token = CLOSURE; break;
case '(': token = LP; break;
case ')': token = RP; break;
default: token = SYMBOL;
if (*pat == '\\' && pat+1 ! = patEnd) nc = *++pat;
else nc = *pat;
}
pat++;
}
static NODE *newNode(void)
{
NODE *node;
if ((node = (NODE *) malloc(sizeof(NODE))) ! = NULL) {
node->pass = 0;
node->reach = 0;
node->edge = NULL;
}
return node;
}
static EDGE *link(NODE *start, NODE *end, int c)
{
EDGE *edge;
if ((edge = (EDGE *) malloc(sizeof(EDGE))) ! = NULL) {
edge->c = c;
edge->target = end;
edge->next = start->edge;
start->edge = edge;
}
return edge;
}
static int orNode(PAIR *ret,
NODE *start1, NODE *end1, NODE *start2, NODE *end2)
{
NODE *start, *end;
start = newNode();
end = newNode();
if (start == NULL || end == NULL) return -1;
link(start, start1, EPSILON);
link(start, start2, EPSILON);
link(end1, end, EPSILON);
link(end2, end, EPSILON);
ret->start = start;
ret->end = end;
return 0;
}
static int andNode(PAIR *ret,
NODE *start1, NODE *end1, NODE *start2, NODE *end2)
{
link(end1, start2, EPSILON);
ret->start = start1;
ret->end = end2;
return 0;
}
static int closureNode(PAIR *ret, NODE *start1, NODE *end1)
{
NODE *start, *end;
start = newNode();
end = newNode();
if (start == NULL || end == NULL) return -1;
link(start, start1, EPSILON);
link(end1, end, EPSILON);
link(start, end, EPSILON);
link(end1, start1, EPSILON);
ret->start = start;
ret->end = end;
return 0;
}
static int basicExpr(PAIR *ret)
{
NODE *start, *end;
if (token == SYMBOL || token == RP || token == OR) {
start = newNode();
end = newNode();
if (start == NULL || end == NULL) return -1;
if (token == SYMBOL) {
link(start, end, nc);
getToken();
} else link(start, end, EPSILON);
ret->start = start;
ret->end = end;
} else if (token == LP) {
getToken();
if (orExpr(ret) ! = 0) return -1;
if (token ! = RP) return -1;
getToken();
} else return -1;
return 0;
}
static int closureExpr(PAIR *ret)
{
if (basicExpr(ret) ! = 0) return -1;
while (token == CLOSURE) {
if (closureNode(ret, ret->start, ret->end) ! = 0) return -1;
getToken();
}
return 0;
}
static int andExpr(PAIR *ret)
{
PAIR tmp;
if (closureExpr(ret) ! = 0) return -1;
while (token == SYMBOL || token == LP) {
if (closureExpr(&tmp) ! = 0) return -1;
if (andNode(ret, ret->start, ret->end,
tmp.start, tmp.end) ! = 0) return -1;
}
return 0;
}
static int orExpr(PAIR *ret)
{
PAIR tmp;
if (andExpr(ret) ! = 0) return -1;
while (token == OR) {
getToken();
if (andExpr(&tmp) ! = 0) return -1;
if (orNode(ret, ret->start, ret->end,
tmp.start, tmp.end) ! = 0) return -1;
}
return 0;
}
PAIR *regNew(char *pattern, int patlen)
{
PAIR *nda;
if ((nda = (PAIR *) malloc(sizeof(PAIR))) == NULL) return NULL;
patEnd = pattern + patlen;
pat = pattern;
getToken();
if (orExpr(nda) == 0 && token == END) return nda;
return NULL;
}
static int stateTransit(NODE *node, int c, int reach, int pass)
{
EDGE *edge;
int ret = -1;
node->pass = pass;
for (edge = node->edge; edge ! = NULL; edge = edge->next) {
if (node->reach == reach && edge->c == c) {
if (c == EPSILON && edge->target->reach ! = reach) {
edge->target->reach = reach;
stateTransit(edge->target, c, reach, pass);
ret = 0;
} else if (c ! = EPSILON && edge->target->reach ! = reach+1) {
edge->target->reach = reach + 1;
stateTransit(edge->target, c, reach, pass);
ret = 0;
}
}
if (edge->target->pass ! = pass) {
if (stateTransit(edge->target, c, reach, pass) == 0) ret = 0;
}
}
return ret;
}
char *regMatch(char *text, int len, PAIR *nda)
{
int k;
char *p;
static int pass = 2;
static int reach = 0;
do {
k = len;
p = text++;
nda->start->reach = ++reach;
while (1) {
stateTransit(nda->start, EPSILON, reach, pass++);
if (nda->end->reach == reach) return text - 1;
if (k-- == 0) break;
if (stateTransit(nda->start, *p++, reach++, pass++) ! = 0)
break;
}
} while (len-- ! = 0);
return NULL;
}
| abc|def¡¡¡¡ | abc ¶Ç´Â def |
| a(b|) c | b°¡ À־ ¾ø¾îµµ ÁÁ´Ù. Áï ac ¶Ç´Â abc |
| ab*c | b¸¦ ¸îȸ ¹Ýº¹Çصµ ±¦Âú´Ù. Áï, ac, abc, abbc,...ÀÇ ¾ðÁ¨°¡ |
ÀÌ·¯ÇÑ ÆÐÅÏÀÇ ÁöÁ¤À» Á¤±Ô Ç¥Çö(regular expression)°ú ¸»ÇÑ´Ù. º¸´Ù Çü½ÄÀûÀ¸·Î ¼³¸íÇϸé(ÀÚ) ÀÌÇÏ¿Í °°ÀÌ µÈ´Ù. ´Ù¸¸, P¿Í Q´Â ÀÖ°í Â÷À̵µ Á¤±Ô Ç¥ÇöÀ¸·Î ÇÑ´Ù.
| Á¤±Ô Ç¥Çö | ÀǹÌ(°Å±â¿¡ ÀÏÄ¡Çϴ ij¸¯ÅÍ ¶óÀÎ) |
| ¹®ÀÚ | ±× ¹®ÀÚ ÀڽŠ|
| \¹®ÀÚ | ±× ¹®ÀÚ ÀÚ½Å(Ưº° ±âÈ£ |, (, ),* µîÀ» ¹®Àڷμ »ç¿ëÇÏ´Â Àå¼Ò ÇÕ) |
| (P) | P |
| PQ | P¿Í Q¸¦ ¿¬°áÇÑ °Í(¿¬°á, concatenation) |
| P|Q | P ¶Ç´Â Q(¼±ÅÃ, union) |
| P* | PÀÇ 0ȸ ÀÌ»óÀÇ ¹Ýº¹(ÆóÆ÷, closure) |
Á¤±Ô Ç¥ÇöÀÌ ÁÖ¾îÁö¸é(ÀÚ), º»¾Ë°í¸®Áò¿¡¼´Â ±×°ÍÀ»¥åõÀ̸¦ Æ÷ÇÔÇÑ´Ù ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡(NonDeterministic Automaton)·Î º¯È¯ÇÑ ´ÙÀ½, ±× ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡¸¦ ½Ã¹Ä·¹ÀÌÆ® ÇÏ¸é¼ ÅØ½ºÆ®¿ÍÀÇ Á¶ÇÕÀ» ½Ç½ÃÇÑ´Ù.
º¸´Ù °í¼ÓÀ¸·Î Á¶ÇÕÀ» ½Ç½ÃÇÏ°í ½ÍÀ» ¶§´Â, ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡¸¦ ÇÑÃþ ´õ ±×°Í°ú µî°¡ÀÎ °áÁ¤¼º ÀÚµ¿ ÀåÄ¡·Î º¯È¯ÇÏ´Â °ÍÀ» »ý°¢µÈ´Ù.