Á¤±Ô Ç¥Çö¿¡ ÀÇÇÑ Ä³¸¯ÅÍ ¶óÀÎ Á¶ÇÕ


ÇÔ¼ö¸í
regMatch  Á¤±Ô Ç¥Çö¿¡ ÀÇÇÑ Ä³¸¯ÅÍ ¶óÀÎ Á¶ÇÕ ¾Ë°í¸®Áò
regNew    ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕÀ» À§ÇÑ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º
Çü½Ä
ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ
    char *regMatch(char *text, int len, PAIR *nda);

ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º
    PAIR *regNew(char *pattern, int patlen);
Àμö
ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ ÇÔ¼ö¿¡ ´ëÇØ
    text     (ÀÔ·Â) ÅؽºÆ®
    len      (ÀÔ·Â) ÅؽºÆ®ÀÇ ±æÀÌ
    nda      (ÀÔ·Â) Á¶ÇÕ ÆÐÅÏ¿¡ ´ëÀÀÇÑ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡

ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º ÇÔ¼ö¿¡ ´ëÇØ
    pattern  (ÀÔ·Â) Á¶ÇÕ ÆÐÅÏ
    patlen   (ÀÔ·Â) Á¶ÇÕ ÆÐÅÏÀÇ ±æÀÌ
ÇÔ¼öÄ¡
ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ ÇÔ¼ö¿¡ ´ëÇØ
    Á¶ÇÕ ÆÐÅÏÀÌ ÅؽºÆ®·ÎºÎÅÍ ¹ß°ßµÇ¾úÀ» °æ¿ì´Â ±× Æ÷ÀÎÅÍ,
    ¹ß°ßµÇÁö ¾Ê¾Ò´ø °æ¿ì´Â 0 (NULL).

ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡ÀÇ ÀÛ¼º ÇÔ¼ö¿¡ ´ëÇØ
    ÀÛ¼ºÇÑ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡. ÀÛ¼ºÇÒ ¼ö ¾ø¾úÀ» ¶§´Â 0 (NULL).
ÁÖÀÇ »çÇ×
regNew()¸¦ È£ÃâÇØ ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡¸¦ ÀÛ¼ºÇÑ ÈÄ,
regMatch()¸¦ ÀÌ¿ëÇØ Ä³¸¯ÅÍ ¶óÀÎ Á¶ÇÕÀ» ½Ç½ÃÇÑ´Ù.
¿ë·Ê(regMatch-test.c )

Çì´õ ÆÄÀÏ(regMatch.h )
typedef struct _node {
    int    pass;
    int    reach;
    struct _edge *edge;
} NODE;

typedef struct _edge {
    int    c;
    struct _node *target;
    struct _edge *next;
} EDGE;

typedef struct _pair {
    NODE   *start;
    NODE   *end;
} PAIR;

typedef enum { END, SYMBOL, AND, OR, CLOSURE, LP, RP } TOKEN;
#define EPSILON  -1
ÇÁ·Î±×·¥(regMatch.c )
#include "regMatch.h"

static int    nc;
static TOKEN  token;
static char   *pat, *patEnd;

static void getToken(void)
{
    if (pat == patEnd) {
        token = END;
        return;
    }

    switch (*pat) {
    case '|': token = OR;      break;
    case '*': token = CLOSURE; break;
    case '(': token = LP;      break;
    case ')': token = RP;      break;
    default:  token = SYMBOL;
        if (*pat == '\\' && pat+1 ! = patEnd) nc = *++pat;
        else nc = *pat;
    }
    pat++;
}

static NODE *newNode(void)
{
    NODE *node;
    
    if ((node = (NODE *) malloc(sizeof(NODE))) ! = NULL) {
        node->pass = 0;
        node->reach = 0;
        node->edge = NULL;
    }
    return node;
}

static EDGE *link(NODE *start, NODE *end, int c)
{
    EDGE *edge;

    if ((edge = (EDGE *) malloc(sizeof(EDGE))) ! = NULL) {
        edge->c = c;
        edge->target = end;
        edge->next = start->edge;
        start->edge = edge;
    }
    return edge;
}

static int orNode(PAIR *ret,
    NODE *start1, NODE *end1, NODE *start2, NODE *end2)
{
    NODE *start, *end;

    start = newNode();
    end   = newNode();
    if (start == NULL || end == NULL) return -1;
    link(start, start1, EPSILON);
    link(start, start2, EPSILON);
    link(end1, end, EPSILON);
    link(end2, end, EPSILON);
    ret->start = start;
    ret->end   = end;
    return 0;
}

static int andNode(PAIR *ret,
    NODE *start1, NODE *end1, NODE *start2, NODE *end2)
{
    link(end1, start2, EPSILON);
    ret->start = start1;
    ret->end   = end2;
    return 0;
}

static int closureNode(PAIR *ret, NODE *start1, NODE *end1)
{
    NODE *start, *end;
    
    start = newNode();
    end   = newNode();
    if (start == NULL || end == NULL) return -1;
    link(start, start1, EPSILON);
    link(end1, end, EPSILON);
    link(start, end, EPSILON);
    link(end1, start1, EPSILON);
    ret->start = start;
    ret->end   = end;
    return 0;
}

static int basicExpr(PAIR *ret)
{
    NODE *start, *end;

    if (token == SYMBOL || token == RP || token == OR) {
        start = newNode();
        end   = newNode();
        if (start == NULL || end == NULL) return -1;
        if (token == SYMBOL) {
            link(start, end, nc);
            getToken();
        } else link(start, end, EPSILON);
        ret->start = start;
        ret->end   = end;
    } else if (token == LP) {
        getToken();
        if (orExpr(ret) ! = 0) return -1;
        if (token ! = RP) return -1;
        getToken();
   } else return -1;
    return 0;
}

static int closureExpr(PAIR *ret)
{
    if (basicExpr(ret) ! = 0) return -1;
    while (token == CLOSURE) {
        if (closureNode(ret, ret->start, ret->end) ! = 0) return -1;
        getToken();
    }
    return 0;
}

static int andExpr(PAIR *ret)
{
    PAIR tmp;

    if (closureExpr(ret) ! = 0) return -1;
    while (token == SYMBOL || token == LP) {
        if (closureExpr(&tmp) ! = 0) return -1;
        if (andNode(ret, ret->start, ret->end, 
                    tmp.start, tmp.end) ! = 0) return -1;
    }
    return 0;
}

static int orExpr(PAIR *ret)
{
    PAIR tmp;

    if (andExpr(ret) ! = 0) return -1;
    while (token == OR) {
        getToken();
        if (andExpr(&tmp) ! = 0) return -1;
        if (orNode(ret, ret->start, ret->end,
                   tmp.start, tmp.end) ! = 0) return -1;
    }
    return 0;
}

PAIR *regNew(char *pattern, int patlen)
{
    PAIR *nda;
    
    if ((nda = (PAIR *) malloc(sizeof(PAIR))) == NULL) return NULL;
    patEnd = pattern + patlen;
    pat = pattern;
    getToken();
    if (orExpr(nda) == 0 && token == END) return nda;
    return NULL;
}

static int stateTransit(NODE *node, int c, int reach, int pass)
{
    EDGE *edge;
    int  ret = -1;

    node->pass = pass;
    for (edge = node->edge; edge ! = NULL; edge = edge->next) {
        if (node->reach == reach && edge->c == c) {
             if (c == EPSILON && edge->target->reach ! = reach) {
                 edge->target->reach = reach;
                 stateTransit(edge->target, c, reach, pass);
                 ret = 0;
             } else if (c ! = EPSILON && edge->target->reach ! = reach+1) {
                 edge->target->reach = reach + 1;
                 stateTransit(edge->target, c, reach, pass);
                 ret = 0;
             }
        }
        if (edge->target->pass ! = pass) {
            if (stateTransit(edge->target, c, reach, pass) == 0) ret = 0;
        }
    }
    return ret;
}

char *regMatch(char *text, int len, PAIR *nda)
{
    int  k;
    char *p;
    static int pass  = 2;
    static int reach = 0;

    do {
        k = len;
        p = text++;
        nda->start->reach = ++reach;
        while (1) {
            stateTransit(nda->start, EPSILON, reach, pass++);
            if (nda->end->reach == reach) return text - 1;
            if (k-- == 0) break;
            if (stateTransit(nda->start, *p++, reach++, pass++) ! = 0)
                break;
        }
    } while (len-- ! = 0);
    return NULL;
}
¼³¸í
º»¾Ë°í¸®Áò¿¡¼­´Â, Á¶ÇÕ ÆÐÅÏÀ¸·Î¼­´Â ´ÙÀ½°ú °°Àº °ÍÀ» ¹Þ¾ÆµéÀδÙ.
¡¡
abc|def¡¡¡¡ abc ¶Ç´Â def
a(b|) c b°¡ À־ ¾ø¾îµµ ÁÁ´Ù. Áï ac ¶Ç´Â abc
ab*c b¸¦ ¸îȸ ¹Ýº¹Çصµ ±¦Âú´Ù. Áï, ac, abc, abbc,...ÀÇ ¾ðÁ¨°¡

ÀÌ·¯ÇÑ ÆÐÅÏÀÇ ÁöÁ¤À» Á¤±Ô Ç¥Çö(regular expression)°ú ¸»ÇÑ´Ù. º¸´Ù Çü½ÄÀûÀ¸·Î ¼³¸íÇϸé(ÀÚ) ÀÌÇÏ¿Í °°ÀÌ µÈ´Ù. ´Ù¸¸, P¿Í Q´Â ÀÖ°í Â÷À̵µ Á¤±Ô Ç¥ÇöÀ¸·Î ÇÑ´Ù.

Á¤±Ô Ç¥Çö ÀǹÌ(°Å±â¿¡ ÀÏÄ¡Çϴ ij¸¯ÅÍ ¶óÀÎ)
¹®ÀÚ ±× ¹®ÀÚ ÀÚ½Å
\¹®ÀÚ ±× ¹®ÀÚ ÀÚ½Å(Ưº° ±âÈ£ |, (, ),* µîÀ» ¹®Àڷμ­ »ç¿ëÇÏ´Â Àå¼Ò ÇÕ)
(P) P
PQ P¿Í Q¸¦ ¿¬°áÇÑ °Í(¿¬°á, concatenation)
P|Q P ¶Ç´Â Q(¼±ÅÃ, union)
P* PÀÇ 0ȸ ÀÌ»óÀÇ ¹Ýº¹(ÆóÆ÷, closure)

¿¬°á°ú ¼±Åÿ¡¼­´Â, ¿¬°áÀÌ ¿ì¼±µµ°¡ ³ô°í, ÆóÆ÷´Â ÇÑÃþ ´õ ±×°Íº¸´Ù ³ô°í ÀÖ´Ù.

Á¤±Ô Ç¥ÇöÀÌ ÁÖ¾îÁö¸é(ÀÚ), º»¾Ë°í¸®Áò¿¡¼­´Â ±×°ÍÀ»¥åõÀ̸¦ Æ÷ÇÔÇÑ´Ù ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡(NonDeterministic Automaton)·Î º¯È¯ÇÑ ´ÙÀ½, ±× ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡¸¦ ½Ã¹Ä·¹ÀÌÆ® Çϸ鼭 ÅؽºÆ®¿ÍÀÇ Á¶ÇÕÀ» ½Ç½ÃÇÑ´Ù.

º¸´Ù °í¼ÓÀ¸·Î Á¶ÇÕÀ» ½Ç½ÃÇÏ°í ½ÍÀ» ¶§´Â, ºñ°áÁ¤¼º ÀÚµ¿ ÀåÄ¡¸¦ ÇÑÃþ ´õ ±×°Í°ú µî°¡ÀÎ °áÁ¤¼º ÀÚµ¿ ÀåÄ¡·Î º¯È¯ÇÏ´Â °ÍÀ» »ý°¢µÈ´Ù.

°ü·Ã ÇÔ¼ö
¼Ò¹Ú¹ý¿¡ µû¸£´Â ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ, KMP¹ý¿¡ µû¸£´Â ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ, RK¹ý¿¡ µû¸£´Â ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ, BM¹ý¿¡ µû¸£´Â ij¸¯ÅÍ ¶óÀÎ Á¶ÇÕ