ÇãÇÁ¸¸ ÄÚµù


ÇÔ¼ö¸í
enHuffman  ÇãÇÁ¸¸ ÄÚµù
deHuffman  ÇãÇÁ¸¸ ÄÚµùÀ¸·ÎºÎÅÍÀÇ º¹È£
Çü½Ä
ÇãÇÁ¸¸ ÄÚµù
    long enHuffman(unsigned char *code, long clen,
                   unsigned char *text, long tlen);

ÇãÇÁ¸¸ ÄÚµùÀ¸·ÎºÎÅÍÀÇ º¹È£
    long deHuffman(unsigned char *text, long tlen,
                   unsigned char *code, long clen);
Àμö
ÇãÇÁ¸¸ ÄÚµù ÇÔ¼ö¿¡ ´ëÇØ
    code  (ÀÔÃâ·Â) ÇãÇÁ¸¸ ÄÚµùÀ» °Ý³³ÇÏ´Â ¹è¿­
    clen  (ÀÔ·Â) ¹è¿­ codeÀÇ ±æÀÌ
    text  (ÀÔ·Â) ÅؽºÆ®¸¦ °Ý³³ÇÏ´Â ¹è¿­
    tlen  (ÀÔ·Â) ¹è¿­ textÀÇ ±æÀÌ

ÇãÇÁ¸¸ ÄÚµùÀ¸·ÎºÎÅÍÀÇ º¹È£ ÇÔ¼ö¿¡ ´ëÇØ
    text  (ÀÔÃâ·Â) ÅؽºÆ®¸¦ °Ý³³ÇÏ´Â ¹è¿­
    tlen  (ÀÔ·Â) ¹è¿­ textÀÇ ±æÀÌ
    code  (ÀÔ·Â) ÇãÇÁ¸¸ ÄÚµùÀ» °Ý³³ÇÏ´Â ¹è¿­
    clen  (ÀÔ·Â) ¹è¿­ codeÀÇ ±æÀÌ
ÇÔ¼öÄ¡
ÇãÇÁ¸¸ ÄÚµù ÇÔ¼ö¿¡ ´ëÇØ
    ÇãÇÁ¸¸ ÄÚµùÀÇ ±æÀÌ. encode¿¡ ½ÇÆÐÇßÀ» ¶§´Â -1L.

ÇãÇÁ¸¸ ÄÚµùÀ¸·ÎºÎÅÍÀÇ º¹È£ ÇÔ¼ö¿¡ ´ëÇØ
    ÅؽºÆ®ÀÇ ±æÀÌ. º¹È£¿¡ ½ÇÆÐÇßÀ» ¶§´Â -1L.
ÁÖÀÇ »çÇ×

¿ë·Ê(huffman-test.c )

ÇÁ·Î±×·¥(huffman.c )
typedef struct {
    long count;      /* ÀÚ ³ëµåÀÇ ÃâÇö ºóµµÀÇ È­ */
    int  parent;     /* Ä£³ëµå¿¡ */
    int  left;       /* ÁÂÃøÀÇ ¾ÆÀÌ ³ëµå¿¡ */
    int  right;      /* ¿ìÃøÀÇ ¾ÆÀÌ ³ëµå¿¡ */
} NODE;

#define CHAR_SIZE    256
#define NODE_SIZE    (2*CHAR_SIZE+2)

long enHuffman(unsigned char *code, long clen,
               unsigned char *text, long tlen)
{
    int  i, k;
    long c, len;
    int  min1, min2;
    long max;
    int  root;
    NODE node[NODE_SIZE]; int  freeNode;
    char bit[NODE_SIZE];  char *bitp;
    static unsigned char bit0[8] = {~128,~64,~32,~16,~8,~4,~2,~1};
    static unsigned char bit1[8] = { 128, 64, 32, 16, 8, 4, 2, 1};

    if (tlen <= 0 || clen <= 2*CHAR_SIZE) return -1L;

    /* °¢ ¹®ÀÚÀÇ ÃâÇö ºóµµ¸¦ °è»ê */
    for (i = 0; i < NODE_SIZE; i++) node[i]. count = 0;
    for (c = 0; c < tlen; c++) node[text[c]]. count++;

    /* ÃÖ´ë ÃâÇö ºóµµÀÇ °ªÀÌ 2¹ÙÀÌÆ®¸¦ ³ÑÁö ¾Ê°Ô Á¶Á¤ÇÑ´Ù */
    for (i = 0, max = 0; i < CHAR_SIZE; i++)
        if (node[i]. count > max) max = node[i]. count;
    if ((k = max/0x10000L) > 0)
        for (i = 0, k++; i < CHAR_SIZE; i++)
            if (node[i]. count > 0) {
                node[i]. count /= k;
                if (node[i]. count == 0) node[i]. count = 1;
            }

    /* °¢ ¹®ÀÚÀÇ ÃâÇö ºóµµ¸¦ Ãâ·Â */
    for (i = 0; i < CHAR_SIZE; i++) {
        *code++ = (node[i]. count >> 8) & 0xff;
        *code++ = node[i]. count & 0xff;
    }
    
    /* EOF */
    node[CHAR_SIZE]. count = 1;

    /* ÇÏÇÁ¸Ç³ª¹«¸¦ ¸¸µç´Ù */
    node[NODE_SIZE - 1]. count = 0x10000L;          /* Æļöº´ */
    for (freeNode = CHAR_SIZE+1; ; freeNode++) {
        min1 = min2 = NODE_SIZE - 1;
        for (i = NODE_SIZE - 2; i >= 0; i--)
            if (node[i]. count > 0) {
                if (node[i]. count < node[min1]. count) {
                    min2 = min1;
                    min1 = i;
                } else if (node[i]. count < node[min2]. count)
                    min2 = i;
            }

        if (min2 == NODE_SIZE - 1) break;
        node[freeNode]. left  = min1;
        node[freeNode]. right = min2;
        node[freeNode]. count = node[min1]. count + node[min2]. count;
        node[min1]. parent = node[min2]. parent = freeNode;
        node[min1]. count = node[min2]. count = 0;
    }
    root = min1;

    /* °¢ ¹®ÀÚ¸¦ encode */
    for (c = 0, i = 0, len = 2*CHAR_SIZE; ; c++) {
        k = (c == tlen) ?  CHAR_SIZE : text[c];
        
        /* ºÎÈ£¾î¸¦ ¾ò´Â´Ù */
        bitp = bit;
        do {
            int parent = node[k]. parent;
            *bitp++ = (node[parent]. left == k) ?  0 : 1;
            k = parent;
        } while (k ! = root);
        
        /* ¿ª¼ø¼­¿¡ Ãâ·Â */
        do {
            if (*--bitp) *code |= bit1[i];
            else         *code &= bit0[i];
            if (++i == 8) {
                code++;
                if (++len > clen) return -1L;
                i = 0;
            }
        } while (bitp ! = bit);
        if (c == tlen) break;
    }

    if (i > 0) len++;
    return (len > clen) ?  -1L : len;
}

long deHuffman(unsigned char *text, long tlen,
               unsigned char *code, long clen)
{
    int  i, k;
    long c, len;
    int  min1, min2;
    int  root;
    NODE node[NODE_SIZE]; int  freeNode;
    static unsigned char bit1[8] = { 128, 64, 32, 16, 8, 4, 2, 1};

    if (clen <= 2*CHAR_SIZE) return -1L;

    /* °¢ ¹®ÀÚÀÇ ÃâÇö ºóµµ¸¦ ¼¼Æ® */
    for (i = 0; i < NODE_SIZE; i++) node[i]. count = 0;
    for (i = 0; i < CHAR_SIZE; i++) {
        node[i]. count = *code++;
        node[i]. count = (node[i]. count << 8) | *code++;
    }
    
    /* EOF */
    node[CHAR_SIZE]. count = 1;

    /* ÇÏÇÁ¸Ç³ª¹«¸¦ ¸¸µç´Ù */
    node[NODE_SIZE - 1]. count = 0x10000L;          /* Æļöº´ */
    for (freeNode = CHAR_SIZE+1; ; freeNode++) {
        min1 = min2 = NODE_SIZE - 1;
        for (i = NODE_SIZE - 2; i >= 0; i--)
            if (node[i]. count > 0)
                if (node[i]. count < node[min1]. count) {
                    min2 = min1;
                    min1 = i;
                } else if (node[i]. count < node[min2]. count)
                    min2 = i;
        if (min2 == NODE_SIZE - 1) break;
        node[freeNode]. left  = min1;
        node[freeNode]. right = min2;
        node[freeNode]. count = node[min1]. count + node[min2]. count;
        node[min1]. parent = node[min2]. parent = freeNode;
        node[min1]. count = node[min2]. count = 0;
    }
    root = min1;
            
    /* º¹È£È­ */
    for (i = 0, len = 0, c = 2*CHAR_SIZE; ; ) {
        /* ºÎÈ£¾î¿¡ ´ëÀÀÇÏ´Â ¹®ÀÚ ³ëµå¸¦ ¾ò´Â´Ù */
        k = root;
        do {
            int parent = k;
            k = (*code & bit1[i]) ?  node[k]. right : node[k]. left;
            if (k >= parent) return -1L;     /* ÀÖÀ» ¼ö  ¾ø´Ù */
            if (++i == 8) {
                code++;
                if (++c > clen) return -1L;
                i = 0;
            }
        } while (k > CHAR_SIZE);
        if (k == CHAR_SIZE) break;
        if (++len > tlen) return -1L;
        *text++ = k;
    }
    return len;
}
¼³¸í
1952³â¿¡ D. A. Huffman ¿¡ ÀÇÇØ °í¾È µÈ ºÎÈ£·Î, ÀÌÇÏÀÇ Æ¯Â¡À» °¡Áø´Ù.

ÇãÇÁ¸¸ ÄÚµùÀ» ÀÌÇÏ¿Í °°ÀÌ ±¸¼ºÇÒ ¼ö ÀÖ´Ù.

  1. ÅؽºÆ®¿¡ ÀÖ´Â °¢ ±âÈ£(¹®ÀÚ)¿¡ ´ëÀÀÇÏ´Â ³ëµå¸¦ ¸¸µç´Ù. °¢°¢ ÀÇ ³ëµå¿¡´Â, ±× ±âÈ£ÀÇ ÃâÇö ºóµµ¸¦ ±âÀÔÇÑ´Ù.
  2. ÃâÇö ºóµµÀÇ °¡Àå ÀÛÀº 2°³ÀÇ ³ëµå¿¡ ´ëÇØ, Ä£³ëµå¸¦ ¸¸µé¾î, ÇÑÆí¿¡ 0 ÀÇ ¶óº§, ÇÑÆí¿¡ 1 ÀÇ ¶óº§À» ºÙÀδÙ. ¶Ç Ä£³ëµå¿¡, ¾ÆÀÌ³ë µåÀÇ ÃâÇö ºóµµÀÇ È­¸¦ ±âÀÔÇÑ´Ù. ÀÌ Ä£³ëµå´Â ÀÌÈÄ, ¾ÆÀÌ ³ëµåÀÇ ´ë½Å µÈ´Ù.
  3. ³ª¸ÓÁöÀÇ ³ëµå¿Í Ä£³ëµå·ÎºÎÅÍ, ³ëµåÀÇ ¼ö°¡ 1°³°¡ µÉ ¶§±îÁö, 2. ÀÇ Ã³¸®¸¦ ¹Ýº¹ÇÑ´Ù.
  4. ·çÆ®·ÎºÎÅÍ ±âÈ£¿¡ ´ëÀÀÇÏ´Â ³ëµå±îÁö ¿¡´Ù¸¦ ´õµë¾úÀ» ¶§¿¡ ¾òÀ» ¼ö ÀÖ´Ù 0, 1 ÀÇ ¶óº§¿­ÀÌ ±× ±âÈ£¿¡ ´ëÇÑ ºÎÈ£¾î°¡ µÈ´Ù. ¶Ç, ÀÌ¿Í °°ÀÌ ±¸¼º µÇ´Â ºÎÈ£ÀÇ ³ª¹«¸¦, ÇÏÇÁ¸Ç³ª¹«¶ó°í ºÎ¸¥´Ù.

½ÇÁ¦ÀÇ encode ÇÁ·Î±×·¥¿¡¼­´Â, ÅؽºÆ®¸¦ 2ȸ ÁÖ»ç ÇÑ´Ù. 1ȸ°·Î´Â °¢ ±âÈ£(¹®ÀÚ)ÀÇ ÃâÇö ºóµµ¸¦ Á¶»çÇÑ ´ÙÀ½, ÇÏÇÁ¸Ç³ª¹«¸¦ ¸¸µç´Ù. ¶Ç º¹È£ ÇÒ ¼ö ÀÖµµ·Ï(µíÀÌ), °¢ ±âÈ£ÀÇ ÃâÇö ºóµµ¸¦ ºÎÈ£ÀÇ ÀϺημ­ Ãâ·ÂÇÑ´Ù. 2¹ø°ÀÇ Áֻ翡¼­´Â, ÇÏÇÁ¸Ç³ª¹«¸®³ª ¹«´Ì °¢ ¹®ÀÚ¸¦ encode ÇÑ´Ù.

Ãâ·Â ºÎÈ£ÀÇ Æ÷¸ËÀº, ¼±µÎ¿¡ 1 ¹®ÀÚ 2¹ÙÀÌÆ®¾¿ÀÇ ÃâÇö ºóµµ, ÇÕ°è 512¹ÙÀÌÆ®ÀÇ Çì´õºÎ¿Í ±× ÈÄ¿¡ °è¼ÓµÇ´Â ºÎÈ£¾î¿­·ÎºÎÅÍ µÈ´Ù.

µ¡ºÙ¿© ºÎÈ£¾î¿­ÀÇ Á¾·á¸¦ ³ªÅ¸³»±â À§Çؼ­(¶§¹®¿¡), ÃâÇö ºóµµ°¡ 1ÀÇ EOF ±âÈ£(ÄÚµå 256) ¿¡ ´ëÀÀÇÏ´Â ºÎÈ£¾î¸¦ ±âÀÔÇÑ´Ù.

°ü·Ã ÇÔ¼ö
¼¨³Í¡¤ÆÄ³ë ºÎÈ£, »ê¼ú ºÎÈ£, FGK ºÎÈ£, LZ77 ºÎÈ£, LZW ºÎÈ£