¼¨³Í¡¤ÆÄ³ë ºÎÈ£


ÇÔ¼ö¸í
enShannon  ¼¨³Í¡¤ÆÄ³ë ºÎÈ£
deShannon  ¼¨³Í¡¤ÆÄ³ë ºÎÈ£·ÎºÎÅÍÀÇ º¹È£
Çü½Ä
¼¨³Í¡¤ÆÄ³ë ºÎÈ£
    long enShannon(unsigned char *code, long clen, unsigned char *text, long tlen);

¼¨³Í¡¤ÆÄ³ë ºÎÈ£·ÎºÎÅÍÀÇ º¹È£
    long deShannon(unsigned char *text, long tlen, unsigned char *code, long clen);
Àμö
¼¨³Í¡¤ÆÄ³ë ºÎÈ£ ÇÔ¼ö¿¡ ´ëÇØ
    code  (ÀÔÃâ·Â) ¼¨³Í¡¤ÆÄ³ë ºÎÈ£¸¦ °Ý³³ÇÏ´Â ¹è¿­
    clen  (ÀÔ·Â) ¹è¿­ codeÀÇ ±æÀÌ
    text  (ÀÔ·Â) ÅؽºÆ®¸¦ °Ý³³ÇÏ´Â ¹è¿­
    tlen  (ÀÔ·Â) ¹è¿­ textÀÇ ±æÀÌ

¼¨³Í¡¤ÆÄ³ë ºÎÈ£·ÎºÎÅÍÀÇ º¹È£ ÇÔ¼ö¿¡ ´ëÇØ
    text  (ÀÔÃâ·Â) ÅؽºÆ®¸¦ °Ý³³ÇÏ´Â ¹è¿­
    tlen  (ÀÔ·Â) ¹è¿­ textÀÇ ±æÀÌ
    code  (ÀÔ·Â) ¼¨³Í¡¤ÆÄ³ë ºÎÈ£¸¦ °Ý³³ÇÏ´Â ¹è¿­
    clen  (ÀÔ·Â) ¹è¿­ codeÀÇ ±æÀÌ
ÇÔ¼öÄ¡
¼¨³Í¡¤ÆÄ³ë ºÎÈ£ ÇÔ¼ö¿¡ ´ëÇØ
    ¼¨³Í¡¤ÆÄ³ë ºÎÈ£ÀÇ ±æÀÌ. encode¿¡ ½ÇÆÐÇßÀ» ¶§´Â -1L.

¼¨³Í¡¤ÆÄ³ë ºÎÈ£·ÎºÎÅÍÀÇ º¹È£ ÇÔ¼ö¿¡ ´ëÇØ
    ÅؽºÆ®ÀÇ ±æÀÌ. º¹È£¿¡ ½ÇÆÐÇßÀ» ¶§´Â -1L.
ÁÖÀÇ »çÇ×

¿ë·Ê(shannon-test.c )

ÇÁ·Î±×·¥(shannon.c )
typedef struct {
    long count;             /* ÃâÇö ºóµµ */
    int  chr;               /* ¹®ÀÚ */
    int  len;               /* ºÎÈ£¾îÀÇ ºñÆ®¼ö */
    unsigned char code[8];  /* ºÎÈ£¾î */
    int  info;              /* 0:󸮴ë, 1:Group ¼±µÎ,2:ó¸®ÇÊ */
} CELL;

#define CHAR_SIZE    256    /* ±âÈ£ Á¾·ù */

long enShannon(unsigned char *code, long clen, unsigned char *text, long tlen){
    int    i, j, k;
    long   c, len;
    long   max;
    CELL   *cell;
    int    cellsize;
    long   count[CHAR_SIZE+1];
    int    char2cell[CHAR_SIZE+1];
    static unsigned char bit0[8] = {~128,~64,~32,~16,~8,~4,~2,~1};
    static unsigned char bit1[8] = { 128, 64, 32, 16, 8, 4, 2, 1};
    CELL   *getCode();

    if (tlen <= 0 || clen <= 512) return -1L;

    /* °¢ ¹®ÀÚÀÇ ÃâÇö ºóµµ¸¦ °è»ê */
    for (i = 0; i < CHAR_SIZE; i++) 
count[i] = 0; for (c = 0; c < tlen; c++)
count[text[c]]++; /* ÃÖ´ë ÃâÇö ºóµµÀÇ °ªÀÌ 2¹ÙÀÌÆ®¸¦ ³ÑÁö ¾Ê°Ô Á¶Á¤ÇÑ´Ù */ for (i = 0, max = 0; i < CHAR_SIZE; i++) if (count[i] > max) max = count[i]; if ((k = max/0x10000L) > 0) for (i = 0, k++; i < CHAR_SIZE; i++) if (count[i] > 0) { count[i] /= k; if (count[i] == 0) count[i] = 1; } /* °¢ ¹®ÀÚÀÇ ÃâÇö ºóµµ¸¦ Ãâ·Â */ for (i = 0; i < CHAR_SIZE; i++) { *code++ = (count[i] >> 8) & 0xff; *code++ = count[i] & 0xff; } count[CHAR_SIZE] = 1; /* EOF */ /* ºÎÈ£¾î¸¦ ¾ò´Â´Ù */ if ((cell = getCode(&cellsize, count)) == NULL) return -1L; /* ¹®ÀÚ¸¦ ºÎÈ£¾î¿Í ´ëÀÀ½ÃŲ´Ù */ for (i = 0; i < cellsize; i++) char2cell[cell[i]. chr] = i; /* °¢ ¹®ÀÚ¸¦ encode */ for (c = 0, i = 0, len = 512; ; c++) { k = (c == tlen) ? 256 : text[c]; k = char2cell[k]; /* ºÎÈ£¾î¸¦ Ãâ·Â */ for (j = 0; j < cell[k]. len; j++) { if (cell[k]. code[j>>3] & bit1[j&7]) *code |= bit1[i]; else *code &= bit0[i]; if (++i == 8) { code++; if (++len > clen) { free(cell); return -1L; } i = 0; } } if (c == tlen)
break; } free(cell); if (i > 0)
len++; return (len > clen) ? -1L : len; } long deShannon(unsigned char *text, long tlen, unsigned char *code, long clen){ int i, j, k; long c, len; CELL *cell; int cellsize; long count[CHAR_SIZE+1]; # define NODE_SIZE (2*CHAR_SIZE+2) struct { int chr, left, right; } node[NODE_SIZE]; int root, freeNode; static unsigned char bit1[8] = {128,64,32,16,8,4,2,1}; CELL *getCode(); if (clen <= 512)
return -1L; /* °¢ ¹®ÀÚÀÇ ÃâÇö ºóµµ¸¦ ¼¼Æ® */ for (i = 0; i < CHAR_SIZE; i++)
count[i] = 0; for (i = 0; i < 256; i++) { count[i] = *code++; count[i] = (count[i] << 8) | *code++; } /* EOF */ count[256] = 1; /* ºÎÈ£¾î¸¦ ¾ò´Â´Ù */ if ((cell = getCode(&cellsize, count)) == NULL) return -1L; /* ºÎÈ£¾î¿¡ ´ëÀÀÇÏ´Â ºÎÈ£³ª¹«¸¦ ¸¸µç´Ù */ for (i = 0; i < NODE_SIZE; i++) { node[i]. chr = NODE_SIZE; node[i]. left = node[i]. right = 0; } root = 0; freeNode = 1; for (i = 0; i < cellsize; i++) { for (j = 0, k = root; j < cell[i]. len; j++) { if (cell[i]. code[j>>3] & bit1[j&7]) { if (node[k]. right == 0) node[k]. right = freeNode++; k = node[k]. right; } else { if (node[k]. left == 0) node[k]. left = freeNode++; k = node[k]. left; } } node[k]. chr = cell[i]. chr; } /* º¹È£È­ */ for (i = 0, len = 0, c = 512; ; ) { /* ºÎÈ£¾î¿¡ ´ëÀÀÇÏ´Â ¹®ÀÚ ³ëµå¸¦ ¾ò´Â´Ù */ k = root; do { k = (*code & bit1[i]) ? node[k]. right : node[k]. left; if (++i == 8) { code++; if (++c > clen) return -1L; i = 0; } } while (node[k]. chr > CHAR_SIZE); if (node[k]. chr == CHAR_SIZE) break; if (++len > tlen) { free(cell); return -1L; } *text++ = node[k]. chr; } free(cell); return len; } /* ºÎÈ£¾î¸¦ ¾ò´Â´Ù */ static CELL *getCode(int *cellsize, long *count) { int i, j, k; CELL *cell; int size; long sum, half; int left, mid, right; static unsigned char bit0[8] = {~128,~64,~32,~16,~8,~4,~2,~1}; static unsigned char bit1[8] = { 128, 64, 32, 16, 8, 4, 2, 1}; /* ±×·ì Å©±âÀÇ °áÁ¤ */ for (i = 0, size = 0; i <= CHAR_SIZE; i++) if (count[i] > 0) size++; if ((cell = (CELL *) malloc(sizeof(CELL) *size)) == NULL) return NULL; /* ¹®ÀÚÀÇ ÃâÇö ºóµµ¸¦ ³»¸²Â÷¼ø¿¡ ¼ÒÆ® */ for (i = 0, j = 0; i <= CHAR_SIZE; i++) if (count[i] > 0) { cell[j]. count = count[i]; cell[j]. chr = i; cell[j]. len = 0; cell[j]. info = 0; j++; } for (i = 0; i < size - 1; i++) { k = i; for (j = i+1; j < size; j++) if (cell[j]. count > cell[k]. count) k = j; if (k ! = i) { long tcount = cell[k]. count; int tchr = cell[k]. chr; cell[k]. count = cell[i]. count; cell[i]. count = tcount; cell[k]. chr = cell[i]. chr; cell[i]. chr = tchr; } } cell[0]. info = 1; /* ±×·ìÀ» ¼¼ºÐºñÀ² ÇØ, ºÎÈ£¾î¸¦ ¾ò´Â´Ù */ for ( ; ; ) { /* ±×·ìÀÇ ¼±µÎ¸¦ ã¾Æ³½´Ù */ for (i = 0, left = -1; i < size; i++) if (cell[i]. info == 1) { left = i; break; } if (left < 0) break; /* ±×·ìÀÇ ¸»¹Ì¸¦ ã¾Æ³½´Ù */ for (i = left + 1, right = size - 1; i < size; i++) if (cell[i]. info > 0) { right = i - 1; break; } /* ±×·ì³»ÀÇ ÃâÇö ºóµµÀÇ È­ */ for (i = left, sum = 0; i <= right; i++) sum += cell[i]. count; /* ±×·ìÀÇ ºÐÇÒ À§Ä¡ */ for (mid = left, half = 0; mid < right && half < sum/2; mid++) half += cell[mid]. count; /* ºÎÈ£¾îÀÇ Ãß°¡ */ for (i = left; i <= right; i++) { if (i>3]&=bit0[cell[i]. len&7]; else cell[i]. code[cell[i]. len>>3]|=bit1[cell[i]. len&7]; cell[i]. len++; } if (mid == left + 1) cell[left]. info = 2; cell[mid]. info = (mid == right) ? 2 : 1; } *cellsize = size; return cell; }
¼³¸í
1948³â¿¡ C. E. Shannon ¿Í R. M. Fano °¡ °ÅÀÇ µ¿½Ã±â¿¡ °í¾È Çß´Ù ºÎÈ£·Î, ÀÌÇÏÀÇ Æ¯Â¡À» °¡Áø´Ù.

¼¨³Í¡¤ÆÄ³ë ºÎÈ£´Â, ÃÖÃÊ·Î °í¾È µÈ ºÎÈ£ÀÌÁö¸¸, Ç×»ó ÃÖ¼ÒÀÇ Æò±Õ ºÎÈ£ÀåÀ» ¾òÀ» ¼ö ÀÖ´ÂÇãÇÁ¸¸ ÄÚµù¿¡´Â ¾ÐÃà ¼º´ÉÀÌ µÚ¶³¾îÁö±â (À§ÇØ)¶§¹®¿¡, ±× ¿ª»çÀû ÀÇÀǷκÎÅÍ Á¤º¸ÀÌ·ÐÀÇ ±³°ú¼­¿¡ ÃëÇØ Àλó¶ó°í ÀÖÁö¸¸, ½ÇÁ¦ÀÇ µ¥ÀÌÅÍ ¾ÐÃà ¾Ë°í¸®ÁòÀ¸·Î¼­´Â, °ÅÀÇ »ç¿ëµÇ°í ÀÖ°í ¾ø´Ù.

ºÎÈ£¾î¸¦ ÀÌÇÏ¿Í °°ÀÌ ±¸¼ºÇÒ ¼ö ÀÖ´Ù.

  1. ÅؽºÆ®¿¡ ÀÖ´Â °¢ ±âÈ£(¹®ÀÚ)¿¡ ´ëÀÀÇÏ´Â ÃâÇö ºóµµ ¸®½ºÆ®¸¦ ¸¸µç´Ù.
  2. ¸®½ºÆ®¸¦ ÃâÇö ºóµµÀÇ ³»¸²Â÷¼ø¿¡ ÁÙ¼­ ¹Ù²Û´Ù.
  3. ¸®½ºÆ®¸¦ »óÇÏ 2 ºÐÇÒÇÑ´Ù. ÀÌ ¶§, »ó¹ÝºÐ¿¡ ÀÖ´Â ÃâÇö ºóµµÀÇ È­¿Í ÇϹݽſ¡ ÀÖ´Â ÃâÇö ºóµµ°¡ °¡¶ó¾É°í °¡´ÉÇÑ ÇÑ µ¿ÀÏÇØÁöµµ·Ï(µíÀÌ) ºÐÇÒÇÑ´Ù.
  4. »ó¹ÝºÐ¿¡ 0 À», ÇϹݽſ¡ 1 À» ÇÒ´çÇÑ´Ù. ÀÌ°ÍÀº, »ó¹ÝºÐ¿¡ Æ÷ÇԵǴ ±âÈ£¿¡ ´ëÀÀÇÏ´Â ºÎÈ£¾î°¡ 0 À¸·Î ½ÃÀ۵Ǿî, ÇϹݽſ¡ Æ÷ÇԵǴ ±âÈ£¿¡ ´ëÀÀÇÏ´Â ºÎÈ£¾î°¡ 1 À¸·Î ½ÃÀ۵Ǵ °ÍÀ» ÀǹÌÇÑ´Ù.
  5. ºÐÇÒÇØ ¾òÀ» ¼ö ÀÖ´ø °¢°¢ÀÇ ¸®½ºÆ®´Â, ¿ª½Ã ±âÈ£ÀÇ ÃâÇö ºóµµÀÇ °­ ¼ø¼­·Î ³ª¶õÇØÁö°í ÀÖ´Ù. °Å±â¼­, 3. (¿Í)°ú 4. ÀÇ ¼ø¼­·Î °¢°¢ÀÇ ¸®½ºÆ®¸¦ ÀçºÐÇÒ ÇØ, ºÎÈ£¾îÀÇ ´ÙÀ½ÀÇ 1 ºñÆ®¸¦ ÇÒ´çÇÑ´Ù. ÀÌ Á¶ÀÛÀ» °¢°¢ÀÇ ¸®½ºÆ® ¿¡ Æ÷ÇԵǴ ±âÈ£ÀÇ ¼ö°¡ 1°³°¡ µÉ ¶§±îÁö ¹Ýº¹ÇÑ´Ù.

encode ÇÁ·Î±×·¥¿¡¼­´Â, ÅؽºÆ®¸¦ 2ȸ ÁÖ»ç ÇÑ´Ù. 1ȸ°·Î´Â °¢ ±âÈ£(¹®ÀÚ)ÀÇ ÃâÇö ºóµµ¸¦ Á¶»çÇÑ ´ÙÀ½, ºÎÈ£¾î¸¦ ¸¸µç´Ù. ¶Ç º¹È£ ÇÒ ¼ö ÀÖµµ·Ï(µíÀÌ), °¢ ±âÈ£ÀÇ ÃâÇö ºóµµ¸¦ ºÎÈ£ÀÇ ÀϺημ­ Ãâ·ÂÇÑ´Ù. 2¹ø°ÀÇ Áֻ翡¼­´Â, ½ÇÁ¦·Î °¢ ¹®ÀÚ¸¦ encode ÇÑ´Ù.

Ãâ·Â ºÎÈ£ÀÇ Æ÷¸ËÀº, ¼±µÎ¿¡ 1 ¹®ÀÚ 2¹ÙÀÌÆ®¾¿ÀÇ ÃâÇö ºóµµ, ÇÕ°è 512¹ÙÀÌÆ®ÀÇ Çì´õºÎ¿Í ±× ÈÄ¿¡ °è¼ÓµÇ´Â ºÎÈ£¾î¿­·ÎºÎÅÍ µÈ´Ù.

µ¡ºÙ¿© ºÎÈ£¾î¿­ÀÇ Á¾·á¸¦ ³ªÅ¸³»±â À§Çؼ­(¶§¹®¿¡), ÃâÇö ºóµµ°¡ 1ÀÇ EOF ±âÈ£(ÄÚµå 256) ¿¡ ´ëÀÀÇÏ´Â ºÎÈ£¾î¸¦ ±âÀÔÇÑ´Ù.

°ü·Ã ÇÔ¼ö
ÇãÇÁ¸¸ ÄÚµù, »ê¼ú ºÎÈ£, FGK ºÎÈ£, LZ77 ºÎÈ£, LZW ºÎÈ£