enShannon ¼¨³Í¡¤ÆÄ³ë ºÎÈ£ deShannon ¼¨³Í¡¤ÆÄ³ë ºÎÈ£·ÎºÎÅÍÀÇ º¹È£
¼¨³Í¡¤ÆÄ³ë ºÎÈ£
long enShannon(unsigned char *code, long clen, unsigned char *text, long tlen);
¼¨³Í¡¤ÆÄ³ë ºÎÈ£·ÎºÎÅÍÀÇ º¹È£
long deShannon(unsigned char *text, long tlen, unsigned char *code, long clen);
¼¨³Í¡¤ÆÄ³ë ºÎÈ£ ÇÔ¼ö¿¡ ´ëÇØ
code (ÀÔÃâ·Â) ¼¨³Í¡¤ÆÄ³ë ºÎÈ£¸¦ °Ý³³ÇÏ´Â ¹è¿
clen (ÀÔ·Â) ¹è¿ codeÀÇ ±æÀÌ
text (ÀÔ·Â) ÅØ½ºÆ®¸¦ °Ý³³ÇÏ´Â ¹è¿
tlen (ÀÔ·Â) ¹è¿ textÀÇ ±æÀÌ
¼¨³Í¡¤ÆÄ³ë ºÎÈ£·ÎºÎÅÍÀÇ º¹È£ ÇÔ¼ö¿¡ ´ëÇØ
text (ÀÔÃâ·Â) ÅØ½ºÆ®¸¦ °Ý³³ÇÏ´Â ¹è¿
tlen (ÀÔ·Â) ¹è¿ textÀÇ ±æÀÌ
code (ÀÔ·Â) ¼¨³Í¡¤ÆÄ³ë ºÎÈ£¸¦ °Ý³³ÇÏ´Â ¹è¿
clen (ÀÔ·Â) ¹è¿ codeÀÇ ±æÀÌ
¼¨³Í¡¤ÆÄ³ë ºÎÈ£ ÇÔ¼ö¿¡ ´ëÇØ
¼¨³Í¡¤ÆÄ³ë ºÎÈ£ÀÇ ±æÀÌ. encode¿¡ ½ÇÆÐÇßÀ» ¶§´Â -1L.
¼¨³Í¡¤ÆÄ³ë ºÎÈ£·ÎºÎÅÍÀÇ º¹È£ ÇÔ¼ö¿¡ ´ëÇØ
ÅØ½ºÆ®ÀÇ ±æÀÌ. º¹È£¿¡ ½ÇÆÐÇßÀ» ¶§´Â -1L.
typedef struct {
long count; /* ÃâÇö ºóµµ */
int chr; /* ¹®ÀÚ */
int len; /* ºÎÈ£¾îÀÇ ºñÆ®¼ö */
unsigned char code[8]; /* ºÎÈ£¾î */
int info; /* 0:󸮴ë, 1:Group ¼±µÎ,2:ó¸®ÇÊ */
} CELL;
#define CHAR_SIZE 256 /* ±âÈ£ Á¾·ù */
long enShannon(unsigned char *code, long clen, unsigned char *text, long tlen){
int i, j, k;
long c, len;
long max;
CELL *cell;
int cellsize;
long count[CHAR_SIZE+1];
int char2cell[CHAR_SIZE+1];
static unsigned char bit0[8] = {~128,~64,~32,~16,~8,~4,~2,~1};
static unsigned char bit1[8] = { 128, 64, 32, 16, 8, 4, 2, 1};
CELL *getCode();
if (tlen <= 0 || clen <= 512) return -1L;
/* °¢ ¹®ÀÚÀÇ ÃâÇö ºóµµ¸¦ °è»ê */
for (i = 0; i < CHAR_SIZE; i++)
count[i] = 0;
for (c = 0; c < tlen; c++)
count[text[c]]++;
/* ÃÖ´ë ÃâÇö ºóµµÀÇ °ªÀÌ 2¹ÙÀÌÆ®¸¦ ³ÑÁö ¾Ê°Ô Á¶Á¤ÇÑ´Ù */
for (i = 0, max = 0; i < CHAR_SIZE; i++)
if (count[i] > max) max = count[i];
if ((k = max/0x10000L) > 0)
for (i = 0, k++; i < CHAR_SIZE; i++)
if (count[i] > 0) {
count[i] /= k;
if (count[i] == 0) count[i] = 1;
}
/* °¢ ¹®ÀÚÀÇ ÃâÇö ºóµµ¸¦ Ãâ·Â */
for (i = 0; i < CHAR_SIZE; i++) {
*code++ = (count[i] >> 8) & 0xff;
*code++ = count[i] & 0xff;
}
count[CHAR_SIZE] = 1; /* EOF */
/* ºÎÈ£¾î¸¦ ¾ò´Â´Ù */
if ((cell = getCode(&cellsize, count)) == NULL) return -1L;
/* ¹®ÀÚ¸¦ ºÎÈ£¾î¿Í ´ëÀÀ½ÃŲ´Ù */
for (i = 0; i < cellsize; i++) char2cell[cell[i]. chr] = i;
/* °¢ ¹®ÀÚ¸¦ encode */
for (c = 0, i = 0, len = 512; ; c++) {
k = (c == tlen) ? 256 : text[c];
k = char2cell[k];
/* ºÎÈ£¾î¸¦ Ãâ·Â */
for (j = 0; j < cell[k]. len; j++) {
if (cell[k]. code[j>>3] & bit1[j&7]) *code |= bit1[i];
else *code &= bit0[i];
if (++i == 8) {
code++;
if (++len > clen) {
free(cell);
return -1L;
}
i = 0;
}
}
if (c == tlen)
break;
}
free(cell);
if (i > 0)
len++;
return (len > clen) ? -1L : len;
}
long deShannon(unsigned char *text, long tlen, unsigned char *code, long clen){
int i, j, k;
long c, len;
CELL *cell;
int cellsize;
long count[CHAR_SIZE+1];
# define NODE_SIZE (2*CHAR_SIZE+2)
struct { int chr, left, right; } node[NODE_SIZE];
int root, freeNode;
static unsigned char bit1[8] = {128,64,32,16,8,4,2,1};
CELL *getCode();
if (clen <= 512)
return -1L;
/* °¢ ¹®ÀÚÀÇ ÃâÇö ºóµµ¸¦ ¼¼Æ® */
for (i = 0; i < CHAR_SIZE; i++)
count[i] = 0;
for (i = 0; i < 256; i++) {
count[i] = *code++;
count[i] = (count[i] << 8) | *code++;
}
/* EOF */
count[256] = 1;
/* ºÎÈ£¾î¸¦ ¾ò´Â´Ù */
if ((cell = getCode(&cellsize, count)) == NULL) return -1L;
/* ºÎÈ£¾î¿¡ ´ëÀÀÇÏ´Â ºÎÈ£³ª¹«¸¦ ¸¸µç´Ù */
for (i = 0; i < NODE_SIZE; i++) {
node[i]. chr = NODE_SIZE;
node[i]. left = node[i]. right = 0;
}
root = 0; freeNode = 1;
for (i = 0; i < cellsize; i++) {
for (j = 0, k = root; j < cell[i]. len; j++) {
if (cell[i]. code[j>>3] & bit1[j&7]) {
if (node[k]. right == 0) node[k]. right = freeNode++;
k = node[k]. right;
} else {
if (node[k]. left == 0) node[k]. left = freeNode++;
k = node[k]. left;
}
}
node[k]. chr = cell[i]. chr;
}
/* º¹È£È */
for (i = 0, len = 0, c = 512; ; ) {
/* ºÎÈ£¾î¿¡ ´ëÀÀÇÏ´Â ¹®ÀÚ ³ëµå¸¦ ¾ò´Â´Ù */
k = root;
do {
k = (*code & bit1[i]) ? node[k]. right : node[k]. left;
if (++i == 8) {
code++;
if (++c > clen) return -1L;
i = 0;
}
} while (node[k]. chr > CHAR_SIZE);
if (node[k]. chr == CHAR_SIZE) break;
if (++len > tlen) {
free(cell);
return -1L;
}
*text++ = node[k]. chr;
}
free(cell);
return len;
}
/* ºÎÈ£¾î¸¦ ¾ò´Â´Ù */
static CELL *getCode(int *cellsize, long *count)
{
int i, j, k;
CELL *cell;
int size;
long sum, half;
int left, mid, right;
static unsigned char bit0[8] = {~128,~64,~32,~16,~8,~4,~2,~1};
static unsigned char bit1[8] = { 128, 64, 32, 16, 8, 4, 2, 1};
/* ±×·ì Å©±âÀÇ °áÁ¤ */
for (i = 0, size = 0; i <= CHAR_SIZE; i++) if (count[i] > 0) size++;
if ((cell = (CELL *) malloc(sizeof(CELL) *size)) == NULL) return NULL;
/* ¹®ÀÚÀÇ ÃâÇö ºóµµ¸¦ ³»¸²Â÷¼ø¿¡ ¼ÒÆ® */
for (i = 0, j = 0; i <= CHAR_SIZE; i++)
if (count[i] > 0) {
cell[j]. count = count[i];
cell[j]. chr = i;
cell[j]. len = 0;
cell[j]. info = 0;
j++;
}
for (i = 0; i < size - 1; i++) {
k = i;
for (j = i+1; j < size; j++)
if (cell[j]. count > cell[k]. count) k = j;
if (k ! = i) {
long tcount = cell[k]. count;
int tchr = cell[k]. chr;
cell[k]. count = cell[i]. count;
cell[i]. count = tcount;
cell[k]. chr = cell[i]. chr;
cell[i]. chr = tchr;
}
}
cell[0]. info = 1;
/* ±×·ìÀ» ¼¼ºÐºñÀ² ÇØ, ºÎÈ£¾î¸¦ ¾ò´Â´Ù */
for ( ; ; ) {
/* ±×·ìÀÇ ¼±µÎ¸¦ ã¾Æ³½´Ù */
for (i = 0, left = -1; i < size; i++)
if (cell[i]. info == 1) {
left = i;
break;
}
if (left < 0) break;
/* ±×·ìÀÇ ¸»¹Ì¸¦ ã¾Æ³½´Ù */
for (i = left + 1, right = size - 1; i < size; i++)
if (cell[i]. info > 0) {
right = i - 1;
break;
}
/* ±×·ì³»ÀÇ ÃâÇö ºóµµÀÇ È */
for (i = left, sum = 0; i <= right; i++) sum += cell[i]. count;
/* ±×·ìÀÇ ºÐÇÒ À§Ä¡ */
for (mid = left, half = 0; mid < right && half < sum/2; mid++)
half += cell[mid]. count;
/* ºÎÈ£¾îÀÇ Ãß°¡ */
for (i = left; i <= right; i++) {
if (i>3]&=bit0[cell[i]. len&7];
else cell[i]. code[cell[i]. len>>3]|=bit1[cell[i]. len&7];
cell[i]. len++;
}
if (mid == left + 1) cell[left]. info = 2;
cell[mid]. info = (mid == right) ? 2 : 1;
}
*cellsize = size;
return cell;
}
¼¨³Í¡¤ÆÄ³ë ºÎÈ£´Â, ÃÖÃÊ·Î °í¾È µÈ ºÎÈ£ÀÌÁö¸¸, Ç×»ó ÃÖ¼ÒÀÇ Æò±Õ ºÎÈ£ÀåÀ» ¾òÀ» ¼ö ÀÖ´ÂÇãÇÁ¸¸ ÄÚµù¿¡´Â ¾ÐÃà ¼º´ÉÀÌ µÚ¶³¾îÁö±â (À§ÇØ)¶§¹®¿¡, ±× ¿ª»çÀû ÀÇÀǷκÎÅÍ Á¤º¸ÀÌ·ÐÀÇ ±³°ú¼¿¡ ÃëÇØ Àλó¶ó°í ÀÖÁö¸¸, ½ÇÁ¦ÀÇ µ¥ÀÌÅÍ ¾ÐÃà ¾Ë°í¸®ÁòÀ¸·Î¼´Â, °ÅÀÇ »ç¿ëµÇ°í ÀÖ°í ¾ø´Ù.
ºÎÈ£¾î¸¦ ÀÌÇÏ¿Í °°ÀÌ ±¸¼ºÇÒ ¼ö ÀÖ´Ù.
encode ÇÁ·Î±×·¥¿¡¼´Â, ÅØ½ºÆ®¸¦ 2ȸ ÁÖ»ç ÇÑ´Ù. 1ȸ°·Î´Â °¢ ±âÈ£(¹®ÀÚ)ÀÇ ÃâÇö ºóµµ¸¦ Á¶»çÇÑ ´ÙÀ½, ºÎÈ£¾î¸¦ ¸¸µç´Ù. ¶Ç º¹È£ ÇÒ ¼ö ÀÖµµ·Ï(µíÀÌ), °¢ ±âÈ£ÀÇ ÃâÇö ºóµµ¸¦ ºÎÈ£ÀÇ ÀϺημ Ãâ·ÂÇÑ´Ù. 2¹øÂ°ÀÇ Áֻ翡¼´Â, ½ÇÁ¦·Î °¢ ¹®ÀÚ¸¦ encode ÇÑ´Ù.
Ãâ·Â ºÎÈ£ÀÇ Æ÷¸ËÀº, ¼±µÎ¿¡ 1 ¹®ÀÚ 2¹ÙÀÌÆ®¾¿ÀÇ ÃâÇö ºóµµ, ÇÕ°è 512¹ÙÀÌÆ®ÀÇ Çì´õºÎ¿Í ±× ÈÄ¿¡ °è¼ÓµÇ´Â ºÎÈ£¾î¿·ÎºÎÅÍ µÈ´Ù.
µ¡ºÙ¿© ºÎÈ£¾î¿ÀÇ Á¾·á¸¦ ³ªÅ¸³»±â À§Çؼ(¶§¹®¿¡), ÃâÇö ºóµµ°¡ 1ÀÇ EOF ±âÈ£(ÄÚµå 256) ¿¡ ´ëÀÀÇÏ´Â ºÎÈ£¾î¸¦ ±âÀÔÇÑ´Ù.