typedef struct {
long count; /* 出現頻度 */
int chr; /* 文字 */
int len; /* 符号語のビット数 */
unsigned char code[8]; /* 符号語 */
int info; /* 0:処理待、1:Group先頭、2:処理済 */
} CELL;
#define CHAR_SIZE 256 /* 記号種類 */
long enShannon(unsigned char *code, long clen,unsigned char *text, long tlen){
int i, j, k;
long c, len;
long max;
CELL *cell;
int cellsize;
long count[CHAR_SIZE+1];
int char2cell[CHAR_SIZE+1];
static unsigned char bit0[8] = {~128,~64,~32,~16,~8,~4,~2,~1};
static unsigned char bit1[8] = { 128, 64, 32, 16, 8, 4, 2, 1};
CELL *getCode();
if (tlen <= 0 || clen <= 512) return -1L;
/* 各文字の出現頻度を計算 */
for (i = 0; i < CHAR_SIZE; i++) count[i] = 0;
for (c = 0; c < tlen; c++) count[text[c]]++;
/* 最大出現頻度の値が2バイトを越えないように調整する */
for (i = 0, max = 0; i < CHAR_SIZE; i++)
if (count[i] > max) max = count[i];
if ((k = max/0x10000L) > 0)
for (i = 0, k++; i < CHAR_SIZE; i++)
if (count[i] > 0) {
count[i] /= k;
if (count[i] == 0) count[i] = 1;
}
/* 各文字の出現頻度を出力 */
for (i = 0; i < CHAR_SIZE; i++) {
*code++ = (count[i] >> 8) & 0xff;
*code++ = count[i] & 0xff;
}
count[CHAR_SIZE] = 1; /* EOF */
/* 符号語を得る */
if ((cell = getCode(&cellsize, count)) == NULL) return -1L;
/* 文字を符号語と対応させる */
for (i = 0; i < cellsize; i++) char2cell[cell[i].chr] = i;
/* 各文字を符号化 */
for (c = 0, i = 0, len = 512; ; c++) {
k = (c == tlen) ? 256 : text[c];
k = char2cell[k];
/* 符号語を出力 */
for (j = 0; j < cell[k].len; j++) {
if (cell[k].code[j>>3] & bit1[j&7]) *code |= bit1[i];
else *code &= bit0[i];
if (++i == 8) {
code++;
if (++len > clen) {
free(cell);
return -1L;
}
i = 0;
}
}
if (c == tlen) break;
}
free(cell);
if (i > 0) len++;
return (len > clen) ? -1L : len;
}
long deShannon(unsigned char *text, long tlen,
unsigned char *code, long clen)
{
int i, j, k;
long c, len;
CELL *cell;
int cellsize;
long count[CHAR_SIZE+1];
# define NODE_SIZE (2*CHAR_SIZE+2)
struct { int chr, left, right; } node[NODE_SIZE];
int root, freeNode;
static unsigned char bit1[8] = {128,64,32,16,8,4,2,1};
CELL *getCode();
if (clen <= 512) return -1L;
/* 各文字の出現頻度をセット */
for (i = 0; i < CHAR_SIZE; i++) count[i] = 0;
for (i = 0; i < 256; i++) {
count[i] = *code++;
count[i] = (count[i] << 8) | *code++;
}
/* EOF */
count[256] = 1;
/* 符号語を得る */
if ((cell = getCode(&cellsize, count)) == NULL) return -1L;
/* 符号語に対応する符号木をつくる */
for (i = 0; i < NODE_SIZE; i++) {
node[i].chr = NODE_SIZE;
node[i].left = node[i].right = 0;
}
root = 0; freeNode = 1;
for (i = 0; i < cellsize; i++) {
for (j = 0, k = root; j < cell[i].len; j++) {
if (cell[i].code[j>>3] & bit1[j&7]) {
if (node[k].right == 0) node[k].right = freeNode++;
k = node[k].right;
} else {
if (node[k].left == 0) node[k].left = freeNode++;
k = node[k].left;
}
}
node[k].chr = cell[i].chr;
}
/* 復号化 */
for (i = 0, len = 0, c = 512; ; ) {
/* 符号語に対応する文字ノードを得る */
k = root;
do {
k = (*code & bit1[i]) ? node[k].right : node[k].left;
if (++i == 8) {
code++;
if (++c > clen) return -1L;
i = 0;
}
} while (node[k].chr > CHAR_SIZE);
if (node[k].chr == CHAR_SIZE) break;
if (++len > tlen) {
free(cell);
return -1L;
}
*text++ = node[k].chr;
}
free(cell);
return len;
}
/* 符号語を得る */
static CELL *getCode(int *cellsize, long *count){
int i, j, k;
CELL *cell;
int size;
long sum, half;
int left, mid, right;
static unsigned char bit0[8] = {~128,~64,~32,~16,~8,~4,~2,~1};
static unsigned char bit1[8] = { 128, 64, 32, 16, 8, 4, 2, 1};
/* グループ大きさの決定 */
for (i = 0, size = 0; i <= CHAR_SIZE; i++) if (count[i] > 0) size++;
if ((cell = (CELL *)malloc(sizeof(CELL)*size)) == NULL) return NULL;
/* 文字の出現頻度を降順にソート */
for (i = 0, j = 0; i <= CHAR_SIZE; i++)
if (count[i] > 0) {
cell[j].count = count[i];
cell[j].chr = i;
cell[j].len = 0;
cell[j].info = 0;
j++;
}
for (i = 0; i < size - 1; i++) {
k = i;
for (j = i+1; j < size; j++)
if (cell[j].count > cell[k].count) k = j;
if (k != i) {
long tcount = cell[k].count;
int tchr = cell[k].chr;
cell[k].count = cell[i].count;
cell[i].count = tcount;
cell[k].chr = cell[i].chr;
cell[i].chr = tchr;
}
}
cell[0].info = 1;
/* グループを細分割して、符号語を得る */
for ( ; ; ) {
/* グループの先頭を見つける */
for (i = 0, left = -1; i < size; i++)
if (cell[i].info == 1) {
left = i;
break;
}
if (left < 0) break;
/* グループの末尾を見つける */
for (i = left + 1, right = size - 1; i < size; i++)
if (cell[i].info > 0) {
right = i - 1;
break;
}
/* グループ内の出現頻度の和 */
for (i = left, sum = 0; i <= right; i++) sum += cell[i].count;
/* グループの分割位置 */
for (mid = left, half = 0; mid < right && half < sum/2; mid++)
half += cell[mid].count;
/* 符号語の追加 */
for (i = left; i <= right; i++) {
if (i<mid) cell[i].code[cell[i].len>>3]&=bit0[cell[i].len&7];
else cell[i].code[cell[i].len>>3]|=bit1[cell[i].len&7];
cell[i].len++;
}
if (mid == left + 1) cell[left].info = 2;
cell[mid].info = (mid == right) ? 2 : 1;
}
*cellsize = size;
return cell;
}