shannon.c

typedef struct {
    long count;             /* 出現頻度 */
    int  chr;               /* 文字 */
    int  len;               /* 符号語のビット数 */
    unsigned char code[8];  /* 符号語 */
    int  info;              /* 0:処理待、1:Group先頭、2:処理済 */
} CELL;

#define CHAR_SIZE    256    /* 記号種類 */

long enShannon(unsigned char *code, long clen,unsigned char *text, long tlen){
    int    i, j, k;
    long   c, len;
    long   max;
    CELL   *cell;
    int    cellsize;
    long   count[CHAR_SIZE+1];
    int    char2cell[CHAR_SIZE+1];
    static unsigned char bit0[8] = {~128,~64,~32,~16,~8,~4,~2,~1};
    static unsigned char bit1[8] = { 128, 64, 32, 16, 8, 4, 2, 1};
    CELL   *getCode();

    if (tlen <= 0 || clen <= 512) return -1L;

    /* 各文字の出現頻度を計算 */
    for (i = 0; i < CHAR_SIZE; i++) count[i] = 0;
    for (c = 0; c < tlen; c++) count[text[c]]++;

    /* 最大出現頻度の値が２バイトを越えないように調整する */
    for (i = 0, max = 0; i < CHAR_SIZE; i++)
        if (count[i] > max) max = count[i];
    if ((k = max/0x10000L) > 0)
        for (i = 0, k++; i < CHAR_SIZE; i++)
            if (count[i] > 0) {
                count[i] /= k;
                if (count[i] == 0) count[i] = 1;
            }

    /* 各文字の出現頻度を出力 */
    for (i = 0; i < CHAR_SIZE; i++) {
        *code++ = (count[i] >> 8) & 0xff;
        *code++ = count[i] & 0xff;
    }
    count[CHAR_SIZE] = 1;         /* EOF */

    /* 符号語を得る */
    if ((cell = getCode(&cellsize, count)) == NULL) return -1L;

    /* 文字を符号語と対応させる */
    for (i = 0; i < cellsize; i++) char2cell[cell[i].chr] = i;
           
    /* 各文字を符号化 */
    for (c = 0, i = 0, len = 512; ; c++) {
        k = (c == tlen) ? 256 : text[c];
        k = char2cell[k];
        
        /* 符号語を出力 */
        for (j = 0; j < cell[k].len; j++) {
            if (cell[k].code[j>>3] & bit1[j&7]) *code |= bit1[i];
            else                                *code &= bit0[i];
            if (++i == 8) {
                code++;
                if (++len > clen) {
                    free(cell);
                    return -1L;
                }
                i = 0;
            }
        }
        if (c == tlen) break;
    }
    free(cell);
    if (i > 0) len++;
    return (len > clen) ? -1L : len;
}

long deShannon(unsigned char *text, long tlen,
               unsigned char *code, long clen)
{
    int  i, j, k;
    long c, len;
    CELL   *cell;
    int    cellsize;
    long   count[CHAR_SIZE+1];
#   define NODE_SIZE (2*CHAR_SIZE+2)
    struct { int chr, left, right; } node[NODE_SIZE];
    int    root, freeNode;
    static unsigned char bit1[8] = {128,64,32,16,8,4,2,1};
    CELL   *getCode();

    if (clen <= 512) return -1L;

    /* 各文字の出現頻度をセット */
    for (i = 0; i < CHAR_SIZE; i++) count[i] = 0;
    for (i = 0; i < 256; i++) {
        count[i] = *code++;
        count[i] = (count[i] << 8) | *code++;
    }
    
    /* EOF */
    count[256] = 1;

    /* 符号語を得る */
    if ((cell = getCode(&cellsize, count)) == NULL) return -1L;

    /* 符号語に対応する符号木をつくる */
    for (i = 0; i < NODE_SIZE; i++) {
        node[i].chr = NODE_SIZE;
        node[i].left = node[i].right = 0;
    }
    root = 0; freeNode = 1;
    
    for (i = 0; i < cellsize; i++) {
        for (j = 0, k = root; j < cell[i].len; j++) {
            if (cell[i].code[j>>3] & bit1[j&7]) {
                if (node[k].right == 0) node[k].right = freeNode++;
                k = node[k].right;
            } else {
                if (node[k].left  == 0) node[k].left  = freeNode++;
                k = node[k].left;
            }
        }
        node[k].chr = cell[i].chr;
    }

    /* 復号化 */
    for (i = 0, len = 0, c = 512; ; ) {
        /* 符号語に対応する文字ノードを得る */
        k = root;
        do {
            k = (*code & bit1[i]) ? node[k].right : node[k].left;
            if (++i == 8) {
                code++;
                if (++c > clen) return -1L;
                i = 0;
            }
        } while (node[k].chr > CHAR_SIZE);
        if (node[k].chr == CHAR_SIZE) break;
        if (++len > tlen) {
            free(cell);
            return -1L;
        }
        *text++ = node[k].chr;
    }
    free(cell);
    return len;
}

/* 符号語を得る */
static CELL *getCode(int *cellsize, long *count){
    int    i, j,  k;
    CELL   *cell;
    int    size;
    long   sum, half;
    int    left, mid, right;
    static unsigned char bit0[8] = {~128,~64,~32,~16,~8,~4,~2,~1};
    static unsigned char bit1[8] = { 128, 64, 32, 16, 8, 4, 2, 1};

    /* グループ大きさの決定 */
    for (i = 0, size = 0; i <= CHAR_SIZE; i++) if (count[i] > 0) size++;

    if ((cell = (CELL *)malloc(sizeof(CELL)*size)) == NULL) return NULL;

    /* 文字の出現頻度を降順にソート */
    for (i = 0, j = 0; i <= CHAR_SIZE; i++)
        if (count[i] > 0) {
            cell[j].count = count[i];
            cell[j].chr = i;
            cell[j].len = 0;
            cell[j].info = 0;
            j++;
        }
    for (i = 0; i < size - 1; i++) {
        k = i;
        for (j = i+1; j < size; j++)
            if (cell[j].count > cell[k].count) k = j;
        if (k != i) {
            long tcount = cell[k].count;
            int  tchr = cell[k].chr;
            cell[k].count = cell[i].count;
            cell[i].count = tcount;
            cell[k].chr = cell[i].chr;
            cell[i].chr = tchr;
        }
    }
    cell[0].info = 1;

    /* グループを細分割して、符号語を得る */
    for ( ; ; ) {
        /* グループの先頭を見つける */
        for (i = 0, left = -1; i < size; i++)
            if (cell[i].info == 1) {
                left = i;
                break;
            }
        if (left < 0) break;

        /* グループの末尾を見つける */
        for (i = left + 1, right = size - 1; i < size; i++)
            if (cell[i].info > 0) {
                right = i - 1;
                break;
            }

        /* グループ内の出現頻度の和 */
        for (i = left, sum = 0; i <= right; i++) sum += cell[i].count;

        /* グループの分割位置 */
        for (mid = left, half = 0; mid < right && half < sum/2; mid++)
            half += cell[mid].count;

        /* 符号語の追加 */
        for (i = left; i <= right; i++) {
            if (i<mid) cell[i].code[cell[i].len>>3]&=bit0[cell[i].len&7];
            else       cell[i].code[cell[i].len>>3]|=bit1[cell[i].len&7];
            cell[i].len++;
        }
        if (mid == left + 1) cell[left].info = 2;
        cell[mid].info = (mid == right) ? 2 : 1;
    }
    *cellsize = size;
    return cell;
}