Commit a68e36c6 by Rudolf

Initial commit of proper tree huffman

parent 0b682649
Showing with 165 additions and 76 deletions
...@@ -6,24 +6,30 @@ ...@@ -6,24 +6,30 @@
static int write_entries(struct BIT_BUFFER *bitbuf, struct tree *parent) static int write_entries(struct BIT_BUFFER *bitbuf, struct tree *parent)
{ {
int bit; int bit;
if (parent->left) {
struct tree *left = parent->left;
struct tree *right = parent->right;
if (left && right) {
bit = 1; bit = 1;
bb_write(bitbuf, &bit, 1); bb_write(bitbuf, &bit, 1);
write_entries(bitbuf, parent->left); printf(" 1");
} }
if (!parent->left) { if (left)
write_entries(bitbuf, left);
if (right)
write_entries(bitbuf, right);
if (!left && !right) {
bit = 0; bit = 0;
bb_write(bitbuf, &bit, 1); /* This means now we have 8 bits of char. */ bb_write(bitbuf, &bit, 1);
bb_writebyte(bitbuf, parent->ch); bb_writebyte(bitbuf, parent->ch);
printf(" 0");
printf("%c", parent->ch);
} }
if (parent->right) {
/* Why should we write this if we can save more bits. */
//bit = 0;
//bb_write(bitbuf, &bit, 1);
bb_writebyte(bitbuf, parent->right->ch);
}
return 0; return 0;
} }
...@@ -33,37 +39,31 @@ int encode_header(struct BIT_BUFFER *bitbuf, struct tree *parent) ...@@ -33,37 +39,31 @@ int encode_header(struct BIT_BUFFER *bitbuf, struct tree *parent)
return 0; return 0;
} }
static int read_entries(struct BIT_BUFFER *bitbuf, struct tree *parent) static int read_entry(struct BIT_BUFFER *bitbuf, struct tree *parent)
{ {
int *bit = bb_read(bitbuf, 1); int *bit;
if (bit == NULL) bit = bb_read(bitbuf, 1);
return 0;
if (*bit == 1) { if (*bit == 1) {
parent->left = insert_simple_tree(0, 0); parent->left = insert_simple_tree(0, 0);
parent->right = insert_simple_tree(0, 0); parent->right = insert_simple_tree(0, 0);
read_entries(bitbuf, parent->left); read_entry(bitbuf, parent->left);
} read_entry(bitbuf, parent->right);
} else {
/* Reads last left character (the inner left). */
if (*bit == 0) {
int *c = bb_readbyte(bitbuf); int *c = bb_readbyte(bitbuf);
parent->ch = *c; parent->ch = *c;
free(c); printf("Found from header %c\n", *c);
} }
/* Reads right character. */
if (*bit == 1) {
int *c = bb_readbyte(bitbuf);
parent->right->ch = *c;
free(c);
}
free(bit);
return 0; return 0;
} }
static int read_entries(struct BIT_BUFFER *bitbuf, struct tree *parent)
{
read_entry(bitbuf, parent);
}
int decode_header(struct BIT_BUFFER *bitbuf, struct tree *parent) int decode_header(struct BIT_BUFFER *bitbuf, struct tree *parent)
{ {
read_entries(bitbuf, parent); read_entries(bitbuf, parent);
......
...@@ -48,7 +48,7 @@ void print_char(unsigned char c) ...@@ -48,7 +48,7 @@ void print_char(unsigned char c)
void print_tree(struct tree *parent) void print_tree(struct tree *parent)
{ {
int level = 1; /*int level = 1;
printf("Showing tree\n"); printf("Showing tree\n");
while (parent->left != NULL ) { while (parent->left != NULL ) {
if (parent->right) { if (parent->right) {
...@@ -70,5 +70,5 @@ void print_tree(struct tree *parent) ...@@ -70,5 +70,5 @@ void print_tree(struct tree *parent)
level += 1; level += 1;
parent = parent->left; parent = parent->left;
} }*/
} }
#include <stdbool.h>
#include <stdlib.h> #include <stdlib.h>
#include "huffman.h" #include "huffman.h"
...@@ -27,6 +28,49 @@ struct tree *insert_proper_tree(struct tree *left, struct tree *right) ...@@ -27,6 +28,49 @@ struct tree *insert_proper_tree(struct tree *left, struct tree *right)
return new; return new;
} }
struct tree *merge_smallest(struct tree **forest, int length)
{
struct tree *smallest1 = NULL;
struct tree *smallest2 = NULL;
printf("--\n");
int i1 = 0, i2 = 0;
for (int i = 0; i < length; i++) {
if (forest[i] == NULL)
continue;
if (smallest1 == NULL) {
smallest1 = forest[i];
i1 = i;
printf("init Smallest1 %c:%d\n", smallest1->ch, smallest1->freq);
continue;
} else if (smallest2 == NULL) {
smallest2 = forest[i];
i2 = i;
printf("init Smallest2 %c:%d\n", smallest2->ch, smallest2->freq);
continue;
}
printf("comp %c:%d\n", forest[i]->ch, forest[i]->freq);
if (forest[i]->freq < smallest1->freq) {
smallest1 = forest[i];
i1 = i;
} else if (forest[i]->freq < smallest2->freq) {
smallest2 = forest[i];
i2 = i;
}
}
forest[i1] = NULL;
forest[i2] = insert_proper_tree(smallest1, smallest2);
printf("Smallest1 %c:%d\n", smallest1->ch, smallest1->freq);
printf("Smallest2 %c:%d\n", smallest2->ch, smallest2->freq);
printf("Merged %c:%d\n", forest[i2]->ch, forest[i2]->freq);
return forest[i2];
}
struct tree *create_tree(FILE *file) struct tree *create_tree(FILE *file)
{ {
int freq[MY_BUF] = { }; int freq[MY_BUF] = { };
...@@ -58,6 +102,8 @@ struct tree *create_tree(FILE *file) ...@@ -58,6 +102,8 @@ struct tree *create_tree(FILE *file)
} }
} }
printf("Created initial tree with %d trees\n", ntrees);
/* Sort ascending */ /* Sort ascending */
for (int i = 0; i < ntrees; i++) { for (int i = 0; i < ntrees; i++) {
for (int j = i+1; j < ntrees; j++) { for (int j = i+1; j < ntrees; j++) {
...@@ -69,20 +115,24 @@ struct tree *create_tree(FILE *file) ...@@ -69,20 +115,24 @@ struct tree *create_tree(FILE *file)
} }
} }
int newforestcount = 0; for (int i = 0; i < ntrees; i++) {
for (int i = 0; i < ntrees-1; i++) { printf("ch %c freq %d\n", forest[i]->ch, forest[i]->freq);
struct tree *small1 = forest[i]; }
struct tree *small2 = forest[i+1];
forest[newforestcount] = insert_proper_tree(small1, small2); struct tree *parent = forest[0];
forest[i+1] = forest[newforestcount]; for (int i = 0; i < ntrees-1; i++) {
newforestcount++; parent = merge_smallest(forest, ntrees);
} }
/* We have our leader. */ /*
struct tree *parent = forest[newforestcount-1]; * At this step, we have the leader.
*/
/* This is not needed anymore */ /* This is not needed anymore */
free(forest); free(forest);
printf("Returning tree\n");
return parent; return parent;
} }
...@@ -90,41 +140,69 @@ struct tree *create_tree(FILE *file) ...@@ -90,41 +140,69 @@ struct tree *create_tree(FILE *file)
* Encoding/Decodíng * Encoding/Decodíng
*/ */
static int write_entry(struct BIT_BUFFER *bitbuf, struct tree *parent, static unsigned int get_max_height(struct tree *parent)
unsigned char c) {
if (!parent)
return 0;
int left_height = get_max_height(parent->left);
int right_height = get_max_height(parent->right);
return (left_height > right_height) ? left_height + 1 : right_height + 1;
}
static int get_rev_path(struct tree *parent, unsigned char c, int *buf,
unsigned int index)
{ {
int bit = 0; int bit = 0;
while (parent != NULL) { int ret = -1;
struct tree *left, *right;
left = parent->left; if (parent == NULL)
right = parent->right; return -1;
if (right->ch == c) { if (parent->ch == c)
bit = 1; return index;
bb_write(bitbuf, &bit, 1);
break;
} else if (left->left == NULL) {
bit = 0;
bb_write(bitbuf, &bit, 1);
break;
}
if ((ret = get_rev_path(parent->left, c, buf, index)) >= 0)
bit = 0; bit = 0;
bb_write(bitbuf, &bit, 1); else if ((ret = get_rev_path(parent->right, c, buf, index)) >= 0)
bit = 1;
parent = left; if (ret >= 0) {
index = ret;
buf[index] = bit;
return index+1;
}
return -1;
}
static int write_entry(struct BIT_BUFFER *bitbuf, struct tree *parent,
unsigned char c, int *pathbuf)
{
int length = get_rev_path(parent, c, pathbuf, 0);
printf("Length %d\n", length);
/* The path is in reverse. Now write it out in correct order. */
for (int i = length-1; i >= 0; i--) {
bb_write(bitbuf, &pathbuf[i], 1);
printf("i %d Bit %d\n", i, pathbuf[i]);
} }
return 0;
} }
int encode_tree(struct BIT_BUFFER *bitbuf, struct tree *parent, char *buf, int encode_tree(struct BIT_BUFFER *bitbuf, struct tree *parent, char *buf,
size_t length) size_t length)
{ {
printf("Bit pos at %d %d\n", ftell(bitbuf->fp), bitbuf->pos);
int maxheight = get_max_height(parent) - 1;
int *pathbuf = calloc(maxheight, sizeof(int));
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
write_entry(bitbuf, parent, buf[i]); write_entry(bitbuf, parent, buf[i], pathbuf);
} }
free(pathbuf);
insert_throwaways(bitbuf->fp, 8 - bitbuf->pos); insert_throwaways(bitbuf->fp, 8 - bitbuf->pos);
return 0; return 0;
...@@ -134,34 +212,45 @@ static int read_entries(struct BIT_BUFFER *bitbuf, FILE *out, ...@@ -134,34 +212,45 @@ static int read_entries(struct BIT_BUFFER *bitbuf, FILE *out,
struct tree *parent) struct tree *parent)
{ {
int *bit; int *bit;
int bitsread = 0;
int totalbits = get_file_size(bitbuf->fp) * 8;
int huffmanbits = totalbits - read_throwaways(bitbuf->fp);
printf("Total bits to parse %d\n", huffmanbits); int lastbyte = get_file_size(bitbuf->fp);
int throwaways = read_throwaways(bitbuf->fp);
struct tree *leaf = parent; struct tree *leaf = parent;
/* If we compress only one letter, the root node is the node containing
* character. So this needs special care. */
#if 0
if (!parent->left && !parent->right) {
while (ftell(bitbuf->fp) != lastbyte && bitbuf->pos + throwaways < 8) {
bit = bb_read(bitbuf, 1);
free(bit);
fwrite(&parent->ch, 1, 1, out);
}
return 0;
}
#endif
while ((bit = bb_read(bitbuf, 1)) != NULL) { while ((bit = bb_read(bitbuf, 1)) != NULL) {
printf("Reading bit %d\n", *bit);
if (*bit == 1) { if (*bit == 0)
fwrite(&leaf->right->ch, 1, 1, out);
leaf->right->freq++;
leaf = parent; /* Reset */
} else {
leaf = leaf->left; leaf = leaf->left;
if (!leaf->left) { else
fwrite(&leaf->ch, 1, 1, out); leaf = leaf->right;
leaf->freq++;
leaf = parent; /* Reset */ if (!leaf->left && !leaf->right) {
} printf("Found from encoded %c\n", leaf->ch);
fwrite(&leaf->ch, 1, 1, out);
leaf = parent;
} }
free(bit); free(bit);
if (bitsread == huffmanbits) if (ftell(bitbuf->fp) == lastbyte && bitbuf->pos + throwaways >= 8) {
printf("Stopping\n");
break; break;
}
bitsread++;
} }
return 0; return 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment