Commit 9c75370d by Rudolf

v1 of huffman encoder/decoder

parent ecb3e2d2
SRC_FILES := \
bit-buffer.c \
header.c \
tree.c \
huffman.c \
print_helper.c
default:
gcc $(SRC_FILES) -o huffman
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
#include "bit-buffer.h"
#include "print_helper.h"
static int write_entries(struct BIT_BUFFER *bitbuf, struct tree *parent)
{
if (parent->left) {
int bit = 1;
bb_write(bitbuf, &bit, 1);
write_entries(bitbuf, parent->left);
}
if (!parent->left) {
int bit = 0;
bb_write(bitbuf, &bit, 1); /* This means now we have 8 bits of char. */
bb_writebyte(bitbuf, parent->ch);
fprintf(stderr, "%x\n", parent->ch);
}
if (parent->right) {
/* Why should we write this - we can save more bits. */
//int bit = 0;
//bb_write(bitbuf, &bit, 1);
bb_writebyte(bitbuf, parent->right->ch);
fprintf(stderr, "%x\n", parent->right->ch);
}
}
int encode_header(struct BIT_BUFFER *bitbuf, struct tree *parent)
{
write_entries(bitbuf, parent);
return 0;
}
static int read_entries(struct BIT_BUFFER *bitbuf, struct tree *parent)
{
int *bit = bb_read(bitbuf, 1);
if (bit == NULL)
return 0;
if (*bit == 1) {
parent->left = insert_simple_tree(0, 0);
parent->right = insert_simple_tree(0, 0);
read_entries(bitbuf, parent->left);
}
/* Reads last left character (the inner left). */
if (*bit == 0) {
int *c = bb_readbyte(bitbuf);
parent->ch = *c;
free(c);
}
/* Reads right character. */
if (*bit == 1) {
int *c = bb_readbyte(bitbuf);
parent->right->ch = *c;
free(c);
}
free(bit);
}
int decode_header(struct BIT_BUFFER *bitbuf, struct tree *parent)
{
read_entries(bitbuf, parent);
return 0;
}
#include "bit-buffer.h"
#include "tree.h"
int encode_header(struct BIT_BUFFER *bitbuf, struct tree *parent);
int decode_header(struct BIT_BUFFER *bitbuf, struct tree *parent);
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "print_helper.h"
#include "header.h"
#include "tree.h"
void print_char(char c)
{
switch (c) {
case '\n':
printf("\\n\n");
break;
case '\r':
printf("\\r\n");
break;
case '\t':
printf("\\t\n");
break;
default:
printf("%c\n", c);
break;
}
}
void free_tree(struct tree *parent)
{
while (parent != NULL) {
struct tree *left, *right;
left = parent->left;
right = parent->right;
if (right != NULL)
free(right);
free(parent);
parent = left;
}
}
long get_file_size(FILE *file)
{
long pos = ftell(file);
fseek(file, 0, SEEK_END);
long size = ftell(file);
fseek(file, pos, SEEK_SET);
return size;
}
/* free it yourself faget. */
char *read_text(FILE *file, long size)
{
char *buf = malloc(size + 1);
fread(buf, size, 1, file);
buf[size] = '\0';
return buf;
}
int encode_file(FILE *in, FILE *out)
{
struct tree *tree;
struct BIT_BUFFER bitbuf;
bb_init(&bitbuf, out);
long size = get_file_size(in);
fseek(in, 0, SEEK_SET);
tree = create_tree(in);
encode_header(&bitbuf, tree);
fseek(in, 0, SEEK_SET);
char *buf = read_text(in, size);
encode_tree(&bitbuf, tree, buf, size);
free(buf);
free_tree(tree);
bb_flush(&bitbuf);
}
int decode_file(FILE *in, FILE *out)
{
struct BIT_BUFFER bitbuf;
bb_init(&bitbuf, in);
struct tree *decoded = malloc(sizeof(*decoded));
fseek(in, 0, SEEK_SET);
decode_header(&bitbuf, decoded);
decode_tree(&bitbuf, out, decoded);
free(decoded);
return 0;
}
int main(int argc, char *argv[])
{
if (argc == 2 && !strcmp(argv[1], "-d")) {
decode_file(stdin, stdout);
} else {
encode_file(stdin, stdout);
}
return 0;
}
/* I DO NOT OWN THIS CODE. */
/* From http://stackoverflow.com/questions/111928/is-there-a-printf-converter-to-print-in-binary-format?page=2&tab=votes#tab-top */
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include "print_helper.h"
void __printb(void *value, size_t size)
{
uint8_t byte;
size_t blen = sizeof(byte) * 8;
uint8_t bits[blen + 1];
bits[blen] = '\0';
for_endian(size) {
byte = ((uint8_t *) value)[i];
memset(bits, '0', blen);
for (int j = 0; byte && j < blen; ++j) {
if (byte & 0x80)
bits[j] = '1';
byte <<= 1;
}
printf("%s ", bits);
}
printf("\n");
}
/* I DO NOT OWN THIS CODE. */
/* From http://stackoverflow.com/questions/111928/is-there-a-printf-converter-to-print-in-binary-format?page=2&tab=votes#tab-top */
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define for_endian(size) for (int i = 0; i < size; ++i)
#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define for_endian(size) for (int i = size - 1; i >= 0; --i)
#else
#error "Endianness not detected"
#endif
void __printb(void *value, size_t size);
#define printb(value) \
({ \
typeof(value) _v = value; \
__printb((typeof(_v) *) &_v, sizeof(_v)); \
})
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "tree.h"
#include "bit-buffer.h"
/* The character set we support */
#define MY_BUF 256
struct tree *insert_simple_tree(int character, int frequency)
{
struct tree *new = malloc(sizeof(struct tree));
//printf("Created %p\n", new);
new->left = NULL;
new->right = NULL;
new->ch = character;
new->freq = frequency;
return new;
}
struct tree *insert_proper_tree(struct tree *left, struct tree *right)
{
struct tree *new = malloc(sizeof(*new));
//printf("Created %p\n", new);
new->left = left;
new->right = right;
new->ch = 0;
new->freq = left->freq + right->freq;
return new;
}
struct tree *create_tree(FILE *file)
{
int freq[MY_BUF] = { };
int charcount = 0;
while (!feof(file)) {
int c = fgetc(file);
if (c == EOF || c > MY_BUF-1)
break;
if (freq[c] == 0)
charcount++;
freq[c]++;
}
struct tree **forest = malloc(charcount * sizeof(struct tree));
/* Create empty forest - whoah */
int ntrees = 0;
for (int i = 0; i < MY_BUF-1; i++) {
if (freq[i] > 0) {
forest[ntrees++] = insert_simple_tree(i, freq[i]);
}
}
/* Sort ascending */
for (int i = 0; i < ntrees; i++) {
for (int j = i+1; j < ntrees; j++) {
if (forest[i]->freq > forest[j]->freq) {
struct tree *tree = forest[i];
forest[i] = forest[j];
forest[j] = tree;
}
}
}
int newforestcount = 0;
for (int i = 0; i < ntrees-1; i++) {
struct tree *small1 = forest[i];
struct tree *small2 = forest[i+1];
forest[newforestcount] = insert_proper_tree(small1, small2);
forest[i+1] = forest[newforestcount];
newforestcount++;
}
/* We have our leader. */
struct tree *parent = forest[newforestcount-1];
/* This is not needed anymore */
free(forest);
return parent;
}
/*
* Encoding/Decodíng
*/
static int write_entry(struct BIT_BUFFER *bitbuf, struct tree *parent, char c)
{
while (parent != NULL) {
struct tree *left, *right;
left = parent->left;
right = parent->right;
if (left->ch == c) {
int bit = 0;
bb_write(bitbuf, &bit, 1);
break;
} else if (right->ch == c) {
int bit = 1;
bb_write(bitbuf, &bit, 1);
break;
}
int bit = 0;
bb_write(bitbuf, &bit, 1);
parent = left;
}
}
int encode_tree(struct BIT_BUFFER *bitbuf, struct tree *parent, char *buf,
size_t length)
{
for (int i = 0; i < length; i++) {
write_entry(bitbuf, parent, buf[i]);
}
return 0;
}
static int read_entries(struct BIT_BUFFER *bitbuf, FILE *out,
struct tree *parent)
{
int *bit;
struct tree *leaf = parent;
while ((bit = bb_read(bitbuf, 1)) != NULL) {
if (*bit == 1) {
fwrite(&leaf->right->ch, 1, 1, out);
leaf->right->freq++;
leaf = parent; /* Reset */
} else {
leaf = leaf->left;
if (!leaf->left) {
fwrite(&leaf->ch, 1, 1, out);
leaf->freq++;
leaf = parent; /* Reset */
}
}
free(bit);
}
fprintf(stdout, "\n");
return 0;
}
int decode_tree(struct BIT_BUFFER *bitbuf, FILE *out, struct tree *parent)
{
read_entries(bitbuf, out, parent);
return 0;
}
#ifndef _TREE_H_
#define _TREE_H_
#include "bit-buffer.h"
struct tree {
struct tree *left;
struct tree *right;
char ch;
int freq;
};
struct tree *insert_simple_tree(int character, int frequency);
struct tree *insert_proper_tree(struct tree *left, struct tree *right);
struct tree *create_tree(FILE *file);
int encode_tree(struct BIT_BUFFER *bitbuf, struct tree *parent, char *buf,
size_t length);
int decode_tree(struct BIT_BUFFER *bitbuf, FILE *out, struct tree *parent);
#endif /* _TREE_H_ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment