Huffman Encoding

It feels good to have written a compression program! One more exercise to go and CS106B is done…

This was all about debugging. Working out how to do it doesn’t take very long. Some of the bugs, once I’d found them, were bemusing. One of them turned out to be a bug in the map class provided by Stanford. Another was simply about the way I was declaring an addition to a string. Both the way I was doing it and the way that worked mean the same thing, as far as I’m aware. But I got into it, you just have to accept that if you get ahead of yourself, you’re probably going to have to end up checking every individual part for bugs. On the evidence of this, test-driven programming makes sense- so that you know that every function or class that you add does what it’s supposed to, before you add it. It turns out that a pseudo eof is only necessary if you aren’t using the frequency in the root node to decompress in a loop. So if you were to decompress recursively. But I found that had weird bugs too- I suspect due to bugs in the class libraries.

Header file for the Encoding class;

/**
 * File: encoding.h
 * ----------------
 * Defines the Encoding class to manage all aspects of the
 * Huffman compression and decompression algorithms.
 */

#ifndef _encoding_
#define _encoding_

#include "bstream.h"
#include "types.h"
#include <map>
#include "pqueue.h"


class Encoding {
    
public:
    
    void compress(ibstream& infile, obstream& outfile);
    void decompress(ibstream& infile, obstream& outfile);
    

private:
    
    PQueue huffqueue;
    Node *currentroot;
    Node *treeroot;
    map<int, int> frequencyTable;
    map<int, std::string> referenceTable;
    int leafcount;
    
    void getFrequency(ibstream& infile);
    void enqueueNodes();
    void buildTree(int leaves);
    void createReferenceTable(int leaves);
    void tracePaths(Node *root, std::string path, int leaves);
    void encodeFile(ibstream& infile, obstream& outfile, int leaves);
    void deleteTree(Node *root);
    void writeHeader(obstream& outfile, int leaves);
    void extractHeader(ibstream& infile);
    void decodeFile(ibstream& infile, obstream& outfile, int filesz);
    
};

#endif

cpp file for the Encoding class;

/**
 * File: encoding.cpp
 * ------------------
 * Place your Encoding class implementation here.
 */

#include "encoding.h"
#include "string.h"
#include "strlib.h"



using namespace std;


void Encoding::compress(ibstream& infile, obstream& outfile) {
    
    getFrequency(infile);
    
    enqueueNodes();
    
    int leaves = frequencyTable.size();
    buildTree(leaves);
    createReferenceTable(leaves);
    Node *root = huffqueue.extractMin();
    deleteTree(root);
    encodeFile(infile, outfile, leaves);
    infile.close();
    outfile.close();
    frequencyTable.clear();
    referenceTable.clear();
}

void Encoding::decompress(ibstream& infile, obstream& outfile) {
    
    extractHeader(infile);
    
    enqueueNodes();
    
    int leaves = frequencyTable.size();
    buildTree(leaves);
    treeroot = huffqueue.peek();
    int filesz = treeroot->frequency;
    
    decodeFile(infile, outfile,filesz);
    deleteTree(treeroot);
    frequencyTable.clear();
    referenceTable.clear();
}

void Encoding::getFrequency(ibstream& infile) {
    
    int ch;
    map<int, int>::iterator iter;
    while((ch = infile.get()) != EOF){
        if(frequencyTable.count(ch) != 0){
            iter = frequencyTable.find(ch);
            int count = iter->second;
            iter->second = ++count;
        }
        
        else {
            frequencyTable.insert(pair<int, int> (ch, 1));
        }
    }
}

void Encoding::enqueueNodes() {
    
    map<int, int>::iterator iter;
    
    foreach(int key in frequencyTable) {// enqueue leaf nodes
        iter = frequencyTable.find(key);
        int freq = iter->second;
        
        Node* newnode = new Node;
        newnode->character = key;
        newnode->frequency = freq;
        newnode->zero = NULL; 
        newnode->one = NULL;
        
        huffqueue.enqueue(newnode, freq);
    }
}

void Encoding::buildTree(int leaves) {
    
    for (int i = 1; i < leaves; i++) {
        
        Node *newnode = new Node;// Create new non-leaf node
        newnode->character = VOID;
        
        Node *zeronode = huffqueue.extractMin();
        
        newnode->zero = zeronode;// pull next node from queue and attach to zero
        
        Node *onenode = huffqueue.extractMin();
        
        newnode->one = onenode;// pull next node from queue and attach to one
        
        int newfreq = zeronode->frequency + onenode->frequency;
        newnode->frequency = newfreq;// new node's frequency is combined frequency
        huffqueue.enqueue(newnode, newfreq);// enqueue
    }
}

void Encoding::createReferenceTable(int leaves) {// wrapper function for backtracking recursion
    Node *root = huffqueue.peek();
    string path = "";
    leafcount = 0;
    tracePaths(root, path, leaves);
}

void Encoding::tracePaths(Node *root, string path, int leaves) {// backtracking recursion to trace paths
    if (leafcount >= leaves) {// base case is when the leafcount has reached the number of leaves in the tree
        return;
    }
    
    if (root->character != VOID) {// identify a leaf
        
        int ch = root->character;
        
        referenceTable.insert(pair<int, string> (ch, path));// insert into referenceTable
        leafcount++;// raise leaf count by one
        
        return;// rewind
    }
    
    for (int i = 0; i < 2; i++) {// for loop for one and zero pointers
        if (i == 0) {
            if (root->zero != NULL) {// check that zero pointer isnt null
                
                Node *zeronode = root->zero;// get the next node
               
                tracePaths(zeronode, path + "0", leaves);// recursively trace the paths, add 0 to path string
            }
        }
        else if (i == 1) {
            if (root->one != NULL) {// same process for one pointer
                
                Node *onenode = root->one;
                
                tracePaths(onenode, path + "1", leaves);
            }
        }
    }
    return;// rewind
}

void Encoding::deleteTree(Node *root) {// free up the memory from the tree
    if(root == NULL){
        return;
    }
    
    else{
        Node *zeronode = root->zero;
        Node *onenode = root->one;
        
        delete root;
        
        deleteTree(zeronode);
        deleteTree(onenode);
    }
}

void Encoding::encodeFile(ibstream& infile, obstream& outfile, int leaves) {
    
    infile.rewind();
    
    writeHeader(outfile, leaves);
    int ch;
    map<int, string>::iterator iter;
    while((ch = infile.get()) != EOF){
        iter = referenceTable.find(ch);
        string encoding = iter->second;
            for (int i = 0; i < encoding.size(); i++) {
                char bit = encoding.at(i);
                if (bit == '0') {
                    outfile.writebit(0);
                }
                else if (bit == '1') {
                    outfile.writebit(1);
            }
        }
    }
}

void Encoding::writeHeader(obstream& outfile, int leaves) {
    
    outfile << leaves << ' ';// leave a gap after the number of leaf nodes
    
    map<int, int>::iterator iter;
    foreach(int key in frequencyTable) {
        iter = frequencyTable.find(key);
        int freq = iter->second;
        outfile << key << ' ';// leave gaps again
        outfile << freq << ' ';
    }
}

void Encoding::extractHeader(ibstream& infile) {
    int number;
    infile >> number;
    infile.get();
    string numstring = integerToString(number);
    for (int i = 0; i < number; i++) {
        int key;
        infile >> key;
        infile.get();
        int freq;
        infile >> freq;
        infile.get();
        frequencyTable.insert(pair<int, int> (key, freq));
    }
}

void Encoding::decodeFile(ibstream& infile, obstream& outfile, int filesz) {
    
    for (int i = 0; i < filesz; i++) {
        currentroot = treeroot;
        
        while (currentroot->character == VOID) {
            int n = infile.readbit();
            if (n == 1) {
                currentroot = currentroot->one;
            }
            else currentroot = currentroot->zero;
        }
        int ch = currentroot->character;
        outfile.put(ch);
    }
    outfile.close();
    infile.close();
}

Node and VOID constant in a separate header;

//
//  types.h
//  huffman
//
//  Created by Robbie on 20/05/2013.
//
//

#ifndef huffman_types_h
#define huffman_types_h



const int VOID = 256;


struct Node {
   
    int character;
    
    Node *zero;
    
    Node *one;
    
    int frequency;
    
};



#endif