BitInputStream.h
#pragma once#include <vector>#include <string>#include "Common.h"class BitInputStream{private: typedef unsigned char byte; FILE* file; byte* buffer; int bufferMaxSize; int maxBitSize; int pos;public: BitInputStream(const std::string &inputFileName, int bufferSize) { file = fopen(inputFileName.c_str(), "rb"); if (file == nullptr) { throw std::exception("input file not found"); } buffer = new byte[bufferSize]; this->bufferMaxSize = bufferSize; fill(); pos = 0; } ~BitInputStream() { close(); }public: bool readBit() { int bytePos = pos / 8; int bitPos = pos % 8; bool ret = (buffer[bytePos] >> bitPos) & 1; pos += 1; if (pos >= maxBitSize) fill(); return ret; } byte readByte() { byte num = 0; for (int i = 0; i < 8; i++) { bool val = readBit(); if (val) { num |= (1 << i); } } return num; } int readInt32() { int num = 0; for (int i = 0; i < 32; i++) { bool val = readBit(); if (val) { num |= (1 << i); } } return num; } __int64 readInt64() { __int64 num = 0; for (int i = 0; i < 64; i++) { bool val = readBit(); if (val) { num |= ((__int64)1 << i); } } return num; } void close() { delete buffer; fclose(file); }private: //从文件中读入数据到缓冲区 void fill() { int cnt = fread(buffer, 1, bufferMaxSize, file); maxBitSize = cnt * 8; pos = 0; }};
BitOutputStream.h
#pragma once#include <string>class BitOutputStream{private: typedef unsigned char byte; FILE* file; byte* buffer; int BUFFER_SIZE; int bitsize;public: BitOutputStream(const std::string &outputFileName, int bufferSize) { file = fopen(outputFileName.c_str(), "wb"); if (file == nullptr) { throw std::exception("dst file not found"); } buffer = new byte[bufferSize]; memset(buffer, 0, bufferSize); this->BUFFER_SIZE = bufferSize; bitsize = 0; } ~BitOutputStream() { if (file != nullptr) { close(); } } void close() { flush(); fclose(file); file = nullptr; delete[] buffer; buffer = nullptr; } void writeBit(bool val) { //static int cnt = 0; //putchar(val ? '1' : '0'); //cnt += 1; //if (cnt == 8) { // putchar(' '); // cnt = 0; //} if (val == true) { int bytePos = bitsize / 8; int bitPos = bitsize % 8; buffer[bytePos] |= 1 << bitPos; } bitsize += 1; if (bitsize == BUFFER_SIZE * 8) { flush(); } } void writeByte(byte num) { for (int i = 0; i < 8; i++) { writeBit(num & 1); num = num >> 1; } } void writeInt32(int num) { for (int i = 0; i < 32; i++) { writeBit(num & 1); num = num >> 1; } } void writeInt64(__int64 num) { for (int i = 0; i < 64; i++) { writeBit(num & 1); num = num >> 1; } }private: //将缓冲区中的数据写入到文件中 void flush() { int byteSize = bitsize % 8 == 0 ? bitsize / 8 : bitsize / 8 + 1; fwrite(buffer, 1, byteSize, file); memset(buffer, 0, BUFFER_SIZE); //将buffer中所有元素重置为0 bitsize = 0; }};
Common.h
#pragma oncetypedef unsigned char byte;const int N = 256;struct HufNode { byte ch; size_t freq; HufNode *lchild, *rchild; HufNode() :ch(0), freq(0), lchild(nullptr), rchild(nullptr) {} HufNode(byte ch) :ch(ch), freq(0), lchild(nullptr), rchild(nullptr) {} HufNode(byte ch, size_t freq) :ch(ch), freq(freq), lchild(nullptr), rchild(nullptr) {} bool isLeaf() { return lchild == nullptr && rchild == nullptr; }};
FileCompress.h
#pragma once#include <stdio.h>#include <stdlib.h>#include <queue>#include <vector>#include <functional>#include "BitOutputStream.h"#include "Common.h"using namespace std;class FileCompress {private: size_t byteFrequence[N] = { 0 }; //统计每个字符出现的频次 vector<bool> codeTable[N]; //每个字符的编码表 BitOutputStream* bos; void countFrequence(const char* src) { FILE* f = fopen(src, "rb"); if (f == NULL) { perror("open src file error"); exit(0); } byte data; while (true) { int size = fread(&data, sizeof(byte), 1, f); if (size == 0) break; byteFrequence[data] += 1; } fclose(f); } void writeContent(const char* src) { FILE* f = fopen(src, "rb"); if (f == NULL) { perror("open src file error"); exit(0); } byte data; while (!feof(f)) { int size = fread(&data, 1, 1, f); if (size == 0) break; for (bool b : codeTable[data]) { bos->writeBit(b); } } fclose(f); } struct cmp { bool operator() (HufNode* n1, HufNode* n2) { return n1->freq > n2->freq; } }; HufNode* createHufTree() { priority_queue<HufNode*, vector<HufNode*>, cmp> pq; for (int i = 0; i < N; i++) { if (byteFrequence[i] == 0) continue; HufNode* temp = new HufNode(i, byteFrequence[i]); pq.push(temp); } while (pq.size() > 1) { HufNode* node1 = pq.top(); pq.pop(); HufNode* node2 = pq.top(); pq.pop(); HufNode* partent = new HufNode(0, node1->freq + node2->freq); partent->lchild = node1; partent->rchild = node2; pq.push(partent); } return pq.top(); } void writeHufTree(HufNode* root) { if (root->isLeaf()) { bos->writeBit(true); bos->writeByte(root->ch); return; } bos->writeBit(false); writeHufTree(root->lchild); writeHufTree(root->rchild); } void dfs(HufNode* root, vector<bool> &path) { if (root == NULL) return; if (root->isLeaf()) { codeTable[root->ch] = path; } else { path.push_back(false); dfs(root->lchild, path); path.pop_back(); path.push_back(true); dfs(root->rchild, path); path.pop_back(); } } void generateCodeTable(HufNode* root) { vector<bool> path; dfs(root, path); } __int64 calcTotalBits() { __int64 cnt = 0; for (int i = 0; i < N; i++) { cnt += (__int64)byteFrequence[i] * codeTable[i].size(); } return cnt; }public: void compress(const char* inputFilePath, const char* outputFilePath) { countFrequence(inputFilePath); HufNode* root = createHufTree(); generateCodeTable(root); bos = new BitOutputStream(outputFilePath, 1024 * 1024 * 8); writeHufTree(root); __int64 cnt = calcTotalBits(); bos->writeInt64(cnt); writeContent(inputFilePath); bos->close(); }};
FileDecompress.h
#pragma once#include "BitInputStream.h"#include "Common.h"class FileDecompress {private: BitInputStream* bis; HufNode* createHufTree() { bool val = bis->readBit(); if (val) { // 1表示叶节点 byte ch = bis->readByte(); return new HufNode(ch); } else { // 0 表示非叶节点,且一定有两个孩子 HufNode* root = new HufNode(); root->lchild = createHufTree(); root->rchild = createHufTree(); return root; } }public: void decompress(const char* inputFilePath, const char* outputFilePath) { bis = new BitInputStream(inputFilePath, 1024 * 1024 * 8); HufNode* root = createHufTree(); __int64 totalBitCnt = bis->readInt64(); FILE* output = fopen(outputFilePath, "wb"); if (output == nullptr) { perror(""); exit(0); } HufNode* p = root; for (__int64 i = 0; i < totalBitCnt; i++) { bool b = bis->readBit(); if (b == false) { p = p->lchild; } else { p = p->rchild; } if (p->isLeaf()) { byte data = p->ch; fwrite(&data, sizeof(byte), 1, output); p = root; } } fclose(output); bis->close(); }};
main.cpp
#include <iostream>#include "FileCompress.h"#include "FileDecompress.h"using namespace std;int main(int argc, char* argv[]) { if (argc != 4) { cout << "参数个数错误!" << endl; cout << "使用方法。。。。" << endl; exit(0); } if (strcmp(argv[1], "zip") == 0) { FileCompress f; f.compress(argv[2], argv[3]); } else if (strcmp(argv[1], "unzip") == 0) { FileDecompress f; f.decompress(argv[2], argv[3]); } else { cout << "unknown command" << endl; } //FileCompress f; //f.compress("D:/1.exe", "D:/temp.dat"); //FileDecompress f2; //f2.decompress("D:/temp.dat", "D:/2.exe"); return 0;}