BitInputStream.h
#pragma once
#include <vector>
#include <string>
#include "Common.h"
class BitInputStream
{
private:
typedef unsigned char byte;
FILE* file;
byte* buffer;
int bufferMaxSize;
int maxBitSize;
int pos;
public:
BitInputStream(const std::string &inputFileName, int bufferSize) {
file = fopen(inputFileName.c_str(), "rb");
if (file == nullptr) {
throw std::exception("input file not found");
}
buffer = new byte[bufferSize];
this->bufferMaxSize = bufferSize;
fill();
pos = 0;
}
~BitInputStream() {
close();
}
public:
bool readBit() {
int bytePos = pos / 8;
int bitPos = pos % 8;
bool ret = (buffer[bytePos] >> bitPos) & 1;
pos += 1;
if (pos >= maxBitSize) fill();
return ret;
}
byte readByte() {
byte num = 0;
for (int i = 0; i < 8; i++) {
bool val = readBit();
if (val) {
num |= (1 << i);
}
}
return num;
}
int readInt32() {
int num = 0;
for (int i = 0; i < 32; i++) {
bool val = readBit();
if (val) {
num |= (1 << i);
}
}
return num;
}
__int64 readInt64() {
__int64 num = 0;
for (int i = 0; i < 64; i++) {
bool val = readBit();
if (val) {
num |= ((__int64)1 << i);
}
}
return num;
}
void close() {
delete buffer;
fclose(file);
}
private:
//从文件中读入数据到缓冲区
void fill() {
int cnt = fread(buffer, 1, bufferMaxSize, file);
maxBitSize = cnt * 8;
pos = 0;
}
};
BitOutputStream.h
#pragma once
#include <string>
class BitOutputStream
{
private:
typedef unsigned char byte;
FILE* file;
byte* buffer;
int BUFFER_SIZE;
int bitsize;
public:
BitOutputStream(const std::string &outputFileName, int bufferSize) {
file = fopen(outputFileName.c_str(), "wb");
if (file == nullptr) {
throw std::exception("dst file not found");
}
buffer = new byte[bufferSize];
memset(buffer, 0, bufferSize);
this->BUFFER_SIZE = bufferSize;
bitsize = 0;
}
~BitOutputStream() {
if (file != nullptr) {
close();
}
}
void close() {
flush();
fclose(file);
file = nullptr;
delete[] buffer;
buffer = nullptr;
}
void writeBit(bool val) {
//static int cnt = 0;
//putchar(val ? '1' : '0');
//cnt += 1;
//if (cnt == 8) {
// putchar(' ');
// cnt = 0;
//}
if (val == true) {
int bytePos = bitsize / 8;
int bitPos = bitsize % 8;
buffer[bytePos] |= 1 << bitPos;
}
bitsize += 1;
if (bitsize == BUFFER_SIZE * 8) {
flush();
}
}
void writeByte(byte num) {
for (int i = 0; i < 8; i++) {
writeBit(num & 1);
num = num >> 1;
}
}
void writeInt32(int num) {
for (int i = 0; i < 32; i++) {
writeBit(num & 1);
num = num >> 1;
}
}
void writeInt64(__int64 num) {
for (int i = 0; i < 64; i++) {
writeBit(num & 1);
num = num >> 1;
}
}
private:
//将缓冲区中的数据写入到文件中
void flush() {
int byteSize = bitsize % 8 == 0 ? bitsize / 8 : bitsize / 8 + 1;
fwrite(buffer, 1, byteSize, file);
memset(buffer, 0, BUFFER_SIZE); //将buffer中所有元素重置为0
bitsize = 0;
}
};
Common.h
#pragma once
typedef unsigned char byte;
const int N = 256;
struct HufNode {
byte ch;
size_t freq;
HufNode *lchild, *rchild;
HufNode() :ch(0), freq(0), lchild(nullptr), rchild(nullptr) {}
HufNode(byte ch) :ch(ch), freq(0), lchild(nullptr), rchild(nullptr) {}
HufNode(byte ch, size_t freq) :ch(ch), freq(freq), lchild(nullptr), rchild(nullptr) {}
bool isLeaf() {
return lchild == nullptr && rchild == nullptr;
}
};
FileCompress.h
#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <queue>
#include <vector>
#include <functional>
#include "BitOutputStream.h"
#include "Common.h"
using namespace std;
class FileCompress {
private:
size_t byteFrequence[N] = { 0 }; //统计每个字符出现的频次
vector<bool> codeTable[N]; //每个字符的编码表
BitOutputStream* bos;
void countFrequence(const char* src) {
FILE* f = fopen(src, "rb");
if (f == NULL) {
perror("open src file error");
exit(0);
}
byte data;
while (true) {
int size = fread(&data, sizeof(byte), 1, f);
if (size == 0) break;
byteFrequence[data] += 1;
}
fclose(f);
}
void writeContent(const char* src) {
FILE* f = fopen(src, "rb");
if (f == NULL) {
perror("open src file error");
exit(0);
}
byte data;
while (!feof(f)) {
int size = fread(&data, 1, 1, f);
if (size == 0) break;
for (bool b : codeTable[data]) {
bos->writeBit(b);
}
}
fclose(f);
}
struct cmp {
bool operator() (HufNode* n1, HufNode* n2) {
return n1->freq > n2->freq;
}
};
HufNode* createHufTree() {
priority_queue<HufNode*, vector<HufNode*>, cmp> pq;
for (int i = 0; i < N; i++) {
if (byteFrequence[i] == 0) continue;
HufNode* temp = new HufNode(i, byteFrequence[i]);
pq.push(temp);
}
while (pq.size() > 1) {
HufNode* node1 = pq.top(); pq.pop();
HufNode* node2 = pq.top(); pq.pop();
HufNode* partent = new HufNode(0, node1->freq + node2->freq);
partent->lchild = node1;
partent->rchild = node2;
pq.push(partent);
}
return pq.top();
}
void writeHufTree(HufNode* root) {
if (root->isLeaf()) {
bos->writeBit(true);
bos->writeByte(root->ch);
return;
}
bos->writeBit(false);
writeHufTree(root->lchild);
writeHufTree(root->rchild);
}
void dfs(HufNode* root, vector<bool> &path) {
if (root == NULL) return;
if (root->isLeaf()) {
codeTable[root->ch] = path;
}
else {
path.push_back(false);
dfs(root->lchild, path);
path.pop_back();
path.push_back(true);
dfs(root->rchild, path);
path.pop_back();
}
}
void generateCodeTable(HufNode* root) {
vector<bool> path;
dfs(root, path);
}
__int64 calcTotalBits() {
__int64 cnt = 0;
for (int i = 0; i < N; i++) {
cnt += (__int64)byteFrequence[i] * codeTable[i].size();
}
return cnt;
}
public:
void compress(const char* inputFilePath, const char* outputFilePath) {
countFrequence(inputFilePath);
HufNode* root = createHufTree();
generateCodeTable(root);
bos = new BitOutputStream(outputFilePath, 1024 * 1024 * 8);
writeHufTree(root);
__int64 cnt = calcTotalBits();
bos->writeInt64(cnt);
writeContent(inputFilePath);
bos->close();
}
};
FileDecompress.h
#pragma once
#include "BitInputStream.h"
#include "Common.h"
class FileDecompress {
private:
BitInputStream* bis;
HufNode* createHufTree() {
bool val = bis->readBit();
if (val) { // 1表示叶节点
byte ch = bis->readByte();
return new HufNode(ch);
}
else { // 0 表示非叶节点,且一定有两个孩子
HufNode* root = new HufNode();
root->lchild = createHufTree();
root->rchild = createHufTree();
return root;
}
}
public:
void decompress(const char* inputFilePath, const char* outputFilePath) {
bis = new BitInputStream(inputFilePath, 1024 * 1024 * 8);
HufNode* root = createHufTree();
__int64 totalBitCnt = bis->readInt64();
FILE* output = fopen(outputFilePath, "wb");
if (output == nullptr) {
perror("");
exit(0);
}
HufNode* p = root;
for (__int64 i = 0; i < totalBitCnt; i++) {
bool b = bis->readBit();
if (b == false) {
p = p->lchild;
}
else {
p = p->rchild;
}
if (p->isLeaf()) {
byte data = p->ch;
fwrite(&data, sizeof(byte), 1, output);
p = root;
}
}
fclose(output);
bis->close();
}
};
main.cpp
#include <iostream>
#include "FileCompress.h"
#include "FileDecompress.h"
using namespace std;
int main(int argc, char* argv[]) {
if (argc != 4) {
cout << "参数个数错误!" << endl;
cout << "使用方法。。。。" << endl;
exit(0);
}
if (strcmp(argv[1], "zip") == 0) {
FileCompress f;
f.compress(argv[2], argv[3]);
}
else if (strcmp(argv[1], "unzip") == 0) {
FileDecompress f;
f.decompress(argv[2], argv[3]);
}
else {
cout << "unknown command" << endl;
}
//FileCompress f;
//f.compress("D:/1.exe", "D:/temp.dat");
//FileDecompress f2;
//f2.decompress("D:/temp.dat", "D:/2.exe");
return 0;
}