BitInputStream.h

  1. #pragma once
  2. #include <vector>
  3. #include <string>
  4. #include "Common.h"
  5. class BitInputStream
  6. {
  7. private:
  8. typedef unsigned char byte;
  9. FILE* file;
  10. byte* buffer;
  11. int bufferMaxSize;
  12. int maxBitSize;
  13. int pos;
  14. public:
  15. BitInputStream(const std::string &inputFileName, int bufferSize) {
  16. file = fopen(inputFileName.c_str(), "rb");
  17. if (file == nullptr) {
  18. throw std::exception("input file not found");
  19. }
  20. buffer = new byte[bufferSize];
  21. this->bufferMaxSize = bufferSize;
  22. fill();
  23. pos = 0;
  24. }
  25. ~BitInputStream() {
  26. close();
  27. }
  28. public:
  29. bool readBit() {
  30. int bytePos = pos / 8;
  31. int bitPos = pos % 8;
  32. bool ret = (buffer[bytePos] >> bitPos) & 1;
  33. pos += 1;
  34. if (pos >= maxBitSize) fill();
  35. return ret;
  36. }
  37. byte readByte() {
  38. byte num = 0;
  39. for (int i = 0; i < 8; i++) {
  40. bool val = readBit();
  41. if (val) {
  42. num |= (1 << i);
  43. }
  44. }
  45. return num;
  46. }
  47. int readInt32() {
  48. int num = 0;
  49. for (int i = 0; i < 32; i++) {
  50. bool val = readBit();
  51. if (val) {
  52. num |= (1 << i);
  53. }
  54. }
  55. return num;
  56. }
  57. __int64 readInt64() {
  58. __int64 num = 0;
  59. for (int i = 0; i < 64; i++) {
  60. bool val = readBit();
  61. if (val) {
  62. num |= ((__int64)1 << i);
  63. }
  64. }
  65. return num;
  66. }
  67. void close() {
  68. delete buffer;
  69. fclose(file);
  70. }
  71. private:
  72. //从文件中读入数据到缓冲区
  73. void fill() {
  74. int cnt = fread(buffer, 1, bufferMaxSize, file);
  75. maxBitSize = cnt * 8;
  76. pos = 0;
  77. }
  78. };

BitOutputStream.h

  1. #pragma once
  2. #include <string>
  3. class BitOutputStream
  4. {
  5. private:
  6. typedef unsigned char byte;
  7. FILE* file;
  8. byte* buffer;
  9. int BUFFER_SIZE;
  10. int bitsize;
  11. public:
  12. BitOutputStream(const std::string &outputFileName, int bufferSize) {
  13. file = fopen(outputFileName.c_str(), "wb");
  14. if (file == nullptr) {
  15. throw std::exception("dst file not found");
  16. }
  17. buffer = new byte[bufferSize];
  18. memset(buffer, 0, bufferSize);
  19. this->BUFFER_SIZE = bufferSize;
  20. bitsize = 0;
  21. }
  22. ~BitOutputStream() {
  23. if (file != nullptr) {
  24. close();
  25. }
  26. }
  27. void close() {
  28. flush();
  29. fclose(file);
  30. file = nullptr;
  31. delete[] buffer;
  32. buffer = nullptr;
  33. }
  34. void writeBit(bool val) {
  35. //static int cnt = 0;
  36. //putchar(val ? '1' : '0');
  37. //cnt += 1;
  38. //if (cnt == 8) {
  39. // putchar(' ');
  40. // cnt = 0;
  41. //}
  42. if (val == true) {
  43. int bytePos = bitsize / 8;
  44. int bitPos = bitsize % 8;
  45. buffer[bytePos] |= 1 << bitPos;
  46. }
  47. bitsize += 1;
  48. if (bitsize == BUFFER_SIZE * 8) {
  49. flush();
  50. }
  51. }
  52. void writeByte(byte num) {
  53. for (int i = 0; i < 8; i++) {
  54. writeBit(num & 1);
  55. num = num >> 1;
  56. }
  57. }
  58. void writeInt32(int num) {
  59. for (int i = 0; i < 32; i++) {
  60. writeBit(num & 1);
  61. num = num >> 1;
  62. }
  63. }
  64. void writeInt64(__int64 num) {
  65. for (int i = 0; i < 64; i++) {
  66. writeBit(num & 1);
  67. num = num >> 1;
  68. }
  69. }
  70. private:
  71. //将缓冲区中的数据写入到文件中
  72. void flush() {
  73. int byteSize = bitsize % 8 == 0 ? bitsize / 8 : bitsize / 8 + 1;
  74. fwrite(buffer, 1, byteSize, file);
  75. memset(buffer, 0, BUFFER_SIZE); //将buffer中所有元素重置为0
  76. bitsize = 0;
  77. }
  78. };

Common.h

  1. #pragma once
  2. typedef unsigned char byte;
  3. const int N = 256;
  4. struct HufNode {
  5. byte ch;
  6. size_t freq;
  7. HufNode *lchild, *rchild;
  8. HufNode() :ch(0), freq(0), lchild(nullptr), rchild(nullptr) {}
  9. HufNode(byte ch) :ch(ch), freq(0), lchild(nullptr), rchild(nullptr) {}
  10. HufNode(byte ch, size_t freq) :ch(ch), freq(freq), lchild(nullptr), rchild(nullptr) {}
  11. bool isLeaf() {
  12. return lchild == nullptr && rchild == nullptr;
  13. }
  14. };

FileCompress.h

  1. #pragma once
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <queue>
  5. #include <vector>
  6. #include <functional>
  7. #include "BitOutputStream.h"
  8. #include "Common.h"
  9. using namespace std;
  10. class FileCompress {
  11. private:
  12. size_t byteFrequence[N] = { 0 }; //统计每个字符出现的频次
  13. vector<bool> codeTable[N]; //每个字符的编码表
  14. BitOutputStream* bos;
  15. void countFrequence(const char* src) {
  16. FILE* f = fopen(src, "rb");
  17. if (f == NULL) {
  18. perror("open src file error");
  19. exit(0);
  20. }
  21. byte data;
  22. while (true) {
  23. int size = fread(&data, sizeof(byte), 1, f);
  24. if (size == 0) break;
  25. byteFrequence[data] += 1;
  26. }
  27. fclose(f);
  28. }
  29. void writeContent(const char* src) {
  30. FILE* f = fopen(src, "rb");
  31. if (f == NULL) {
  32. perror("open src file error");
  33. exit(0);
  34. }
  35. byte data;
  36. while (!feof(f)) {
  37. int size = fread(&data, 1, 1, f);
  38. if (size == 0) break;
  39. for (bool b : codeTable[data]) {
  40. bos->writeBit(b);
  41. }
  42. }
  43. fclose(f);
  44. }
  45. struct cmp {
  46. bool operator() (HufNode* n1, HufNode* n2) {
  47. return n1->freq > n2->freq;
  48. }
  49. };
  50. HufNode* createHufTree() {
  51. priority_queue<HufNode*, vector<HufNode*>, cmp> pq;
  52. for (int i = 0; i < N; i++) {
  53. if (byteFrequence[i] == 0) continue;
  54. HufNode* temp = new HufNode(i, byteFrequence[i]);
  55. pq.push(temp);
  56. }
  57. while (pq.size() > 1) {
  58. HufNode* node1 = pq.top(); pq.pop();
  59. HufNode* node2 = pq.top(); pq.pop();
  60. HufNode* partent = new HufNode(0, node1->freq + node2->freq);
  61. partent->lchild = node1;
  62. partent->rchild = node2;
  63. pq.push(partent);
  64. }
  65. return pq.top();
  66. }
  67. void writeHufTree(HufNode* root) {
  68. if (root->isLeaf()) {
  69. bos->writeBit(true);
  70. bos->writeByte(root->ch);
  71. return;
  72. }
  73. bos->writeBit(false);
  74. writeHufTree(root->lchild);
  75. writeHufTree(root->rchild);
  76. }
  77. void dfs(HufNode* root, vector<bool> &path) {
  78. if (root == NULL) return;
  79. if (root->isLeaf()) {
  80. codeTable[root->ch] = path;
  81. }
  82. else {
  83. path.push_back(false);
  84. dfs(root->lchild, path);
  85. path.pop_back();
  86. path.push_back(true);
  87. dfs(root->rchild, path);
  88. path.pop_back();
  89. }
  90. }
  91. void generateCodeTable(HufNode* root) {
  92. vector<bool> path;
  93. dfs(root, path);
  94. }
  95. __int64 calcTotalBits() {
  96. __int64 cnt = 0;
  97. for (int i = 0; i < N; i++) {
  98. cnt += (__int64)byteFrequence[i] * codeTable[i].size();
  99. }
  100. return cnt;
  101. }
  102. public:
  103. void compress(const char* inputFilePath, const char* outputFilePath) {
  104. countFrequence(inputFilePath);
  105. HufNode* root = createHufTree();
  106. generateCodeTable(root);
  107. bos = new BitOutputStream(outputFilePath, 1024 * 1024 * 8);
  108. writeHufTree(root);
  109. __int64 cnt = calcTotalBits();
  110. bos->writeInt64(cnt);
  111. writeContent(inputFilePath);
  112. bos->close();
  113. }
  114. };

FileDecompress.h

  1. #pragma once
  2. #include "BitInputStream.h"
  3. #include "Common.h"
  4. class FileDecompress {
  5. private:
  6. BitInputStream* bis;
  7. HufNode* createHufTree() {
  8. bool val = bis->readBit();
  9. if (val) { // 1表示叶节点
  10. byte ch = bis->readByte();
  11. return new HufNode(ch);
  12. }
  13. else { // 0 表示非叶节点,且一定有两个孩子
  14. HufNode* root = new HufNode();
  15. root->lchild = createHufTree();
  16. root->rchild = createHufTree();
  17. return root;
  18. }
  19. }
  20. public:
  21. void decompress(const char* inputFilePath, const char* outputFilePath) {
  22. bis = new BitInputStream(inputFilePath, 1024 * 1024 * 8);
  23. HufNode* root = createHufTree();
  24. __int64 totalBitCnt = bis->readInt64();
  25. FILE* output = fopen(outputFilePath, "wb");
  26. if (output == nullptr) {
  27. perror("");
  28. exit(0);
  29. }
  30. HufNode* p = root;
  31. for (__int64 i = 0; i < totalBitCnt; i++) {
  32. bool b = bis->readBit();
  33. if (b == false) {
  34. p = p->lchild;
  35. }
  36. else {
  37. p = p->rchild;
  38. }
  39. if (p->isLeaf()) {
  40. byte data = p->ch;
  41. fwrite(&data, sizeof(byte), 1, output);
  42. p = root;
  43. }
  44. }
  45. fclose(output);
  46. bis->close();
  47. }
  48. };

main.cpp

  1. #include <iostream>
  2. #include "FileCompress.h"
  3. #include "FileDecompress.h"
  4. using namespace std;
  5. int main(int argc, char* argv[]) {
  6. if (argc != 4) {
  7. cout << "参数个数错误!" << endl;
  8. cout << "使用方法。。。。" << endl;
  9. exit(0);
  10. }
  11. if (strcmp(argv[1], "zip") == 0) {
  12. FileCompress f;
  13. f.compress(argv[2], argv[3]);
  14. }
  15. else if (strcmp(argv[1], "unzip") == 0) {
  16. FileDecompress f;
  17. f.decompress(argv[2], argv[3]);
  18. }
  19. else {
  20. cout << "unknown command" << endl;
  21. }
  22. //FileCompress f;
  23. //f.compress("D:/1.exe", "D:/temp.dat");
  24. //FileDecompress f2;
  25. //f2.decompress("D:/temp.dat", "D:/2.exe");
  26. return 0;
  27. }