问题描述

压缩算法,目的就是根据字母的出现频率来“按需分配”编码来优化编码方式。

比如:给出一串字母 Huffman Coding ,按照计算机处理形式,会根据ascll码将这串字符编码,具体形式(十进制)就是104 117 102 102 109 97 110 32 67 111 100 105 110 103,然后转换成二进制,最后会得到需要97个比特来存储。

算法描述

算法角度来讲对上述问题ascll编码方式是浪费空间的,优化方向是改变编码方式,根据字母出现的频率来“按需分配”进制位。

给出下面所给出的字母,以及出现的频率,来得到哈夫曼编码
image.png
先提出将频率小的依次加入。d和h组合权值为9(或者说A只是称呼方便),然后将这个9“替换d和h”代入整个序列,在进行插入树操作,

过程中,遵循数字大的在左数字小的在右原则(互换也没关系,只不过换的是二进制的0和1)

在进行到E的时候,此时的队列应该为120 107 42 37,所以此时需要重新调整队列,然后进行到结束。

image.png
最后的编码结果为:
image.png

编码实现

  1. #include <stdio.h>
  2. #include <vector>
  3. #include <algorithm>
  4. namespace NS_HuffmanCoding {
  5. using namespace std;
  6. void BuildHuffmanTree();
  7. void Initialization(vector<pair<char, int>> chars);
  8. void Finalization();
  9. struct HFMNode {
  10. char Ch; int Freq;
  11. HFMNode* Left, * Right;
  12. HFMNode(char pCh, int pFreq, HFMNode* pLeft, HFMNode* pRight)
  13. : Ch(pCh), Freq(pFreq), Left(pLeft), Right(pRight) {}
  14. HFMNode(char pCh, int pFreq)
  15. : HFMNode(pCh, pFreq, NULL, NULL) {}
  16. };
  17. void MinHeapify(vector<HFMNode*>& H);
  18. void InsertH(vector<HFMNode*>& H, HFMNode* node);
  19. void SiftDown(vector<HFMNode*>& H, int i);
  20. void SiftUp(vector<HFMNode*>& H, int i);
  21. HFMNode* ExtractMin(vector<HFMNode*>& H);
  22. void DeleteANode(HFMNode* node);
  23. void ShowInput(vector<pair<char, int>> chars);
  24. void Output();
  25. static vector<HFMNode*> Q;
  26. void HuffmanCodingCaller(vector<pair<char, int>> chars)
  27. {
  28. ShowInput(chars);
  29. Initialization(chars);
  30. BuildHuffmanTree();
  31. Output();
  32. Finalization();
  33. }
  34. void BuildHuffmanTree()
  35. {
  36. char C = 'A';
  37. while (Q.size() > 1)
  38. {
  39. HFMNode* x = ExtractMin(Q);
  40. HFMNode* y = ExtractMin(Q);
  41. HFMNode* z = new HFMNode(C++, x->Freq + y->Freq, x, y);
  42. InsertH(Q, z);
  43. }
  44. }
  45. HFMNode* ExtractMin(vector<HFMNode*>& H)
  46. {
  47. swap(H.front(), H.back());
  48. HFMNode* p = H.back();
  49. H.pop_back();
  50. if (!H.empty())
  51. SiftDown(H, 0);
  52. return p;
  53. }
  54. void SiftDown(vector<HFMNode*>& H, int i)
  55. {
  56. while ((i = (i << 1) + 1) < H.size()) {
  57. if ((i + 1 < H.size()) && (H[i + 1]->Freq < H[i]->Freq))
  58. i = i + 1;
  59. if (H[(i - 1) >> 1]->Freq > H[i]->Freq)
  60. swap(H[(i - 1) >> 1], H[i]);
  61. else break;
  62. }
  63. }
  64. void InsertH(vector<HFMNode*>& H, HFMNode* node)
  65. {
  66. H.push_back(node);
  67. SiftUp(H, H.size() - 1);
  68. }
  69. void SiftUp(vector<HFMNode*>& H, int i)
  70. {
  71. while (i > 0 && H[i]->Freq < H[(i - 1) >> 1]->Freq) {
  72. swap(H[i], H[(i - 1) >> 1]);
  73. i = (i - 1) >> 1;
  74. }
  75. }
  76. void MinHeapify(vector<HFMNode*>& H)
  77. {
  78. for (int i = (H.size() >> 1) - 1; i >= 0; i--) {
  79. SiftDown(H, i);
  80. }
  81. }
  82. void Initialization(vector<pair<char, int>> chars)
  83. {
  84. Q.clear();
  85. for (auto ch : chars)
  86. Q.push_back(new HFMNode(ch.first, ch.second));
  87. MinHeapify(Q);
  88. }
  89. void Finalization()
  90. {
  91. DeleteANode(Q[0]);
  92. }
  93. void DeleteANode(HFMNode* node)
  94. {
  95. if (node->Left)
  96. {
  97. DeleteANode(node->Left);
  98. DeleteANode(node->Right);
  99. }
  100. delete node;
  101. }
  102. void ShowInput(vector<pair<char, int>> chars)
  103. {
  104. printf("Huffman coding input: \n");
  105. for (auto c : chars)
  106. printf("%c,%d; ", c.first, c.second);
  107. printf("\n");
  108. }
  109. static vector<char> coding;
  110. static vector<pair<char, vector<char>>> codingList;
  111. void GetHuffmanCoding(HFMNode* node)
  112. {
  113. if (node->Left)
  114. {
  115. coding.push_back('0');
  116. GetHuffmanCoding(node->Left);
  117. coding.pop_back();
  118. coding.push_back('1');
  119. GetHuffmanCoding(node->Right);
  120. coding.pop_back();
  121. }
  122. else
  123. {
  124. codingList.push_back(pair<char,
  125. vector<char>>(node->Ch, coding));
  126. }
  127. }
  128. void Output()
  129. {
  130. printf("Huffman coding:\n");
  131. coding.clear();
  132. codingList.clear();
  133. GetHuffmanCoding(Q[0]);
  134. sort(codingList.begin(), codingList.end());
  135. for (auto c1 : codingList)
  136. {
  137. printf(" %c: ", c1.first);
  138. for (auto c2 : c1.second)
  139. printf("%c", c2);
  140. printf("\n");
  141. }
  142. printf("\n");
  143. }
  144. } //namespace NS_HuffmanCoding
  145. using namespace NS_HuffmanCoding;
  146. void TestHuffmanCoding()
  147. {
  148. vector<vector<pair<char, int>>> charLists = {
  149. //Introduction to Algorithms
  150. {
  151. { {'a',40}, {'b',13}, {'c',12},
  152. {'d',16}, {'e',9}, {'f',5} },
  153. },
  154. //ÑÏεÃô
  155. {
  156. { {'a',5}, {'b',29}, {'c',7}, {'d',8},
  157. {'e',14}, {'f',23}, {'g',3}, {'h',11} },
  158. },
  159. };
  160. int n = charLists.size();
  161. for (int i = 0; i < n; i++)
  162. {
  163. HuffmanCodingCaller(charLists[i]);
  164. }
  165. }