设计并实现TopKRecord结构,可以不断地网其中加入字符串,可以根据字符串chu’xian的情况随时打印加入次数最多前K个字符串。
class Node{
public:
string s;
int count;
Node(string ss, int c):s(ss),count(c){}
};
class TopKRecord{
private:
vector<Node*> heap;
int index;//队尾的后一个位置
unordered_map<string,Node*> strNodeMap;
unordered_map<Node*,int> nodeIndexMap;
public:
TopKRecord(int size){
heap.resize(size);
index=0;
}
void add(string str){
Node* curNode=nullptr;
int location=-1;
if(find(strNodeMap.begin(),strNodeMap.end(),str)==strNodeMap.end()){
curNode = new Node(str,1);
strNodeMap[str]= curNode;
nodeIndexMap[curNode]=-1;
}else{
curNode=strNodeMap[str];
curNode->count++;
location=nodeIndex[curNode];
}
if(location==-1){
if(index < heap.size()){
nodeIndexMap[curNode]=index;
heap[index]=curNode;
heapBuild(index);
index++;
}else{
if(curNode->count>heap[0]->count){
Node* root=heap[0];
nodeIndex[root]=-1;
heap[0]=curNode;
nodeIndex[curNode]=0;
heapKeep(0,index);
}
}
}else{
heapKeep(location,index);
}
}
};
void heapKeep(int start, int end){
int left=start*2+1;
int right=left+1;
int cur=start;
int smallest=cur;
while(left<end){
if(heap[cur]->val>heap[left]->val){
smallest=left;
}
if(right<=end&&heap[smallest]->val>heap[right]->val){
smallest=right;
}
if(smallest!=cur){
swap(smallest,cur);
}else{
break;
}
cur=smallest;
left=cur*2+1;
right=left+1;
}
}
void heapBuild(int start){
int cur=start;
int parent=(cur-1)/2;
int smallest=cur;
while(parent>=0){
if(heap[parent]->val>heap[cur]->val){
swap(cur,parent);
cur=parent;
parent=(cur-1)/2;
}else{
break;
}
}
}
TopKRecord::swap(i,j)是最为关键的地方之一,它要保证,交换节点的值时,要更新nodeIndexMap中其位置的更新,这十分关键。
void swap(int i, int j){
int t=heap[i]->val;
heap[i]->val=heap[j]->val;
heap[j]->val=t;
//更新节点的位置记录
nodeIndexMap[heap[i]]=j;
nodeIndexMap[heap[j]]=i;
}
关于void reduce(str)
不在堆中,只需更新哈希表;若还在堆中,同时要heapBuild(location);若减小为零,heapBuild操作把之交换到root节点后,删除此root节点: swap(0,index);—index; heapKeep(0,index);
打印topK,时间复杂度为O(k):
void printTopK(){
for(int i=0;i<index;++i){
cout << heap[i]->val << ",";
}
}