collections.Counter类接收任意由可哈希(hashable)元素构成的序列对象。在底层实现上一个Counter对象就是一个字典,将元素映射在其出现的次数上,十分适合统计序列元素数量:
from collections import Counterwords = ["look", "into", "my", "eyes", "look", "into", "my", "eyes","the", "eyes", "the", "eyes", "the", "eyes", "not", "around", "the","eyes", "don't", "look", "around", "the", "eyes", "look", "into","my", "eyes", "you're", "under"]word_counts = Counter(words)# 出现频率最高的3个单词top_three = word_counts.most_common(3)print(top_three) # [("eyes", 8), ("the", 5), ("look", 4)]print(word_counts["eyes"]) # 8# 使用update方法更新more_words = ["eyes", "the"]word_counts.update(more_words)print(word_counts["eyes"]) # 9print(word_counts["the"]) # 6
Counter实例进行数学运算:
from collections import Counterwords = ["eyes", "look", "into", "eyes", "the","eyes", "the", "eyes", "the", "eyes", "the"]more_words = ["eyes", "the"]a = Counter(words)b = Counter(more_words)add_words = a + bprint(add_words) # Counter({"eyes": 6, "the": 5, "look": 1, "into": 1})sub_words = a - bprint(sub_words) # Counter({"eyes": 4, "the": 3, "look": 1, "into": 1})
