collections.Counter
类接收任意由可哈希(hashable)
元素构成的序列对象。在底层实现上一个Counter
对象就是一个字典,将元素映射在其出现的次数上,十分适合统计序列元素数量:
from collections import Counter
words = [
"look", "into", "my", "eyes", "look", "into", "my", "eyes",
"the", "eyes", "the", "eyes", "the", "eyes", "not", "around", "the",
"eyes", "don't", "look", "around", "the", "eyes", "look", "into",
"my", "eyes", "you're", "under"
]
word_counts = Counter(words)
# 出现频率最高的3个单词
top_three = word_counts.most_common(3)
print(top_three) # [("eyes", 8), ("the", 5), ("look", 4)]
print(word_counts["eyes"]) # 8
# 使用update方法更新
more_words = ["eyes", "the"]
word_counts.update(more_words)
print(word_counts["eyes"]) # 9
print(word_counts["the"]) # 6
Counter
实例进行数学运算:
from collections import Counter
words = [
"eyes", "look", "into", "eyes", "the",
"eyes", "the", "eyes", "the", "eyes", "the"
]
more_words = ["eyes", "the"]
a = Counter(words)
b = Counter(more_words)
add_words = a + b
print(add_words) # Counter({"eyes": 6, "the": 5, "look": 1, "into": 1})
sub_words = a - b
print(sub_words) # Counter({"eyes": 4, "the": 3, "look": 1, "into": 1})