To use this example program, you need to import the following .txt file that contains the content of The Three Kingdoms.

    three.txt

    添加本地文件.gif

    1. import jieba
    2. content = open('three.txt', 'r',encoding='utf-8',errors = 'ignore').read()
    3. words =jieba.lcut(content)#分词
    4. excludes={"将军","却说","二人","后主","上马","不知","天子","大叫","众将","不可","主公","蜀兵","只见","如何","商议","都督","一人","汉中","不敢","人马","陛下","魏兵","天下","今日","左右","东吴","于是","荆州","不能","如此","大喜","引兵","次日","军士","军马"}#排除的词汇
    5. words=jieba.lcut(content)
    6. counts={}
    7. for word in words:
    8. if len(word) == 1: # 排除单个字符的分词结果
    9. continue
    10. elif word == '孔明' or word == '孔明曰':
    11. real_word = '孔明'
    12. elif word == '关公' or word == '云长':
    13. real_word = '关羽'
    14. elif word == '孟德' or word == '丞相':
    15. real_word = '曹操'
    16. elif word == '玄德' or word == '玄德曰':
    17. real_word = '刘备'
    18. else:
    19. real_word =word
    20. counts[word] = counts.get(word, 0) + 1
    21. for word in excludes:
    22. del(counts[word])
    23. items = list(counts.items())
    24. items.sort(key=lambda x:x[1], reverse=True)
    25. for i in range(10):
    26. word, count=items[i]
    27. print (word, count)

    :::info Note: This example program is translated from the Chinese version. You can try to compile an English one by using English APIs. :::