1、基于 nltk 包

  1. from nltk.stem.porter import PorterStemmer
  2. from nltk.stem.lancaster import LancasterStemmer
  3. from nltk.stem import SnowballStemmer
  4. # jy: 基于 Porter 词干提取算法 =========================
  5. porter_stemmer = PorterStemmer()
  6. word_stem = porter_stemmer.stem("multiply")
  7. # jy: "multipli"
  8. print(word_stem)
  9. # jy: 基于 Lancaster 词干提取算法 ======================
  10. lancaster_stemmer = LancasterStemmer()
  11. word_stem = lancaster_stemmer.stem("multiply")
  12. # jy: "multiply"
  13. print(word_stem)
  14. # jy: 基于 Snowball 词干提取算法 ======================
  15. snowball_stemmer = SnowballStemmer("english")
  16. word_stem = snowball_stemmer.stem("multiply")
  17. # jy: "multipli"
  18. print(word_stem)

2、基于 spacy 包(非严格意义上的词干提取)

  1. import spacy
  2. nlp = spacy.load("en_core_web_sm")
  3. str_words = "multiply successfully others leaf leaves"
  4. doc = nlp(str_words)
  5. ls_lemma = [i.lemma_ for i in doc]
  6. # jy: ['multiply', 'successfully', 'other', 'leaf', 'leave']
  7. print(ls_lemma)