1、基于 nltk 包
from nltk.stem.porter import PorterStemmer
from nltk.stem.lancaster import LancasterStemmer
from nltk.stem import SnowballStemmer
# jy: 基于 Porter 词干提取算法 =========================
porter_stemmer = PorterStemmer()
word_stem = porter_stemmer.stem("multiply")
# jy: "multipli"
print(word_stem)
# jy: 基于 Lancaster 词干提取算法 ======================
lancaster_stemmer = LancasterStemmer()
word_stem = lancaster_stemmer.stem("multiply")
# jy: "multiply"
print(word_stem)
# jy: 基于 Snowball 词干提取算法 ======================
snowball_stemmer = SnowballStemmer("english")
word_stem = snowball_stemmer.stem("multiply")
# jy: "multipli"
print(word_stem)
2、基于 spacy 包(非严格意义上的词干提取)
import spacy
nlp = spacy.load("en_core_web_sm")
str_words = "multiply successfully others leaf leaves"
doc = nlp(str_words)
ls_lemma = [i.lemma_ for i in doc]
# jy: ['multiply', 'successfully', 'other', 'leaf', 'leave']
print(ls_lemma)