1、基于 nltk 包
from nltk.stem.porter import PorterStemmerfrom nltk.stem.lancaster import LancasterStemmerfrom nltk.stem import SnowballStemmer# jy: 基于 Porter 词干提取算法 =========================porter_stemmer = PorterStemmer()word_stem = porter_stemmer.stem("multiply")# jy: "multipli"print(word_stem)# jy: 基于 Lancaster 词干提取算法 ======================lancaster_stemmer = LancasterStemmer()word_stem = lancaster_stemmer.stem("multiply")# jy: "multiply"print(word_stem)# jy: 基于 Snowball 词干提取算法 ======================snowball_stemmer = SnowballStemmer("english")word_stem = snowball_stemmer.stem("multiply")# jy: "multipli"print(word_stem)
2、基于 spacy 包(非严格意义上的词干提取)
import spacynlp = spacy.load("en_core_web_sm")str_words = "multiply successfully others leaf leaves"doc = nlp(str_words)ls_lemma = [i.lemma_ for i in doc]# jy: ['multiply', 'successfully', 'other', 'leaf', 'leave']print(ls_lemma)