Word2Vec模块

    1. from gensim.models import Word2Vec,KeyedVectors
    2. #训练
    3. model=Word2Vec([a,b,c],size,window,min_count,workers)
    4. #a,b,c 是分词后的list
    5. #size是embedding size
    6. #min_count是最小词频
    7. #保存
    8. model.wv.save_word2vec_format('...bin',binary=Ture)
    9. #加载
    10. model=KeyedVectors.laod_word2vec_format('...bin',binary=Ture,unicode_erros='ignore')
    11. #vocab查看
    12. model.vocab.keys()
    13. #embedding 查询
    14. model[word]
    15. #相似度查询
    16. model.similarity(word1,word2)
    17. #近义词和反义词相似度最接近的词查询
    18. model.most_similar(positive,negative,top)
    19. #计算与其他word相似度最低的
    20. model.doesnt_match([word1,word2,..])

    https://mp.weixin.qq.com/s/VGgnoyMrPOKP5wkxtAMpyw?utm_medium=hao.caibaojian.com&utm_source=hao.caibaojian.com