热更新
每次都是在es的扩展词典中,手动添加新词语,很坑
(1)每次添加完,都要重启es才能生效,非常麻烦
(2)es是分布式的,可能有数百个节点,你不能每次都一个一个节点上面去修改
es不停机,直接我们在外部某个地方添加新的词语,es中立即热加载到这些新词语
热更新的方案
(1)修改ik分词器源码,然后手动支持从mysql中每隔一定时间,自动加载新的词库
(2)基于ik分词器原生支持的热更新方案,部署一个web服务器,提供一个http接口,通过modified和tag两个http响应头,来提供词语的热更新
用第一种方案,第二种,ik git社区官方都不建议采用,觉得不太稳定

1、下载源码

https://github.com/medcl/elasticsearch-analysis-ik
ik分词器,是个标准的java maven工程,直接导入IDE

2、修改源码

pom.xml:

  1. <dependency>
  2. <groupId>mysql</groupId>
  3. <artifactId>mysql-connector-java</artifactId>
  4. <version>8.0.17</version>
  5. </dependency>

以下修改 Dictionary.java

/**
 * 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
 * 只有当Dictionary类被实际调用时,才会开始载入词典, 这将延长首次分词操作的时间 该方法提供了一个在应用加载阶段就初始化字典的手段
 * 
 * @return Dictionary
 */
public static synchronized void initial(Configuration cfg) {
    if (singleton == null) {
        synchronized (Dictionary.class) {
            if (singleton == null) {

                singleton = new Dictionary(cfg);
                singleton.loadMainDict();
                singleton.loadSurnameDict();
                singleton.loadQuantifierDict();
                singleton.loadSuffixDict();
                singleton.loadPrepDict();
                singleton.loadStopWordDict();

                // TODO 自定义的线程,并启动(新增)
                new Thread(new HotDictReloadThread()).start();

                if(cfg.isEnableRemoteDict()){
                    // 建立监控线程
                    for (String location : singleton.getRemoteExtDictionarys()) {
                        // 10 秒是初始延迟可以修改的 60是间隔时间 单位秒
                        pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
                    }
                    for (String location : singleton.getRemoteExtStopWordDictionarys()) {
                        pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
                    }
                }

            }
        }
    }
}


/**
 * 加载主词典及扩展词典
 */
private void loadMainDict() {
    // 建立一个主词典实例
    _MainDict = new DictSegment((char) 0);

    // 读取主词典文件
    Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_MAIN);
    loadDictFile(_MainDict, file, false, "Main Dict");
    // 加载扩展词典
    this.loadExtDict();
    // 加载远程自定义词库
    this.loadRemoteExtDict();
    // TODO 从mysql加载词典(新增方法)
    this.loadMySQLExtDict();
}

/**
 * 从 mysql 加载热更新词典(新增方法)
 */
private void loadMySQLExtDict() {
    Connection conn = null;
    Statement stmt = null;
    ResultSet rs = null;

    try {
        Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties");
        props.load(new FileInputStream(file.toFile()));

        logger.info("[==========]jdbc-reload.properties");
        for(Object key : props.keySet()) {
            logger.info("[==========]" + key + "=" + props.getProperty(String.valueOf(key)));
        }

        logger.info("[==========]query hot dict from mysql, " + props.getProperty("jdbc.reload.sql") + "......");

        conn = DriverManager.getConnection(
                props.getProperty("jdbc.url"),
                props.getProperty("jdbc.user"),
                props.getProperty("jdbc.password"));
        stmt = conn.createStatement();
        rs = stmt.executeQuery(props.getProperty("jdbc.reload.sql"));

        while(rs.next()) {
            String theWord = rs.getString("word");
            logger.info("[==========]hot word from mysql: " + theWord);
            _MainDict.fillSegment(theWord.trim().toCharArray());
        }

        Thread.sleep(Integer.valueOf(String.valueOf(props.get("jdbc.reload.interval"))));
    } catch (Exception e) {
        logger.error("erorr", e);
    } finally {
        if(rs != null) {
            try {
                rs.close();
            } catch (SQLException e) {
                logger.error("error", e);
            }
        }
        if(stmt != null) {
            try {
                stmt.close();
            } catch (SQLException e) {
                logger.error("error", e);
            }
        }
        if(conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                logger.error("error", e);
            }
        }
    }
}


/**
 * 加载用户扩展的停止词词典
 */
private void loadStopWordDict() {
    .........

    // TODO 新增方法
    this.loadMySQLStopwordDict();
}


/**
 * 从mysql加载停用词(新增方法)
 */
private void loadMySQLStopwordDict() {
    Connection conn = null;
    Statement stmt = null;
    ResultSet rs = null;

    try {
        Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties");
        props.load(new FileInputStream(file.toFile()));

        logger.info("[==========]jdbc-reload.properties");
        for(Object key : props.keySet()) {
            logger.info("[==========]" + key + "=" + props.getProperty(String.valueOf(key)));
        }

        logger.info("[==========]query hot stopword dict from mysql, " + props.getProperty("jdbc.reload.stopword.sql") + "......");

        conn = DriverManager.getConnection(
                props.getProperty("jdbc.url"),
                props.getProperty("jdbc.user"),
                props.getProperty("jdbc.password"));
        stmt = conn.createStatement();
        rs = stmt.executeQuery(props.getProperty("jdbc.reload.stopword.sql"));

        while(rs.next()) {
            String theWord = rs.getString("word");
            logger.info("[==========]hot stopword from mysql: " + theWord);
            _StopWords.fillSegment(theWord.trim().toCharArray());
        }

        Thread.sleep(Integer.valueOf(String.valueOf(props.get("jdbc.reload.interval"))));
    } catch (Exception e) {
        logger.error("erorr", e);
    } finally {
        if(rs != null) {
            try {
                rs.close();
            } catch (SQLException e) {
                logger.error("error", e);
            }
        }
        if(stmt != null) {
            try {
                stmt.close();
            } catch (SQLException e) {
                logger.error("error", e);
            }
        }
        if(conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                logger.error("error", e);
            }
        }
    }
}

死循环,不断调用Dictionary.getSingleton().reLoadMainDict(),去重新加载词典

public class HotDictReloadThread implements Runnable {

    private static final Logger logger = ESPluginLoggerFactory.getLogger(HotDictReloadThread.class.getName());

    @Override
    public void run() {
        while(true) {
            logger.info("[==========]reload hot dict from mysql......");
            Dictionary.getSingleton().reLoadMainDict();
        }
    }
}

plugin.xml

<dependencySet>
    <outputDirectory/>
    <useProjectArtifact>true</useProjectArtifact>
    <useTransitiveFiltering>true</useTransitiveFiltering>
    <includes>
        <include>mysql:mysql-connector-java</include>
    </includes>
</dependencySet>

jdbc相关配置 jdbc-reload.properties

jdbc.url=jdbc\:mysql\://localhost\:3306/ik-words?serverTimezone=UTC
jdbc.user=root
jdbc.password=123456
# 词库 sql
jdbc.reload.sql=select word from hot_words
# 停用词 sql
jdbc.reload.stopword.sql=select stop_word as word from stop_words
# 间隔时长
jdbc.reload.interval=1000

3、mvn package打包代码

target\releases\elasticsearch-analysis-ik-X.X.X.zip

6、重启es

https://blog.csdn.net/hqd1870299/article/details/106660017
https://blog.csdn.net/qq_35524586/article/details/88553670
https://blog.csdn.net/weixin_43315211/article/details/99650363