Sphinx(Coreseek)安装和使用指南

1.安装

1.1安装mmseg

./bootstrap # 必须执行,不然安装会失败
./configure --prefix=/usr/local/mmseg-3.2.14 #指定安装目录
make
make install

1.2安装coreseek

# 在csft-4.1/configure.ac中
# 查找 AM_INIT_AUTOMAKE([-Wall -Werror foreign])
# 更改 AM_INIT_AUTOMAKE([-Wall foreign])
sh buildconf.sh # 必须执行,不然安装失败
./configure --prefix=/usr/local/coreseek-4.1 --with-mysql --with-mysql-includes=/usr/local/mysql-5.6.21/include --with-mysql-libs=/usr/local/mysql-5.6.21/lib --with-mmseg --with-mmseg-includes=/usr/local/mmseg-3.2.14/include/mmseg --with-mmseg-libs=/usr/local/mmseg-3.2.14/lib
# 在csft-4.1/src/sphinxexpr.cpp中
# 查找 T val = ExprEval ( this->m_pArg, tMatch );
# 替换 T val = this->ExprEval ( this->m_pArg, tMatch );
make
make install

2.配置

测试数据表结构

CREATE TABLE `sph_counter` (
  `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
  `code` varchar(50) NOT NULL DEFAULT '',
  `max` int(10) unsigned NOT NULL,
  `add_time` datetime NOT NULL,
  PRIMARY KEY (`id`),
  UNIQUE KEY `code` (`code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE `movie` (
  `movie_id` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT '影片ID',
  `old_id` int(10) unsigned NOT NULL DEFAULT '0' COMMENT '旧ID',
  `sn` varchar(50) NOT NULL COMMENT '影片番号',
  `title` varchar(255) NOT NULL COMMENT '片名',
  `cover` varchar(255) NOT NULL DEFAULT '' COMMENT '影片封面图',
  `publisher_id` int(10) unsigned NOT NULL COMMENT '发行商ID',
  `publish_date` date NOT NULL COMMENT '发行日期',
  `add_time` datetime NOT NULL COMMENT '添加时间',
  PRIMARY KEY (`movie_id`),
  UNIQUE KEY `sn` (`sn`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

coreseek主索引配置

source main_javzoo_movie
{
    type            = mysql

    sql_host        = 127.0.0.1
    sql_user        = root
    sql_pass        = 
    sql_db            = javzoo
    sql_port        = 3306    # optional, default is 3306

    sql_query        = 
        SELECT movie_id, movie_id AS record_id, title 
        FROM movie WHERE movie_id >= $start AND movie_id <= $end ORDER BY movie_id ASC
    sql_query_pre        = SET NAMES utf8
    sql_query_pre        = REPLACE INTO sph_counter SET `code` = 'movie', `max` = (SELECT MAX(movie_id) FROM movie), add_time = NOW()
    sql_query_range        = SELECT MIN(movie_id), (SELECT `max` FROM `sph_counter` WHERE `code` = 'movie') FROM movie
    sql_range_step        = 10000

    sql_attr_uint        = record_id

}

index main_javzoo_movie_title
{
    source            = main_javzoo_movie
    path            = /usr/local/coreseek-4.1/var/data/main_javzoo_movie_title
    mlock            = 0
    morphology        = none
    min_word_len        = 1
    html_strip        = 0
    docinfo            = extern
    charset_type        = zh_cn.utf-8
    charset_dictpath    = /usr/local/mmseg-3.2.14/etc
    ngram_len        = 0
}

3.增量配置

source delta_javzoo_movie
{
    type            = mysql
    
    sql_host        = 127.0.0.1
    sql_user        = root
    sql_pass        = shia802927
    sql_db            = javzoo
    sql_port        = 3306
    
    sql_query_pre        = SET NAMES utf8
    sql_query_range        = SELECT (SELECT `max` FROM `sph_counter` WHERE `code` = 'movie'), MAX(movie_id) FROM movie
    sql_range_step        = 10000
    sql_query        = 
        SELECT movie_id, movie_id AS record_id, title 
        FROM movie WHERE movie_id >= $start AND movie_id <= $end ORDER BY movie_id ASC
    sql_query_post        = REPLACE INTO sph_counter SET `code` = 'movie', `max` = (SELECT MAX(movie_id) FROM movie), add_time = NOW()

    sql_attr_uint           = record_id
}

index delta_javzoo_movie_title
{
    source            = delta_javzoo_movie
    path            = /usr/local/coreseek-4.1/var/data/delta_javzoo_movie_title
    mlock            = 0
    morphology        = none
    min_word_len        = 1
    html_strip        = 0
    docinfo            = extern
    charset_type        = zh_cn.utf-8
    charset_dictpath    = /usr/local/mmseg-3.2.14/etc
    ngram_len        = 0
}

4.定时更新脚本

 1 #!/bin/sh -
 2 START_DATE=`date`
 3 echo start $1 index @ $START_DATE
 4 
 5 if [ "$1" = "all" ]
 6 then
 7 /usr/local/coreseek-4.1/bin/indexer --config /usr/local/coreseek-4.1/etc/sphinx.conf --all --rotate >> /dev/null
 8 elif [ "$1" = "main" ]
 9 then
10 /usr/local/coreseek-4.1/bin/indexer --config /usr/local/coreseek-4.1/etc/sphinx.conf main_javzoo_movie_title --rotate >> /dev/null
11 elif [ "$1" = "delta" ]
12 then
13 /usr/local/coreseek-4.1/bin/indexer --config /usr/local/coreseek-4.1/etc/sphinx.conf delta_javzoo_movie_title --rotate >> /dev/null
14 elif [ "$1" = "merge" ]
15 then
16 /usr/local/coreseek-4.1/bin/indexer --config /usr/local/coreseek-4.1/etc/sphinx.conf --merge main_javzoo_movie_title delta_javzoo_movie_title --rotate >> /dev/null
17 else
18 echo error action!
19 exit 1
20 fi
21 
22 END_DATE=`date`
23 if [ "$?" -eq "0" ]
24 then
25     echo complete @ $END_DATE
26 else
27     echo error @ $END_DATE
28 fi
29 echo 

用法

# 更新全部索引
sh sphinx_index.sh all
# 更新主索引
sh sphinx_index.sh main
# 更新增量索引
sh sphinx_index.sh delta
# 合并增量和主索引
sh sphinx_index.sh merge

备注

配合crond实现定时增量更新和合并

原文地址:https://www.cnblogs.com/koboshi/p/4298565.html