Sphinx初探之安装

在Centos or redhat 安装Sphinx
1.首先安装依赖包
$ yum install postgresql-libs unixODBC
2.安装软件
$ rpm -Uhv sphinx-2.2.1-1.rhel6.x86_64.rpm
3.启动服务
$ service searchd start

[root@face sphinx-2.2.10]# find / -name sphinx
/var/run/sphinx
/var/log/sphinx
/var/lib/sphinx
/etc/logrotate.d/sphinx
/etc/sphinx
/usr/share/sphinx
/usr/share/sphinx/api/ruby/spec/sphinx
/usr/share/sphinx/api/ruby/lib/sphinx

mysql> #创建测试库
mysql> create database test;
Query OK, 1 row affected (0.00 sec)
#插入测试数据
mysql -uroot -ppassword test < /usr/share/doc/sphinx-2.2.10/example.sql

mysql> desc tags;
+-------+---------+------+-----+---------+-------+
| Field | Type    | Null | Key | Default | Extra |
+-------+---------+------+-----+---------+-------+
| docid | int(11) | NO   | PRI | NULL    |       |
| tagid | int(11) | NO   | PRI | NULL    |       |
+-------+---------+------+-----+---------+-------+
2 rows in set (0.00 sec)
#表结构
mysql> desc documents;
+------------+--------------+------+-----+---------+----------------+
| Field      | Type         | Null | Key | Default | Extra          |
+------------+--------------+------+-----+---------+----------------+
| id         | int(11)      | NO   | PRI | NULL    | auto_increment |
| group_id   | int(11)      | NO   |     | NULL    |                |
| group_id2  | int(11)      | NO   |     | NULL    |                |
| date_added | datetime     | NO   |     | NULL    |                |
| title      | varchar(255) | NO   |     | NULL    |                |
| content    | text         | NO   |     | NULL    |                |
+------------+--------------+------+-----+---------+----------------+
6 rows in set (0.00 sec)
#表数据
mysql> select * from documents;
+----+----------+-----------+---------------------+-----------------+---------------------------------------------------------------------------+
| id | group_id | group_id2 | date_added          | title           | content                                                                   |
+----+----------+-----------+---------------------+-----------------+---------------------------------------------------------------------------+
|  1 |        1 |         5 | 2016-01-15 14:24:23 | test one        | this is my test document number one. also checking search within phrases. |
|  2 |        1 |         6 | 2016-01-15 14:24:23 | test two        | this is my test document number two                                       |
|  3 |        2 |         7 | 2016-01-15 14:24:23 | another doc     | this is another group                                                     |
|  4 |        2 |         8 | 2016-01-15 14:24:23 | doc number four | this is to test groups                                                    |
+----+----------+-----------+---------------------+-----------------+---------------------------------------------------------------------------+
4 rows in set (0.00 sec)

mysql> select * from tags;
+-------+-------+
| docid | tagid |
+-------+-------+
|     1 |     1 |
|     1 |     3 |
|     1 |     5 |
|     1 |     7 |
|     2 |     2 |
|     2 |     4 |
|     2 |     6 |
|     3 |    15 |
|     4 |     7 |
|     4 |    40 |
+-------+-------+
10 rows in set (0.00 sec)

#sphinx的配置文件
[root@face sphinx]# grep "^$|^#" -v sphinx.conf
source src1
{
    #数据库类型 数据来源信息 type
= mysql
    #数据库的IP sql_host
= localhost #数据库用户
     sql_user
= root
    #数据库密码 sql_pass
=xxxxxx
#数据库库名 sql_db = test
     #默认的配置文件没有这一句 需要注意 sql_sock
= /tmp/mysql.sock sql_port = 3306 # optional, default is 3306
     #
定义取数据的SQL,第一列ID列必须为唯一的正整数值
     sql_query = SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content
                FROM documents
        sql_attr_uint           = group_id
        sql_attr_timestamp      = date_added
}
index test1
{
    #定义索引的源 source
= src1
#设置生成的索引存放路径 path
= /var/lib/sphinx/test1 } index testrt {
#实时索引类型 type
= rt rt_mem_limit = 128M path = /var/lib/sphinx/testrt rt_field = title rt_field = content rt_attr_uint = gid }
#定义indexer配置选项 indexer {
     #定义生成索引过程使用索引的限制 mem_limit
= 128M }
##定义searchd守护进程的相关选项 searchd {
     #tcp        0      0 0.0.0.0:9312                0.0.0.0:*                   LISTEN      9289/searchd     listen
= 9312 listen = 9306:mysql41 #进程服务日志
     log
= /var/log/sphinx/searchd.log
     #查询日志 query_log
= /var/log/sphinx/query.log
     #网络客服端请求的读超时 时间 read_timeout
= 5 #子进程数
max_children
= 30 #进程文件的路径
pid_file
= /var/run/sphinx/searchd.pid

#启用无缝seamless轮转,防止searchd轮转在需要预取大量数据的索引时停止响应
#也就是说在任何时刻查询都可用,或者使用旧索引,或者使用新索引

        seamless_rotate         = 1
        preopen_indexes         = 1
        unlink_old              = 1
        workers                 = threads # for RT to work
        binlog_path             = /var/lib/sphinx/
}
[root@face sphinx]# service searchd start
Starting searchd: Sphinx 2.2.10-id64-release (2c212e0)
Copyright (c) 2001-2015, Andrew Aksyonoff
Copyright (c) 2008-2015, Sphinx Technologies Inc (http://sphinxsearch.com)

using config file '/etc/sphinx/sphinx.conf'...
listening on all interfaces, port=9312
listening on all interfaces, port=9306
precaching index 'test1'
WARNING: index 'test1': preload: /var/lib/sphinx/test1.sph is invalid header file (too old index version?); NOT SERVING
precaching index 'testrt'
precached 2 indexes in 0.004 sec
                                                           [  OK  ]
#如果启动有warning 注意这儿的权限
[root@face sphinx]# pwd
/var/lib/sphinx
[root@face sphinx]# chown sphinx.sphinx ./ -R
[root@face sphinx]# 
[root@face sphinx]# ll
total 36
-rw-------. 1 sphinx sphinx   8 Jan 15 15:28 binlog.001
-rw-------. 1 sphinx sphinx   0 Jan 15 15:28 binlog.lock
-rw-------. 1 sphinx sphinx  11 Jan 15 15:28 binlog.meta
-rw-r--r--. 1 sphinx sphinx 128 Jan 15 15:32 test1.spa
-rw-r--r--. 1 sphinx sphinx 148 Jan 15 15:32 test1.spd
-rw-r--r--. 1 sphinx sphinx   1 Jan 15 15:32 test1.spe
-rw-r--r--. 1 sphinx sphinx 371 Jan 15 15:32 test1.sph
-rw-r--r--. 1 sphinx sphinx 190 Jan 15 15:32 test1.spi
-rw-r--r--. 1 sphinx sphinx   0 Jan 15 15:32 test1.spk
-rw-r--r--. 1 sphinx sphinx   0 Jan 15 15:32 test1.spm
-rw-r--r--. 1 sphinx sphinx  37 Jan 15 15:32 test1.spp
-rw-r--r--. 1 sphinx sphinx   1 Jan 15 15:32 test1.sps
-rw-------. 1 sphinx sphinx   0 Jan 15 15:28 testrt.lock

#不能有warning 开始的时候
[root@face sphinx]# service searchd restart
Stopping searchd:                                          [  OK  ]
Starting searchd: Sphinx 2.2.10-id64-release (2c212e0)
Copyright (c) 2001-2015, Andrew Aksyonoff
Copyright (c) 2008-2015, Sphinx Technologies Inc (http://sphinxsearch.com)

using config file '/etc/sphinx/sphinx.conf'...
listening on all interfaces, port=9312
listening on all interfaces, port=9306
precaching index 'test1'
precaching index 'testrt'                                   
precached 2 indexes in 0.001 sec
                                                           [  OK  ]
#不+ --rotate有可能起不来
[root@face sphinx]# indexer --all --rotate
Sphinx 2.2.10-id64-release (2c212e0)
Copyright (c) 2001-2015, Andrew Aksyonoff
Copyright (c) 2008-2015, Sphinx Technologies Inc (http://sphinxsearch.com)

using config file '/etc/sphinx/sphinx.conf'...
indexing index 'test1'...
collected 4 docs, 0.0 MB
sorted 0.0 Mhits, 100.0% done
total 4 docs, 193 bytes
total 0.003 sec, 58949 bytes/sec, 1221.74 docs/sec
skipping non-plain index 'testrt'...
total 4 reads, 0.000 sec, 0.1 kb/call avg, 0.0 msec/call avg
total 12 writes, 0.000 sec, 0.1 kb/call avg, 0.0 msec/call avg
rotating indices: successfully sent SIGHUP to searchd (pid=9289).
#连接server 查看数据状态
[root@face sphinx]# mysql -h0 -P9306
Welcome to the MySQL monitor.  Commands end with ; or g.
Your MySQL connection id is 1
Server version: 2.2.10-id64-release (2c212e0)

Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or 'h' for help. Type 'c' to clear the current input statement.

mysql> show tables;
+--------+-------+
| Index  | Type  |
+--------+-------+
| test1  | local |
| testrt | rt    |
+--------+-------+
2 rows in set (0.00 sec)

mysql> select * from test1;
+------+----------+------------+
| id   | group_id | date_added |
+------+----------+------------+
|    1 |        1 | 1452839063 |
|    2 |        1 | 1452839063 |
|    3 |        2 | 1452839063 |
|    4 |        2 | 1452839063 |
+------+----------+------------+
4 rows in set (0.00 sec)
mysql> select * from testrt;
Empty set (0.00 sec)

mysql> INSERT INTO testrt VALUES ( 1, 'first record', 'test one', 123 );

Query OK, 1 row affected (0.00 sec)

mysql> INSERT INTO testrt VALUES ( 2, 'second record', 'test two', 234 );
Query OK, 1 row affected (0.00 sec)

mysql> INSERT INTO testrt VALUES ( 3, 'three record', 'three', 334 );
Query OK, 1 row affected (0.00 sec)

mysql> SELECT * FROM testrt;
+------+------+
| id | gid |
+------+------+
| 1 | 123 |
| 2 | 234 |
| 3 | 334 |
+------+------+
3 rows in set (0.00 sec)

mysql> SELECT * FROM testrt WHERE MATCH('test');
+------+------+
| id | gid |
+------+------+
| 1 | 123 |
| 2 | 234 |
+------+------+
2 rows in set (0.00 sec)


#模糊匹配
mysql> SELECT * FROM test1 WHERE MATCH('my document');
+------+----------+------------+
| id   | group_id | date_added |
+------+----------+------------+
|    1 |        1 | 1452839063 |
|    2 |        1 | 1452839063 |
+------+----------+------------+
2 rows in set (0.00 sec)


mysql> SELECT *, WEIGHT() FROM test1 WHERE MATCH('"document one"/1');SHOW META;
+------+----------+------------+----------+
| id   | group_id | date_added | weight() |
+------+----------+------------+----------+
|    1 |        1 | 1452839063 |     2663 |
|    2 |        1 | 1452839063 |     1528 |
+------+----------+------------+----------+
2 rows in set (0.18 sec)
#相关的元数据信息
+---------------+----------+
| Variable_name | Value    |
+---------------+----------+
| total         | 2        |
| total_found   | 2        |
| time          | 0.175    |
| keyword[0]    | document |
| docs[0]       | 2        |
| hits[0]       | 2        |
| keyword[1]    | one      |
| docs[1]       | 1        |
| hits[1]       | 2        |
+---------------+----------+
9 rows in set (0.00 sec)

#相关的统计信息
mysql> SET profiling=1;SELECT * FROM test1 WHERE id IN (1,2,4);SHOW PROFILE;
Query OK, 0 rows affected (0.00 sec)

+------+----------+------------+
| id   | group_id | date_added |
+------+----------+------------+
|    1 |        1 | 1452839063 |
|    2 |        1 | 1452839063 |
|    4 |        2 | 1452839063 |
+------+----------+------------+
3 rows in set (0.00 sec)

+--------------+----------+----------+---------+
| Status       | Duration | Switches | Percent |
+--------------+----------+----------+---------+
| unknown      | 0.000206 | 4        | 65.61   |
| net_read     | 0.000004 | 1        | 1.27    |
| local_search | 0.000040 | 1        | 12.74   |
| sql_parse    | 0.000027 | 1        | 8.60    |
| fullscan     | 0.000002 | 1        | 0.64    |
| finalize     | 0.000015 | 1        | 4.78    |
| aggregate    | 0.000008 | 2        | 2.55    |
| net_write    | 0.000012 | 1        | 3.82    |
| eval_post    | 0.000000 | 1        | 0.00    |
| total        | 0.000314 | 13       | 0       |
+--------------+----------+----------+---------+
10 rows in set (0.00 sec)


mysql> SELECT id, id%3 idd FROM test1 WHERE MATCH('this is | nothing') GROUP BY idd;SHOW PROFILE;
+------+------+
| id   | idd  |
+------+------+
|    1 |    1 |
|    2 |    2 |
|    3 |    0 |
+------+------+
3 rows in set (0.18 sec)

+--------------+----------+----------+---------+
| Status       | Duration | Switches | Percent |
+--------------+----------+----------+---------+
| unknown      | 0.000430 | 6        | 0.24    |
| net_read     | 0.000014 | 1        | 0.01    |
| local_search | 0.000192 | 1        | 0.11    |
| sql_parse    | 0.000056 | 1        | 0.03    |
| dict_setup   | 0.000002 | 1        | 0.00    |
| parse        | 0.000040 | 1        | 0.02    |
| transforms   | 0.000003 | 1        | 0.00    |
| init         | 0.177525 | 3        | 99.51   |
| read_docs    | 0.000053 | 2        | 0.03    |
| get_docs     | 0.000008 | 5        | 0.00    |
| get_hits     | 0.000005 | 2        | 0.00    |
| filter       | 0.000002 | 1        | 0.00    |
| rank         | 0.000001 | 3        | 0.00    |
| sort         | 0.000014 | 2        | 0.01    |
| finalize     | 0.000004 | 1        | 0.00    |
| aggregate    | 0.000017 | 2        | 0.01    |
| net_write    | 0.000027 | 1        | 0.02    |
| eval_post    | 0.000001 | 1        | 0.00    |
| total        | 0.178394 | 35       | 0       |
+--------------+----------+----------+---------+
19 rows in set (0.00 sec)


mysql> CALL KEYWORDS ('one two three', 'test1');
+------+-----------+------------+
| qpos | tokenized | normalized |
+------+-----------+------------+
| 1    | one       | one        |
| 2    | two       | two        |
| 3    | three     | three      |
+------+-----------+------------+
3 rows in set (0.00 sec)
#hits表示是否命中
mysql> CALL KEYWORDS ('one two three', 'test1', 1);
+------+-----------+------------+------+------+
| qpos | tokenized | normalized | docs | hits |
+------+-----------+------------+------+------+
| 1    | one       | one        | 1    | 2    |
| 2    | two       | two        | 1    | 2    |
| 3    | three     | three      | 0    | 0    |
+------+-----------+------------+------+------+
3 rows in set (0.00 sec)

 试验到这儿吧

原文地址:https://www.cnblogs.com/similarface/p/5133515.html