转phpsphinx详细配置1.下载PHP客户端安装:
pecl.php/package/sphinx
vim sphinxclient.c
void sock_close ( int sock );
改为
static void sock_close ( int sock );
在php.ini加扩展,客户端安装完毕
2.安装sphinx,前提你已经安装mysql且安装了mysql-devel重源码安装mysql的
mysql-devel都已经安装,yum安装的运⾏
yum -y install mysql-devel
下载sphinx2.0.1地址:
sphinxsearch/downloads/
tar -xvzf sphinx-2.0.
cd sphinx-2.0.1-beta
./configure --prefix=/usr/local/sphinx --with-mysql --with-iconv
备注:64位增加参数 --enable-id64
make && make install cd /usr/local/sphinx/etc/ f.f
[php]
1. #
2. # Sphinx configuration file sample
3. #
4. # WARNING! While this sample file mentions all available options,
5. # it contains (very) short helper descriptions only. Please refer to
6. # doc/sphinx.html for details.
7. #
8.
9. >>>>>>>>>>>>>>>##
10. ## data source definition
11. >>>>>>>>>>>>>>>##
12.
13. source src1
14. {
15.    # data source type. mandatory, no default value
16.    # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
17.    type            = mysql
18.
19.    >>>>>>>>>>>>>####
20.    ## SQL settings (for 'mysql' and 'pgsql' types)
21.    >>>>>>>>>>>>>####
22.
23.    # some straightforward parameters for SQL source types
24.    sql_host        = localhost
25.    sql_user        = root
26.    sql_pass        = ******
27.    sql_db            = ******
28.    sql_port        = 3306    # optional, default is 3306
29.
30.    # UNIX socket name
31.    # optional, default is empty (reuse client library defaults)
32.    # usually '/var/lib/mysql/mysql.sock' on Linux
33.    # usually '/tmp/mysql.sock' on FreeBSD
34.    #
35.    sql_sock        = /tmp/mysql.sock
36.
37.
38.    # MySQL specific client connection flags
39.    # optional, default is 0
40.    # 数据传输⽅式
41.    # mysql_connect_flags    = 32 # enable compression
42.
43.    # MySQL specific SSL certificate settings
44.    # optional, defaults are empty
45.    # SLL链接
46.    # mysql_ssl_cert        = /etc/ssl/client-cert.pem
47.    # mysql_ssl_key        = /etc/ssl/client-key.pem
48.    # mysql_ssl_ca        = /etc/ssl/cacert.pem
49.
50.    # MS SQL specific Windows authentication mode flag
51.    # MUST be in sync with charset_type index-level setting
52.    # optional, default is 0
53.    #
54.    # mssql_winauth        = 1 # use currently logged on user credentials
57.    # MS SQL specific Unicode indexing flag
58.    # optional, default is 0 (request SBCS data)
59.    #
60.    # mssql_unicode        = 1 # request Unicode data from server
61.
62.
63.    # ODBC specific DSN (data source name)
64.    # mandatory for odbc source type, no default value
65.    #
66.    # odbc_dsn        = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
67.    # sql_query        = SELECT id, data FROM documents.csv
68.
69.
70.    # ODBC and MS SQL specific, per-column buffer sizes
71.    # optional, default is auto-detect
72.    #
73.    # sql_column_buffers    = content=12M, comments=1M
74.
75.
76.    # pre-query, executed before the main fetch query
77.    # multi-value, optional, default is empty list of queries
78.    # 发送SQL语句前发送
79.    sql_query_pre        = SET NAMES utf8
80.    sql_query_pre        = SET SESSION query_cache_type=OFF
81.
82.
83.    # main document fetch query
84.    # mandatory, integer document ID field MUST be the first selected column
85.    # 需要查询的表构建查询
86.    sql_query        = \
87.        SELECT id,target_type,genre,stars,sub_title,sports_team,music_band,music_album \
88.        FROM ko_link
89. #如果多个数据源并要在⼀个索引,必须要保持字段的顺序数量跟数据都要⼀致,否则将出错
90.
91.
92.    # joined/payload field fetch query
93.    # joined fields let you avoid (slow) JOIN and GROUP_CONCAT
94.    # payload fields let you attach custom per-keyword values (eg. for ranking)
95.    #
96.    # syntax is FIELD-NAME 'from'  ( 'query' | 'payload-query' ); QUERY
97.    # joined field QUERY should return 2 columns (docid, text)
98.    # payload field QUERY should return 3 columns (docid, keyword, weight)
99.    #
100.    # REQUIRES that query results are in ascending docuent ID order!
101.    # multi-value, optional, default is empty list of queries
102.    #  添加字段,来源与表⾃动连接
103. # 字段结果集保持为
104. # (1,tags1)
105. # (1,tags2)
106. # (2,tags3)
107. # (2,tags4)
108. # 添加字段将⽤于搜索,结果如有第3个字段,第3个字段表⽰该记录的权重,权重为⼤于1的值
109.    # sql_joined_field    = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC  110.    # sql_joined_field    = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC  111.
112.
113.    # file based field declaration
114.    #
115.    # content of this field is treated as a file name
116.    # and the file gets loaded and indexed in place of a field
117.    #
118.    # max file size is limited by max_file_field_buffer indexer setting
119.    # file IO errors are non-fatal and get reported as warnings
120.    # 把字段声明放⼊⽂件
121.    # sql_file_field        = content_file_path
122.
123.
124.    # range query setup, query that must return min and max ID values
125.    # optional, default is empty
126.    #
127.    # sql_query will need to reference $start and $end boundaries
128.    # if using ranged query:
129.    # 分区查询,防⽌MYSQL死锁
130.    # sql_query        = \
131.    #    SELECT doc.id, doc.id AS group, doc.title, doc.data \
132.    #    FROM documents doc \
133.    #    WHERE id>=$start AND id<=$end
134.    #
135.    # sql_query_range        = SELECT MIN(id),MAX(id) FROM documents
136.
137.
138.    # range query step
139.    # optional, default is 1024
140.    # 分区查询跳步
141.    # sql_range_step        = 1000
142.
143.
144.
145.
146.    # unsigned integer attribute declaration
147.    # multi-value (an arbitrary number of attributes is allowed), optional
148.    # optional bit size can be specified, default is 32
149.    # 声明⽆符号数字段
150.    #sql_attr_uint        = target_type
151.    # sql_attr_uint        = forum_id:9 # 9 bits for forum_id
152.    #sql_attr_uint        = group_id
153.    #声明BOOL字段
154.    # boolean attribute declaration
156.    # equivalent to sql_attr_uint with 1-bit size
157.    #
158.    # sql_attr_bool        = is_deleted
159.
160.
161.    # bigint attribute declaration
162.    # multi-value (an arbitrary number of attributes is allowed), optional
163.    # declares a signed (unlike uint!) 64-bit attribute
164.    # 声明长整字段
165.    # sql_attr_bigint        = my_bigint_id
166.
167.
168.    # UNIX timestamp attribute declaration
169.    # multi-value (an arbitrary number of attributes is allowed), optional
170.    # similar to integer, but can also be used in date functions
171.    # 声明时间字段
172.    # sql_attr_timestamp    = posted_ts
173.    # sql_attr_timestamp    = last_edited_ts
174.    #sql_attr_timestamp    = date_added
175.
176.    # string ordinal attribute declaration
177.    # multi-value (an arbitrary number of attributes is allowed), optional
178.    # sorts strings (bytewise), and stores their indexes in the sorted list
179.    # sorting by this attr is equivalent to sorting by the original strings
180.    # 声明字符串字段⽤于排序等,但此字段不会被存储
181.    # sql_attr_str2ordinal    = author_name
182.
183.
184.    # floating point attribute declaration
185.    # multi-value (an arbitrary number of attributes is allowed), optional
186.    # values are stored in single precision, 32-bit IEEE 754 format
187.    # 声明浮点字段
188.    # sql_attr_float        = lat_radians
189.    # sql_attr_float        = long_radians
190.
191.
192.    # multi-valued attribute (MVA) attribute declaration
193.    # multi-value (an arbitrary number of attributes is allowed), optional
194.    # MVA values are variable length lists of unsigned 32-bit integers
195.    #
196.    # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]  197.    # ATTR-TYPE is 'uint' or 'timestamp'
198.    # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
199.    # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
200.    # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'  201.    # 声明复合字段
202.    # sql_attr_multi        = uint tag from query; SELECT docid, tagid FROM tags
203.    # sql_attr_multi        = uint tag from ranged-query; \
204.    #    SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \
205.    #    SELECT MIN(docid), MAX(docid) FROM tags
206.
207.
208.    # string attribute declaration
209.    # multi-value (an arbitrary number of these is allowed), optional
210.    # lets you store and retrieve strings
211.    # 只是把数据存储,但不会索引改字段
212.    # sql_attr_string        = stitle
213.
214.
215.    # wordcount attribute declaration
216.    # multi-value (an arbitrary number of these is allowed), optional
217.    # lets you count the words at indexing time
218.    # 将转化成关键字的字段,⽤于提⾼匹配率
219.    # sql_attr_str2wordcount    = stitle
220.
221.
222.    # combined field plus attribute declaration (from a single column)
223.    # stores column as an attribute, but also indexes it as a full-text field
224.    # 跟sql_attr_string不同是该属性加⼊索引
225.    # sql_field_string    = author
226.    # sql_field_str2wordcount    = title
227.
228.
229.    # post-query, executed on sql_query completion
230.    # optional, default is empty
231.    # 取后查询
232.    # sql_query_post        =
233.
234.
235.    # post-index-query, executed on successful indexing completion
236.    # optional, default is empty
237.    # $maxid expands to max document ID actually fetched from DB
238.    # 索引后查询
239.    # sql_query_post_index    = REPLACE INTO counters ( id, val ) \
240.    #    VALUES ( 'max_indexed_id', $maxid )
241.
242.
243.    # ranged query throttling, in milliseconds
244.    # optional, default is 0 which means no delay
245.    # enforces given delay before each query step
246.    #分区查询的时间间隔
247.    sql_ranged_throttle    = 0
248.
249.    # document info query, ONLY for CLI search (ie. testing and debugging)
250.    # optional, default is empty
251.    # must contain $id macro and must fetch the document by that id
252.    #命令⾏调试查询结果⽤
253.    sql_query_info        = SELECT * FROM ko_link WHERE id=$id
255.    # kill-list query, fetches the document IDs for kill-list
256.    # k-list will suppress matches from preceding indexes in the same query
257.    # optional, default is empty
258.    ##清理指定查询ID列表,对于数据的更改
259.    # sql_query_killlist    = SELECT id FROM documents WHERE edited>=@last_reindex
260.
261.
262.    # columns to unpack on indexer side when indexing
263.    # multi-value, optional, default is empty list
264.    # 启⽤ZIP压缩可以降低系统负载但必须保证zlib库zlib-dev库可⽤
265.    # unpack_zlib        = zlib_column
266.    # unpack_mysqlcompress    = compressed_column
267.    # unpack_mysqlcompress    = compressed_column_2
268.
269.
270.    # maximum unpacked length allowed in MySQL COMPRESS() unpacker
271.    # optional, default is 16M
272.    # 压缩缓存区⼤⼩不能⼩于字段存储值
273.    # unpack_mysqlcompress_maxsize    = 16M
274.
275.
276.    >>>>>>>>>>>>>####  277.    ## xmlpipe2 配置
278.    >>>>>>>>>>>>>####  279.
280.    # type            = xmlpipe
281.
282.    # shell command to invoke xmlpipe stream producer
283.    # mandatory
284.    #
285.    # xmlpipe_command        = cat /usr/local/sphinx/l
286.
287.    # xmlpipe2 field declaration
288.    # multi-value, optional, default is empty
289.    #
290.    # xmlpipe_field        = subject
291.    # xmlpipe_field        = content
292.
293.
294.    # xmlpipe2 attribute declaration
295.    # multi-value, optional, default is empty
296.    # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
297.    #
298.    # xmlpipe_attr_timestamp    = published
299.    # xmlpipe_attr_uint    = author_id
300.
301.
302.    # perform UTF-8 validation, and filter out incorrect codes
303.    # avoids XML parser choking on non-UTF-8 documents
304.    # optional, default is 0
305.    #
306.    # xmlpipe_fixup_utf8    = 1
307. }
308.
309.
310. # inherited source example
311. # 继承数据源
312. # all the parameters are copied from the parent source,
313. # and may then be overridden in this source definition
314. #source src1throttled : src1
315. #{
316. #    sql_ranged_throttle    = 100
317. #}
318.
319. >>>>>>>>>>>>>>>##  320. ## index definition
321. >>>>>>>>>>>>>>>##  322.
323. # local index example
324. #
325. # this is an index which is stored locally in the filesystem
326. #
327. # all indexing-time options (such as morphology and charsets)
328. # are configured per local index
329. index test1
330. {
331.    # index type
332.    # optional, default is 'plain'
333.    # known values are 'plain', 'distributed', and 'rt' (see samples below)
334.    #索引类型本地分布式
335.    # type            = plain
336.
337.    # document source(s) to index
338.    # multi-value, mandatory
339.    # document IDs must be globally unique across all sources
340.    #数据源,可以多个数据源
341.    source            = src1
342.
343.    # index files path and file name, without extension
344.    # mandatory, path must be writable, extensions will be auto-appended
345.    # 索引保存路径
346.    path            = /usr/local/sphinx/var/data/test1
347.
348.    # document attribute values (docinfo) storage mode
349.    # optional, default is 'extern'
350.    # known values are 'none', 'extern' and 'inline'
351.    #索引存储⽅式
352.    docinfo            = extern
354.    # memory locking for cached data (.spa and .spi), to prevent swapping
355.    # optional, default is 0 (do not mlock)
356.    # requires searchd to be run from root
357.    #内存锁定需要保证⾜够权限
358.    mlock            = 0
359.
360.    # a list of morphology preprocessors to apply
361.    # optional, default is empty
362.    #
363.    # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
364.    # 'soundex', and 'metaphone'; additional preprocessors available from
365.    # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
366.    # (see libstemmer_c/)
367.    # 词语提取器
368.    # morphology        = stem_en, stem_ru, soundex
369.    # morphology        = libstemmer_german
370.    # morphology        = libstemmer_sv
371.    morphology        = stem_en
372.
373.    # minimum word length at which to enable stemming
374.    # optional, default is 1 (stem everything)
375.    # 词⼲化的最⼩词长
376.    # min_stemming_len    = 1
377.
378.
379.    # stopword files list (space separated)
380.    # optional, default is empty
381.    # contents are plain text, charset_table and stemming are both applied
382.    # 停⽤搜索词
383.    # stopwords        = /usr/local/sphinx/var/
384.
385.
386.    # wordforms file, in "mapfrom > mapto" plain text format
387.    # optional, default is empty
388.    # 词型字典可⽤spelldump⼯具⽣成
389.    # wordforms        = /usr/local/sphinx/var/
390.
391.
392.    # tokenizing exceptions file
393.    # optional, default is empty
394.    #Token特例⽂件,就是有些词是完整词意,不能拆分索引如a&t 跟a & t
395.    # plain text, case sensitive, space insensitive in map-from part
396.    # one "Map Several Words => ToASingleOne" entry per line
397.    #
398.    # exceptions        = /usr/local/sphinx/var/
399.
400.
401.    # minimum indexed word length
402.    # default is 1 (index everything)
403.    #  最⼩索引长度,就是⼩于指定长度的词不被索引
404.    min_word_len        = 1
405.
406.    # charset encoding type
407.    # optional, default is 'sbcs'
408.    # known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
409.    # 字符编码
410.    charset_type        = utf-8
411.
412.    # charset definition and case folding rules "table"
413.    # optional, default value depends on charset_type
414.    #
415.    # defaults are configured to include English and Russian characters only
416.    # you need to change the table to include additional ones
417.    # this behavior MAY change in future versions
418.    #
419.    # 'sbcs' default value is
420.    # charset_table        = 0..9, A..Z-&, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF  421.    # 转换字符表
422.    # 'utf-8' default value is
423.    # charset_table        = 0..9, A..Z-&, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
424.
425.
426.    # ignored characters list
427.    # optional, default value is empty
428.    #  忽略字符表
429.    # ignore_chars        = U+00AD
430.
431.
432.    # minimum word prefix length to index
433.    # optional, default is 0 (do not index prefixes)
434.    #索引的最⼩前缀长度,⼩⼼使⽤,索引和搜索的时间皆会恶化
435.    # min_prefix_len        = 0
436.
437.
438.    # minimum word infix length to index
439.    # optional, default is 0 (do not index infixes)
440.    #索引的最⼩中缀长度⼩⼼使⽤,索引和搜索的时间皆会恶化
441.    # min_infix_len        = 0
442.
443.
444.    # list of fields to limit prefix/infix indexing to安装mysql失败
445.    # optional, default value is empty (index all fields in prefix/infix mode)
446.    # 未知
447.    # prefix_fields        = filename
448.    # infix_fields        = url, domain
449.
450.
451.    # enable star-syntax (wildcards) when searching prefix/infix indexes

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。

发表评论