转phpsphinx详细配置1.下载PHP客户端安装:
pecl.php/package/sphinx
vim sphinxclient.c
到
void sock_close ( int sock );
改为
static void sock_close ( int sock );
在php.ini加扩展,客户端安装完毕
2.安装sphinx,前提你已经安装mysql且安装了mysql-devel重源码安装mysql的
mysql-devel都已经安装,yum安装的运⾏
yum -y install mysql-devel
下载sphinx2.0.1地址:
sphinxsearch/downloads/
tar -xvzf sphinx-2.0.
cd sphinx-2.0.1-beta
./configure --prefix=/usr/local/sphinx --with-mysql --with-iconv
备注:64位增加参数 --enable-id64
make && make install cd /usr/local/sphinx/etc/ f.f
[php]
1. #
2. # Sphinx configuration file sample
3. #
4. # WARNING! While this sample file mentions all available options,
5. # it contains (very) short helper descriptions only. Please refer to
6. # doc/sphinx.html for details.
7. #
8.
9. >>>>>>>>>>>>>>>##
10. ## data source definition
11. >>>>>>>>>>>>>>>##
12.
13. source src1
14. {
15. # data source type. mandatory, no default value
16. # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
17. type = mysql
18.
19. >>>>>>>>>>>>>####
20. ## SQL settings (for 'mysql' and 'pgsql' types)
21. >>>>>>>>>>>>>####
22.
23. # some straightforward parameters for SQL source types
24. sql_host = localhost
25. sql_user = root
26. sql_pass = ******
27. sql_db = ******
28. sql_port = 3306 # optional, default is 3306
29.
30. # UNIX socket name
31. # optional, default is empty (reuse client library defaults)
32. # usually '/var/lib/mysql/mysql.sock' on Linux
33. # usually '/tmp/mysql.sock' on FreeBSD
34. #
35. sql_sock = /tmp/mysql.sock
36.
37.
38. # MySQL specific client connection flags
39. # optional, default is 0
40. # 数据传输⽅式
41. # mysql_connect_flags = 32 # enable compression
42.
43. # MySQL specific SSL certificate settings
44. # optional, defaults are empty
45. # SLL链接
46. # mysql_ssl_cert = /etc/ssl/client-cert.pem
47. # mysql_ssl_key = /etc/ssl/client-key.pem
48. # mysql_ssl_ca = /etc/ssl/cacert.pem
49.
50. # MS SQL specific Windows authentication mode flag
51. # MUST be in sync with charset_type index-level setting
52. # optional, default is 0
53. #
54. # mssql_winauth = 1 # use currently logged on user credentials
57. # MS SQL specific Unicode indexing flag
58. # optional, default is 0 (request SBCS data)
59. #
60. # mssql_unicode = 1 # request Unicode data from server
61.
62.
63. # ODBC specific DSN (data source name)
64. # mandatory for odbc source type, no default value
65. #
66. # odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
67. # sql_query = SELECT id, data FROM documents.csv
68.
69.
70. # ODBC and MS SQL specific, per-column buffer sizes
71. # optional, default is auto-detect
72. #
73. # sql_column_buffers = content=12M, comments=1M
74.
75.
76. # pre-query, executed before the main fetch query
77. # multi-value, optional, default is empty list of queries
78. # 发送SQL语句前发送
79. sql_query_pre = SET NAMES utf8
80. sql_query_pre = SET SESSION query_cache_type=OFF
81.
82.
83. # main document fetch query
84. # mandatory, integer document ID field MUST be the first selected column
85. # 需要查询的表构建查询
86. sql_query = \
87. SELECT id,target_type,genre,stars,sub_title,sports_team,music_band,music_album \
88. FROM ko_link
89. #如果多个数据源并要在⼀个索引,必须要保持字段的顺序数量跟数据都要⼀致,否则将出错
90.
91.
92. # joined/payload field fetch query
93. # joined fields let you avoid (slow) JOIN and GROUP_CONCAT
94. # payload fields let you attach custom per-keyword values (eg. for ranking)
95. #
96. # syntax is FIELD-NAME 'from' ( 'query' | 'payload-query' ); QUERY
97. # joined field QUERY should return 2 columns (docid, text)
98. # payload field QUERY should return 3 columns (docid, keyword, weight)
99. #
100. # REQUIRES that query results are in ascending docuent ID order!
101. # multi-value, optional, default is empty list of queries
102. # 添加字段,来源与表⾃动连接
103. # 字段结果集保持为
104. # (1,tags1)
105. # (1,tags2)
106. # (2,tags3)
107. # (2,tags4)
108. # 添加字段将⽤于搜索,结果如有第3个字段,第3个字段表⽰该记录的权重,权重为⼤于1的值
109. # sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC 110. # sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC 111.
112.
113. # file based field declaration
114. #
115. # content of this field is treated as a file name
116. # and the file gets loaded and indexed in place of a field
117. #
118. # max file size is limited by max_file_field_buffer indexer setting
119. # file IO errors are non-fatal and get reported as warnings
120. # 把字段声明放⼊⽂件
121. # sql_file_field = content_file_path
122.
123.
124. # range query setup, query that must return min and max ID values
125. # optional, default is empty
126. #
127. # sql_query will need to reference $start and $end boundaries
128. # if using ranged query:
129. # 分区查询,防⽌MYSQL死锁
130. # sql_query = \
131. # SELECT doc.id, doc.id AS group, doc.title, doc.data \
132. # FROM documents doc \
133. # WHERE id>=$start AND id<=$end
134. #
135. # sql_query_range = SELECT MIN(id),MAX(id) FROM documents
136.
137.
138. # range query step
139. # optional, default is 1024
140. # 分区查询跳步
141. # sql_range_step = 1000
142.
143.
144.
145.
146. # unsigned integer attribute declaration
147. # multi-value (an arbitrary number of attributes is allowed), optional
148. # optional bit size can be specified, default is 32
149. # 声明⽆符号数字段
150. #sql_attr_uint = target_type
151. # sql_attr_uint = forum_id:9 # 9 bits for forum_id
152. #sql_attr_uint = group_id
153. #声明BOOL字段
154. # boolean attribute declaration
156. # equivalent to sql_attr_uint with 1-bit size
157. #
158. # sql_attr_bool = is_deleted
159.
160.
161. # bigint attribute declaration
162. # multi-value (an arbitrary number of attributes is allowed), optional
163. # declares a signed (unlike uint!) 64-bit attribute
164. # 声明长整字段
165. # sql_attr_bigint = my_bigint_id
166.
167.
168. # UNIX timestamp attribute declaration
169. # multi-value (an arbitrary number of attributes is allowed), optional
170. # similar to integer, but can also be used in date functions
171. # 声明时间字段
172. # sql_attr_timestamp = posted_ts
173. # sql_attr_timestamp = last_edited_ts
174. #sql_attr_timestamp = date_added
175.
176. # string ordinal attribute declaration
177. # multi-value (an arbitrary number of attributes is allowed), optional
178. # sorts strings (bytewise), and stores their indexes in the sorted list
179. # sorting by this attr is equivalent to sorting by the original strings
180. # 声明字符串字段⽤于排序等,但此字段不会被存储
181. # sql_attr_str2ordinal = author_name
182.
183.
184. # floating point attribute declaration
185. # multi-value (an arbitrary number of attributes is allowed), optional
186. # values are stored in single precision, 32-bit IEEE 754 format
187. # 声明浮点字段
188. # sql_attr_float = lat_radians
189. # sql_attr_float = long_radians
190.
191.
192. # multi-valued attribute (MVA) attribute declaration
193. # multi-value (an arbitrary number of attributes is allowed), optional
194. # MVA values are variable length lists of unsigned 32-bit integers
195. #
196. # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY] 197. # ATTR-TYPE is 'uint' or 'timestamp'
198. # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
199. # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
200. # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range' 201. # 声明复合字段
202. # sql_attr_multi = uint tag from query; SELECT docid, tagid FROM tags
203. # sql_attr_multi = uint tag from ranged-query; \
204. # SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \
205. # SELECT MIN(docid), MAX(docid) FROM tags
206.
207.
208. # string attribute declaration
209. # multi-value (an arbitrary number of these is allowed), optional
210. # lets you store and retrieve strings
211. # 只是把数据存储,但不会索引改字段
212. # sql_attr_string = stitle
213.
214.
215. # wordcount attribute declaration
216. # multi-value (an arbitrary number of these is allowed), optional
217. # lets you count the words at indexing time
218. # 将转化成关键字的字段,⽤于提⾼匹配率
219. # sql_attr_str2wordcount = stitle
220.
221.
222. # combined field plus attribute declaration (from a single column)
223. # stores column as an attribute, but also indexes it as a full-text field
224. # 跟sql_attr_string不同是该属性加⼊索引
225. # sql_field_string = author
226. # sql_field_str2wordcount = title
227.
228.
229. # post-query, executed on sql_query completion
230. # optional, default is empty
231. # 取后查询
232. # sql_query_post =
233.
234.
235. # post-index-query, executed on successful indexing completion
236. # optional, default is empty
237. # $maxid expands to max document ID actually fetched from DB
238. # 索引后查询
239. # sql_query_post_index = REPLACE INTO counters ( id, val ) \
240. # VALUES ( 'max_indexed_id', $maxid )
241.
242.
243. # ranged query throttling, in milliseconds
244. # optional, default is 0 which means no delay
245. # enforces given delay before each query step
246. #分区查询的时间间隔
247. sql_ranged_throttle = 0
248.
249. # document info query, ONLY for CLI search (ie. testing and debugging)
250. # optional, default is empty
251. # must contain $id macro and must fetch the document by that id
252. #命令⾏调试查询结果⽤
253. sql_query_info = SELECT * FROM ko_link WHERE id=$id
255. # kill-list query, fetches the document IDs for kill-list
256. # k-list will suppress matches from preceding indexes in the same query
257. # optional, default is empty
258. ##清理指定查询ID列表,对于数据的更改
259. # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex
260.
261.
262. # columns to unpack on indexer side when indexing
263. # multi-value, optional, default is empty list
264. # 启⽤ZIP压缩可以降低系统负载但必须保证zlib库zlib-dev库可⽤
265. # unpack_zlib = zlib_column
266. # unpack_mysqlcompress = compressed_column
267. # unpack_mysqlcompress = compressed_column_2
268.
269.
270. # maximum unpacked length allowed in MySQL COMPRESS() unpacker
271. # optional, default is 16M
272. # 压缩缓存区⼤⼩不能⼩于字段存储值
273. # unpack_mysqlcompress_maxsize = 16M
274.
275.
276. >>>>>>>>>>>>>#### 277. ## xmlpipe2 配置
278. >>>>>>>>>>>>>#### 279.
280. # type = xmlpipe
281.
282. # shell command to invoke xmlpipe stream producer
283. # mandatory
284. #
285. # xmlpipe_command = cat /usr/local/sphinx/l
286.
287. # xmlpipe2 field declaration
288. # multi-value, optional, default is empty
289. #
290. # xmlpipe_field = subject
291. # xmlpipe_field = content
292.
293.
294. # xmlpipe2 attribute declaration
295. # multi-value, optional, default is empty
296. # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
297. #
298. # xmlpipe_attr_timestamp = published
299. # xmlpipe_attr_uint = author_id
300.
301.
302. # perform UTF-8 validation, and filter out incorrect codes
303. # avoids XML parser choking on non-UTF-8 documents
304. # optional, default is 0
305. #
306. # xmlpipe_fixup_utf8 = 1
307. }
308.
309.
310. # inherited source example
311. # 继承数据源
312. # all the parameters are copied from the parent source,
313. # and may then be overridden in this source definition
314. #source src1throttled : src1
315. #{
316. # sql_ranged_throttle = 100
317. #}
318.
319. >>>>>>>>>>>>>>>## 320. ## index definition
321. >>>>>>>>>>>>>>>## 322.
323. # local index example
324. #
325. # this is an index which is stored locally in the filesystem
326. #
327. # all indexing-time options (such as morphology and charsets)
328. # are configured per local index
329. index test1
330. {
331. # index type
332. # optional, default is 'plain'
333. # known values are 'plain', 'distributed', and 'rt' (see samples below)
334. #索引类型本地分布式
335. # type = plain
336.
337. # document source(s) to index
338. # multi-value, mandatory
339. # document IDs must be globally unique across all sources
340. #数据源,可以多个数据源
341. source = src1
342.
343. # index files path and file name, without extension
344. # mandatory, path must be writable, extensions will be auto-appended
345. # 索引保存路径
346. path = /usr/local/sphinx/var/data/test1
347.
348. # document attribute values (docinfo) storage mode
349. # optional, default is 'extern'
350. # known values are 'none', 'extern' and 'inline'
351. #索引存储⽅式
352. docinfo = extern
354. # memory locking for cached data (.spa and .spi), to prevent swapping
355. # optional, default is 0 (do not mlock)
356. # requires searchd to be run from root
357. #内存锁定需要保证⾜够权限
358. mlock = 0
359.
360. # a list of morphology preprocessors to apply
361. # optional, default is empty
362. #
363. # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
364. # 'soundex', and 'metaphone'; additional preprocessors available from
365. # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
366. # (see libstemmer_c/)
367. # 词语提取器
368. # morphology = stem_en, stem_ru, soundex
369. # morphology = libstemmer_german
370. # morphology = libstemmer_sv
371. morphology = stem_en
372.
373. # minimum word length at which to enable stemming
374. # optional, default is 1 (stem everything)
375. # 词⼲化的最⼩词长
376. # min_stemming_len = 1
377.
378.
379. # stopword files list (space separated)
380. # optional, default is empty
381. # contents are plain text, charset_table and stemming are both applied
382. # 停⽤搜索词
383. # stopwords = /usr/local/sphinx/var/
384.
385.
386. # wordforms file, in "mapfrom > mapto" plain text format
387. # optional, default is empty
388. # 词型字典可⽤spelldump⼯具⽣成
389. # wordforms = /usr/local/sphinx/var/
390.
391.
392. # tokenizing exceptions file
393. # optional, default is empty
394. #Token特例⽂件,就是有些词是完整词意,不能拆分索引如a&t 跟a & t
395. # plain text, case sensitive, space insensitive in map-from part
396. # one "Map Several Words => ToASingleOne" entry per line
397. #
398. # exceptions = /usr/local/sphinx/var/
399.
400.
401. # minimum indexed word length
402. # default is 1 (index everything)
403. # 最⼩索引长度,就是⼩于指定长度的词不被索引
404. min_word_len = 1
405.
406. # charset encoding type
407. # optional, default is 'sbcs'
408. # known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
409. # 字符编码
410. charset_type = utf-8
411.
412. # charset definition and case folding rules "table"
413. # optional, default value depends on charset_type
414. #
415. # defaults are configured to include English and Russian characters only
416. # you need to change the table to include additional ones
417. # this behavior MAY change in future versions
418. #
419. # 'sbcs' default value is
420. # charset_table = 0..9, A..Z-&, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF 421. # 转换字符表
422. # 'utf-8' default value is
423. # charset_table = 0..9, A..Z-&, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
424.
425.
426. # ignored characters list
427. # optional, default value is empty
428. # 忽略字符表
429. # ignore_chars = U+00AD
430.
431.
432. # minimum word prefix length to index
433. # optional, default is 0 (do not index prefixes)
434. #索引的最⼩前缀长度,⼩⼼使⽤,索引和搜索的时间皆会恶化
435. # min_prefix_len = 0
436.
437.
438. # minimum word infix length to index
439. # optional, default is 0 (do not index infixes)
440. #索引的最⼩中缀长度⼩⼼使⽤,索引和搜索的时间皆会恶化
441. # min_infix_len = 0
442.
443.
444. # list of fields to limit prefix/infix indexing to安装mysql失败
445. # optional, default value is empty (index all fields in prefix/infix mode)
446. # 未知
447. # prefix_fields = filename
448. # infix_fields = url, domain
449.
450.
451. # enable star-syntax (wildcards) when searching prefix/infix indexes
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
推荐文章
热门文章
-
随机森林算法的改进方法
2024-10-02 -
基于随机森林算法的风险预警模型研究
2024-10-02 -
Python中的随机森林算法详解
2024-10-02 -
随机森林发展历史
2024-10-02 -
如何使用随机森林进行时间序列数据模式识别(八)
2024-10-02 -
随机森林回归模型原理
2024-10-02 -
如何使用随机森林进行时间序列数据模式识别(六)
2024-10-02 -
如何使用随机森林进行时间序列数据预测(四)
2024-10-02 -
如何使用随机森林进行异常检测(六)
2024-10-02 -
随机森林算法和grandientboosting算法 -回复
2024-10-02 -
随机森林方法总结全面
2024-10-02 -
随机森林算法原理和步骤
2024-10-02 -
随机森林的原理
2024-10-02 -
随机森林 重要性
2024-10-02 -
随机森林算法
2024-10-02 -
机器学习中随机森林的原理
2024-10-02 -
随机森林算法原理
2024-10-02 -
使用计算机视觉技术进行动物识别的技巧
2024-10-02 -
基于crf命名实体识别实验总结
2024-10-02 -
transformer预测模型训练方法
2024-10-02
最新文章
-
随机森林算法介绍及R语言实现
2024-10-02 -
基于随机森林优化的神经网络算法在冬小麦产量预测中的应用研究_百度文 ...
2024-10-02 -
基于正则化贪心森林算法的情感分析方法研究
2024-10-02 -
随机森林算法和grandientboosting算法
2024-10-02 -
基于随机森林的图像分类算法研究
2024-10-02 -
随机森林结合直接正交信号校正的模型传递方法
2024-10-02
发表评论