这里我主要讲重点,第一个是基于discuz的索引配置文件,这个配置文件比较灵活,可以根据不同的需求来配置 
    # 
    # LinuxTone full index search configure file 
    # 
    source lt_posts 
    { 
    type = mysql 
    sql_host = 127.0.0.1 
    sql_user = root 
    sql_pass = 
    sql_db = lt_bbs 
    sql_port = 3306 
    sql_query_pre = SET NAMES utf8 
    sql_query = SELECT pid,tid,fid,dateline,subject,message,author FROM cdb_posts where first=1 #此处是基于posts表来做索引的,这样的目的是可以同时检索到subject,message,author 三个字段的值 
    sql_attr_uint = fid 
    sql_attr_timestamp = dateline 
    sql_query_info = SELECT * FROM cdb_posts WHERE pid=$id 
    } 
    index lt_posts 
    { 
    source = lt_posts 
    path = /data/sphinx/data/lt_posts 
    docinfo = extern 
    mlock = 0 
    morphology = none 
    min_word_len = 2 
    html_strip = 1 
    charset_dictpath = /usr/local/mmseg-3.2.13/etc/ 
    charset_type = zh_cn.utf-8 
    ngram_len = 0 
    } 
    ########## 增量索引 ################## 
    source delta 
    { 
    type = mysql 
    sql_host = 127.0.0.1 
    sql_user = root 
    sql_pass = 
    sql_db = lt_bbs 
    sql_port = 3306 # optional, default is 3306 
    sql_query_pre = SET NAMES utf8 
    sql_query = SELECT pid,tid,fid,dateline,subject,message,author FROM cdb_posts where first=1 and dateline > unix_timestamp()-3600*10 #增量索引采用当前时间戳减去一个需要间隔的时间来新建新增的数据索引 
  
    sql_attr_uint = fid 
    sql_attr_timestamp = dateline 
    sql_query_info = SELECT * FROM cdb_posts WHERE pid=$id 
    } 
    index delta 
    { 
    source = delta 
    path = /data/sphinx/data/lt_delta 
    docinfo = extern 
    mlock = 0 
    morphology = none 
    min_word_len = 2 
    html_strip = 1 
    charset_dictpath = /usr/local/mmseg-3.2.13/etc/ 
    charset_type = zh_cn.utf-8 
    ngram_len = 0 
  
    } 
    indexer 
    { 
    mem_limit = 32M 
    } 
    searchd 
    { 
    port = 9312 
    log = /data/sphinx/var/log/searchd.log 
    query_log = /data/sphinx/var/log/query.log 
    read_timeout = 5 
    max_children = 30 
    pid_file = /data/sphinx/var/log/searchd.pid 
    max_matches = 10000 
    seamless_rotate = 1 
    preopen_indexes = 0 
    unlink_old = 1 
    } 
 
 
sphinx最主要的就是这个配置文件,当然在增量索引部分可以写一个脚本放到crontab里面来定时跑 
 
下面介绍下sphinx的PHP调用部分,sphinx的接口采用PHP的扩展,可以通过pecl或者http://pecl.php.net/package/sphinx来安装 
    <?php 
    /** 
    * LinuxTone全文搜索服务 
    */ 
    define('IN_DISCUZ', TRUE); 
    require_once './include/common.inc.php'; 
  
    $q = isset($_GET['q']) && !empty($_GET['q']) ? $_GET['q'] : ''; 
    $q = str_replace(array('<','>',' ','\'',','),array('','',' ','',''),strip_tags($q)); 
  
    $page = isset($_GET['page']) && intval($_GET['page'])>0 ? intval($_GET['page']) : 1; 
    $perNum = 20; 
    $offset = ($page - 1) * $perNum; 
  
    $search = new SphinxClient(); 
    $search->setServer('127.0.0.1',9312); 
    $search->setConnectTimeout(2); 
    $search->setArrayResult(true); 
    $search->setMatchMode(SPH_MATCH_ANY); 
    $search->setRankingMode(SPH_RANK_PROXIMITY_BM25); 
    $search->setSortMode(SPH_SORT_EXTENDED,'@relevance desc,@weight desc'); 
    $search->setLimits($offset,$perNum); 
    $search->setFieldWeights(array('subject'=>2000,'message'=>0)); 
  
    $rs = array(); 
    $query_totals = $query_time = 0; 
    if(!empty($q)){ 
             $rs = $search->Query($q,"*"); 
             $pages = ceil($rs['total']/$perNum); 
  
             $query_totals = $rs['total_found']; 
             $query_time = $rs['time']; 
    } 
  
    $data = $title = $content = array(); 
  
    if(!empty($rs) && $page <= $pages){ 
               $pids = array(); 
               foreach($rs['matches'] as $v){ 
                       $pids[] = $v['id']; 
             } 
             $pid = implode(',',$pids); 
             $sql = "select pid,tid,author,authorid,subject,message,dateline from cdb_posts where pid IN($pid) and status ='0' and invisible='0'"; 
  
             $query = $db->query($sql); 
             while($row = $db->fetch_array($query)){ 
                       $data[] = $row; 
                       $title[] = $row['subject']; 
                       $content[] = preg_replace('/\[[\/]?(b|img|url|color|s|hr|p|list|i|align|email|u|font|code|hide|table|tr|td|th|attach|list|indent|float).*\]/','',strip_tags($row['message'])); 
             } 
             //搜索词高亮 
             $opts = array(); 
             $opts['before_match'] = ''; 
             $opts['after_match'] = ''; 
             $title = $search->BuildExcerpts($title,'lt_posts',$q,$opts); 
             $content = $search->BuildExcerpts($content,'lt_posts',$q,$opts); 
  
             foreach($data as $k=>$v){ 
                       $data[$k]['subject'] = $title[$k]; 
                       $data[$k]['message'] = $content[$k]; 
             } 
  
             $url = "s.php?q=".urlencode($q); 
             $multipage = multi($rs['total'], $perNum, $page, $url); 
    } 
  
    include template("lt_search"); 
    ?> 
 
 
跑主索引的shell脚本search-index.sh 
#!/bin/bash 
# 
# The BBS search exec full index 
# 
/usr/local/csft-3.2.13/bin/indexer -c /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate lt_posts >> /data/sphinx/var/`date "+%Y-%m-%d-%H"`.log 
 
跑增量索引的shell脚本search-delta.sh 
#!/bin/bash 
# 
# The BBS search exec delta index 
# 
#跑增量索引 
/usr/local/csft-3.2.13/bin/indexer -c /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate delta 
#合并主索引和增量索引 
#/usr/local/csft-3.2.13/bin/indexer --config /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate --merge lt_posts delta |