锘??xml version="1.0" encoding="utf-8" standalone="yes"?>欧美一区二区三区免费大片,亚洲高清免费,国内精品99http://m.shnenglu.com/jrckkyy/category/12532.html閲戣瀺鏁板,InformationSearch,Compiler,OS,zh-cnThu, 10 Dec 2009 15:07:47 GMTThu, 10 Dec 2009 15:07:47 GMT60鑷《鍚戜笅瀛︽悳绱㈠紩鎿庘斺斿寳澶уぉ緗戞悳绱㈠紩鎿嶵SE鍒嗘瀽鍙婂畬鍏ㄦ敞閲奫6]鍊掓帓绱㈠紩鐨勫緩绔嬬殑紼嬪簭鍒嗘瀽(4)http://m.shnenglu.com/jrckkyy/archive/2009/12/10/102949.html瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/dc:creator>瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/author>Thu, 10 Dec 2009 15:03:00 GMThttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102949.htmlhttp://m.shnenglu.com/jrckkyy/comments/102949.htmlhttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102949.html#Feedback0http://m.shnenglu.com/jrckkyy/comments/commentRss/102949.htmlhttp://m.shnenglu.com/jrckkyy/services/trackbacks/102949.html浠ヤ笅鏄牴鎹鍚戠儲寮曞緩绔嬪掓帓绱㈠紩鐨勬敞閲?/p>

 

int main(int argc, char* argv[])    //./CrtInvertedIdx moon.fidx.sort > sun.iidx
{
    ifstream ifsImgInfo(argv[1]);
    if (!ifsImgInfo) 
    {
        cerr << "Cannot open " << argv[1] << " for input\n";
        return -1;
    }

    string strLine,strDocNum,tmp1="";
    int cnt = 0;
    while (getline(ifsImgInfo, strLine)) 
    {
        string::size_type idx;
        string tmp;


        idx = strLine.find("\t");
        tmp = strLine.substr(0,idx);

        if (tmp.size()<2 || tmp.size() > 8) continue;

        if (tmp1.empty()) tmp1=tmp;

        if (tmp == tmp1) 
        {
            strDocNum = strDocNum + " " + strLine.substr(idx+1);
        }
        else 
        {
            if ( strDocNum.empty() )
                strDocNum = strDocNum + " " + strLine.substr(idx+1);

            cout << tmp1 << "\t" << strDocNum << endl;
            tmp1 = tmp;
            strDocNum.clear();
            strDocNum = strDocNum + " " + strLine.substr(idx+1);
        }

        cnt++;
        //if (cnt==100) break;
    }
    cout << tmp1 << "\t" << strDocNum << endl;  //鍊掓帓绱㈠紩涓瘡涓瓧鍏稿崟璇嶅悗鐨勬枃妗g紪鍙蜂互table閿負闂撮殧

    return 0;
}

 

 



]]>
鑷《鍚戜笅瀛︽悳绱㈠紩鎿庘斺斿寳澶уぉ緗戞悳绱㈠紩鎿嶵SE鍒嗘瀽鍙婂畬鍏ㄦ敞閲奫6]鍊掓帓绱㈠紩鐨勫緩绔嬬殑紼嬪簭鍒嗘瀽(2)http://m.shnenglu.com/jrckkyy/archive/2009/12/10/102947.html瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/dc:creator>瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/author>Thu, 10 Dec 2009 15:02:00 GMThttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102947.htmlhttp://m.shnenglu.com/jrckkyy/comments/102947.htmlhttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102947.html#Feedback0http://m.shnenglu.com/jrckkyy/comments/commentRss/102947.htmlhttp://m.shnenglu.com/jrckkyy/services/trackbacks/102947.html鍓嶉潰鐨凞ocIndex紼嬪簭杈撳叆涓涓猅ianwang.raw.*****鏂囦歡錛屼細浜х敓涓涓嬩笁涓枃浠?Doc.idx, Url.idx, DocId2Url.idx錛屾垜浠繖閲屽DocSegment紼嬪簭榪涜鍒嗘瀽銆?/p>

榪欓噷杈撳叆 Tianwang.raw.*****錛孌oc.idx錛孶rl.idx.sort_uniq絳変笁涓枃浠訛紝杈撳嚭涓涓猅ianwang.raw.***.seg 鍒嗚瘝瀹屾瘯鐨勬枃浠?/p>

int main(int argc, char* argv[])
{
    string strLine, strFileName=argv[1];
    CUrl iUrl;
    vector<CUrl> vecCUrl;
    CDocument iDocument;
    vector<CDocument> vecCDocument;
    unsigned int docId = 0;

    //ifstream ifs("Tianwang.raw.2559638448");
    ifstream ifs(strFileName.c_str());  //DocSegment Tianwang.raw.****
    if (!ifs) 
    {
        cerr << "Cannot open tianwang.img.info for input\n";
        return -1;
    }

    ifstream ifsUrl("Url.idx.sort_uniq");   //鎺掑簭騫舵秷閲嶅悗鐨剈rl瀛楀吀
    if (!ifsUrl) 
    {
        cerr << "Cannot open Url.idx.sort_uniq for input\n";
        return -1;
    }
    ifstream ifsDoc("Doc.idx"); //瀛楀吀鏂囦歡
    if (!ifsDoc) 
    {
        cerr << "Cannot open Doc.idx for input\n";
        return -1;
    }

    while (getline(ifsUrl,strLine)) //鍋忕url瀛楀吀瀛樺叆涓涓悜閲忓唴瀛樹腑
    {
        char chksum[33];
        int  docid;

        memset(chksum, 0, 33);
        sscanf( strLine.c_str(), "%s%d", chksum, &docid );
        iUrl.m_sChecksum = chksum;
        iUrl.m_nDocId = docid;
        vecCUrl.push_back(iUrl);
    }

    while (getline(ifsDoc,strLine))     //鍋忕瀛楀吀鏂囦歡灝嗗叾鏀懼叆涓涓悜閲忓唴瀛樹腑
    {
        int docid,pos,length;
        char chksum[33];

        memset(chksum, 0, 33);
        sscanf( strLine.c_str(), "%d%d%d%s", &docid, &pos, &length,chksum );
        iDocument.m_nDocId = docid;
        iDocument.m_nPos = pos;
        iDocument.m_nLength = length;
        iDocument.m_sChecksum = chksum;
        vecCDocument.push_back(iDocument);
    }

 

    strFileName += ".seg";
    ofstream fout(strFileName.c_str(), ios::in|ios::out|ios::trunc|ios::binary);    //璁劇疆瀹屾垚鍒嗚瘝鍚庣殑鏁版嵁杈撳嚭鏂囦歡
    for ( docId=0; docId<MAX_DOC_ID; docId++ )
    {

        // find document according to docId
        int length = vecCDocument[docId+1].m_nPos - vecCDocument[docId].m_nPos -1;
        char *pContent = new char[length+1];
        memset(pContent, 0, length+1);
        ifs.seekg(vecCDocument[docId].m_nPos);
        ifs.read(pContent, length);

        char *s;
        s = pContent;

        // skip Head
        int bytesRead = 0,newlines = 0;
        while (newlines != 2 && bytesRead != HEADER_BUF_SIZE-1) 
        {
            if (*s == '\n')
                newlines++;
            else
                newlines = 0;
            s++;
            bytesRead++;
        }
        if (bytesRead == HEADER_BUF_SIZE-1) continue;


        // skip header
        bytesRead = 0,newlines = 0;
        while (newlines != 2 && bytesRead != HEADER_BUF_SIZE-1) 
        {
            if (*s == '\n')
                newlines++;
            else
                newlines = 0;
            s++;
            bytesRead++;
        }
        if (bytesRead == HEADER_BUF_SIZE-1) continue;

        //iDocument.m_sBody = s;
        iDocument.RemoveTags(s);    //鍘婚櫎<>
        iDocument.m_sBodyNoTags = s;

        delete[] pContent;
        string strLine = iDocument.m_sBodyNoTags;

        CStrFun::ReplaceStr(strLine, " ", " ");
        CStrFun::EmptyStr(strLine); // set " \t\r\n" to " "


        // segment the document 鍏蜂綋鍒嗚瘝澶勭悊
        CHzSeg iHzSeg;
        strLine = iHzSeg.SegmentSentenceMM(iDict,strLine);
        fout << docId << endl << strLine;
        fout << endl;
        
    }

    return(0);
}
榪欓噷鍙槸嫻厜鎺犲獎寮忕殑榪囦竴閬嶅ぇ姒傜殑浠g爜錛屽悗闈㈡垜浼氭湁涓撻璇︾粏璁茶В parse html 鍜?segment docment 絳夋妧鏈?/p>

 

 



]]>
鑷《鍚戜笅瀛︽悳绱㈠紩鎿庘斺斿寳澶уぉ緗戞悳绱㈠紩鎿嶵SE鍒嗘瀽鍙婂畬鍏ㄦ敞閲奫6]鍊掓帓绱㈠紩鐨勫緩绔嬬殑紼嬪簭鍒嗘瀽(3) http://m.shnenglu.com/jrckkyy/archive/2009/12/10/102948.html瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/dc:creator>瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/author>Thu, 10 Dec 2009 15:02:00 GMThttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102948.htmlhttp://m.shnenglu.com/jrckkyy/comments/102948.htmlhttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102948.html#Feedback0http://m.shnenglu.com/jrckkyy/comments/commentRss/102948.htmlhttp://m.shnenglu.com/jrckkyy/services/trackbacks/102948.html榪欓噷浠嬬粛姝e悜绱㈠紩鐨勫緩绔嬶紝濡傛灉鐩存帴寤虹珛鍊掓帓绱㈠紩鏁堢巼涓婂彲鑳戒細寰堜綆錛屾墍浠ュ彲浠ュ厛浜х敓姝e悜绱㈠紩涓哄悗闈㈢殑鍊掓帓绱㈠紩鎵撲笅鍩虹銆?/p>

 

璇︾粏鐨勬枃浠跺姛鑳藉拰浠嬬粛閮藉湪榪欓噷鏈変簡浠嬬粛鑷《鍚戜笅瀛︽悳绱㈠紩鎿庘斺斿寳澶уぉ緗戞悳绱㈠紩鎿嶵SE鍒嗘瀽鍙婂畬鍏ㄦ敞閲奫5]鍊掓帓绱㈠紩鐨勫緩绔嬪強鏂囦歡浠嬬粛

 

CrtForwardIdx.cpp鏂囦歡

 

int main(int argc, char* argv[])    //./CrtForwardIdx Tianwang.raw.***.seg > moon.fidx
{
    ifstream ifsImgInfo(argv[1]);
    if (!ifsImgInfo) 
    {
        cerr << "Cannot open " << argv[1] << " for input\n";
        return -1;
    }

    string strLine,strDocNum;
    int cnt = 0;
    while (getline(ifsImgInfo, strLine)) 
    {
        string::size_type idx;

        cnt++;
        if (cnt%2 == 1) //濂囨暟琛屼負鏂囨。緙栧彿
        {
            strDocNum = strLine.substr(0,strLine.size());
            continue;
        }
        if (strLine[0]=='\0' || strLine[0]=='#' || strLine[0]=='\n')
        {
            continue;
        }

        while ( (idx = strLine.find(SEPARATOR)) != string::npos ) //鎸囧畾鏌ユ壘鍒嗙晫絎?
        {
            string tmp1 = strLine.substr(0,idx);
            cout << tmp1 << "\t" << strDocNum << endl;
            strLine = strLine.substr(idx + SEPARATOR.size());
        }

        //if (cnt==100) break;
    }

    return 0;
}

 

author:http://hi.baidu.com/jrckkyy

author:http://blog.csdn.net/jrckkyy

 

 



]]>
鑷《鍚戜笅瀛︽悳绱㈠紩鎿庘斺斿寳澶уぉ緗戞悳绱㈠紩鎿嶵SE鍒嗘瀽鍙婂畬鍏ㄦ敞閲奫6]鍊掓帓绱㈠紩鐨勫緩绔嬬殑紼嬪簭鍒嗘瀽(1)http://m.shnenglu.com/jrckkyy/archive/2009/12/10/102945.html瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/dc:creator>瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/author>Thu, 10 Dec 2009 15:00:00 GMThttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102945.htmlhttp://m.shnenglu.com/jrckkyy/comments/102945.htmlhttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102945.html#Feedback0http://m.shnenglu.com/jrckkyy/comments/commentRss/102945.htmlhttp://m.shnenglu.com/jrckkyy/services/trackbacks/102945.htmlauthor:http://hi.baidu.com/jrckkyy

author:http://blog.csdn.net/jrckkyy

涓婁竴綃囦富瑕佷粙緇嶄簡鍊掓帓绱㈠紩寤虹珛鐩稿叧鐨勬枃浠跺強涓棿鏂囦歡銆?br>TSE寤虹珛绱㈠紩鍦ㄨ繍琛岀▼搴忎笂鐨勫ぇ鑷存楠ゅ彲浠ョ畝鍖栧垎涓轟互涓嬪嚑姝ワ細

1銆佽繍琛屽懡浠?./DocIndex
浼氱敤鍒頒竴涓枃浠?tianwang.raw.520    //鐖彇鍥炴潵鐨勫師濮嬫枃浠訛紝鍖呭惈澶氫釜緗戦〉鐨勬墍鏈変俊鎭紝鎵浠ュ緢澶э紝榪欎篃鏄竴涓湁寰呰В鍐崇殑闂錛屽埌搴曞瓨鎴愬ぇ鏂囦歡錛堝鏋滆繃澶т細瓚呰繃2G鎴?G鐨勯檺鍒訛紝鑰屼笖鏂囦歡榪囧ぇ绱㈠紩鏁堢巼榪囦綆錛夎繕鏄皬鏂囦歡錛堟枃浠舵暟榪囧鐢ㄤ簬鎵撳紑鍏抽棴鏂囦歡鍙ユ焺鐨勬秷鑰楄繃澶э級榪樻湁寰呮濊冿紝榪樺氨鏄瓨鍌ㄦ柟妗堢殑瑙e喅鏈緇堣偗瀹氭槸瑕佸瓨涓哄垎甯冨紡鐨勶紝鏈緇堟繪枃浠墮噺鑲畾鏄細涓奣B鐨勶紝TSE鍙敮鎸佸皬鍨嬬殑鎼滅儲寮曟搸闇姹傘?nbsp;         
浼氫駭鐢熶竴涓嬩笁涓枃浠?Doc.idx, Url.idx, DocId2Url.idx    //Data鏂囦歡澶逛腑鐨凞oc.idx DocId2Url.idx鍜孌oc.idx

2銆佽繍琛屽懡浠?sort Url.idx|uniq > Url.idx.sort_uniq    //Data鏂囦歡澶逛腑鐨刄rl.idx.sort_uniq
浼氱敤鍒頒竴涓枃浠?Url.idx鏂囦歡 //md5 hash 涔嬪悗鐨剈rl瀹屾暣鍦板潃鍜宒ocument id鍊煎
浼氫駭鐢熶竴涓枃浠?Url.idx.sort_uniq //URL娑堥噸錛宮d5 hash鎺掑簭錛屾彁楂樻绱㈡晥鐜?/p>

3銆佽繍琛屽懡浠?./DocSegment Tianwang.raw.2559638448 
浼氱敤鍒頒竴涓枃浠?Tianwang.raw.2559638448  //Tianwang.raw.2559638448涓虹埇鍥炴潵鐨勬枃浠?錛屾瘡涓〉闈㈠寘鍚玥ttp澶達紝鍒嗚瘝涓哄悗闈㈠緩绔嬪埌鎺掔儲寮曞仛鍑嗗
浼氫駭鐢熶竴涓枃浠?Tianwang.raw.2559638448.seg //鍒嗚瘝鏂囦歡錛岀敱涓琛宒ocument id鍙峰拰涓琛屾枃妗e垎璇嶇粍錛堝彧瀵規瘡涓枃妗?lt;html></html>涓?lt;head></head><body></body>絳夋枃瀛楁爣璁頒腑鐨勬枃鏈繘琛屽垎緇勶級鏋勬垚

4銆佽繍琛屽懡浠?./CrtForwardIdx Tianwang.raw.2559638448.seg > moon.fidx //寤虹珛鐙珛鐨勬鍚戠儲寮?/p>

5銆佽繍琛屽懡浠?br>#set | grep "LANG"
#LANG=en; export LANG;
#sort moon.fidx > moon.fidx.sort

6銆佽繍琛屽懡浠?./CrtInvertedIdx moon.fidx.sort > sun.iidx //寤虹珛鍊掓帓绱㈠紩

鎴戜滑鍏堜粠寤虹珛绱㈠紩鐨勭涓涓▼搴廌ocIndex.cpp寮濮嬪垎鏋愩?娉ㄩ噴綰﹀畾錛歍ianwang.raw.2559638448鏄姄鍥炴潵鍚堝茍鎴愮殑澶ф枃浠訛紝鍚庨潰灝卞彨澶ф枃浠訛紝閲岄潰鍖呭惈浜嗗緢澶氱瘒html鏂囨。錛岄噷闈㈢殑鏂囨。鏈夎寰嬬殑鍒嗛殧灝卞彨鍋氫竴綃囦竴綃囩殑鏂囨。)


//DocIndex.h start-------------------------------------------------------------

 


#ifndef _COMM_H_040708_
#define _COMM_H_040708_

#include

#include
#include
#include
#include
#include
#include
#include


using namespace std;

const unsigned HEADER_BUF_SIZE = 1024;
const unsigned RstPerPage = 20; //鍓嶅彴鎼滅儲緇撴灉鏁版嵁闆嗚繑鍥炴潯鏁?/p>

//iceway
//const unsigned MAX_DOC_IDX_ID = 21312;  //DocSegment.cpp涓鐢ㄥ埌
const unsigned MAX_DOC_IDX_ID = 22104;


//const string IMG_INFO_NAME("./Data/s1.1");
const string INF_INFO_NAME("./Data/sun.iidx"); //鍊掓帓绱㈠紩鏂囦歡
//鏈卞痙  14383 16151 16151 16151 1683 207 6302 7889 8218 8218 8637
//鏈卞彜鍔?nbsp; 1085 1222

//9涓囧鏉?瀛楀厓鏂囦歡 鍖呮嫭鐗規畩絎﹀彿錛屾爣鐐癸紝姹夊瓧
const string DOC_IDX_NAME("./Data/Doc.idx"); //鍊掓帓绱㈠紩鏂囦歡
const string RAWPAGE_FILE_NAME("./Data/Tianwang.swu.iceway.1.0");

//iceway
const string DOC_FILE_NAME = "Tianwang.swu.iceway.1.0";  //Docindex.cpp涓鐢ㄥ埌
const string Data_DOC_FILE_NAME = "./Data/Tianwang.swu.iceway.1.0";  //Snapshot.cpp涓鐢ㄥ埌


//const string RM_THUMBNAIL_FILES("rm -f ~/public_html/ImgSE/timg/*");

//const string THUMBNAIL_DIR("/ImgSE/timg/");


#endif _COMM_H_040708_
//DocIndex.h end--------------------------------------------------------------//DocIndex.cpp start-----------------------------------------------------------

#include
#include
#include "Md5.h"
#include "Url.h"
#include "Document.h"

//iceway(mnsc)
#include "Comm.h"
#include

using namespace std;

int main(int argc, char* argv[])
{
    //ifstream ifs("Tianwang.raw.2559638448");
 //ifstream ifs("Tianwang.raw.3023555472");
 //iceway(mnsc)
 ifstream ifs(DOC_FILE_NAME.c_str()); //鎵撳紑Tianwang.raw.3023555472鏂囦歡錛屾渶鍘熷鐨勬枃浠?br> if (!ifs)
 {
     cerr << "Cannot open " << "tianwang.img.info" << " for input\n";
     return -1;
    }
 ofstream ofsUrl("Url.idx", ios::in|ios::out|ios::trunc|ios::binary); //寤虹珛騫舵墦寮Url.idx鏂囦歡
 if( !ofsUrl )
 {
  cout << "error open file " << endl;
 }

 ofstream ofsDoc("Doc.idx", ios::in|ios::out|ios::trunc|ios::binary); //寤虹珛騫舵墦寮Doc.idx鏂囦歡
 if( !ofsDoc )
 {
  cout << "error open file " << endl;
 }

 ofstream ofsDocId2Url("DocId2Url.idx", ios::in|ios::out|ios::trunc|ios::binary); //寤虹珛騫舵墦寮DocId2Url.idx鏂囦歡
 if( !ofsDocId2Url )
 {
  cout << "error open file " << endl;
 }

 int cnt=0; //鏂囨。緙栧彿浠?寮濮嬭綆?br> string strLine,strPage;
 CUrl iUrl;
 CDocument iDocument;
 CMD5 iMD5;
 
 int nOffset = ifs.tellg();
 while (getline(ifs, strLine))
 {
  if (strLine[0]=='\0' || strLine[0]=='#' || strLine[0]=='\n')
  {
   nOffset = ifs.tellg();
   continue;
  }

  if (!strncmp(strLine.c_str(), "version: 1.0", 12)) //鍒ゆ柇絎竴琛屾槸鍚︽槸version: 1.0濡傛灉鏄氨瑙f瀽涓嬪幓
  { 
   if(!getline(ifs, strLine)) break;
   if (!strncmp(strLine.c_str(), "url: ", 4)) //鍒ゆ柇絎簩琛屾槸鍚︽槸url: 濡傛灉鏄垯瑙f瀽涓嬪幓
   {
    iUrl.m_sUrl = strLine.substr(5); //鎴彇url: 浜斾釜瀛楃涔嬪悗鐨剈rl鍐呭
    iMD5.GenerateMD5( (unsigned char*)iUrl.m_sUrl.c_str(), iUrl.m_sUrl.size() ); //瀵箄rl鐢╩d5 hash澶勭悊
    iUrl.m_sChecksum = iMD5.ToString(); //灝嗗瓧絎︽暟緇勭粍鍚堟垚瀛楃涓茶繖涓嚱鏁板湪Md5.h涓疄鐜?/p>

   } else
   {
    continue;
   }

   while (getline(ifs, strLine))
   {
    if (!strncmp(strLine.c_str(), "length: ", 8)) //涓鐩磋涓嬪幓鐩村埌鍒ゆ柇婢規竟(鐩稿絎簲琛?鎯烘瑺琚瘋帒ength: 鏄垯鎺ヤ笅涓嬪幓
    {
     sscanf(strLine.substr(8).c_str(), "%d", &(iDocument.m_nLength)); //灝嗚鍧楁墍浠h〃緗戦〉鐨勫疄闄呯綉欏靛唴瀹歸暱搴︽斁鍏Document鏁版嵁緇撴瀯涓?br>     break;
    }
   }

   getline(ifs, strLine); //璺寵繃鐩稿絎叚琛屾晠鎰忕暀鐨勪竴涓┖琛?/p>

   iDocument.m_nDocId = cnt; //灝嗘枃妗g紪鍙瘋祴鍊煎埌iDocument鏁版嵁緇撴瀯涓?br>   iDocument.m_nPos = nOffset; //鏂囨。緇撳熬鍦ㄥぇ鏂囦歡涓殑緇撴潫琛屽彿
   char *pContent = new char[iDocument.m_nLength+1]; //鏂板緩璇ユ枃妗i暱搴︾殑瀛楃涓叉寚閽?/p>

   memset(pContent, 0, iDocument.m_nLength+1); //姣忎竴浣嶅垵濮嬪寲涓?
   ifs.read(pContent, iDocument.m_nLength); //鏍規嵁鑾峰緱鐨勬枃妗i暱搴﹁鍙栨竟(鍏朵腑鍖呭惈鍗忚澶?璇誨彇鏂囨。鍐呭
   iMD5.GenerateMD5( (unsigned char*)pContent, iDocument.m_nLength );
   iDocument.m_sChecksum = iMD5.ToString(); //灝嗗瓧絎︽暟緇勭粍鍚堟垚瀛楃涓茶繖涓嚱鏁板湪Md5.h涓疄鐜?br>   
   delete[] pContent;
   
   ofsUrl << iUrl.m_sChecksum ; //灝唌d5hash鍚庣殑url鍐欏叆Url.idx鏂囦歡
   ofsUrl << "\t" << iDocument.m_nDocId << endl; //鍦ㄤ竴琛屼腑涓涓猼ab璺濈鍒嗛殧錛屽皢鏂囦歡緙栧彿鍐欏叆Url.idx鏂囦歡

   ofsDoc << iDocument.m_nDocId ; //灝嗘枃浠剁紪鍙峰啓鍏oc.idx鏂囦歡
   ofsDoc << "\t" << iDocument.m_nPos ; //鍦ㄤ竴琛屼腑涓涓猼ab璺濈鍒嗛殧錛屽皢璇ユ枃妗g粨鏉熻鍙鋒竟(鍚屾牱涔熸槸涓嬩竴鏂囨。寮濮嬭鍙?鍐欏叆Doc.idx鏂囦歡
   //ofsDoc << "\t" << iDocument.m_nLength ;
   ofsDoc << "\t" << iDocument.m_sChecksum << endl; //鍦ㄤ竴琛屼腑涓涓猼ab璺濈鍒嗛殧錛屽皢md5hash鍚庣殑url鍐欏叆Doc.idx鏂囦歡

   ofsDocId2Url << iDocument.m_nDocId ; //灝嗘枃浠剁紪鍙峰啓鍏ocId2Url.idx鏂囦歡
   ofsDocId2Url << "\t" << iUrl.m_sUrl << endl; //灝嗚鏂囨。鐨勫畬鏁磚rl鍐欏叆DocId2Url.idx鏂囦歡

   cnt++; //鏂囨。緙栧彿鍔犱竴璇存槑璇ヤ互鏂囨。鍒嗘瀽瀹屾瘯錛岀敓鎴愪笅涓鏂囨。鐨勭紪鍙?br>  }

  nOffset = ifs.tellg();

 }

 //鏈鍚庝竴琛屽彧鏈夋枃妗e彿鍜屼笂涓綃囨枃妗g粨鏉熷彿
 ofsDoc << cnt ;
 ofsDoc << "\t" << nOffset << endl;


 return(0);
}

//DocIndex.cpp end-----------------------------------------------------------author:http://hi.baidu.com/jrckkyy

author:http://blog.csdn.net/jrckkyy

 

 



]]>
鑷《鍚戜笅瀛︽悳绱㈠紩鎿庘斺斿寳澶уぉ緗戞悳绱㈠紩鎿嶵SE鍒嗘瀽鍙婂畬鍏ㄦ敞閲奫5]鍊掓帓绱㈠紩鐨勫緩绔嬪強鏂囦歡浠嬬粛http://m.shnenglu.com/jrckkyy/archive/2009/12/10/102943.html瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/dc:creator>瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/author>Thu, 10 Dec 2009 14:55:00 GMThttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102943.htmlhttp://m.shnenglu.com/jrckkyy/comments/102943.htmlhttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102943.html#Feedback0http://m.shnenglu.com/jrckkyy/comments/commentRss/102943.htmlhttp://m.shnenglu.com/jrckkyy/services/trackbacks/102943.html涓嶅ソ鎰忔濊澶у涔呯瓑浜嗭紝鍓嶄竴闃典竴鐩村湪蹇欒冭瘯錛岀粓浜庣粨鏉熶簡銆傚懙鍛碉紒搴熻瘽涓嶅璇翠簡涓嬮潰鎴戜滑寮濮嬪惂錛?/p>

TSE鐢ㄧ殑鏄皢鎶撳彇鍥炴潵鐨勭綉欏墊枃妗e叏閮ㄨ鍏ヤ竴涓ぇ鏂囨。錛岃鍚庡榪欎竴涓ぇ鏂囨。鍐呯殑鏁版嵁鏁翠綋緇熶竴鐨勫緩绱㈠紩錛屽叾涓寘鍚簡鍑犱釜姝ラ銆?/p>

view plaincopy to clipboardprint?
1.  The document index (Doc.idx) keeps information about each document.  
 
It is a fixed width ISAM (Index sequential access mode) index, orderd by docID.  
 
The information stored in each entry includes a pointer into the repository,  
 
a document length, a document checksum.  
 
 
 
//Doc.idx  鏂囨。緙栧彿 鏂囨。闀垮害    checksum hash鐮?nbsp; 
 
0   0   bc9ce846d7987c4534f53d423380ba70  
 
1   76760   4f47a3cad91f7d35f4bb6b2a638420e5  
 
2   141624  d019433008538f65329ae8e39b86026c  
 
3   142350  5705b8f58110f9ad61b1321c52605795  
 
//Doc.idx   end  
 
 
 
  The url index (url.idx) is used to convert URLs into docIDs.  
 
 
 
//url.idx  
 
5c36868a9c5117eadbda747cbdb0725f    0 
 
3272e136dd90263ee306a835c6c70d77    1 
 
6b8601bb3bb9ab80f868d549b5c5a5f3    2 
 
3f9eba99fa788954b5ff7f35a5db6e1f    3 
 
//url.idx   end  
 
 
 
It is a list of URL checksums with their corresponding docIDs and is sorted by  
 
checksum. In order to find the docID of a particular URL, the URL's checksum  
 
is computed and a binary search is performed on the checksums file to find its  
 
docID.  
 
 
 
    ./DocIndex  
 
        got Doc.idx, Url.idx, DocId2Url.idx //Data鏂囦歡澶逛腑鐨凞oc.idx DocId2Url.idx鍜孌oc.idx涓?nbsp; 
 
 
 
//DocId2Url.idx  
 
0   http://*.*.edu.cn/index.aspx  
 
1   http://*.*.edu.cn/showcontent1.jsp?NewsID=118  
 
2   http://*.*.edu.cn/0102.html  
 
3   http://*.*.edu.cn/0103.html  
 
//DocId2Url.idx end  
 
 
 
2.  sort Url.idx|uniq > Url.idx.sort_uniq    //Data鏂囦歡澶逛腑鐨刄rl.idx.sort_uniq  
 
 
 
//Url.idx.sort_uniq  
 
//瀵筯ash鍊艱繘琛屾帓搴?nbsp; 
 
000bfdfd8b2dedd926b58ba00d40986b    1111 
 
000c7e34b653b5135a2361c6818e48dc    1831 
 
0019d12f438eec910a06a606f570fde8    366 
 
0033f7c005ec776f67f496cd8bc4ae0d    2103 
 
 
 
3. Segment document to terms, (with finding document according to the url)  
 
    ./DocSegment Tianwang.raw.2559638448        //Tianwang.raw.2559638448涓虹埇鍥炴潵鐨勬枃浠?錛屾瘡涓〉闈㈠寘鍚玥ttp澶?nbsp; 
 
        got Tianwang.raw.2559638448.seg       
 
 
 
//Tianwang.raw.2559638448   鐖彇鐨勫師濮嬬綉欏墊枃浠跺湪鏂囨。鍐呴儴姣忎竴涓枃妗d箣闂村簲璇ユ槸閫氳繃version錛?lt;/html>鍜屽洖杞﹀仛鏍囧織浣嶅垎鍓茬殑  
 
version: 1.0 
 
url: http://***.105.138.175/Default2.asp?lang=gb  
 
origin: http://***.105.138.175/  
 
date: Fri, 23 May 2008 20:01:36 GMT  
 
ip: 162.105.138.175 
 
length: 38413 
 
 
 
HTTP/1.1 200 OK  
 
Server: Microsoft-IIS/5.0 
 
Date: Fri, 23 May 2008 11:17:49 GMT  
 
Connection: keep-alive  
 
Connection: Keep-Alive  
 
Content-Length: 38088 
 
Content-Type: text/html; Charset=gb2312  
 
Expires: Fri, 23 May 2008 11:17:49 GMT  
 
Set-Cookie: ASPSESSIONIDSSTRDCAB=IMEOMBIAIPDFCKPAEDJFHOIH; path=/  
 
Cache-control: private 
 
 
 
 
 
 
 
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 
 
" 
<html>  
 
<head>  
 
<title>Apabi鏁板瓧璧勬簮騫沖彴</title>  
 
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">  
 
<META NAME="ROBOTS" CONTENT="INDEX,NOFOLLOW">  
 
<META NAME="DESCRIPTION" CONTENT="鏁板瓧鍥句功棣?鏂規鏁板瓧鍥句功棣?鐢靛瓙鍥句功 鐢靛瓙涔?ebook e涔?Apabi 鏁板瓧璧勬簮騫沖彴">  
 
<link rel="stylesheet" type="text/css" href="css\common.css">  
 
 
 
<style type="text/css">  
 
<!--  
 
.style4 {color: #666666}  
 
-->  
 
</style>  
 
 
 
<script LANGUAGE="vbscript">  
 
...  
 
</script>  
 
 
 
<Script Language="javascript">  
 
...  
 
</Script>  
 
</head>  
 
<body leftmargin="0" topmargin="0">  
 
</body>  
 
</html>  
 
//Tianwang.raw.2559638448   end  
 
 
 
//Tianwang.raw.2559638448.seg   灝嗘瘡涓〉闈㈠垎鎴愪竴琛屽涓?娉ㄦ剰涓棿娌℃湁鍥炶濺浣滀負鍒嗛殧)  
 

 
...  
 
...  
 
...  
 

 
...  
 
...  
 
...  
 
//Tianwang.raw.2559638448.seg   end  
 
 
 
//涓嬫槸 Tiny search 闈炲繀欏誨洜绱?nbsp; 
 
4. Create forward index (docic-->termid)     //寤虹珛姝e悜绱㈠紩  
 
    ./CrtForwardIdx Tianwang.raw.2559638448.seg > moon.fidx  
 
 
 
//Tianwang.raw.2559638448.seg 灝嗘瘡涓〉闈㈠垎鎴愪竴琛屽涓?lt;BR>//鍒嗚瘝   DocID<BR>1<BR>涓夋槦/  s/  鎵嬫満/  璁哄潧/  ,/  鎵嬫満/  閾冨0/  涓嬭澆/  ,/  鎵嬫満/  鍥劇墖/  涓嬭澆/  ,/  鎵嬫満/<BR>2<BR>...<BR>...<BR>... 

1.  The document index (Doc.idx) keeps information about each document.

It is a fixed width ISAM (Index sequential access mode) index, orderd by docID.

The information stored in each entry includes a pointer into the repository,

a document length, a document checksum.

 

//Doc.idx  鏂囨。緙栧彿 鏂囨。闀垮害 checksum hash鐮?/p>

0 0 bc9ce846d7987c4534f53d423380ba70

1 76760 4f47a3cad91f7d35f4bb6b2a638420e5

2 141624 d019433008538f65329ae8e39b86026c

3 142350 5705b8f58110f9ad61b1321c52605795

//Doc.idx end

 

  The url index (url.idx) is used to convert URLs into docIDs.

 

//url.idx

5c36868a9c5117eadbda747cbdb0725f 0

3272e136dd90263ee306a835c6c70d77 1

6b8601bb3bb9ab80f868d549b5c5a5f3 2

3f9eba99fa788954b5ff7f35a5db6e1f 3

//url.idx end

 

It is a list of URL checksums with their corresponding docIDs and is sorted by

checksum. In order to find the docID of a particular URL, the URL's checksum

is computed and a binary search is performed on the checksums file to find its

docID.

 

 ./DocIndex

  got Doc.idx, Url.idx, DocId2Url.idx //Data鏂囦歡澶逛腑鐨凞oc.idx DocId2Url.idx鍜孌oc.idx涓?/p>

 

//DocId2Url.idx

http://*.*.edu.cn/index.aspx

http://*.*.edu.cn/showcontent1.jsp?NewsID=118

http://*.*.edu.cn/0102.html

http://*.*.edu.cn/0103.html

//DocId2Url.idx end

 

2.  sort Url.idx|uniq > Url.idx.sort_uniq //Data鏂囦歡澶逛腑鐨刄rl.idx.sort_uniq

 

//Url.idx.sort_uniq

//瀵筯ash鍊艱繘琛屾帓搴?/p>

000bfdfd8b2dedd926b58ba00d40986b 1111

000c7e34b653b5135a2361c6818e48dc 1831

0019d12f438eec910a06a606f570fde8 366

0033f7c005ec776f67f496cd8bc4ae0d 2103

 

3. Segment document to terms, (with finding document according to the url)

 ./DocSegment Tianwang.raw.2559638448  //Tianwang.raw.2559638448涓虹埇鍥炴潵鐨勬枃浠?錛屾瘡涓〉闈㈠寘鍚玥ttp澶?/p>

  got Tianwang.raw.2559638448.seg  

 

//Tianwang.raw.2559638448 鐖彇鐨勫師濮嬬綉欏墊枃浠跺湪鏂囨。鍐呴儴姣忎竴涓枃妗d箣闂村簲璇ユ槸閫氳繃version錛?lt;/html>鍜屽洖杞﹀仛鏍囧織浣嶅垎鍓茬殑

version: 1.0

url: http://***.105.138.175/Default2.asp?lang=gb

origin: http://***.105.138.175/

date: Fri, 23 May 2008 20:01:36 GMT

ip: 162.105.138.175

length: 38413

 

HTTP/1.1 200 OK

Server: Microsoft-IIS/5.0

Date: Fri, 23 May 2008 11:17:49 GMT

Connection: keep-alive

Connection: Keep-Alive

Content-Length: 38088

Content-Type: text/html; Charset=gb2312

Expires: Fri, 23 May 2008 11:17:49 GMT

Set-Cookie: ASPSESSIONIDSSTRDCAB=IMEOMBIAIPDFCKPAEDJFHOIH; path=/

Cache-control: private

 

 

 

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"

"

<html>

<head>

<title>Apabi鏁板瓧璧勬簮騫沖彴</title>

<meta http-equiv="Content-Type" content="text/html; charset=gb2312">

<META NAME="ROBOTS" CONTENT="INDEX,NOFOLLOW">

<META NAME="DESCRIPTION" CONTENT="鏁板瓧鍥句功棣?鏂規鏁板瓧鍥句功棣?鐢靛瓙鍥句功 鐢靛瓙涔?ebook e涔?Apabi 鏁板瓧璧勬簮騫沖彴">

<link rel="stylesheet" type="text/css" href="css\common.css">

 

<style type="text/css">

<!--

.style4 {color: #666666}

-->

</style>

 

<script LANGUAGE="vbscript">

...

</script>

 

<Script Language="javascript">

...

</Script>

</head>

<body leftmargin="0" topmargin="0">

</body>

</html>

//Tianwang.raw.2559638448 end

 

//Tianwang.raw.2559638448.seg 灝嗘瘡涓〉闈㈠垎鎴愪竴琛屽涓?娉ㄦ剰涓棿娌℃湁鍥炶濺浣滀負鍒嗛殧)

1

...

...

...

2

...

...

...

//Tianwang.raw.2559638448.seg end

 

//涓嬫槸 Tiny search 闈炲繀欏誨洜绱?/p>

4. Create forward index (docic-->termid)  //寤虹珛姝e悜绱㈠紩

 ./CrtForwardIdx Tianwang.raw.2559638448.seg > moon.fidx

 

//Tianwang.raw.2559638448.seg 灝嗘瘡涓〉闈㈠垎鎴愪竴琛屽涓?/鍒嗚瘝   DocID1涓夋槦/  s/  鎵嬫満/  璁哄潧/  ,/  鎵嬫満/  閾冨0/  涓嬭澆/  ,/  鎵嬫満/  鍥劇墖/  涓嬭澆/  ,/  鎵嬫満/2.........view plaincopy to clipboardprint?
//Tianwang.raw.2559638448.seg end  
 
 
//moon.fidx  
 
//姣忕瘒鏂囨。鍙峰搴旀枃妗e唴鍒嗗嚭鏉ョ殑    鍒嗚瘝  DocID  
 
閮戒細  2391 
 
浣?nbsp;  2391 
 
閭d簺  2391 
 
鎷ユ湁  2391 
 
瀹?nbsp;  2391 
 
鐨?nbsp;  2391 
 
浜?nbsp;  2391 
 
鐨?nbsp;  2391 
 
瑙嗛噹  2391 
 
鍙?nbsp;  2391 
 
紿?nbsp;  2391 
 
鍦?nbsp;  2180 
 
鐮旂┒鐢熼儴    2180 
 
涓婚〉  2180 
 
鍩瑰吇  2180 
 
綆$悊  2180 
 
鏍忕洰  2180 
 
涓嬭澆  2180 
 
錛?nbsp;  2180 
 
銆?nbsp;  2180 
 
鍏充簬  2180 
 
鍋氬ソ  2180 
 
騫?nbsp;  2180 
 
鍥藉  2180 
 
鍏淳  2180 
 
鐮旂┒鐢?2180 
 
欏圭洰  2180 
 
//moon.fidx end  
 
 
 
5.# set | grep "LANG" 
 
LANG=en; export LANG;  
 
sort moon.fidx > moon.fidx.sort  
 
 
 
6. Create inverted index (termid-->docid)    //寤虹珛鍊掓帓绱㈠紩  
 
    ./CrtInvertedIdx moon.fidx.sort > sun.iidx  
 
 
 
//sun.iidx  //鏂囦歡瑙勬ā澶ф鍑忓皯1/2  
 
鑺卞伐   236 
 
鑺辨搗   2103 
 
鑺卞崏   1018 1061 1061 1061 1730 1730 1730 1730 1730 1852 949 949 
 
鑺辮暰   447 447 
 
鑺辨湪   1061 
 
鑺卞憿   1430 
 
鑺辨湡   447 447 447 447 447 525 
 
鑺遍挶   174 236 
 
鑺辮壊   1730 1730 
 
鑺辮壊鍝佺     1660 
 
鑺辯敓   450 526 
 
鑺卞紡   1428 1430 1430 1430 
 
鑺辯汗   1430 1430 
 
鑺卞簭   447 447 447 447 447 450 
 
鑺辯誕   136 137 
 
鑺辮娊   450 450 
 
//sun.iidx  end  
 
 
 
TSESearch   CGI program for query  
 
Snapshot    CGI program for page snapshot  
 
 
<P>  
author:http://hi.baidu.com/jrckkyy  
 
author:http://blog.csdn.net/jrckkyy  
</P> 

 



]]>
鑷《鍚戜笅瀛︽悳绱㈠紩鎿庘斺斿寳澶уぉ緗戞悳绱㈠紩鎿嶵SE鍒嗘瀽鍙婂畬鍏ㄦ敞閲奫4]灝忕粨http://m.shnenglu.com/jrckkyy/archive/2009/12/10/102942.html瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/dc:creator>瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/author>Thu, 10 Dec 2009 14:54:00 GMThttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102942.htmlhttp://m.shnenglu.com/jrckkyy/comments/102942.htmlhttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102942.html#Feedback0http://m.shnenglu.com/jrckkyy/comments/commentRss/102942.htmlhttp://m.shnenglu.com/jrckkyy/services/trackbacks/102942.html閫氳繃鍓嶉潰鐨勪笁綃囨枃绔犵浉淇′綘宸茬粡瀵圭縐樼殑鎼滅儲寮曟搸鏈変簡涓涓劅鎬х殑璁よ瘑錛屽拰鏅氱殑php綾諱技鐨勮剼鏈璦鏈嶅姟鍣ㄧ被浼鹼紝閫氳繃鑾峰彇鍓嶅彴鍏抽敭瀛楋紝閫氳繃瀛楀吀鍒嗚瘝錛屽拰浜嬪厛寤虹珛寤虹珛濂界殑鍊掓帓绱㈠紩榪涜鐩稿叧鎬у垎鏋愶紝寰楀嚭鏌ヨ緇撴瀯鏍煎紡鍖栬緭鍑虹粨鏋溿傝岃繖閲岀殑鎶鏈毦鐐瑰湪浜?/p>

1銆佸瓧鍏哥殑閫夊彇錛堜簨瀹炰笂鏍規嵁涓嶅悓鏃朵唬涓嶅悓鍦版柟浜轟滑鐨勮璦涔犳儻鏄笉涓鏍風殑鎵浠ヨ瀛楀吀鐨勬渶灝忓厓鐨勫彇鍊兼槸涓嶅悓鐨勶級

2銆佸掓帓绱㈠紩鐨勫緩绔嬶紙榪欓噷灝辮娑夊強鍒扮埇铏殑鎶撳彇鍜岀儲寮曠殑寤虹珛鍚庨潰灝嗛噸鐐逛粙緇嶈繖2鐐癸紝鎼滅儲寮曟搸鐨勬晥鐜囧拰鏈嶅姟璐ㄩ噺瀹炴晥鎬х摱棰堝湪榪欓噷錛?/p>

3銆佺浉鍏蟲у垎鏋愶紙瀵規姄鍥炴潵鐨勬枃妗e垎璇嶅緩绱㈠紩鍜岀敤鎴峰叧閿瓧鍒嗚瘝綆楁硶涓婅瀵瑰簲錛?/p>

鍚庨潰鏂囩珷浼氶噸鐐逛粙緇嶇埇铏殑鎶撳彇鍜岀儲寮曠殑寤虹珛銆?/p>

]]>
鑷《鍚戜笅瀛︽悳绱㈠紩鎿庘斺斿寳澶уぉ緗戞悳绱㈠紩鎿嶵SE鍒嗘瀽鍙婂畬鍏ㄦ敞閲奫3]鏉ュ埌鍏抽敭瀛楀垎璇嶅強鐩稿叧鎬у垎鏋愮▼搴?http://m.shnenglu.com/jrckkyy/archive/2009/12/10/102941.html瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/dc:creator>瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/author>Thu, 10 Dec 2009 14:53:00 GMThttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102941.htmlhttp://m.shnenglu.com/jrckkyy/comments/102941.htmlhttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102941.html#Feedback0http://m.shnenglu.com/jrckkyy/comments/commentRss/102941.htmlhttp://m.shnenglu.com/jrckkyy/services/trackbacks/102941.html鏈夊墠闈㈡敞閲婃垜浠彲浠ョ煡閬撴煡璇㈠叧閿瓧鍜屽瓧鍏告枃浠跺噯澶囧ソ濂藉悗錛屽皢榪涘叆鐢ㄦ埛鍏抽敭瀛楀垎璇嶉樁孌?/p>

//TSESearch.cpp涓細

view plaincopy to clipboardprint?
CHzSeg iHzSeg;      //include ChSeg/HzSeg.h  
 
//  
iQuery.m_sSegQuery = iHzSeg.SegmentSentenceMM(iDict, iQuery.m_sQuery);  //灝唃et鍒扮殑鏌ヨ鍙橀噺鍒嗚瘝鍒嗘垚 "鎴?        鐖?      浣犱滑/ 鐨?      鏍煎紡"  
 
vector<STRING></STRING> vecTerm;  
iQuery.ParseQuery(vecTerm);     //灝嗕互"/"鍒掑垎寮鐨勫叧閿瓧涓涓欏哄簭鏀懼叆涓涓悜閲忓鍣ㄤ腑  
 
set<STRING></STRING> setRelevantRst;   
iQuery.GetRelevantRst(vecTerm, mapBuckets, setRelevantRst);   
 
gettimeofday(&end_tv,&tz);  
// search end  
//鎼滅儲瀹屾瘯 

 CHzSeg iHzSeg;  //include ChSeg/HzSeg.h

 //
 iQuery.m_sSegQuery = iHzSeg.SegmentSentenceMM(iDict, iQuery.m_sQuery); //灝唃et鍒扮殑鏌ヨ鍙橀噺鍒嗚瘝鍒嗘垚 "鎴?  鐖?  浣犱滑/ 鐨?  鏍煎紡"
 
 vector vecTerm;
 iQuery.ParseQuery(vecTerm);  //灝嗕互"/"鍒掑垎寮鐨勫叧閿瓧涓涓欏哄簭鏀懼叆涓涓悜閲忓鍣ㄤ腑
 
 set setRelevantRst;
 iQuery.GetRelevantRst(vecTerm, mapBuckets, setRelevantRst);
 
 gettimeofday(&end_tv,&tz);
 // search end
 //鎼滅儲瀹屾瘯view plaincopy to clipboardprint?
鐪婥HzSeg 涓殑榪欎釜鏂規硶 

鐪婥HzSeg 涓殑榪欎釜鏂規硶view plaincopy to clipboardprint?
//ChSeg/HzSeg.h 

//ChSeg/HzSeg.hview plaincopy to clipboardprint?
/**  
 * 紼嬪簭緲昏瘧璇存槑  
 * 榪涗竴姝ュ噣鍖栨暟鎹紝杞崲姹夊瓧  
 * @access  public  
 * @param   CDict, string 鍙傛暟鐨勬眽瀛楄鏄?瀛楀吀錛屾煡璇㈠瓧絎︿覆  
 * @return  string 0  
 */  
// process a sentence before segmentation  
//鍦ㄥ垎璇嶅墠澶勭悊鍙ュ瓙  
string CHzSeg::SegmentSentenceMM (CDict &dict, string s1) const  
{  
    string s2="";  
    unsigned int i,len;  
 
    while (!s1.empty())   
    {  
        unsigned char ch=(unsigned char) s1[0];  
        if(ch<128)   
        { // deal with ASCII  
            i=1;  
            len = s1.size();  
            while (i<LEN len="s1.length();" i="0;" 涓枃鏍囩偣絳夐潪姹夊瓧瀛楃="" if="" else="" yhf="" s1="s1.substr(i);" by="" added="" ch="=13)" s2="" cr=""></LEN>=161)  
              && (!((unsigned char)s1[i]==161 && ((unsigned char)s1[i+1]>=162 && (unsigned char)s1[i+1]<=168)))  
              && (!((unsigned char)s1[i]==161 && ((unsigned char)s1[i+1]>=171 && (unsigned char)s1[i+1]<=191)))  
              && (!((unsigned char)s1[i]==163 && ((unsigned char)s1[i+1]==172 || (unsigned char)s1[i+1]==161)   
              || (unsigned char)s1[i+1]==168 || (unsigned char)s1[i+1]==169 || (unsigned char)s1[i+1]==186  
              || (unsigned char)s1[i+1]==187 || (unsigned char)s1[i+1]==191)))   
                {   
                    ii=i+2; // 鍋囧畾娌℃湁鍗婁釜姹夊瓧  
                }  
 
                if (i==0) ii=i+2;  
 
                // 涓嶅鐞嗕腑鏂囩┖鏍?nbsp; 
                if (!(ch==161 && (unsigned char)s1[1]==161))   
                {   
                    if (i <= s1.size())  // yhf  
                        // 鍏朵粬鐨勯潪姹夊瓧鍙屽瓧鑺傚瓧絎﹀彲鑳借繛緇緭鍑?nbsp; 
                        s2 += s1.substr(0, i) + SEPARATOR;   
                    else break; // yhf  
                }  
 
                if (i <= s1.size())  // yhf  
                    s1s1=s1.substr(i);  
                else break;     //yhf  
 
                continue;  
            }  
        }  
      
 
    // 浠ヤ笅澶勭悊姹夊瓧涓?nbsp; 
 
        i = 2;  
        len = s1.length();  
 
        while(i<LEN></LEN>=176)   
//    while(i<LEN></LEN>=128 && (unsigned char)s1[i]!=161)  
            i+=2;  
 
        s2+=SegmentHzStrMM(dict, s1.substr(0,i));  
 
        if (i <= len)    // yhf  
            s1s1=s1.substr(i);  
        else break; // yhf  
    }  
 
    return s2;  

/**
 * 紼嬪簭緲昏瘧璇存槑
 * 榪涗竴姝ュ噣鍖栨暟鎹紝杞崲姹夊瓧
 * @access  public
 * @param   CDict, string 鍙傛暟鐨勬眽瀛楄鏄?瀛楀吀錛屾煡璇㈠瓧絎︿覆
 * @return  string 0
 */
// process a sentence before segmentation
//鍦ㄥ垎璇嶅墠澶勭悊鍙ュ瓙
string CHzSeg::SegmentSentenceMM (CDict &dict, string s1) const
{
 string s2="";
 unsigned int i,len;

 while (!s1.empty())
 {
  unsigned char ch=(unsigned char) s1[0];
  if(ch<128)
  { // deal with ASCII
   i=1;
   len = s1.size();
   while (i=161)
              && (!((unsigned char)s1[i]==161 && ((unsigned char)s1[i+1]>=162 && (unsigned char)s1[i+1]<=168)))
              && (!((unsigned char)s1[i]==161 && ((unsigned char)s1[i+1]>=171 && (unsigned char)s1[i+1]<=191)))
              && (!((unsigned char)s1[i]==163 && ((unsigned char)s1[i+1]==172 || (unsigned char)s1[i+1]==161)
              || (unsigned char)s1[i+1]==168 || (unsigned char)s1[i+1]==169 || (unsigned char)s1[i+1]==186
              || (unsigned char)s1[i+1]==187 || (unsigned char)s1[i+1]==191)))
    {
     i=i+2; // 鍋囧畾娌℃湁鍗婁釜姹夊瓧
    }

    if (i==0) i=i+2;

    // 涓嶅鐞嗕腑鏂囩┖鏍?br>    if (!(ch==161 && (unsigned char)s1[1]==161))
    {
     if (i <= s1.size()) // yhf
      // 鍏朵粬鐨勯潪姹夊瓧鍙屽瓧鑺傚瓧絎﹀彲鑳借繛緇緭鍑?br>      s2 += s1.substr(0, i) + SEPARATOR;
     else break; // yhf
    }

    if (i <= s1.size()) // yhf
     s1=s1.substr(i);
    else break;  //yhf

    continue;
   }
  }
   

    // 浠ヤ笅澶勭悊姹夊瓧涓?/p>

  i = 2;
  len = s1.length();

  while(i=176)
//    while(i=128 && (unsigned char)s1[i]!=161)
   i+=2;

  s2+=SegmentHzStrMM(dict, s1.substr(0,i));

  if (i <= len) // yhf
   s1=s1.substr(i);
  else break; // yhf
 }

 return s2;
}view plaincopy to clipboardprint?
  

 view plaincopy to clipboardprint?
//Query.cpp 

//Query.cppview plaincopy to clipboardprint?
<PRE class=csharp name="code">/**  
 * 紼嬪簭緲昏瘧璇存槑  
 * 灝嗕互"/"鍒掑垎寮鐨勫叧閿瓧涓涓欏哄簭鏀懼叆涓涓悜閲忓鍣ㄤ腑  
 *  
 * @access  public  
 * @param   vector<STRING></STRING> 鍙傛暟鐨勬眽瀛楄鏄庯細鍚戦噺瀹瑰櫒  
 * @return  void  
 */  
void CQuery::ParseQuery(vector<STRING></STRING> &vecTerm)  
{  
    string::size_type idx;   
    while ( (idx = m_sSegQuery.find("/  ")) != string::npos ) {   
        vecTerm.push_back(m_sSegQuery.substr(0,idx));   
        m_sSegQuerym_sSegQuery = m_sSegQuery.substr(idx+3);   
    }  
}  
</PRE> 
<PRE class=csharp name="code"> </PRE> 
<PRE class=csharp name="code"><PRE class=csharp name="code">/**  
 * 紼嬪簭緲昏瘧璇存槑  
 * 鐩稿叧鎬у垎鏋愭煡璇紝鏋勯犵粨鏋滈泦鍚坰etRelevantRst //鐡墮鎵鍦?nbsp; 
 *  
 * @access  public  
 * @param   vector<STRING></STRING> map set<STRING></STRING> 鍙傛暟鐨勬眽瀛楄鏄庯細 鐢ㄦ埛鎻愪氦鍏抽敭瀛楃殑鍒嗚瘝緇勶紝鍊掓帓绱㈠紩鏄犲皠錛岀浉鍏蟲х粨鏋滈泦鍚?nbsp; 
 * @return  string 0  
 */  
bool CQuery::GetRelevantRst  
(  
    vector<STRING></STRING> &vecTerm,   
    map &mapBuckets,   
    set<STRING></STRING> &setRelevantRst  
) const  
{  
    set<STRING></STRING> setSRst;  
 
    bool bFirst=true;  
    vector<STRING></STRING>::iterator itTerm = vecTerm.begin();  
 
    for ( ; itTerm != vecTerm.end(); ++itTerm )  
    {  
 
        setSRst.clear();  
        copy(setRelevantRst.begin(), setRelevantRst.end(), inserter(setSRst,setSRst.begin()));  
 
        map mapRstDoc;  
        string docid;  
        int doccnt;  
 
        map::iterator itBuckets = mapBuckets.find(*itTerm);  
        if (itBuckets != mapBuckets.end())  
        {  
            string strBucket = (*itBuckets).second;  
            string::size_type idx;  
            idx = strBucket.find_first_not_of(" ");  
            strBucketstrBucket = strBucket.substr(idx);  
 
            while ( (idx = strBucket.find(" ")) != string::npos )   
            {  
                docid = strBucket.substr(0,idx);  
                doccnt = 0;  
 
                if (docid.empty()) continue;  
 
                map::iterator it = mapRstDoc.find(docid);  
                if ( it != mapRstDoc.end() )  
                {  
                    doccnt = (*it).second + 1;  
                    mapRstDoc.erase(it);  
                }  
                mapRstDoc.insert( pair(docid,doccnt) );  
 
                strBucketstrBucket = strBucket.substr(idx+1);  
            }  
 
            // remember the last one  
            docid = strBucket;  
            doccnt = 0;  
            map::iterator it = mapRstDoc.find(docid);  
            if ( it != mapRstDoc.end() )  
            {  
                doccnt = (*it).second + 1;  
                mapRstDoc.erase(it);  
            }  
            mapRstDoc.insert( pair(docid,doccnt) );  
        }  
 
        // sort by term frequencty  
        multimap > newRstDoc;  
        map::iterator it0 = mapRstDoc.begin();  
        for ( ; it0 != mapRstDoc.end(); ++it0 ){  
            newRstDoc.insert( pair((*it0).second,(*it0).first) );  
        }  
 
        multimap::iterator itNewRstDoc = newRstDoc.begin();  
        setRelevantRst.clear();  
        for ( ; itNewRstDoc != newRstDoc.end(); ++itNewRstDoc ){  
            string docid = (*itNewRstDoc).second;  
 
            if (bFirst==true) {  
                setRelevantRst.insert(docid);  
                continue;  
            }  
 
            if ( setSRst.find(docid) != setSRst.end() ){      
                setRelevantRst.insert(docid);  
            }  
        }  
 
        //cout << "setRelevantRst.size(): " << setRelevantRst.size() << "<BR>";  
        bFirst = false;  
    }  
    return true;  
}</PRE> 
</PRE> 
鎺ヤ笅鏉ョ殑灝辨槸鐜板疄浜嗭紝鍓嶉潰閮藉彧鏄鐞嗘暟鎹緱鍒?setRelevantRst 榪欎釜鏌ヨ緇撴瀯闆嗗悎,榪欓噷灝變笉澶氳浜嗕笅闈㈠氨鍜宲hp涔嬬被鐨勮剼鏈璦宸笉澶氾紝鏍煎紡鍖栫粨鏋滈泦鍚堝茍鏄劇ず鍑烘潵銆?nbsp;

view plaincopy to clipboardprint?/**   * 紼嬪簭緲昏瘧璇存槑   * 灝嗕互"/"鍒掑垎寮鐨勫叧閿瓧涓涓欏哄簭鏀懼叆涓涓悜閲忓鍣ㄤ腑   *   * @access  public   * @param   vector<STRING></STRING> 鍙傛暟鐨勬眽瀛楄鏄庯細鍚戦噺瀹瑰櫒   * @return  void   */  void CQuery::ParseQuery(vector<STRING></STRING> &vecTerm)   {       string::size_type idx;        while ( (idx = m_sSegQuery.find("/  ")) != string::npos ) {            vecTerm.push_back(m_sSegQuery.substr(0,idx));            m_sSegQuery = m_sSegQuery.substr(idx+3);        }   }  /**
 * 紼嬪簭緲昏瘧璇存槑
 * 灝嗕互"/"鍒掑垎寮鐨勫叧閿瓧涓涓欏哄簭鏀懼叆涓涓悜閲忓鍣ㄤ腑
 *
 * @access  public
 * @param   vector 鍙傛暟鐨勬眽瀛楄鏄庯細鍚戦噺瀹瑰櫒
 * @return  void
 */
void CQuery::ParseQuery(vector &vecTerm)
{
 string::size_type idx;
 while ( (idx = m_sSegQuery.find("/  ")) != string::npos ) {
  vecTerm.push_back(m_sSegQuery.substr(0,idx));
  m_sSegQuery = m_sSegQuery.substr(idx+3);
 }
}

view plaincopy to clipboardprint?   
view plaincopy to clipboardprint?<PRE class=csharp name="code">/**   * 紼嬪簭緲昏瘧璇存槑   * 鐩稿叧鎬у垎鏋愭煡璇紝鏋勯犵粨鏋滈泦鍚坰etRelevantRst //鐡墮鎵鍦?nbsp;  *   * @access  public   * @param   vector<STRING></STRING> map set<STRING></STRING> 鍙傛暟鐨勬眽瀛楄鏄庯細 鐢ㄦ埛鎻愪氦鍏抽敭瀛楃殑鍒嗚瘝緇勶紝鍊掓帓绱㈠紩鏄犲皠錛岀浉鍏蟲х粨鏋滈泦鍚?nbsp;  * @return  string 0   */  bool CQuery::GetRelevantRst   (       vector<STRING></STRING> &vecTerm,        map &mapBuckets,        set<STRING></STRING> &setRelevantRst   ) const  {       set<STRING></STRING> setSRst;         bool bFirst=true;       vector<STRING></STRING>::iterator itTerm = vecTerm.begin();         for ( ; itTerm != vecTerm.end(); ++itTerm )       {             setSRst.clear();           copy(setRelevantRst.begin(), setRelevantRst.end(), inserter(setSRst,setSRst.begin()));             map mapRstDoc;           string docid;           int doccnt;             map::iterator itBuckets = mapBuckets.find(*itTerm);           if (itBuckets != mapBuckets.end())           {               string strBucket = (*itBuckets).second;               string::size_type idx;               idx = strBucket.find_first_not_of(" ");               strBucket = strBucket.substr(idx);                 while ( (idx = strBucket.find(" ")) != string::npos )                {                   docid = strBucket.substr(0,idx);                   doccnt = 0;                     if (docid.empty()) continue;                     map::iterator it = mapRstDoc.find(docid);                   if ( it != mapRstDoc.end() )                   {                       doccnt = (*it).second + 1;                       mapRstDoc.erase(it);                   }                   mapRstDoc.insert( pair(docid,doccnt) );                     strBucket = strBucket.substr(idx+1);               }                 // remember the last one               docid = strBucket;               doccnt = 0;               map::iterator it = mapRstDoc.find(docid);               if ( it != mapRstDoc.end() )               {                   doccnt = (*it).second + 1;                   mapRstDoc.erase(it);               }               mapRstDoc.insert( pair(docid,doccnt) );           }             // sort by term frequencty           multimap > newRstDoc;           map::iterator it0 = mapRstDoc.begin();           for ( ; it0 != mapRstDoc.end(); ++it0 ){               newRstDoc.insert( pair((*it0).second,(*it0).first) );           }             multimap::iterator itNewRstDoc = newRstDoc.begin();           setRelevantRst.clear();           for ( ; itNewRstDoc != newRstDoc.end(); ++itNewRstDoc ){               string docid = (*itNewRstDoc).second;                 if (bFirst==true) {                   setRelevantRst.insert(docid);                   continue;               }                 if ( setSRst.find(docid) != setSRst.end() ){                       setRelevantRst.insert(docid);               }           }             //cout << "setRelevantRst.size(): " << setRelevantRst.size() << "<BR>";           bFirst = false;       }       return true;   }</PRE>  view plaincopy to clipboardprint?/**   * 紼嬪簭緲昏瘧璇存槑   * 鐩稿叧鎬у垎鏋愭煡璇紝鏋勯犵粨鏋滈泦鍚坰etRelevantRst //鐡墮鎵鍦?nbsp;  *   * @access  public   * @param   vector<STRING></STRING> map set<STRING></STRING> 鍙傛暟鐨勬眽瀛楄鏄庯細 鐢ㄦ埛鎻愪氦鍏抽敭瀛楃殑鍒嗚瘝緇勶紝鍊掓帓绱㈠紩鏄犲皠錛岀浉鍏蟲х粨鏋滈泦鍚?nbsp;  * @return  string 0   */  bool CQuery::GetRelevantRst   (       vector<STRING></STRING> &vecTerm,        map &mapBuckets,        set<STRING></STRING> &setRelevantRst   ) const  {       set<STRING></STRING> setSRst;         bool bFirst=true;       vector<STRING></STRING>::iterator itTerm = vecTerm.begin();         for ( ; itTerm != vecTerm.end(); ++itTerm )       {             setSRst.clear();           copy(setRelevantRst.begin(), setRelevantRst.end(), inserter(setSRst,setSRst.begin()));             map mapRstDoc;           string docid;           int doccnt;             map::iterator itBuckets = mapBuckets.find(*itTerm);           if (itBuckets != mapBuckets.end())           {               string strBucket = (*itBuckets).second;               string::size_type idx;               idx = strBucket.find_first_not_of(" ");               strBucket = strBucket.substr(idx);                 while ( (idx = strBucket.find(" ")) != string::npos )                {                   docid = strBucket.substr(0,idx);                   doccnt = 0;                     if (docid.empty()) continue;                     map::iterator it = mapRstDoc.find(docid);                   if ( it != mapRstDoc.end() )                   {                       doccnt = (*it).second + 1;                       mapRstDoc.erase(it);                   }                   mapRstDoc.insert( pair(docid,doccnt) );                     strBucket = strBucket.substr(idx+1);               }                 // remember the last one               docid = strBucket;               doccnt = 0;               map::iterator it = mapRstDoc.find(docid);               if ( it != mapRstDoc.end() )               {                   doccnt = (*it).second + 1;                   mapRstDoc.erase(it);               }               mapRstDoc.insert( pair(docid,doccnt) );           }             // sort by term frequencty           multimap > newRstDoc;           map::iterator it0 = mapRstDoc.begin();           for ( ; it0 != mapRstDoc.end(); ++it0 ){               newRstDoc.insert( pair((*it0).second,(*it0).first) );           }             multimap::iterator itNewRstDoc = newRstDoc.begin();           setRelevantRst.clear();           for ( ; itNewRstDoc != newRstDoc.end(); ++itNewRstDoc ){               string docid = (*itNewRstDoc).second;                 if (bFirst==true) {                   setRelevantRst.insert(docid);                   continue;               }                 if ( setSRst.find(docid) != setSRst.end() ){                       setRelevantRst.insert(docid);               }           }             //cout << "setRelevantRst.size(): " << setRelevantRst.size() << "<BR>";           bFirst = false;       }       return true;   }  /**
 * 紼嬪簭緲昏瘧璇存槑
 * 鐩稿叧鎬у垎鏋愭煡璇紝鏋勯犵粨鏋滈泦鍚坰etRelevantRst //鐡墮鎵鍦?br> *
 * @access  public
 * @param   vector map set 鍙傛暟鐨勬眽瀛楄鏄庯細 鐢ㄦ埛鎻愪氦鍏抽敭瀛楃殑鍒嗚瘝緇勶紝鍊掓帓绱㈠紩鏄犲皠錛岀浉鍏蟲х粨鏋滈泦鍚?br> * @return  string 0
 */
bool CQuery::GetRelevantRst
(
 vector &vecTerm,
 map &mapBuckets,
 set &setRelevantRst
) const
{
 set setSRst;

 bool bFirst=true;
 vector::iterator itTerm = vecTerm.begin();

 for ( ; itTerm != vecTerm.end(); ++itTerm )
 {

  setSRst.clear();
  copy(setRelevantRst.begin(), setRelevantRst.end(), inserter(setSRst,setSRst.begin()));

  map mapRstDoc;
  string docid;
  int doccnt;

  map::iterator itBuckets = mapBuckets.find(*itTerm);
  if (itBuckets != mapBuckets.end())
  {
   string strBucket = (*itBuckets).second;
   string::size_type idx;
   idx = strBucket.find_first_not_of(" ");
   strBucket = strBucket.substr(idx);

   while ( (idx = strBucket.find(" ")) != string::npos )
   {
    docid = strBucket.substr(0,idx);
    doccnt = 0;

    if (docid.empty()) continue;

    map::iterator it = mapRstDoc.find(docid);
    if ( it != mapRstDoc.end() )
    {
     doccnt = (*it).second + 1;
     mapRstDoc.erase(it);
    }
    mapRstDoc.insert( pair(docid,doccnt) );

    strBucket = strBucket.substr(idx+1);
   }

   // remember the last one
   docid = strBucket;
   doccnt = 0;
   map::iterator it = mapRstDoc.find(docid);
   if ( it != mapRstDoc.end() )
   {
    doccnt = (*it).second + 1;
    mapRstDoc.erase(it);
   }
   mapRstDoc.insert( pair(docid,doccnt) );
  }

  // sort by term frequencty
  multimap > newRstDoc;
  map::iterator it0 = mapRstDoc.begin();
  for ( ; it0 != mapRstDoc.end(); ++it0 ){
   newRstDoc.insert( pair((*it0).second,(*it0).first) );
  }

  multimap::iterator itNewRstDoc = newRstDoc.begin();
  setRelevantRst.clear();
  for ( ; itNewRstDoc != newRstDoc.end(); ++itNewRstDoc ){
   string docid = (*itNewRstDoc).second;

   if (bFirst==true) {
    setRelevantRst.insert(docid);
    continue;
   }

   if ( setSRst.find(docid) != setSRst.end() ){ 
    setRelevantRst.insert(docid);
   }
  }

  //cout << "setRelevantRst.size(): " << setRelevantRst.size() << "";
  bFirst = false;
 }
 return true;
}

鎺ヤ笅鏉ョ殑灝辨槸鐜板疄浜嗭紝鍓嶉潰閮藉彧鏄鐞嗘暟鎹緱鍒?setRelevantRst 榪欎釜鏌ヨ緇撴瀯闆嗗悎,榪欓噷灝變笉澶氳浜嗕笅闈㈠氨鍜宲hp涔嬬被鐨勮剼鏈璦宸笉澶氾紝鏍煎紡鍖栫粨鏋滈泦鍚堝茍鏄劇ず鍑烘潵銆?br>//TSESearch.cpp

view plaincopy to clipboardprint?
//涓嬮潰寮濮嬫樉紺?nbsp; 
    CDisplayRst iDisplayRst;   
    iDisplayRst.ShowTop();   
 
    float used_msec = (end_tv.tv_sec-begin_tv.tv_sec)*1000   
        +((float)(end_tv.tv_usec-begin_tv.tv_usec))/(float)1000;   
 
    iDisplayRst.ShowMiddle(iQuery.m_sQuery,used_msec,   
            setRelevantRst.size(), iQuery.m_iStart);  
 
    iDisplayRst.ShowBelow(vecTerm,setRelevantRst,vecDocIdx,iQuery.m_iStart);

 



]]>
鑷《鍚戜笅瀛︽悳绱㈠紩鎿庘斺斿寳澶уぉ緗戞悳绱㈠紩鎿嶵SE鍒嗘瀽鍙婂畬鍏ㄦ敞閲奫2]璺繃鏌ヨ澶勭悊紼嬪簭http://m.shnenglu.com/jrckkyy/archive/2009/12/10/102940.html瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/dc:creator>瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/author>Thu, 10 Dec 2009 14:52:00 GMThttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102940.htmlhttp://m.shnenglu.com/jrckkyy/comments/102940.htmlhttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102940.html#Feedback0http://m.shnenglu.com/jrckkyy/comments/commentRss/102940.htmlhttp://m.shnenglu.com/jrckkyy/services/trackbacks/102940.html鐢變笂涓綃囨枃绔燵鍘焆鑷《鍚戜笅瀛︽悳绱㈠紩鎿庘斺斿寳澶уぉ緗戞悳绱㈠紩鎿嶵SE鍒嗘瀽鍙婂畬鍏ㄦ敞閲奫1]瀵繪壘鎼滅儲寮曟搸鍏ュ彛 鎴戜滑鍙互鐭ラ亾鏁翠釜紼嬪簭鏄粠TSESearch.cpp 涓殑main鍑芥暟寮濮嬬殑鎴戜滑閲嶇偣涓涓嬭繖孌典唬鐮?/p>

//TSESearch.cpp CQuery iQuery;
 iQuery.GetInputs();  //鍏蜂綋紼嬪簭寮濮嬫墽琛?br> // current query & result page number
 iQuery.SetQuery();
 iQuery.SetStart();

 // begin to search
 //寮濮嬪叿浣撴悳绱㈢▼搴?br> gettimeofday(&begin_tv,&tz); //寮濮嬭鏃惰幏鍙栫▼搴忚繍琛屾椂闂村樊

 iQuery.GetInvLists(mapBuckets);  //灝嗘墍鏈夊瓧絎﹂泦瀛樺叆鏄犲皠鍙橀噺涓?nbsp;鐡墮鎵鍦?br> iQuery.GetDocIdx(vecDocIdx);  //灝嗗掓帓绱㈠紩瀛樺叆鍚戦噺涓?nbsp; 鐡墮鎵鍦?br> 
 CHzSeg iHzSeg;  //include ChSeg/HzSeg.h
 iQuery.m_sSegQuery = iHzSeg.SegmentSentenceMM(iDict, iQuery.m_sQuery); //灝唃et鍒扮殑鏌ヨ鍙橀噺鍒嗚瘝鍒嗘垚 "鎴?  鐖?  浣犱滑/ 鐨?  鏍煎紡"
 
 vector vecTerm;
 iQuery.ParseQuery(vecTerm);  //灝嗕互"/"鍒掑垎寮鐨勫叧閿瓧涓涓欏哄簭鏀懼叆涓涓悜閲忓鍣ㄤ腑
 
 set setRelevantRst;
 iQuery.GetRelevantRst(vecTerm, mapBuckets, setRelevantRst);
 
 gettimeofday(&end_tv,&tz);
 // search end
 //鎼滅儲瀹屾瘯鎸夌収欏哄簭鎴戜滑棣栧厛娣卞叆榪沬Query瀵硅薄鐨勭被CQuery  

//Query.cpp

1銆丟etInputs

榪欎釜鏂規硶鐨勫姛鑳芥槸灝嗗墠鍙癵et榪囨潵鐨勫彉閲忚漿鎹㈠埌HtmlInputs緇撴瀯浣撴暟緇勪腑濡備笅渚嬪瓙鍜屼唬鐮侊細

//鍋囪鍓嶅彴鏌ヨ鐨勫叧閿瓧鏄?1"鐫HtmlInputs涓唴瀹硅緭鍑哄涓?nbsp; //HtmlInputs[0].Name word  //HtmlInputs[0].Value 1  //HtmlInputs[1].Name www  //HtmlInputs[1].Value 鎼滅儲  //HtmlInputs[2].Name cdtype  //HtmlInputs[2].Value GB

 
/*
 * Get form information throught environment varible.
 * return 0 if succeed, otherwise exit.
 */
/**
 * 紼嬪簭緲昏瘧璇存槑
 * 澶勭悊GET榪囨潵鐨勮〃鍗?br> *
 * @access  public
 * @return  string 0
 */
int CQuery::GetInputs()
{
    int i,j;
 char *mode = getenv("REQUEST_METHOD"); //榪斿洖鐜鍙橀噺鐨勫?榪欓噷鐜鍙橀噺 REQUEST_METHOD 涓?get 鏂規硶
    char *tempstr; //GET鍙橀噺瀛楃涓叉垨POST瀛楃涓插唴瀹?br> char *in_line; 
 int length;  //GET鍙橀噺涓查暱搴︽垨POST鍐呭闀垮害

 cout << "Content-type: text/html\n\n";
 //cout << "Cache-Control: no-cache\n";
 //cout << "Expires: Tue, 08 Apr 1997 17:20:00 GMT\n";
 //cout << "Expires: 0\n";
 //cout << "Pragma: no-cache\n\n";

 cout << "\n";
 cout << "\n";
 //cout << "\n";
 //cout << "\n";
 //cout << "\n";
 cout << "\n";
 cout.flush(); //閲婃斁杈撳嚭緙撳啿鍖?杈撳嚭澶撮儴head鍜屼箣鍓嶇殑html鏍囩鍐呭
 //cout <<"" << endl;

 if (mode==NULL) return 1;

 if (strcmp(mode, "POST") == 0)
 {
  length = atoi(getenv("CONTENT_LENGTH")); //濡傛灉鏄疨OST鏂規硶鐫鑾峰緱鐜鍙橀噺CONTENT_LENGTH鐨勬暣鍨嬪?br>  if (length==0 || length>=256)
   return 1;
  in_line = (char*)malloc(length + 1);
  read(STDIN_FILENO, in_line, length);
  in_line[length]='\0';
 }
 else if (strcmp(mode, "GET") == 0)
 {
  char* inputstr = getenv("QUERY_STRING"); //濡傛灉鏄疓ET鏂規硶鐫鑾峰緱鐜鍙橀噺QUERY_STRING鐨勫瓧絎︿覆鍊?br>  length = strlen(inputstr);
  if (inputstr==0 || length>=256)
   return 1;

  //鑾峰彇get鍐呭闀垮害騫舵妸get 錛熷悗闈㈢殑鍙傛暟璧嬪肩粰鍙橀噺in_line
  in_line = (char*)malloc(length + 1);
  strcpy(in_line, inputstr); //灝忓績婧㈠嚭鏀誨嚮
 }


 tempstr = (char*)malloc(length + 1); //鑾峰彇post鍐呭鎴杇et鍐呭闀垮害
 if(tempstr == NULL)
 {
  printf("\n");
  printf("\n");
  printf("Major failure #1;please notify the webmaster\n");
  printf("\n");
  fflush(stdout); //杈撳嚭緙撳啿鍖?br>  exit(2); //閿欒榪斿洖
 }

 j=0;
 for (i=0; i char
   strcpy(HtmlInputs[HtmlInputCount].Name,tempstr);
   if (i == length - 1)
   {
    strcpy(HtmlInputs[HtmlInputCount].Value,"");
    HtmlInputCount++;
   }
   j=0;
  }
  else if ((in_line[i] == '&') || (i==length-1))
  {
   if (i==length-1)
   {
    if(in_line[i] == '+')tempstr[j]=' ';
    else tempstr[j] = in_line[i];
    j++;
   }
   tempstr[j]='\0';
   CStrFun::Translate(tempstr); //灝哢RL緙栫爜褰㈠紡鐨勫弬鏁拌漿鎹㈡垚瀛楃鍨?%** -> char
   strcpy(HtmlInputs[HtmlInputCount].Value,tempstr);
   HtmlInputCount++;
   j=0;
  }
  else if (in_line[i] == '+')
  {
   tempstr[j]=' ';
   j++;
  }
  else
  {
   tempstr[j]=in_line[i]; //緇勫悎get涓殑鍙橀噺濡倃ord www cdtype
   j++;
  }
  //cout<";
  //cout<";
  //cout.flush();
 }

 /*
 for (int kk = 0; kk < HtmlInputCount ; ++kk )
 {
  cout<<"Name="<";
  cout<<"Value="<";
 }
 //鍋囪鍓嶅彴鏌ヨ鐨勫叧閿瓧鏄?1"杈撳嚭濡備笅
 //Name=word
 //Value=1
 //Name=www
 //Value= 鎼滅儲
 //Name=cdtype
 //Value=GB
 */

 if(in_line) free(in_line);
 if(tempstr) free(tempstr);

 return 0;
}
 
2銆丼etQuery
 
//Query.cpp
void CQuery::SetQuery()
{
 string q = HtmlInputs[0].Value;
 CStrFun::Str2Lower(q,q.size()); //澶у啓鍙樺皬鍐?br> m_sQuery = q;  //鍑嗗鏌ヨ鍏抽敭瀛?br>}
3銆丼etStart
void CQuery::SetQuery()
{
 string q = HtmlInputs[0].Value;
 CStrFun::Str2Lower(q,q.size()); //澶у啓鍙樺皬鍐檞ord鍙橀噺閲岀殑鍊?br> m_sQuery = q;  //璁劇疆鏌ヨ鍏抽敭瀛?br>}

4銆丟etInvLists
 bool CQuery::GetInvLists(map<string, string> &mapBuckets) const
{
 ifstream ifsInvInfo(INF_INFO_NAME.c_str(), ios::binary); //浠ヤ簩榪涘埗褰㈠紡鎵撳紑涓涓枃浠剁殑杈撳叆嫻佺紦鍐詫紝INF_INFO_NAME鍦ㄥご鏂囦歡Comm.h涓畾涔変簡鐨勶紝 const string INF_INFO_NAME("./Data/sun.iidx");
 //鍊掓帓绱㈠紩鏂囦歡绱㈠紩瀛楀拰鏂囨。濂戒箣闂存湁涓涓埗琛ㄧ"\t"
 //鏈卞痙  14383 16151 16151 16151 1683 207 6302 7889 8218 8218 8637
 //鏈卞彜鍔?nbsp; 1085 1222
 
 if (!ifsInvInfo) {
  cerr << "Cannot open " << INF_INFO_NAME << " for input\n";
  return false;
 }
 string strLine, strWord, strDocNum;
 //浠ヨ璇誨彇杈撳叆嫻佺紦鍐插埌瀛楃涓插璞trLine涓茍鍋氬鐞?br> while (getline(ifsInvInfo, strLine)) {
  string::size_type idx;
  string tmp;
  idx = strLine.find("\t");
  strWord = strLine.substr(0,idx);
  strDocNum = strLine.substr(idx+1);
  mapBuckets.insert(map<string,string>::value_type (strWord, strDocNum)); //鍊掓帓琛ㄤ簩欏逛簩緇磋〃瀛樺叆鏄犲皠涓?br> 
  /*
  map<string, string>::iterator iter;
  int kkk = 0;
  for (iter = mapBuckets.begin(); kkk != 10; ++iter)
  {
   cout<<iter->first<<"  "<<iter->second<<"<br>";
   ++kkk;
  }
  cout.flush();
  */
 }
 return true;
}
 
5銆丟etDocIdx
 
bool CQuery::GetDocIdx(vector &vecDocIdx) const
{
 ifstream ifs(DOC_IDX_NAME.c_str(), ios::binary); 
 //0  0  bc9ce846d7987c4534f53d423380ba70
 //1  76760 4f47a3cad91f7d35f4bb6b2a638420e5
 //2  141624 d019433008538f65329ae8e39b86026c

 if (!ifs) {
  cerr << "Cannot open " << DOC_IDX_NAME << " for input\n"; //浠ヤ簩榪涘埗褰㈠紡鎵撳紑涓涓枃浠剁殑杈撳叆嫻佺紦鍐詫紝DOC_IDX_NAME鍦ㄥご鏂囦歡Comm.h涓畾涔変簡鐨勶紝 const string INF_INFO_NAME("./Data/Doc.idx"); 
  return false;
 }

 string strLine, strDocid, strUrl;
 while (getline(ifs,strLine)){
  DocIdx di;

  sscanf( strLine.c_str(), "%d%d", &di.docid, &di.offset ); //鍙繚鐣欎簡鍓嶉潰涓ら」鏂囨。鍙峰拰鍋忕Щ閲?br>  vecDocIdx.push_back(di); //瀵煎叆緇撴瀯浣撳悜閲忎腑
 }

 return true;
}

 



]]>
鑷《鍚戜笅瀛︽悳绱㈠紩鎿庘斺斿寳澶уぉ緗戞悳绱㈠紩鎿嶵SE鍒嗘瀽鍙婂畬鍏ㄦ敞閲奫1]瀵繪壘鎼滅儲寮曟搸鍏ュ彛http://m.shnenglu.com/jrckkyy/archive/2009/12/10/102939.html瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/dc:creator>瀛﹁呯珯鍦ㄥ法浜虹殑鑲╄唨涓?/author>Thu, 10 Dec 2009 14:51:00 GMThttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102939.htmlhttp://m.shnenglu.com/jrckkyy/comments/102939.htmlhttp://m.shnenglu.com/jrckkyy/archive/2009/12/10/102939.html#Feedback0http://m.shnenglu.com/jrckkyy/comments/commentRss/102939.htmlhttp://m.shnenglu.com/jrckkyy/services/trackbacks/102939.html鐢變簬鐧懼害鍗氬http://hi.baidu.com/jrckkyy鍙戣〃鏂囩珷瀛楁暟鏈夐檺錛屼互鍚庡師鍒涙枃绔犲叏閮ㄩ兘鍏堝彂琛ㄥ埌csdn鍜宑u涓婏紝鍐嶅彂琛ㄥ埌鐧懼害鍗氬涓婏紝鐧懼害鍗氬闄や簡鏀懼師鍒涚殑鏂囩珷榪樹富瑕佹斁緗戜笂瀵繪壘鍒扮殑浼樼鏂囩珷銆?/p>

鏈潃榛戝綺劇鎴戝皢闄嗙畫鎶婃渶榪戝垎鏋愭敞閲奣SE鎼滅儲寮曟搸鐨勫績寰楀彂甯冨嚭鏉ワ紝鑰侀笩錛屽ぇ铏撅紝澶х墰錛岄珮鎵嬮榪囧氨鏄簡錛岃嫢鎰挎剰嫻垂鎸囩偣涓嬪皬寮熺殑鍦ㄤ笅涓嶇敋鎰熸縺錛屾湁闂鐨勬湅鍙嬬洿鎺ョ暀璦璁ㄨ銆傜敱浜庢湰浜烘按騫蟲湁闄愶紝鍒嗘瀽鍜岀炕璇戦毦鍏嶆湁閿欏ぇ瀹惰絎戜簡銆?/p>

涓婂鏈熸嫓璇諱簡James F.Kurose钁楃殑銆婅綆楁満緗戠粶-鑷《鍚戜笅鏂規硶涓巌nternet鐗硅壊(絎笁鐗堥槾褰?銆嬶紝瑙夊緱鍐欏緱紜疄涓嶉敊(甯屾湜娌$湅鐨勬湅鍙嬩竴瀹氳涔版潵鐪嬬湅)錛岃嚜宸變篃鏉ユ悶涓珮鑷《鍚戜笅鐨勫涔犳柟娉曪紝鍏堜粠鐢ㄦ埛鐪嬪緱鍒扮殑涓滆タ鍑哄彂鍒嗘瀽鐮旂┒鎼滅儲寮曟搸錛屼笅闈㈡垜浠氨鏉ョ湅鐪嬪悇澶ф悳绱㈠紩鎿庢悳绱㈢晫闈㈢殑浠g爜錛屼綘鎵闇瑕佺壒鍒敞鎰忕殑鏄痜orm琛ㄥ崟涓殑action

闆呰檸http://www.yohoo.com/錛?/p>

<form name=s1 style="margin-bottom:0" action="<table cellpadding=0 cellspacing=0 border=0><tr><td>
<input type=text size=30 name=p title="enter search terms here">&nbsp;
<input type=submit value=Search>&nbsp;&nbsp;</td><td><font face=arial size=-2>·&nbsp;
<a href="
search</a><br>·&nbsp;
<a href="
popular</a></font></td></tr></table></form>
璋鋒瓕
http://www.g.cn錛?/p>

<form method=GET action=/search><tr><td nowrap>
<font size=-1><input type=text name=q size=41 maxlength=2048 value="jrckkyy" title="Google 鎼滅儲"> <input type=submit name=btnG value="Google 鎼滅儲"><input type=hidden name=complete value=1><input type=hidden name=hl value="zh-CN"><input type=hidden name=newwindow value=1><input type=hidden name=sa value="2"></font></td></tr></form>
鐧懼害http://www.baidu.com錛?/p>

<form name=f2 action="/s">
<tr valign="middle">
<td nowrap>
<input type=hidden name=ct value="0">
<input type=hidden name=ie value="gb2312">
<input type=hidden name=bs value="jrckkyy">
<input type=hidden name=sr>
<input type=hidden name=z value="">
<input type=hidden name=cl value=3>
<input type=hidden name=f value=8>
<input name=wd size="35" class=i value="jrckkyy" maxlength=100>
<input type=submit value=鐧懼害涓涓?gt; <input type=button value=緇撴灉涓壘 onclick="return bq(f2,1,0);">&nbsp;&nbsp;&nbsp;</td>
<td nowrap><a href="</tr>
</form>
澶╃綉
http://www.tianwang.com/錛?/p>

<form name=f action="/cgi-bin/tw" method=get>
                <td valign=center width=634 background=images/index_image_02.gif>
                    <table height=46 cellspacing=0 cellpadding=0 width=600 align=right  border=0>
                        <tbody>
                            <tr>
                                <td height=50>
                                    <table cellspacing=0 cellpadding=0 width=600 border=0>
                                        <tbody>
                                            <tr>
                                  <td width="524" height="30" valign="bottom">
                                        <div align="center">                                  <input name="word" type="text" size="40" maxlength="255" onClick="this.focus();checkWord(this,1)" onblutesr='checkWord(this,0)' value='璇瘋緭鍏ヨ祫婧愬悕縐?>
                                            <font color=#ffffff> &nbsp;
                                            <select onChange=reRange(this.selectedIndex) name=range>
                                                <script language=javascript>...
                           <!--
                           for(var i = 0; i < rescode.length; i++) ...{
                               if(i == 0) ...{
                                   document.write('<option value="0" selected>' + rescode[i][0] + '</option>');
                               } else ...{
                                   document.write('<option value="' + i + '">' + rescode[i][0] + '</option>');
                               }
                           }
                           document.f.range.selectedIndex = 0;
                           -->
                         </script>
                                            </select>
                                            </font>-<font color=#ffffff>
                                            <select name=cd>
                                                <script language=javascript>...
                           <!--
                           var ind = document.f.range.selectedIndex;
                           var len = (rescode[ind].length - 1) / 2;
                           var sel = 0;
                           for(var i = 0; i < len; i++) ...{
                               document.write('<option value="' + rescode[ind][2*i+1] + '">' + rescode[ind][2*i+2] + '</option>');
                               if(rescode[ind][2*i+1] == 0)
                                   sel = i;
                           }
                           document.f.cd.selectedIndex = sel;
                           -->
                 </script>
                                            </select>
                                            </font></div>
                                    </td>
                <td width="71" valign="bottom"><input id=submit2 type=image height=22 width=40 src="images/so2.gif" align=absMiddle name=submit></td>
              </tr>
                                            <tr>
                                                <td colspan=3 height=25 class=style16>
                                                    <div align=center></div>
                                                </td>
                                            </tr>
                                        </tbody>
                                    </table>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                </td>
            </form>
嫻嬭瘯鏈嶅姟鍣═SE錛?/p>

<form method="get" action="/cgi-bin/index/TSESearch" name="tw">
        <td width="100%" height="25" align="center">                          
        <input type="text" name="word" size="55">
        <input type="submit" value=" 鎼滅儲" name="www">
        </td>                          
        <input type="hidden" name="cdtype" value="GB">                        
        </form>   
鐢變互涓婂嚑涓猣orm鐨勫睘鎬у彲浠ョ湅鍑哄叏閮ㄩ噰鐢ㄧ殑鏄痝et鏂規硶錛孋GI鍋氫負澶勭悊紼嬪簭錛屼篃灝辨槸C/C++錛孋GI鍏ㄧО鏄?#8220;鍏叡緗戝叧鐣岄潰”(Common Gateway Interface)錛孒TTP鏈嶅姟鍣ㄤ笌浣犵殑鎴栧叾瀹冩満鍣ㄤ笂鐨勭▼搴忚繘琛?#8220;浜よ皥”鐨勪竴縐嶅伐鍏鳳紝鍏剁▼搴忛』榪愯鍦ㄧ綉緇滄湇鍔″櫒涓娿侰GI閫愭笎琚繎鍑犲勾鏉ョ殑PHP錛孞AVA錛孉SP錛孭ERL錛孭ython錛孯uby絳夊姩鎬佽璦鎵鍙栦唬銆備絾鏄叾鍦ㄩ熷害鍜岃繍琛屾晥鐜囦笂鐨勪紭鍔挎槸鏃犳硶鍙栦唬鐨勩?/p>

浠ヤ笅鏄疶SE CGI鍏ュ彛紼嬪簭娉ㄩ噴錛屽叾浠栨悳绱㈠紩鎿庣殑鍏ュ彛涔熷簲璇ョ被浼?/p>

 

/**//**
 * 紼嬪簭緲昏瘧璇存槑
 * @Copyright (c) 2008, 鐮斿彂閮?br> * All rights reserved.
 *
 * @filesource  TSESearch.cpp
 * @author  jrckkyy <jrckkyy@163.com>
 *
 * Let's start
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/time.h>
#include <unistd.h>

#include <iostream>
#include <fstream>
#include <list>

#include "Comm.h"    //鍖呭惈2涓儲寮曞拰1涓暟鎹枃浠?br>#include "Query.h"    //鍖呭惈鏁版嵁鏌ヨ澶勭悊澶存枃浠?br>#include "Document.h"    //html鏂囨。澶勭悊澶存枃浠?br>#include "StrFun.h"        //瀛楃涓插鐞嗗ご鏂囦歡
#include "ChSeg/Dict.h"    //瀛楀厓瀛楀吀澶勭悊澶存枃浠?br>#include "ChSeg/HzSeg.h"   
#include "DisplayRst.h"    //榪斿洖鏌ヨ緇撴灉欏甸潰澶存枃浠訛紝榪斿洖緇撴灉鍒嗕負澶撮儴錛屼腑閮紝搴曢儴

using namespace std;

/**//*
 * A inverted file(INF) includes a term-index file & a inverted-lists file.
 * A inverted-lists consists of many bucks(posting lists).
 * The term-index file is stored at vecTerm, and
 * the inverted-lists is sored at mapBuckets.
 */

/**//**
 * 紼嬪簭緲昏瘧璇存槑
 * 鎼滅儲紼嬪簭鍏ュ彛鍓嶅彴鍏抽敭瀛楁彁浜ゅ埌璇gi紼嬪簭 渚嬪錛?/cgi-bin/index/TSESearch?word=123&start=1
 * 鍊掓帓鏂囦歡鍖呮嫭涓涓褰曟绱㈣瘝鏂囦歡鍜屼竴涓掓帓鍒楄〃鏂囦歡銆?br> * 鍊掓帓鍒楄〃鍖呭惈寰堝鏍囧織錛堟彁浜ゅ悕鍗曪級銆?br> * 璁板綍媯绱㈣瘝鏂囦歡浣跨敤vecTerm鏉ユ帓搴忥紝鍜屽掓帓鍒楄〃鏄敤mapBuckets鏉ユ帓搴忋?br> *
 * @access  public
 * @param   int char 鍙傛暟鐨勬眽瀛楄鏄?鐢ㄤ簬鎺ユ敹鍓嶅彴get浼犻掔殑鍙傛暟
 * @return  string 0
 */
int main(int argc, char* argv[])
...{
    struct timeval begin_tv, end_tv;
    struct timezone tz;

    CDict iDict;
    map<string, string> dictMap, mapBuckets;
    vector<DocIdx> vecDocIdx;    //Document銆俬

    CQuery iQuery;
    iQuery.GetInputs();        //鍏蜂綋紼嬪簭寮濮嬫墽琛?br>    // current query & result page number
    iQuery.SetQuery();
    iQuery.SetStart();

    // begin to search
    //寮濮嬪叿浣撴悳绱㈢▼搴?br>    gettimeofday(&begin_tv,&tz);    //寮濮嬭鏃惰幏鍙栫▼搴忚繍琛屾椂闂村樊

    iQuery.GetInvLists(mapBuckets);        //灝嗘墍鏈夊瓧絎﹂泦瀛樺叆鏄犲皠鍙橀噺涓?nbsp;   鐡墮鎵鍦?br>    iQuery.GetDocIdx(vecDocIdx);        //灝嗗掓帓绱㈠紩瀛樺叆鍚戦噺涓?nbsp;       鐡墮鎵鍦?br>   
    CHzSeg iHzSeg;        //include ChSeg/HzSeg.h
    iQuery.m_sSegQuery = iHzSeg.SegmentSentenceMM(iDict, iQuery.m_sQuery);    //灝唃et鍒扮殑鏌ヨ鍙橀噺鍒嗚瘝鍒嗘垚 "鎴?        鐖?        浣犱滑/    鐨?        鏍煎紡"
   
    vector<string> vecTerm;
    iQuery.ParseQuery(vecTerm);        //灝嗕互"/"鍒掑垎寮鐨勫叧閿瓧涓涓欏哄簭鏀懼叆涓涓悜閲忓鍣ㄤ腑
   
    set<string> setRelevantRst;
    iQuery.GetRelevantRst(vecTerm, mapBuckets, setRelevantRst);
   
    gettimeofday(&end_tv,&tz);
    // search end
    //鎼滅儲瀹屾瘯

    //涓嬮潰寮濮嬫樉紺?br>    CDisplayRst iDisplayRst;
    iDisplayRst.ShowTop();

    float used_msec = (end_tv.tv_sec-begin_tv.tv_sec)*1000
        +((float)(end_tv.tv_usec-begin_tv.tv_usec))/(float)1000;

    iDisplayRst.ShowMiddle(iQuery.m_sQuery,used_msec,
            setRelevantRst.size(), iQuery.m_iStart);

    iDisplayRst.ShowBelow(vecTerm,setRelevantRst,vecDocIdx,iQuery.m_iStart);

    return 0;

}

 

 



]]>
青青草原综合久久大伊人导航_色综合久久天天综合_日日噜噜夜夜狠狠久久丁香五月_热久久这里只有精品
  • <ins id="pjuwb"></ins>
    <blockquote id="pjuwb"><pre id="pjuwb"></pre></blockquote>
    <noscript id="pjuwb"></noscript>
          <sup id="pjuwb"><pre id="pjuwb"></pre></sup>
            <dd id="pjuwb"></dd>
            <abbr id="pjuwb"></abbr>
            亚洲亚洲精品三区日韩精品在线视频 | 久久精品99国产精品| 日韩午夜在线观看视频| 欧美1级日本1级| 99v久久综合狠狠综合久久| 亚洲精选91| 国产精品免费在线| 久久久久久久久久久久久久一区| 久久精品国产免费| 亚洲精品一区二区三区樱花| 亚洲免费黄色| 国产亚洲人成a一在线v站| 美女精品一区| 欧美涩涩网站| 久久免费视频在线观看| 欧美国产亚洲精品久久久8v| 亚洲午夜一区| 久久精品91久久久久久再现| 亚洲精品国产欧美| 亚洲综合三区| 亚洲黄一区二区三区| 亚洲一区二区三区在线看| 在线成人激情黄色| 一区二区电影免费观看| 激情六月婷婷综合| 亚洲人成在线免费观看| 国产情侣久久| 亚洲毛片在线看| 黄色成人片子| 一区二区三区久久| 亚洲人成在线播放网站岛国| 亚洲欧美国产日韩天堂区| 亚洲精品乱码久久久久久黑人| 亚洲一区二区三区中文字幕| 亚洲精品视频免费在线观看| 午夜精品区一区二区三| 夜夜嗨av一区二区三区中文字幕 | 你懂的国产精品永久在线| 亚洲欧美在线免费观看| 欧美国产日韩一区二区在线观看 | 亚洲一区bb| 免费观看成人www动漫视频| 亚洲影院一区| 欧美va天堂va视频va在线| 久久久午夜电影| 国产精品国产三级国产aⅴ浪潮| 欧美大片免费久久精品三p| 国产亚洲观看| 亚洲天堂av高清| 99国产麻豆精品| 噜噜噜躁狠狠躁狠狠精品视频| 欧美中文字幕第一页| 国产精品久久久久久久久久三级 | 老司机午夜精品| 国产精品一区二区三区四区五区| 99国产精品99久久久久久粉嫩| 91久久黄色| 你懂的网址国产 欧美| 欧美顶级大胆免费视频| 依依成人综合视频| 久久精品一区二区三区不卡| 久久人人97超碰人人澡爱香蕉| 国产欧美日韩在线播放| 欧美一区二区播放| 久久精品一区蜜桃臀影院| 国产精品毛片一区二区三区| 亚洲视频欧美视频| 亚洲欧美日韩一区二区在线| 国产精品人成在线观看免费| 亚洲欧美第一页| 久久精品官网| 激情校园亚洲| 欧美gay视频激情| 亚洲精品护士| 午夜一区二区三区不卡视频| 国产精品久久一区二区三区| 亚洲欧美日韩国产成人精品影院| 欧美中文在线视频| 亚洲国产高清一区二区三区| 欧美国产精品一区| 99视频超级精品| 久久精品国产第一区二区三区最新章节 | 夜夜狂射影院欧美极品| 欧美一区二区免费视频| 国内精品国产成人| 麻豆av一区二区三区| 亚洲精品一级| 午夜精品影院| 在线看无码的免费网站| 欧美日韩国产成人精品| 亚洲欧美另类在线| 欧美电影电视剧在线观看| 国产精品99久久久久久久久久久久| 欧美性做爰毛片| 久久久国产精品亚洲一区| 亚洲高清av| 欧美一区二区三区播放老司机| 伊人久久成人| 国产精品超碰97尤物18| 久久精品一区二区三区四区| 亚洲伦理中文字幕| 久久成人在线| 日韩一级精品视频在线观看| 国产日本欧美视频| 欧美黄色aaaa| 久久激情视频| 亚洲视频免费看| 亚洲高清视频的网址| 久久精品国产精品 | 夜夜嗨av一区二区三区四区| 国产一区二区三区久久久久久久久 | 久久婷婷国产综合精品青草| 99av国产精品欲麻豆| 免费黄网站欧美| 亚洲欧美视频一区| 99精品久久久| 在线免费观看日本欧美| 国产精品尤物| 欧美特黄视频| 欧美aa国产视频| 久久精品国产99国产精品澳门| 一本色道久久综合亚洲精品婷婷 | 麻豆精品视频在线观看| 午夜在线播放视频欧美| 一区二区精品在线| 亚洲精品欧美极品| 91久久中文| 欧美福利电影在线观看| 久久亚洲精品一区| 久久精品国产视频| 午夜精品久久久久久久久久久久久| 日韩午夜电影| 亚洲精品网站在线播放gif| 在线观看中文字幕不卡| 激情国产一区二区| 合欧美一区二区三区| 国产亚洲综合性久久久影院| 国产精品视频内| 国产精品私拍pans大尺度在线| 欧美午夜一区二区三区免费大片 | 免费在线亚洲欧美| 久久综合九色| 免费在线亚洲| 欧美精品日韩一区| 欧美日韩和欧美的一区二区| 欧美久久视频| 欧美日精品一区视频| 欧美视频在线观看一区| 国产精品a久久久久| 国产精品久久午夜夜伦鲁鲁| 国产精品网站一区| 国产日韩欧美三区| 狠狠狠色丁香婷婷综合激情| 怡红院av一区二区三区| 亚洲人体偷拍| 亚洲永久免费av| 久久国产99| 美女在线一区二区| 亚洲国产精品一区二区久 | 欧美mv日韩mv亚洲| 亚洲精品小视频在线观看| 亚洲天堂成人在线视频| 欧美伊人久久| 欧美国产日韩一区二区在线观看 | 国产乱人伦精品一区二区| 国产日产精品一区二区三区四区的观看方式| 国产欧美日韩亚洲一区二区三区| 狠狠色狠狠色综合日日小说| 亚洲国产小视频| 亚洲一区二区三区精品动漫| 久久久www免费人成黑人精品| 猛男gaygay欧美视频| 亚洲日韩第九十九页| 亚洲免费在线电影| 欧美mv日韩mv国产网站| 国产精品欧美激情| 亚洲丰满少妇videoshd| 亚洲一区二区三区在线视频| 狂野欧美激情性xxxx欧美| 亚洲国产另类精品专区| 亚洲自拍另类| 欧美女人交a| 黄色亚洲大片免费在线观看| 一级日韩一区在线观看| 久久不射网站| 亚洲美女精品成人在线视频| 久久精品亚洲一区| 国产精品高潮呻吟久久av无限 | 亚洲午夜精品国产| 嫩草影视亚洲| 亚洲免费一区二区| 欧美日本亚洲韩国国产| 亚洲电影网站| 久久国产精品一区二区三区四区| 91久久夜色精品国产九色| 欧美在线视频播放| 国产精品乱码一区二三区小蝌蚪| 亚洲精品中文字幕女同| 可以看av的网站久久看| 香蕉乱码成人久久天堂爱免费|