首頁新隨筆新文章聯系聚合

posts - 21,comments - 59,trackbacks - 0

2010年2月

>

日

一

二

三

四

五

六

31

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

1

2

3

4

5

6

7

8

9

10

11

12

13

常用鏈接

留言簿(14)

隨筆分類

隨筆檔案

相冊

album

搜索

閱讀排行榜

評論排行榜

數據結構隨筆3(哈希表)

寫了一個哈希表模板類，用于統計一篇文章中不同單詞出現的次數。哈希表使用char*作為key,使用桶式鏈表指針數組(指向結點鏈表的指針數組)來索引，字符串哈希函數是在網上搜來的。模板參數一個是值類型，另一個NBARREL是指針數組的大小，通常是越大哈希值沖突就越少，結點鏈表長度也就越短，當然查找就越快。

為了方便統計，為哈希表增加了一個IncValue函數，提高效率。統計的時候使用了快速排序，為了提高效率，
把上次的快排模板稍微修改了一下，直接對指針數組進行排序，就不用進行值的交換了。

/********************************************************************
    created:    2007/12/30
    filename:   hashtable.h
    author:     dj
    purpose:    哈希表模板類
*********************************************************************/

   #ifndef __HASHTABLE_H__
#define __HASHTABLE_H__

#define SAFE_DELETE(p) {if(p) { delete [] (p); (p) = NULL;}}

template<typename T>
void quicksort(T* v, int n)
{
    if (n<=1)
        return;
    int last = 0;
    int pivot = rand()%n;
    swap(v, 0, pivot);
    for (int i = 1; i < n; i++)
    {
        if ((*v[i])>(*v[0]))
            swap(v, ++last, i);
    }
    swap(v, last, 0);
    quicksort(&v[0], last);
    quicksort(&v[last+1], n-last-1);
}

template<typename T>
void swap(T* v, int i, int j)
{
    T tmp = v[i];
    v[i] = v[j];
    v[j] = tmp;
}

template<typename T, int NBARREL = 100>
class HashTable
{
public:
    HashTable():m_nCount(0)
    {
        memset(m_pNodes, NULL, sizeof(void*)*NBARREL);
    }
    ~HashTable()
    {
        FreeTable();
    }
    bool Exists(const char* sName)
    {
        int h = Hash(sName);
        HashNode* p = m_pNodes[h];
        while(p)
        {
            if (strcmp(p->name, sName)==0)
                return true;
            p = p->next;
        }
        return false;
    }
    bool AddNode(const char* sName, const T& tValue)
    {
        if (Exists(sName))
            return false;
        int h = Hash(sName);
        HashNode* node = new HashNode(sName, tValue);
        node->next = m_pNodes[h];
        m_pNodes[h] = node;
        m_nCount++;
        return true;
    }
    T GetValue(const char* sName)
    {
        int h = Hash(sName);
        HashNode* p = m_pNodes[h];
        while(p)
        {
            if (strcmp(p->name, sName)==0)
                return p->value;
            p = p->next;
        }
        return NULL;
    }
    bool SetValue(const char* sName, const T& tValue)
    {
        int h = Hash(sName);
        HashNode* p = m_pNodes[h];
        while(p)
        {
            if (strcmp(p->name, sName)==0)
            {
                p->value = tValue;
                return true;
            }
            p = p->next;
        }
    }
    void IncValue(const char* sName)
    {
        int h = Hash(sName);
        HashNode* p = m_pNodes[h];
        while(p)
        {
            if (strcmp(p->name, sName)==0)
            {
                p->value++;
                return;
            }
            p = p->next;
        }
        HashNode* node = new HashNode(sName, 1);
        node->next = m_pNodes[h];
        m_pNodes[h] = node;
        m_nCount++;
    }
    void Dump(const char* sFile)
    {
        ofstream file(sFile);
        assert(file!=NULL);
        HashNode** pNodes = new HashNode*[m_nCount];
        int i, counter = 0;
        for(i = 0; i < NBARREL; i++)
        {
            HashNode* p = m_pNodes[i];
            while(p)
            {
                pNodes[counter++] = p;
                p = p->next;
            }
        }
        quicksort(pNodes, m_nCount);
        for (i = 0; i < m_nCount; i++)
        {
            file<<pNodes[i]->value<<"  "<<pNodes[i]->name<<endl;
        }

        SAFE_DELETE(pNodes);
        file.close();
    }
private:
    int Hash(const char* c)
    {
        int ret=0;
        int n, v, r;
        if ((c == NULL) || (*c == '\0'))
        return(ret);
        n=0x100;
        while (*c)
        {
            v=n|(*c);
            n+=0x100;
            r= (int)((v>>2)^v)&0x0f;
            ret=(ret!=(32-r));
            ret&=0xFFFFFFFFL;
            ret^=v*v;
            c++;
        }
        return(((ret>>16)^ret)%NBARREL);
    }
    void FreeTable()
    {
        for(int i = 0; i < NBARREL; i++)
        {
            HashNode* p = m_pNodes[i];
            while(p)
            {
                HashNode* pnext = p->next;
                delete p;
                p = pnext;
            }
        }
    }
private:
    struct HashNode
    {
        HashNode(const char* c, const T& v)
        {
            name = new char[strlen(c)+1];
            strcpy(name, c);
            value = v;
        }
        ~HashNode()
        {
            SAFE_DELETE(name);
        }
        bool operator > (const HashNode& node) const
        {
            return (this->value > node.value);
        }
        bool operator < (const HashNode& node) const
        {
            return (this->value < node.value);
        }
        bool operator == (const HashNode& node) const
        {
            return (this->value == node.value);
        }
        char* name;
        T value;
        HashNode* next;
    };
    HashNode* m_pNodes[NBARREL];
    int m_nCount;
};

#endif //__HASHTABLE_H__

測試程序如下

int main(int argc, char* argv[])

{

HashTable<int, 500> h;

ifstream f("c:\\test.txt");

string s;

while(f>>s)

{

// if (h.Exists(s.c_str()))

// {

// h.SetValue(s.c_str(), h.GetValue(s.c_str())+1);

// }

// else

// {

// h.AddNode(s.c_str(), 1);

// }

h.IncValue(s.c_str());

}

h.Dump("c:\\stat.txt");

return 0;

}

隨便在google上找了幾個英文網頁來統計，
發現排第一位的單詞是"2007",第二位的居然是"die"

程序設計實踐上說，用素數作為數組的大小是明智的，因為這樣能保證在數組大小、散列的乘數和可能的數據值之間不存在公因子，我覺得他特指java的散列函數

enum{MULTIPLIER = 37}

unsigned int hash(char* str)

{

unsigned int h = 0;

unsigned char* p;

for(p=str;*p!=0;p++)

h = MULTIPLIER*h+*p;

return h%NHASH;

}

最后附幾個經典字符串哈希函數來自
http://www.oioj.net/blog/user3/28679/archives/2005/166870.shtml

posted on 2007-12-30 15:13 小四閱讀(563) 評論(0) 編輯收藏引用所屬分類: 算法與數據結構

只有注冊用戶登錄后才能發表評論。
【推薦】100%開源！大型工業跨平臺軟件C++源碼提供，建模，組態！

相關文章: 關于mp3轉ogg DLL窗體中PreTranslateMessage的解決方案數據結構隨筆6(表達式求值) 數據結構隨筆5(二叉排序樹) 數據結構隨筆4(折半查找) 數據結構隨筆3(哈希表) 數據結構隨筆2(快速排序) 數據結構隨筆1(堆棧)

網站導航: 博客園 IT新聞 BlogJava 博問 Chat2DB 管理

青青草原综合久久大伊人导航_色综合久久天天综合_日日噜噜夜夜狠狠久久丁香五月_热久久这里只有精品