C++二叉树实现词频分析功能

通过二叉树存单词,并且对总共的单词数量进行计数,二叉树自适应的将出现频率高的单词往上移动以减少二叉树的搜索时间。 代码如下

/***********************genSplay.h***********************/#ifndef _GENSPLAY_H_#define _GENSPLAY_H_#include <iostream>using namespace std;//树节点template<class T>class SplayingNode{public:  T info;  SplayingNode *left, *right, *parent;  //节点指针  SplayingNode(){    left = right = parent = 0;  }  SplayingNode(const T &el, SplayingNode *l = 0,         SplayingNode *r = 0, SplayingNode *p = 0)         :info(el), left(l), right(r), parent(p){ }};//二叉树template<class T>class SplayTree{protected:  SplayingNode<T> *root;  void rotateR(SplayingNode<T> *);  //向右旋转  void rotateL(SplayingNode<T> *);  //向左旋转  void continueRotation(SplayingNode<T> *gr, SplayingNode<T> *par,             SplayingNode<T> *ch, SplayingNode<T> *desc); //重新定义父节点指针  void semisplay(SplayingNode<T> *); //更改树结构,将权值大的向上移动  void inorder(SplayingNode<T> *);  //中序遍历  void virtual visit(SplayingNode<T> *){ } //虚函数public:  SplayTree(){    root = 0;  }  void inorder(){    inorder(root);  }  T *search(const T &);  void insert(const T &);};template<class T>void SplayTree<T>::continueRotation(SplayingNode<T> *gr, SplayingNode<T> *par,    SplayingNode<T> *ch, SplayingNode<T> *desc){  if(gr != 0){    if(gr->right == ch->parent)      gr->right = ch;    else      gr->left = ch;  }  else    root = ch;  if(desc != 0)    desc->parent = par;  par->parent = ch;  ch->parent = gr;}template<class T>void SplayTree<T>::rotateR(SplayingNode<T> *p){  p->parent->left = p->right;  p->right = p->parent;  continueRotation(p->parent->parent, p->right, p, p->right->left);}template<class T>void SplayTree<T>::rotateL(SplayingNode<T> *p){  p->parent->right = p->left;  p->left = p->parent;  continueRotation(p->parent->parent, p->left, p, p->left->right);}template<class T>void SplayTree<T>::semisplay(SplayingNode<T> *p){  while(p != root){    if(p->parent->parent == NULL){      if(p->parent->left == p)        rotateR(p);      else        rotateL(p);    }    else if(p->parent->left == p){      if(p->parent->parent->left == p->parent){        rotateR(p->parent);        p = p->parent;      }      else{        rotateR(p);        rotateL(p);      }    }    else{      if(p->parent->parent->right == p->parent){        rotateL(p->parent);        p = p->parent;      }      else{        rotateL(p);        rotateR(p);      }    }    if(root == NULL)      root = p;  }}template<class T>T *SplayTree<T>::search(const T &el){  SplayingNode<T> *p = root;  while(p != NULL){    if(p->info == el){      semisplay(p);      return &p->info;    }    else if(el < p->info)      p = p->left;    else      p = p->right;  }  return 0;}template<class T>void SplayTree<T>::insert(const T &el){  SplayingNode<T> *p = root, *prev = NULL, *newNode;  while(p != 0){    prev = p;    if(el < p->info)      p = p->left;    else      p = p->right;  }  if((newNode = new SplayingNode<T>(el, 0, 0, prev)) == 0){    cerr << "no room for new node.\n";    exit(1);  }  if(root == 0)    root = newNode;  else if(el < prev->info)    prev->left = newNode;  else    prev->right = newNode;}template<class T>void SplayTree<T>::inorder(SplayingNode<T> *p){  if(p != 0){    inorder(p->left);    visit(p);    inorder(p->right);  }}#endif // _GENSPLAY_H_
/***********************Splay.cpp***********************/#include <iostream>#include <fstream>#include <cctype>#include <cstring>#include <cstdlib> //exit(0)#include "genSplay.h"using namespace std;//用作计数对象的类class Word{private:  char *word;  int freq;  friend class WordSplay;  //friend ostream & operator<<(ostream &out, const Word &wd);public:  Word(){    freq = 1;  }  int operator==(const Word &ir) const{    return strcmp(word, ir.word) == 0;  }  int operator<(const Word &ir) const{    return strcmp(word, ir.word) < 0;  }};class WordSplay : public SplayTree<Word>{private:  int differentWords, wordCnt;  void visit(SplayingNode<Word> *);public:  WordSplay(){    differentWords = wordCnt = 0;  }  void run(ifstream &, char *);};void WordSplay::visit(SplayingNode<Word> *p){  differentWords++;  wordCnt += p->info.freq;}void WordSplay::run(ifstream &fin, char *filename){  char ch = ' ', i;  char s[100];  Word rec;  while(!fin.eof()){    while(1){      if(!fin.eof() && !isalpha(ch))        fin.get(ch);      else        break;    }    if(fin.eof())      break;    for(i = 0; !fin.eof() && isalpha(ch); i++){      s[i] = toupper(ch);      fin.get(ch);    }    s[i] = '\0';    if(!(rec.word = new char[strlen(s) + 1])){      cerr << "no room for new words.\n";      exit(1);    }    strcpy(rec.word, s);    Word *p = search(rec);    if(p == 0)      insert(rec);    else      p->freq++;  }  inorder();  cout << "\n\nFile " << filename     << " contains " << wordCnt << " words among which "     << differentWords << " are different.\n";}int main(){  char Filename[80];  WordSplay SplayTree;  cout << "enter a filename: ";  cin >> Filename;  ifstream fin(Filename);  if(fin.fail()){    cerr << "cannot open " << Filename << endl;    return 1;  }  SplayTree.run(fin, Filename);  fin.close();  return 0;}

有空回来补充相应的其他功能:

将对应的单词和文件写到文件里面去,先序遍历优化性能

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。

多对自己说“我能行,我一定可以”,

C++二叉树实现词频分析功能

相关文章:

你感兴趣的文章:

标签云: