性能全部损耗在排序上,由于hashtable不能排序,所以只能借助其他来排序, 不过还好 一本60000字的书10几秒就能统计完.
using System.IO;using System.Collections;//use hashtableusing System.Text.RegularExpressions;namespace WindowsApplication1{ public partial class Form1 : Form { public Form1() { InitializeComponent(); } private void cmdStart_Click(object sender, EventArgs e) { string Pattern = @"\,|\.|\ |\n|\r|\?|\;|\:|\!|\(|\)|\042|\“|\”|\-|[\u4e00-\u9fa5]|[0-9]"; //匹配正值表达式 逗号,点号,空格,换行符,回车符,问号,,分号,,冒号,感叹号,,左括号,又括号,双引号,左双引号,右双引号,中文字符,数字 string textstring = ""; int j; Hashtable ht = new Hashtable(); //创建一个Hashtable实例 //Hashtable ht = new Hashtable(); //创建一个Hashtable实例 Regex regex = new Regex(Pattern); //创建Regex实类 textBox2.Text = ""; //读取文件 try { StreamReader sr = new StreamReader(@textBox1.Text,System.Text.Encoding.GetEncoding("GB2312")); textstring = sr.ReadToEnd(); sr.Close(); } catch { MessageBox.Show("请把test.txt文件拷贝到C:/"); } //根据匹配正值表达式分割字符串 string[] words = regex.Split(textstring); //单词加入哈希表 foreach (string word in words) { //textBox2.Text = textBox2.Text + " " + i; if (word != null && word != "") { if (ht.Contains(word)) { j = Convert.ToInt32(ht[word]) + 1; //ht.Remove(word); //ht.Add(word, j); ht[word]=j; } else { ht.Add(word, 1); } } } 对哈希表排序 ArrayList akeys = new ArrayList(ht.Keys); //按字母顺序进行排序 //akeys.Sort() ; //按字单词次数进行排序 string[] keyarray = new string[akeys.Count]; int[] valuearray = new int[akeys.Count]; int index=0; //将key/value分别赋给数祖 foreach (string skey in akeys) { keyarray[index] = Convert.ToString(skey); valuearray[index] = Convert.ToInt32(ht[skey]); index++; } for(int a=0;a<akeys.Count;a++) { for (int b = a+1; b < akeys.Count; b++) { if (valuearray[a]>valuearray[b]) { valuearray[a] ^= valuearray[b]; valuearray[b] ^= valuearray[a]; valuearray[a] ^= valuearray[b]; string tempstr = keyarray[a]; keyarray[a] = keyarray[b]; keyarray[b] = tempstr; } } } //显示 //按字母顺序进行排序显示 //foreach(string skey in akeys) //{ // textBox2.Text = textBox2.Text + skey + "\t" + ht[skey] + "\r\n"; //} //按字单词次数进行排序显示 for (int a = 0; a < akeys.Count; a++) { textBox2.Text = textBox2.Text + keyarray[a] + "\t" + valuearray[a] + "\r\n"; } } }}