博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
统计文本单词个数,并个数大小按序排列 C#
阅读量:4585 次
发布时间:2019-06-09

本文共 2187 字,大约阅读时间需要 7 分钟。

性能全部损耗在排序上,由于hashtable不能排序,所以只能借助其他来排序,  不过还好  一本60000字的书10几秒就能统计完.

using System.IO;
using System.Collections;//use hashtable
using System.Text.RegularExpressions;
namespace WindowsApplication1
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }
        private void cmdStart_Click(object sender, EventArgs e)
        {
            string Pattern = @"\,|\.|\ |\n|\r|\?|\;|\:|\!|\(|\)|\042|\“|\”|\-|[\u4e00-\u9fa5]|[0-9]";   //匹配正值表达式 逗号,点号,空格,换行符,回车符,问号,,分号,,冒号,感叹号,,左括号,又括号,双引号,左双引号,右双引号,中文字符,数字
            string textstring = "";
            int j;
            Hashtable ht = new Hashtable();      //创建一个Hashtable实例
            //Hashtable ht = new Hashtable();      //创建一个Hashtable实例
            Regex regex = new Regex(Pattern);    //创建Regex实类
            textBox2.Text = "";
            //读取文件
            try
            {
                StreamReader sr = new StreamReader(@textBox1.Text,System.Text.Encoding.GetEncoding("GB2312"));
                textstring = sr.ReadToEnd();
                sr.Close();
            }
            catch
            {
                MessageBox.Show("请把test.txt文件拷贝到C:/");
            }
            //根据匹配正值表达式分割字符串
            string[] words = regex.Split(textstring);
            //单词加入哈希表
            foreach (string word in words)
            {
                //textBox2.Text = textBox2.Text + "   " + i;
                if (word != null && word != "")
                {
                    if (ht.Contains(word))
                    {
                        j = Convert.ToInt32(ht[word]) + 1;
                        //ht.Remove(word);
                        //ht.Add(word, j);
                        ht[word]=j;
                    }
                    else
                    {
                        ht.Add(word, 1);
                    }
                }
            }
            对哈希表排序
            ArrayList akeys = new ArrayList(ht.Keys);
                //按字母顺序进行排序
                //akeys.Sort() ;
                //按字单词次数进行排序
                string[] keyarray = new string[akeys.Count];
                int[] valuearray = new int[akeys.Count];
                int index=0;
                //将key/value分别赋给数祖
                foreach (string skey in akeys)
                {
                    keyarray[index] = Convert.ToString(skey);
                    valuearray[index] = Convert.ToInt32(ht[skey]);
                    index++;
                }
                for(int a=0;a<akeys.Count;a++)
                {
                    for (int b = a+1; b < akeys.Count; b++)
                    {
                        if (valuearray[a]>valuearray[b])
                        {
                            valuearray[a] ^= valuearray[b];
                            valuearray[b] ^= valuearray[a];
                            valuearray[a] ^= valuearray[b];
                            string tempstr = keyarray[a];
                            keyarray[a] = keyarray[b];
                            keyarray[b] = tempstr;
                        }
                    }
                }
            //显示
                //按字母顺序进行排序显示
                //foreach(string skey in akeys)
                //{
                //    textBox2.Text = textBox2.Text + skey + "\t" + ht[skey] + "\r\n";
                //}
                //按字单词次数进行排序显示
                for (int a = 0; a < akeys.Count; a++)
                {
                    textBox2.Text = textBox2.Text + keyarray[a] + "\t" + valuearray[a] + "\r\n";
                }
        }
    }
}

转载于:https://www.cnblogs.com/zhangdongdong/archive/2013/04/23/3037635.html

你可能感兴趣的文章
bzoj 2054: 疯狂的馒头
查看>>
SQL 公用表达式CTE
查看>>
可恶的Math.random()
查看>>
Netty线程模型
查看>>
查找->动态查找表->键树(无代码)
查看>>
关于 angular 小心得
查看>>
DDD的"waiting until GDB gets ready"
查看>>
简单解决 Javascrip 浮点数计算的 Bug(.toFixed(int 小数位数))
查看>>
laravel错误1071 Specified key was too long; max key length is 1000 bytes
查看>>
正则表达式,的简单应用
查看>>
iOS开发~CocoaPods使用详细说明
查看>>
如何解决浏览器兼容问题
查看>>
必须输入大于0的整数
查看>>
shell杂记
查看>>
使用dom4j技术对xml文件的基本操作
查看>>
C# 属性控件2
查看>>
asp.net 站点在Apache下的配置,就这么简单
查看>>
开源 免费 java CMS - FreeCMS1.9 移动APP生成网站列表数据
查看>>
java中==和equals
查看>>
CCActionPageTurn3D
查看>>