要求:读取一个文本,然后统计里面出现的单词,打印每个单词出现的次数。仅仅考虑英文单词的情形,不考虑中文
小乓练题:
int main(int argc, char* argv[])
{
using namespace std;
ifstream infile("c:\\a.txt",ios::binary );
if(!infile)
{
cout<<"Can not open sourse file!"<<endl;
return 0;
}
//ofstream outfile("out.txt");
//if(!outfile)
//{
// cout<<"Can not open destination file!"<<endl;
//}
int nLength = 0;
char * pBuffer;
// get length of file:
infile.seekg (0, ios::end);
nLength = infile.tellg();
infile.seekg (0, ios::beg);
//read the file to the buffer
pBuffer = new char[nLength];
memset(pBuffer, 0, nLength);
infile.read(pBuffer,nLength);
infile.close();
//copy the buffer to the string s
string s = pBuffer;
delete[] pBuffer;
pBuffer = NULL;
string temp;
vector<string> vecSubstr;
vector<int> vecCount;
int pre=0,next=0;
while(next<nLength)
{
pre=next;
//find the word
while((next<nLength)&&isalnum(s[next]))
{
next++;
}
if(pre!=next)
{
//计算当前的单词个数
temp = s.substr(pre,next-pre);
cout<<temp<<endl;
//std::vector<std::string>::iterator iter = std::find(vecSubstr.begin(), vecSubstr.end(), temp);
//if (vecSubstr.end() != iter)
//{
// std::cout<<temp<<std::endl;
//}else
//{
// vecSubstr.push_back(temp);
//}
unsigned int iPosition=0;
while(iPosition<vecSubstr.size())
{
if(vecSubstr[iPosition].compare(temp)==0)
break;
iPosition++;
}
if (iPosition==vecSubstr.size())
{
vecSubstr.push_back(temp);
vecCount.push_back(1);
}
else
{
vecCount[iPosition]++;
}
}
next++;
}
for (int j=0;j<vecSubstr.size();j++)
{
cout<<vecSubstr[j]<<endl<<vecCount[j]<<endl;
}
//for(int i=0;i<substr.size();i++)
//{
// cout<<substr[i]<<endl;
// cout<<count[i]<<endl;
//}
//delete[] pBuffer;
//pBuffer = NULL;
system("pause");
return 0;
}
C++代码:
int main(int argc, char* argv[])
{
// 文件路径
char* szPath = "C:\\text.txt";
std::ifstream fin(szPath);
if (!fin)
{
std::cout<<"Can not open file"<<std::endl;
return -1;
}
// 通常我们这样读取一个文本文件的全文
std::string strText = std::string(std::istreambuf_iterator<char>(fin), std::istreambuf_iterator<char>());
typedef std::map<std::string, int> CountMap;
CountMap counter;
int nLength = strText.length();
int nLeft = 0;
int nRight = -1;
while(nRight<nLength)
{
nLeft = nRight+1;
// 找到第一个是字母的位置
while (nLeft<nLength && !isalnum(strText[nLeft]))
{
++nLeft;
}
nRight = nLeft+1;
// 找到第一个非字母的位置
while (nRight<nLength && isalnum(strText[nRight]))
{
++nRight;
}
// 取nRight-nLeft可以保证取到的是一个word,其中不会含有字符
if (nRight < nLength)
{
// 提取单词
std::string strWord = strText.substr(nLeft, nRight - nLeft);
// 加入记数器
counter[strWord]+=1;
}
}
// 打印输出
for (CountMap::iterator iter = counter.begin(); counter.end()!=iter; ++iter)
{
std::cout<<iter->first<<"\t\t"<<iter->second<<std::endl;
}
system("pause");
return 0;
}
python 代码:
import re
filepath=r'c:/text.txt'
with open(filepath) as file:
text=file.read()
text=re.split('\W+', text)
d={}
for item in text:
d[item]=d.get(item, 0) +1
for key, value in d.items():
print('%s\t\t%s'%(key, value))
小乓加油!