Simple is beautifull

还需要副标题吗?

导航

<2006年2月>
2930311234
567891011
12131415161718
19202122232425
2627281234
567891011

统计

常用链接

留言簿(2)

随笔档案

搜索

最新评论

阅读排行榜

评论排行榜

为什么Python的性能比较好呢?

在vckbase上看到有讨论这样一个问题:
http://blog.vckbase.com/jzhang/archive/2006/03/28/18807.html
CSDN的朋友参考了Python的实现源码给出有如下的解答:
http://blog.csdn.net/imjj/archive/2006/03/31/645163.aspx?Pending=true
性能上已经比Python好了,但是该解答毕竟是针对了具体的应用,比如定死了hash桶的大小之类的。

我也凑热闹给了一个实现,只使用标准C++的一些算法解决此问题,性能上还是没有Python好,但是已经非常接近了:
D:\test\pytest>python test.py
2006-03-31 14:59:19.348000
2006-03-31 14:59:22.963000

D:\test\pytest>cpptest
经过了4025.7888毫秒

实现:
#include <windows.h>      //  just for time counting

#include <list>
#include <string>
#include <fstream>
#include <algorithm>

using namespace std;
int main( void )
{
 __int64 t1, t2;
 GetSystemTimeAsFileTime( (LPFILETIME)&t1 );

 list<string> emails;
 ifstream infile("email2.txt");
 ofstream oufile("email_cpp.txt");
 copy( istream_iterator<string>(infile), istream_iterator<string>(), back_inserter(emails) );
 emails.unique();
 ofstream outfile( "email_cpp.txt" );
 copy( emails.begin(), emails.end(), ostream_iterator<string>(outfile,"\n") );

 GetSystemTimeAsFileTime( (LPFILETIME)&t2 );
 printf( "经过了%I64d.%04I64d毫秒\n", (t2-t1)/10000, (t2-t1)%10000 );
}
对比的其他两个实现:
1、vector + sort + unique
2、set
最后还是我的这个实现好一点:)
PS:编译器用的是VC2005

再PS,写了上面那个PS之后突然想看看VC2003怎么样,于是测试一下,惊人的发现:
D:\test\pytest>cpptest2
经过了3234.6512毫秒
速度已经超越了Python
.^_^。满心欢喜结束这个讨论旅程

posted on 2006-03-31 15:28 音乐虫子 阅读(2641) 评论(4)  编辑 收藏 引用

评论

# re: 为什么Python的性能比较好呢? 2006-03-31 18:23 虫子

为了方便日后查看(怕那些链接无效了),特意把一些其他实现的代码摘录下来:
====================================================1.Python的原始实现:
#remove duplicated email address from file
import datetime
if __name__ == "__main__":
t = datetime.datetime(2000,1,1)
print str(t.today())
hashtable = {}
f = file("email.txt","r")
f2 = file("email_new.txt","w")
line = f.readline();
while len(line)>0:
if not hashtable.has_key(line):
hashtable[line] = 1
f2.write(line)
line = f.readline();
f.close()
f2.close()
t2 = datetime.datetime(2000,1,1)
print str(t2.today())

from link:
http://blog.vckbase.com/jzhang/archive/2006/03/28/18807.html
====================================================
  回复  更多评论   

# re: 为什么Python的性能比较好呢? 2006-03-31 18:24 铏瓙

2. 参看Python代码实现的实现
#include <cstdio>

// code by 李嘉
// 禁止任何商业目的的转载
// 不对因使用代码产生任何后果负任何责任
// 转载请保留所有声明

#include <windows.h>
using namespace std;


#define c_mul(a, b) (a * b & 0xFFFFFFFF)

size_t python_hash(const char * str)
{
size_t value = str[0] << 7;
size_t len = 0;
while(*str != 0)
{
value = c_mul(1000003, value) ^ *str++;
len++;
}

value = value ^ len;
if (value == (size_t)-1)
value = (size_t)-2;
return value;
}

size_t hash(const char * str, size_t seed = 1)
{
size_t h = 0, g;
size_t len = 0;
while (*str)
{
h = (h << 4) + *str++;
if ((g = (h & 0xF0000000))) {
h = h ^ (g >> 24);
h = h ^ g;
h = h ^ seed;
}
len++;
}
return h;
}


#define MAX_TABLE_SIZE (780000)
#define MAX_CONFI 9

struct hash_item
{
size_t items[MAX_CONFI];
size_t item_count;
hash_item()
{
item_count = 0;
}
bool check_has(const char * str)
{
size_t key = hash(str);
for(size_t i = 0; i < item_count; i++)
{
if (items[i] == key)
return true;
}
items[item_count++] = key;
return false;
}

};


int main( void )
{
__int64 t1, t2;
GetSystemTimeAsFileTime( (LPFILETIME)&t1 );
FILE * fin = fopen("email.txt", "r");
FILE * fout = fopen("email_new_my.txt", "w+");

size_t hash_key_a = 0;
size_t hash_key_b = 0;
size_t pos_x = 0;
size_t pos_y = 0;
const char * buffer = NULL;
char line[255];
fgets(line, 255, fin);
hash_item * table = new hash_item[MAX_TABLE_SIZE];
while(!feof(fin))
{
buffer = line;
hash_key_a = python_hash(buffer);
pos_x = hash_key_a % MAX_TABLE_SIZE;
if (!table[pos_x].check_has(buffer))
fprintf(fout, "%s", buffer);

fgets(line, 255, fin);
}
GetSystemTimeAsFileTime( (LPFILETIME)&t2 );
printf( "经过了%I64d.%04I64d毫秒\n", (t2-t1)/10000, (t2-t1)%10000 );
fclose(fin);
fclose(fout);
delete [] table;
}

from link:
http://blog.csdn.net/imjj/archive/2006/03/31/645163.aspx?Pending=true  回复  更多评论   

# re: 为什么Python的性能比较好呢? 2006-04-01 22:10 christanxw

#include <windows.h>
#include <cstdio>
#include <iostream>

unsigned long cryptTable[0x500];
const int HASH = 0;
const int HASH_A = 1;
const int HASH_B = 2;

void InitCryptTable()
{
unsigned long seed = 0x00100001, index1 = 0, index2 = 0, i;
for(index1 = 0; index1 < 0x100; index1++)
{
for(index2 = index1, i = 0; i < 5; i++, index2 += 0x100)
{
unsigned long temp1, temp2;
seed = (seed * 125 + 3) % 0x2AAAAB;
temp1 = (seed & 0xFFFF) << 0x10;
seed = (seed * 125 + 3) % 0x2AAAAB;
temp2 = (seed & 0xFFFF);
cryptTable[index2] = (temp1 | temp2);
}
}
}

unsigned long Hash(char *pStr, unsigned long dwHashType)
{
unsigned char *key = (unsigned char *)pStr;
unsigned long seed1 = 0x7FED7FED, seed2 = 0xEEEEEEEE;
int ch;

while(*key != 0)
{
ch = toupper(*key++);

seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2);
seed2 = ch + seed1 + seed2 + (seed2 << 5) + 3;
}
return seed1;
}

struct HashItem
{
unsigned long m_nHashKeyA;
unsigned long m_nHashKeyB;
bool m_bExist;
};

int main()
{
__int64 t1, t2;
GetSystemTimeAsFileTime( (LPFILETIME)&t1 );

InitCryptTable();
FILE* fread = fopen("c:\\email.txt","r");
FILE* fwrite = fopen("c:\\emailnew.txt","w+");

HashItem *hashTable = new HashItem[780000];

char line[256] = "";
fgets(line,255,fread);
while(!feof(fread))
{
int nStart = Hash(line,HASH) % 780000;
int nPos = nStart;
if(!(hashTable[nPos].m_bExist
&& hashTable[nPos].m_nHashKeyA ==Hash(line,HASH_A)
&& hashTable[nPos].m_nHashKeyB == Hash(line,HASH_B)))
{
hashTable[nPos].m_nHashKeyA = Hash(line,HASH_A);
hashTable[nPos].m_nHashKeyB = Hash(line,HASH_B);
hashTable[nPos].m_bExist = true;
fprintf(fwrite,"%s",line);
}

fgets(line,255,fread);
}

GetSystemTimeAsFileTime( (LPFILETIME)&t2 );
printf( "经过了%I64d.%04I64d毫秒\n", (t2-t1)/10000, (t2-t1)%10000 );
fclose(fread);
fclose(fwrite);
delete [] hashTable;

std::cin.get();
}

耗时343毫秒。很不错了。呵呵。Ptyong也是C写出来的,C/C++效率是完全可以比Pyton更快的,就看怎么实现算法了。在总多的脚本语言中Python是比较慢的一个了。  回复  更多评论   

# re: 为什么Python的性能比较好呢? 2009-10-16 22:53 MKII

如果是用了PSYCO,则在我的机器上为170MS。。。
PYTHON + PSYCO,怎可能是脚本语言中比较慢的一个?  回复  更多评论   


只有注册用户登录后才能发表评论。
网站导航: 博客园   IT新闻   BlogJava   知识库   博问   管理