- 注册时间
- 2007-12-28
- 最后登录
- 1970-1-1
- 威望
- 星
- 金币
- 枚
- 贡献
- 分
- 经验
- 点
- 鲜花
- 朵
- 魅力
- 点
- 上传
- 次
- 下载
- 次
- 积分
- 12787
- 在线时间
- 小时
|
发表于 2014-12-18 08:19:46
|
显示全部楼层
本楼是讨论汇编优化的,不喜欢汇编的人请忽略本楼。
在28#的代码中,“if ( g_used [ i ] ) continue” 这句是用来查找一个值为0的元素,这使我立即想起c运行时刻函数strlen,这个函数也是用来查找值为0的元素,不过其数组是char型而不是整型。如果将这段代码改为调用strlen函数,效果如何呢?说干就干 ,于是我又写了一版调用函数strlen的版本。另外,我有加了2个strlen函数的汇编的版本。源代码是来自于丹麦的Anger Fog。下面是调用strlen函数的版本。
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <windows.h>
- /* 说明:
- 我们定义尺子编号为i,(i>=0), 尺子i总是有i+1段。
- 我们首先搜索编号最大的尺子,然后搜索编号次小的尺子,最后搜索编号为0的尺子,在本程序尺子编号用rule_idx来表示
- */
- #define MAX_RULES_COUNT 8 //最多可搜索8把尺子
- #define MAX_SEG_COUNT 120 //最多可覆盖120种不同的长度
- char g_used[MAX_SEG_COUNT+2]; //used[i]=1表示长度i可被尺子测量
- int g_maxSegArr[]={1,4,10,20,35,56,84,120}; //maxSegArr[i]表示尺子i可测量的不同长度的总数
- int g_result[MAX_RULES_COUNT][MAX_RULES_COUNT]; // result[i][j]表示尺子i第j+1段的长度
- int g_count;
- // The asm code is from Anger Fog "optimization_manuals"
- _declspec(naked)
- int anger_strlen( char *s )
- {
- __asm
- {
- push ebx
- mov ecx, [esp+8] ; get pointer to string
- mov eax, ecx ; copy pointer
- and ecx, 3 ; lower 2 bits of address, check alignment
- jz L2 ; string is aligned by 4. Go to loop
- and eax, -4 ; align pointer by 4
- mov ebx, [eax] ; read from nearest preceding boundary
- shl ecx, 3 ; mul by 8 = displacement in bits
- mov edx, -1
- shl edx, cl ; make byte mask
- not edx ; mask = 0FFH for false bytes
- or ebx, edx ; mask out false bytes
- ; check first four bytes for zero
- lea ecx, [ebx-01010101H] ; subtract 1 from each byte
- not ebx ; invert all bytes
- and ecx, ebx ; and these two
- and ecx, 80808080H ; test all sign bits
- jnz L3 ; zero-byte found
-
- ; Main loop, read 4 bytes aligned
- L1: add eax, 4 ; increment pointer by 4
- L2: mov ebx, [eax] ; read 4 bytes of string
- lea ecx, [ebx-01010101H] ; subtract 1 from each byte
- not ebx ; invert all bytes
- and ecx, ebx ; and these two
- and ecx, 80808080H ; test all sign bits
- jz L1 ; no zero bytes, continue loop
-
- L3: bsf ecx, ecx ; find right-most 1-bit
- shr ecx, 3 ; divide by 8 = byte index
- sub eax, [esp+8] ; subtract start address
- add eax, ecx ; add index to byte
- pop ebx
- ret
- }
- }
- // The asm code is from Anger Fog "optimization_manuals"
- _declspec(naked)
- int anger_strlen_sse2( char *s )
- {
- __asm
- {
- mov eax, [esp+4] ; get pointer to string
- mov ecx, eax ; copy pointer
- pxor xmm0, xmm0 ; set to zero
- and ecx, 15 ; lower 4 bits indicate misalignment
- and eax, -16 ; align pointer by 16
- movdqa xmm1, [eax] ; read from nearest preceding boundary
- pcmpeqb xmm1, xmm0 ; compare 16 bytes with zero
- pmovmskb edx, xmm1 ; get one bit for each byte result
- shr edx, cl ; shift out false bits
- shl edx, cl ; shift back again
- bsf edx, edx ; find first 1-bit
- jnz L2 ; found
-
- ; Main loop, search 16 bytes at a time
- L1: add eax, 16 ; increment pointer by 16
- movdqa xmm1, [eax] ; read 16 bytes aligned
- pcmpeqb xmm1, xmm0 ; compare 16 bytes with zero
- pmovmskb edx, xmm1 ; get one bit for each byte result
- bsf edx, edx ; find first 1-bit
- jz L1 ; loop if not found
-
- L2: ; Zero-byte found. Compute string length
- sub eax, [esp+4] ; subtract start address
- add eax, edx ; add byte index
- ret
- }
- }
- void output_result( int rule_count)
- {
- int i,j;
- for(i=0;i<rule_count;i++)
- {
- printf("(");
- for(j=0;j<=i;j++)
- printf("%d,",g_result[i][j]);
- printf(")\n");
- }
- printf("\n");
- g_count++;
- }
- int check(int rule_idx, int deep, int *pNewSegCount, int newSegs[] )
- {
- int i,c,sum;
- for (c=0,sum=0,i=deep-1;i>=0;i--)
- {
- sum += g_result[rule_idx][i];
- if ( g_used[sum] )
- return 0;
- g_used[sum]=32;
- newSegs[c++]=sum;
- *pNewSegCount=c;
- }
- return 1;
- }
- //deep: 表示将要确定尺子第deep+1段的长度
- void new_mark(int rule_count,int seg_count,int rule_idx, int deep)
- {
- int i,j,mid,flag,sum,new_used_count;
- int new_used[MAX_SEG_COUNT+1];
- mid=rule_idx/2; //第rule_idx+1把尺子中间段的编号
- for (sum=0,i=0;i<deep;i++)
- sum += g_result[rule_idx][i];
- for (i=1;i<=seg_count;i++)
- {
- //i+= strlen(g_used+i);
- //i+= anger_strlen(g_used+i);
- i+= anger_strlen_sse2(g_used+i);
- if ( i + sum > seg_count)
- break;
- if ( deep==mid+1 && i< g_result[rule_idx][rule_idx-deep] )
- continue; //消除重复的结果
- g_result[rule_idx][deep]=i;
- flag=check(rule_idx,deep+1,&new_used_count,new_used);
- if (flag )
- {
- if ( rule_idx==0)
- output_result(rule_count);
- if ( deep==rule_idx)
- new_mark(rule_count,seg_count,rule_idx-1,0);
- else
- new_mark(rule_count,seg_count,rule_idx,deep+1);
- }
- for ( j=0;j<new_used_count;j++)
- g_used[new_used[j]]=0;
- }
- }
- void search(int n)
- {
- DWORD t;
- printf("\nIs searching the soultion when rule count is %d\n",n);
- t=::GetTickCount();
- g_count=0;memset(g_result,0,sizeof(g_result));
- new_mark(n,g_maxSegArr[n-1],n-1,0);
- t=::GetTickCount()-t;
- printf("It take %d ms and find %d solution\n",t,g_count);
- }
- //int main(int argc, char* argv[])
- //{
- // int i;
- // for (i=1;i<=6;i++ ) search(i);
- // return 0;
- //}
复制代码
|
|