- 注册时间
- 2008-3-28
- 最后登录
- 1970-1-1
- 威望
- 星
- 金币
- 枚
- 贡献
- 分
- 经验
- 点
- 鲜花
- 朵
- 魅力
- 点
- 上传
- 次
- 下载
- 次
- 积分
- 3286
- 在线时间
- 小时
|
发表于 2009-2-4 17:18:09
|
显示全部楼层
49# 的程序我用汇编写了一下
我进行了简单测试,由于比较复杂不知是否正确,但应该差不多吧。
发现之所以慢,是因为功能本身太小,如果代码较长编译器稍稍放松一点就白费劲了。
另外,全部展开太复杂,就偷懒在3,4层将gxq的copy进去了,总之就是想说,不用bsr 未必就不行
第一次写汇编,还请各位多指教- ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
- //m 1-2 ,2 bits :mark
- // 0: at least 3 ones
- // 1: 2 ones
- // 2: 1 one
- // 3: 0 one
- //x 3-12 ,10 bits:
- // when contains at least 2 ones ,then x= upper-one
- // eg : 11001 - 100000 ,1001 - 10000 ,0011-0100
- //y 13-22 ,10 bits:
- // when contains 2 ones, y= GreaterEqualBit2(first byte+1)
- // eg : 1010-1100 ,0011 -0100
- //z 23-32 ,10 bits:
- // when contains at least 3 ones , it is y =GreaterEqualBit2(first byte) ,so just return z<<24
- // eg : 0110010 -1000000,101011 -110000
- ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
- const UINT32 Data[256] = {
- 0xC0000000,0x80000000,0x80000000,0x40401000,0x80000000,0x40801800,0x40802000,0x00800008,
- 0x80000000,0x41002800,0x41003000,0x0100000C,0x41004000,0x01000010,0x01000010,0x01000010,
- 0x80000000,0x42004800,0x42005000,0x02000014,0x42006000,0x02000018,0x02000018,0x02000018,
- 0x42008000,0x02000020,0x02000020,0x02000020,0x02000020,0x02000020,0x02000020,0x02000020,
- 0x80000000,0x44008800,0x44009000,0x04000024,0x4400A000,0x04000028,0x04000028,0x04000028,
- 0x4400C000,0x04000030,0x04000030,0x04000030,0x04000030,0x04000030,0x04000030,0x04000030,
- 0x44010000,0x04000040,0x04000040,0x04000040,0x04000040,0x04000040,0x04000040,0x04000040,
- 0x04000040,0x04000040,0x04000040,0x04000040,0x04000040,0x04000040,0x04000040,0x04000040,
- 0x80000000,0x48010800,0x48011000,0x08000044,0x48012000,0x08000048,0x08000048,0x08000048,
- 0x48014000,0x08000050,0x08000050,0x08000050,0x08000050,0x08000050,0x08000050,0x08000050,
- 0x48018000,0x08000060,0x08000060,0x08000060,0x08000060,0x08000060,0x08000060,0x08000060,
- 0x08000060,0x08000060,0x08000060,0x08000060,0x08000060,0x08000060,0x08000060,0x08000060,
- 0x48020000,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,
- 0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,
- 0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,
- 0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,0x08000080,
- 0x80000000,0x50020800,0x50021000,0x10000084,0x50022000,0x10000088,0x10000088,0x10000088,
- 0x50024000,0x10000090,0x10000090,0x10000090,0x10000090,0x10000090,0x10000090,0x10000090,
- 0x50028000,0x100000A0,0x100000A0,0x100000A0,0x100000A0,0x100000A0,0x100000A0,0x100000A0,
- 0x100000A0,0x100000A0,0x100000A0,0x100000A0,0x100000A0,0x100000A0,0x100000A0,0x100000A0,
- 0x50030000,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,
- 0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,
- 0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,
- 0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,0x100000C0,
- 0x50040000,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,
- 0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,
- 0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,
- 0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,
- 0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,
- 0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,
- 0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,
- 0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100,0x10000100
- };
- ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
- // if (first byte has at least 3 ones )
- // {
- // get z and return z<<24
- // }
- // if (first byte has 2 ones )
- // {
- // if ( other bits are all 0)
- // return (first byte)<<24
- // else
- // get y, return y<<24
- // }
- // if (first byte has 1 one )
- // {
- // if (second byte has at least 2 ones )
- // return (first byte)<<24 + (x of second byte)<<16
- // go to GXQ;
- //
- // }
- // if (first byte has no one ) {
- // go to GXQ;
- // }
- ///////////////////////////////////////////////////////////////////////////////////////////////////////////
- _declspec(naked) int GreaterEqualBit2_SHSHSH(int n)
- {
- __asm
- {
- mov ecx, dword ptr[esp+0x04]; //save n to ecx
- mov eax, ecx;
- shr eax, 24; //get first byte
- shl eax, 2; //mul 4
- mov edx, dword ptr[Data+eax]; //get table item
-
- mov eax, 0xC0000000; //get mark
- and eax, edx;
-
- cmp eax,0;
- jne first_byte_l_3_ones;
- // at least 3 ones in first byte
- mov eax, 0x000003FF; //get z 00000000 00000000 00000011 11111111
- and eax, edx;
- shl eax, 24;
- ret;
-
- first_byte_l_3_ones:
- cmp eax,1;
- jne first_byte_l_2_ones;
- // exactly 2 ones in first byte
- mov eax, ecx;
- shl eax, 8; //other data
- cmp eax,0;
- jne first_byte_2_ones_plus_sth;
- mov eax,ecx;
- and eax,0xFF000000;
- ret;
- first_byte_2_ones_plus_sth:
- mov eax, 0x000FFC00; //get y 00000000 00001111 11111100 00000000
- and eax, edx;
- shl eax, 14;
- ret;
- first_byte_l_2_ones:
- cmp eax,2;
- jne return_hardcase;
- // 1 one,get second byte
- mov eax, 0x00FF0000;
- and eax, ecx;
- shr eax, 14;
- mov edx, dword ptr[Data+eax]; //get table item
-
- mov eax, 0xC0000000; //get mark
- and eax, edx;
-
- cmp eax, 0;
- jne second_byte_l_3_ones;
- //second byte has at least 3 ones
- mov eax, 0x3FF00000; //get x 00111111 11110000 00000000 00000000
- and eax, edx;
- shr eax, 4;
- and ecx, 0xFF000000;
- add eax, ecx;
- ret;
- second_byte_l_3_ones:
- cmp eax, 1;
- jne second_byte_l_2_ones;
- //second byte has 2 ones ,same to above
- mov eax, 0x3FF00000; //get x 00111111 11110000 00000000 00000000
- and eax, edx;
- shr eax, 4;
- and ecx, 0xFF000000;
- add eax, ecx;
- ret;
- second_byte_l_2_ones:
- //second byte has at most 1 one
- jmp return_hardcase;
-
- return_hardcase: //copy from GXQ :)
- mov edx, ecx;
- mov eax, 1;
- bsr ecx, edx;
- shl eax, cl;
- push eax;
- xor edx, eax;
- bsr ecx, edx;
- mov eax, 0;
- setne al;
- shl eax, cl;
- lea ecx, [eax*0x02];
- xor edx, eax;
- cmovne eax, ecx;
- pop edx;
- add eax, edx;
- ret;
- }
- }
-
复制代码 |
|