找回密码
 欢迎注册
楼主: gxqcn

[讨论] 一个快速得到bit位索引的算法

[复制链接]
发表于 2008-3-11 11:39:18 | 显示全部楼层
////////////////// E:\yaojialin\FindBits\Debug>findbits yaos: 345.155 yaos_1: 343.801 yaos_2: 165.216 GXQ: 169.795 GXQ used BT: 249.897 E:\yaojialin\FindBits\Debug>findbits yaos: 344.554 yaos_1: 343.643 yaos_2: 165.530 GXQ: 170.582 GXQ used BT: 249.942 E:\yaojialin\FindBits\Debug>findbits yaos: 344.908 yaos_1: 346.577 yaos_2: 166.864 GXQ: 171.155 GXQ used BT: 252.270 E:\yaojialin\FindBits\Debug>findbits yaos: 344.171 yaos_1: 346.151 yaos_2: 166.900 GXQ: 171.366 GXQ used BT: 252.932 E:\yaojialin\FindBits\Debug>findbits yaos: 344.697 yaos_1: 343.413 yaos_2: 165.558 GXQ: 171.240 GXQ used BT: 249.706 E:\yaojialin\FindBits\Debug>findbits yaos: 345.192 yaos_1: 343.298 yaos_2: 165.210 GXQ: 170.112 GXQ used BT: 250.019 ////////////////// #include #include #include __int64 GetCPUCounter(void) { __asm rdtsc; } DWORD find_empty(DWORD a, DWORD index[33]) { return 0; } DWORD find_by_yaos(DWORD a, DWORD index[33]) { __asm { mov edx, a; mov edi, dword ptr index; xor eax, eax; loop1: bsr ecx, edx; je exit1; btr edx, ecx; mov dword ptr [edi + 4*eax], ecx; inc eax; jmp loop1; exit1: mov dword ptr [edi + 4*eax], -1; } } void find_by_yaos_1(DWORD a, DWORD index[33]) { __asm { mov eax, a mov edi, dword ptr [index] loop1: bsr ebx, eax je exit1 btr eax, ebx mov dword ptr [edi], ebx add edi, 4 jmp loop1 exit1: mov dword ptr [edi], -1 } } void find_by_yaos_2(DWORD a, DWORD index[33]) { __asm { mov eax, a mov edi, dword ptr [index] mov ecx, 0 loop1: shr eax, 1 jnc loop2 mov dword ptr [edi], ecx add edi, 4 loop2: inc ecx cmp ecx, 32 jne loop1 mov dword ptr [edi], -1 } } DWORD find_by_GxQ(DWORD code, DWORD index[33]) { __asm { mov ecx, code; xor eax, eax; shr ecx, 1; mov dword ptr[index + 4*eax], 0; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 1; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 2; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 3; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 4; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 5; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 6; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 7; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 8; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 9; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 10; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 11; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 12; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 13; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 14; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 15; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 16; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 17; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 18; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 19; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 20; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 21; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 22; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 23; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 24; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 25; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 26; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 27; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 28; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 29; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 30; adc eax, 0; shr ecx, 1; mov dword ptr[index + 4*eax], 31; adc eax, 0; // 休止符 mov dword ptr[index + 4*eax], -1; } } DWORD find_by_GxQ_bt(DWORD code, DWORD index[33]) { __asm { mov ecx, code; xor eax, eax; bt ecx, 0; mov dword ptr[index + 4*eax], 0; adc eax, 0; bt ecx, 1; mov dword ptr[index + 4*eax], 1; adc eax, 0; bt ecx, 2; mov dword ptr[index + 4*eax], 2; adc eax, 0; bt ecx, 3; mov dword ptr[index + 4*eax], 3; adc eax, 0; bt ecx, 4; mov dword ptr[index + 4*eax], 4; adc eax, 0; bt ecx, 5; mov dword ptr[index + 4*eax], 5; adc eax, 0; bt ecx, 6; mov dword ptr[index + 4*eax], 6; adc eax, 0; bt ecx, 7; mov dword ptr[index + 4*eax], 7; adc eax, 0; bt ecx, 8; mov dword ptr[index + 4*eax], 8; adc eax, 0; bt ecx, 9; mov dword ptr[index + 4*eax], 9; adc eax, 0; bt ecx, 10; mov dword ptr[index + 4*eax], 10; adc eax, 0; bt ecx, 11; mov dword ptr[index + 4*eax], 11; adc eax, 0; bt ecx, 12; mov dword ptr[index + 4*eax], 12; adc eax, 0; bt ecx, 13; mov dword ptr[index + 4*eax], 13; adc eax, 0; bt ecx, 14; mov dword ptr[index + 4*eax], 14; adc eax, 0; bt ecx, 15; mov dword ptr[index + 4*eax], 15; adc eax, 0; bt ecx, 16; mov dword ptr[index + 4*eax], 16; adc eax, 0; bt ecx, 17; mov dword ptr[index + 4*eax], 17; adc eax, 0; bt ecx, 18; mov dword ptr[index + 4*eax], 18; adc eax, 0; bt ecx, 19; mov dword ptr[index + 4*eax], 19; adc eax, 0; bt ecx, 20; mov dword ptr[index + 4*eax], 20; adc eax, 0; bt ecx, 21; mov dword ptr[index + 4*eax], 21; adc eax, 0; bt ecx, 22; mov dword ptr[index + 4*eax], 22; adc eax, 0; bt ecx, 23; mov dword ptr[index + 4*eax], 23; adc eax, 0; bt ecx, 24; mov dword ptr[index + 4*eax], 24; adc eax, 0; bt ecx, 25; mov dword ptr[index + 4*eax], 25; adc eax, 0; bt ecx, 26; mov dword ptr[index + 4*eax], 26; adc eax, 0; bt ecx, 27; mov dword ptr[index + 4*eax], 27; adc eax, 0; bt ecx, 28; mov dword ptr[index + 4*eax], 28; adc eax, 0; bt ecx, 29; mov dword ptr[index + 4*eax], 29; adc eax, 0; bt ecx, 30; mov dword ptr[index + 4*eax], 30; adc eax, 0; bt ecx, 31; mov dword ptr[index + 4*eax], 31; adc eax, 0; // 休止符 mov dword ptr[index + 4*eax], -1; } } int main(int argc, char * argv[]) { ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL); LARGE_INTEGER lTimestart, lTimeEnd, liFreq; __int64 CounterStart, CounterStop; double dbMs; DWORD index[33]; QueryPerformanceCounter(&lTimestart); for(DWORD i = 0; i < 1000000; i++){ find_by_yaos(0xFFFFFFFF, index);// 调用bit扫描函数 } QueryPerformanceCounter(&lTimeEnd); QueryPerformanceFrequency(&liFreq); dbMs = ((double) (lTimeEnd.QuadPart - lTimestart.QuadPart)) * 1000.0 / (double)liFreq.QuadPart; printf("yaos: %.3f\n", dbMs); QueryPerformanceCounter(&lTimestart); for(DWORD i = 0; i < 1000000; i++){ find_by_yaos_1(0xFFFFFFFF, index);// 调用bit扫描函数 } QueryPerformanceCounter(&lTimeEnd); QueryPerformanceFrequency(&liFreq); dbMs = ((double) (lTimeEnd.QuadPart - lTimestart.QuadPart)) * 1000.0 / (double)liFreq.QuadPart; printf("yaos_1: %.3f\n", dbMs); QueryPerformanceCounter(&lTimestart); for(DWORD i = 0; i < 1000000; i++){ find_by_yaos_2(0xFFFFFFFF, index);// 调用bit扫描函数 } QueryPerformanceCounter(&lTimeEnd); QueryPerformanceFrequency(&liFreq); dbMs = ((double) (lTimeEnd.QuadPart - lTimestart.QuadPart)) * 1000.0 / (double)liFreq.QuadPart; printf("yaos_2: %.3f\n", dbMs); QueryPerformanceCounter(&lTimestart); for(DWORD i = 0; i < 1000000; i++){ find_by_GxQ(0xFFFFFFFF, index);// 调用bit扫描函数 } QueryPerformanceCounter(&lTimeEnd); QueryPerformanceFrequency(&liFreq); dbMs = ((double) (lTimeEnd.QuadPart - lTimestart.QuadPart)) * 1000.0 / (double)liFreq.QuadPart; printf("GXQ: %.3f\n", dbMs); QueryPerformanceCounter(&lTimestart); for(DWORD i = 0; i < 1000000; i++){ find_by_GxQ_bt(0xFFFFFFFF, index);// 调用bit扫描函数 } QueryPerformanceCounter(&lTimeEnd); QueryPerformanceFrequency(&liFreq); dbMs = ((double) (lTimeEnd.QuadPart - lTimestart.QuadPart)) * 1000.0 / (double)liFreq.QuadPart; printf("GXQ used BT: %.3f\n", dbMs); return 0; }
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
 楼主| 发表于 2008-3-11 12:04:14 | 显示全部楼层
验证了我在 35# 的几个提醒。 其实,find_by_yaos_2() 还可进一步优化,如下:
void find_by_yaos_3(DWORD a, DWORD index[33]) { __asm { mov eax, a mov edi, dword ptr index mov ecx, 32 loop1: shr eax, 1 jnc loop2 mov dword ptr [edi], ecx add edi, 4 loop2: sub ecx, 1 jne loop1 mov dword ptr [edi], -1 } }
从而在循环中省掉了一条比较指令。
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-11 13:49:01 | 显示全部楼层
呵呵 不用我说吧 自己看有什么问题 不是不能用loop或者loopne 关键是索引是从0-31的啊 void find_by_yaos_3(DWORD a, DWORD index[33]) { __asm { mov eax, a mov edi, dword ptr index mov ecx, 32 loop1: shr eax, 1 jnc loop2 mov dword ptr [edi], ecx add edi, 4 loop2: sub ecx, 1 jne loop1 mov dword ptr [edi], -1 } }
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-11 13:57:50 | 显示全部楼层
我这么改的 void find_by_yaos_3(DWORD a, DWORD index[33]) { __asm { mov eax, a mov edi, dword ptr [index] mov ecx, 32 loop1: shl eax, 1 jnc loop2 mov dword ptr [edi], ecx dec dword ptr [edi] add edi, 4 loop2: sub ecx, 1 jmpne loop1 mov dword ptr [edi], -1 } } void find_by_yaos_4(DWORD a, DWORD index[33]) { __asm { mov eax, a mov edi, dword ptr [index] mov ecx, 32 loop1: shl eax, 1 jnc loop2 mov dword ptr [edi], ecx dec dword ptr [edi] add edi, 4 loop2: loopne loop1 mov dword ptr [edi], -1 } } 结论时间2>4>3
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-11 14:22:08 | 显示全部楼层
分析结果 bit中1越少find_by_yaos越快 在小数字超过GxQ很多 普遍情况find_by_yaos_3, find_by_yaos_4获胜 小数字find_by_yaos_4优于find_by_yaos
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
 楼主| 发表于 2008-3-12 07:35:31 | 显示全部楼层
我在 42# 对你的汇编优化时,没有注意到 ecx 同时还作为将要存的index,对不起。 但你后来的改动也改变了数据的存放格式,由先前默认的升序变成了降序。
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-12 07:49:46 | 显示全部楼层
已经到这个方法的极限了 或者有方法判断小于0时跳出 ======================== 除非不用移位做 还有其他测试bit的方法否?
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
 楼主| 发表于 2008-3-12 08:01:25 | 显示全部楼层
你还可试着利用 shr/shl 产生的 ZF 标记位来提前跳出循环(若同时 CF=1,要记得存储index)。
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-12 08:35:26 | 显示全部楼层
两个改进, 无论什么情况 均比find_by_yaos_3, find_by_yaos_2快 但比较不出两种写法的差异 void find_by_yaos_5(DWORD a, DWORD index[33]) { __asm { mov eax, a mov edi, dword ptr [index] mov ecx, 31 loop1: shl eax, 1 jnc loop2 mov dword ptr [edi], ecx // dec dword ptr [edi] add edi, 4 loop2: sub ecx, 1 jns loop1 mov dword ptr [edi], -1 } } void find_by_yaos_6(DWORD a, DWORD index[33]) { __asm { mov eax, a mov edi, dword ptr [index] mov ecx, 31 loop1: shl eax, 1 jnc loop2 mov dword ptr [edi], ecx // dec dword ptr [edi] add edi, 4 loop2: sub ecx, 1 jnc loop1 mov dword ptr [edi], -1 } }
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-12 08:41:09 | 显示全部楼层
对着void find_by_yaos_6(DWORD a, DWORD index[33]) 发了一会儿呆 无论如何添加jz或者je均要增加至少三条指令 还是不用了 ==================================== 顺便说下, 最近的改进 增加检测bit=1的数量的语句是很容易的 也应该增加不了多少时间
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
您需要登录后才可以回帖 登录 | 欢迎注册

本版积分规则

小黑屋|手机版|数学研发网 ( 苏ICP备07505100号 )

GMT+8, 2024-11-22 00:16 , Processed in 0.035088 second(s), 15 queries .

Powered by Discuz! X3.5

© 2001-2024 Discuz! Team.

快速回复 返回顶部 返回列表