|
  
- 帖子
- 6944
- 精华
- 21
- 积分
- 53834
- 鲜花
- 843 朵
- 主题
- 298 帖
     
|
//我先提交我的函数,由旧代码修改成
//终于接近完美了,不过还有几条指令似乎可以省掉
//怎么省想不出来 :)
void lMul_yaos(uint32 *result, uint32 *left, uint32 *right, uint32 sLeft, uint32 sRight)
{
__asm {
mov edx, sLeft
mov esi, dword ptr [left]
mov edi, dword ptr [right]
mov ebx, dword ptr [result]
mbinmul2:
mov eax, ebx
pxor mm0, mm0
mov ecx, sRight
movd mm1, dword ptr [esi]
mov ebp, edi
mbinmul3:
movd mm2, dword ptr [edi]
lea edi, [edi+4]
movd mm3, dword ptr [ebx]
pmuludq mm2, mm1
paddq mm0, mm3
paddq mm0, mm2
movd dword ptr [ebx], mm0
psrlq mm0, 32
lea ebx, [ebx+4]
sub ecx, 1
jnz mbinmul3
mov edi, ebp
movd dword ptr [ebx], mm0
mov ebx, eax
lea esi, [esi+4]
lea ebx, [ebx+4]
sub edx, 1
jnz mbinmul2
emms
}
}
|
|