- 注册时间
- 2007-12-28
- 最后登录
- 1970-1-1
- 威望
- 星
- 金币
- 枚
- 贡献
- 分
- 经验
- 点
- 鲜花
- 朵
- 魅力
- 点
- 上传
- 次
- 下载
- 次
- 积分
- 12785
- 在线时间
- 小时
|
楼主 |
发表于 2016-11-25 15:55:32
|
显示全部楼层
我学习了下GMP的代码。完成了基为2^30 和GMP类似的除法函数。
这个算法的第一步是求n的逆元。下面函数mod_inverse,使用欧几里得辗转相除法,可得到a关于任意m的逆元。
- int extgcd(DWORD a, DWORD b, DWORD *px, DWORD *py)
- {
- DWORD d = a;
- if(b != 0)
- {
- d = extgcd(b, a % b, py, px);
- *py -= (a / b) * (*px);
- }
- else
- {
- *px = 1;
- *py = 0;
- }
- return d;
- }
- int mod_inverse(DWORD a, DWORD m)
- {
- DWORD x,y;
- extgcd(a, m, &x, &y);
- return ( (m + x) % m);
- }
复制代码
在GMP的代码中,则是使用一次查表,再加上2次迭代来快速地求逆元。值得注意是,求逆元的迭代算法和求倒数的迭代算法非常类似。下面给出求模=2^30的逆元,下面的代码中,MASK_B20= (2^30-1)= 0x3fffffff
- /* g_bi_tab[i] is the multiplicative inverse of 2*i+1 mod 256,
- ie. (g_bi_tab[i] * (2*i+1)) % 256 == 1 */
- const BYTE g_bi_tab[128] =
- {
- 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
- 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
- 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
- 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
- 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
- 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
- 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
- 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
- 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
- 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
- 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
- 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
- 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
- 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
- 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
- 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
- };
- DWORD _b_invert_C( DWORD *pN,int* pShift)
- {
- DWORD inv;
- int _shift;
- DWORD n=*pN;
- assert (n != 0);
- _shift=0;
- while ( (n & 1)==0 )
- {
- n>>=1; _shift++;
- }
- *pShift=_shift;
- *pN=n;
- if ( n==1)
- {
- return 1;
- }
- inv = g_bi_tab[(n/2) & 0x7F];
- inv = 2 * inv - inv * inv * n;
- inv = 2 * inv - inv * inv * n;
-
- return (inv & MASK_B30);
- }
复制代码
我还写了一个功能等价的汇编语言版本
- ;_declspec(naked) DWORD _b_invert_ALU( DWORD *pN, int *pShift)
- __b_invert_ALU PROC
- PARAM_np = 4
- PARAM_pShift = 8
- REG_rp EQU edi
- REG_np EQU ebx
- REG_shift EQU ecx
- push REG_np
- push REG_rp
- ;--------------------------------------------
- mov REG_np, [esp+8+PARAM_np]
- mov REG_rp,[esp+8+PARAM_pShift]
- mov eax, DP [REG_np]
- or eax, eax
- jz invalid_para
- bsf REG_shift,eax ; check tail bit0, n= x * 2^ ecx
- mov DP [REG_rp], REG_shift ; store shift
- shr eax,cl ; n= (n >> shift)
- mov DP [REG_np],eax ; update n
- cmp eax,1
- jz funExit ; if ( n==1) return 1
- shr eax,1 ; eax= n/2
- and eax, 127
- movzx ecx, byte ptr _g_bi_tab[eax] ;inv= g_bi_tab[n/2]
-
- ; Do "inv = 2 * inv - inv * inv * n"
- mov eax,ecx ; eax=inv
- mul ecx ; edx:eax = inv*inv
- mul DP [REG_np] ; edx:eax = inv*inv*n
- shl ecx,1 ; inv=inv*2
- sub ecx, eax ; inv=inv*2-inv*inv*n
-
- ; Do "inv = 2 * inv - inv * inv * n" again
- mov eax,ecx ; eax=inv
- mul ecx ; edx:eax = inv*inv
- mul DP [REG_np] ; edx:eax = inv*inv*n
- shl ecx,1 ; inv=inv*2
- sub ecx, eax ; inv=inv*2-inv*inv*n
- mov eax,ecx
- and eax,MASK_B30
- funExit:
- ;--------------------------------------------
- pop REG_rp
- pop REG_np
- ret
- invalid_para:
- mov eax,-1
- mov DP [REG_rp],eax
- pop REG_rp
- pop REG_np
- ret
- __b_invert_ALU ENDP
复制代码 |
|