- 注册时间
- 2007-12-28
- 最后登录
- 1970-1-1
- 威望
- 星
- 金币
- 枚
- 贡献
- 分
- 经验
- 点
- 鲜花
- 朵
- 魅力
- 点
- 上传
- 次
- 下载
- 次
- 积分
- 12787
- 在线时间
- 小时
|
发表于 2020-2-23 22:10:40
|
显示全部楼层
64位basecase mul的原代码,见 mpn/x86_64/coreisbr/mul_basecase.asm,AT&T格式
- dnl AMD64 mpn_mul_basecase optimised for Intel Sandy bridge and Ivy bridge.
- dnl Contributed to the GNU project by Torbjörn Granlund.
- dnl Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
- dnl This file is part of the GNU MP Library.
- dnl
- dnl The GNU MP Library is free software; you can redistribute it and/or modify
- dnl it under the terms of either:
- dnl
- dnl * the GNU Lesser General Public License as published by the Free
- dnl Software Foundation; either version 3 of the License, or (at your
- dnl option) any later version.
- dnl
- dnl or
- dnl
- dnl * the GNU General Public License as published by the Free Software
- dnl Foundation; either version 2 of the License, or (at your option) any
- dnl later version.
- dnl
- dnl or both in parallel, as here.
- dnl
- dnl The GNU MP Library is distributed in the hope that it will be useful, but
- dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- dnl for more details.
- dnl
- dnl You should have received copies of the GNU General Public License and the
- dnl GNU Lesser General Public License along with the GNU MP Library. If not,
- dnl see https://www.gnu.org/licenses/.
- include(`../config.m4')
- C cycles/limb mul_1 mul_2 mul_3 addmul_2
- C AMD K8,K9
- C AMD K10
- C AMD bull
- C AMD pile
- C AMD steam
- C AMD bobcat
- C AMD jaguar
- C Intel P4
- C Intel core
- C Intel NHM
- C Intel SBR 2.5 2.5 - 2.95
- C Intel IBR 2.4 2.3 - 2.68
- C Intel HWL 2.35 2.0 - 2.5
- C Intel BWL
- C Intel atom
- C VIA nano
- C The inner loops of this code are the result of running a code generation and
- C optimisation tool suite written by David Harvey and Torbjorn Granlund.
- C TODO
- C * Fix the addmul_2 fluctuation affecting SBR.
- C * Improve feed-in code, avoiding zeroing of many registers and dummy adds in
- C the loops at the expense of code size.
- C * Adjoin a mul_3, avoiding slow mul_1 for odd vn.
- C * Consider replacing the 2-way mul_2 code with 4-way code, for a very slight
- C speedup.
- C * Further micro-optimise.
- C When playing with pointers, set this to $2 to fall back to conservative
- C indexing in wind-down code.
- define(`I',`$1')
- define(`rp', `%rdi')
- define(`up', `%rsi')
- define(`un_param',`%rdx')
- define(`vp', `%rcx')
- define(`vn', `%r8')
- define(`un', `%rbx')
- define(`w0', `%r10')
- define(`w1', `%r11')
- define(`w2', `%r12')
- define(`w3', `%r13')
- define(`n', `%rbp')
- define(`v0', `%r9')
- ABI_SUPPORT(DOS64)
- ABI_SUPPORT(STD64)
- ASM_START()
- TEXT
- ALIGN(16)
- PROLOGUE(mpn_mul_basecase)
- FUNC_ENTRY(4)
- IFDOS(` mov 56(%rsp), %r8d ')
- push %rbx
- push %rbp
- mov un_param, un C free up rdx
- neg un
- mov (up), %rax C shared for mul_1 and mul_2
- lea (up,un_param,8), up C point at operand end
- lea (rp,un_param,8), rp C point at rp[un-1]
- mov (vp), v0 C shared for mul_1 and mul_2
- mul v0 C shared for mul_1 and mul_2
- test $1, R8(vn)
- jz L(do_mul_2)
- L(do_mul_1):
- test $1, R8(un)
- jnz L(m1x1)
- L(m1x0):mov %rax, w0 C un = 2, 4, 6, 8, ...
- mov %rdx, w1
- mov 8(up,un,8), %rax
- test $2, R8(un)
- jnz L(m110)
- L(m100):lea 2(un), n C un = 4, 8, 12, ...
- jmp L(m1l0)
- L(m110):lea (un), n C un = 2, 6, 10, ...
- jmp L(m1l2)
- L(m1x1):mov %rax, w1 C un = 1, 3, 5, 7, ...
- mov %rdx, w0
- test $2, R8(un)
- jz L(m111)
- L(m101):lea 3(un), n C un = 1, 5, 9, ...
- test n, n
- js L(m1l1)
- mov %rax, -8(rp)
- mov %rdx, (rp)
- pop %rbp
- pop %rbx
- FUNC_EXIT()
- ret
- L(m111):lea 1(un), n C un = 3, 7, 11, ...
- mov 8(up,un,8), %rax
- jmp L(m1l3)
- ALIGN(16) C FIXME
- L(m1tp):mov %rdx, w0
- add %rax, w1
- L(m1l1):mov -16(up,n,8), %rax
- adc $0, w0
- mul v0
- add %rax, w0
- mov w1, -24(rp,n,8)
- mov -8(up,n,8), %rax
- mov %rdx, w1
- adc $0, w1
- L(m1l0):mul v0
- mov w0, -16(rp,n,8)
- add %rax, w1
- mov %rdx, w0
- mov (up,n,8), %rax
- adc $0, w0
- L(m1l3):mul v0
- mov w1, -8(rp,n,8)
- mov %rdx, w1
- add %rax, w0
- mov 8(up,n,8), %rax
- adc $0, w1
- L(m1l2):mul v0
- mov w0, (rp,n,8)
- add $4, n
- jnc L(m1tp)
- L(m1ed):add %rax, w1
- adc $0, %rdx
- mov w1, I(-8(rp),-24(rp,n,8))
- mov %rdx, I((rp),-16(rp,n,8))
- dec R32(vn)
- jz L(ret2)
- lea 8(vp), vp
- lea 8(rp), rp
- push %r12
- push %r13
- push %r14
- jmp L(do_addmul)
- L(do_mul_2):
- define(`v1', `%r14')
- push %r12
- push %r13
- push %r14
- mov 8(vp), v1
- test $1, R8(un)
- jnz L(m2b1)
- L(m2b0):lea (un), n
- xor w0, w0
- mov %rax, w2
- mov %rdx, w1
- jmp L(m2l0)
- L(m2b1):lea 1(un), n
- xor w1, w1
- xor w2, w2
- mov %rax, w0
- mov %rdx, w3
- jmp L(m2l1)
- ALIGN(32)
- L(m2tp):mul v0
- add %rax, w0
- mov %rdx, w3
- adc $0, w3
- L(m2l1):mov -8(up,n,8), %rax
- mul v1
- add w1, w0
- adc $0, w3
- add %rax, w2
- mov w0, -8(rp,n,8)
- mov %rdx, w0
- adc $0, w0
- mov (up,n,8), %rax
- mul v0
- add %rax, w2
- mov %rdx, w1
- adc $0, w1
- add w3, w2
- L(m2l0):mov (up,n,8), %rax
- adc $0, w1
- mul v1
- mov w2, (rp,n,8)
- add %rax, w0
- mov %rdx, w2
- mov 8(up,n,8), %rax
- adc $0, w2
- add $2, n
- jnc L(m2tp)
- L(m2ed):mul v0
- add %rax, w0
- mov %rdx, w3
- adc $0, w3
- mov I(-8(up),-8(up,n,8)), %rax
- mul v1
- add w1, w0
- adc $0, w3
- add %rax, w2
- mov w0, I(-8(rp),-8(rp,n,8))
- adc $0, %rdx
- add w3, w2
- mov w2, I((rp),(rp,n,8))
- adc $0, %rdx
- mov %rdx, I(8(rp),8(rp,n,8))
- add $-2, R32(vn)
- jz L(ret5)
- lea 16(vp), vp
- lea 16(rp), rp
- L(do_addmul):
- push %r15
- push vn C save vn in new stack slot
- define(`vn', `(%rsp)')
- define(`X0', `%r14')
- define(`X1', `%r15')
- define(`v1', `%r8')
- L(outer):
- mov (vp), v0
- mov 8(vp), v1
- mov (up,un,8), %rax
- mul v0
- test $1, R8(un)
- jnz L(a1x1)
- L(a1x0):mov (rp,un,8), X0
- xor w0, w0
- mov %rdx, w1
- test $2, R8(un)
- jnz L(a110)
- L(a100):lea 2(un), n C un = 4, 8, 12, ...
- add %rax, X0
- adc $0, w1
- mov (up,un,8), %rax
- mul v1
- mov 8(rp,un,8), X1
- jmp L(lo0)
- L(a110):lea (un), n C un = 2, 6, 10, ...
- xor w3, w3
- jmp L(lo2)
- L(a1x1):mov (rp,un,8), X1
- xor w2, w2
- xor w1, w1
- test $2, R8(un)
- jz L(a111)
- L(a101):lea 3(un), n C un = 1, 5, 9, ...
- mov %rdx, w3
- add %rax, X1
- mov (up,un,8), %rax
- mov 8(rp,un,8), X0
- adc $0, w3
- jmp L(top)
- L(a111):lea 1(un), n C un = 3, 7, 11, ...
- jmp L(lo3)
- ALIGN(32)
- L(top): mul v1
- mov %rdx, w0
- add %rax, X0
- adc $0, w0
- add w1, X1
- adc $0, w3
- add w2, X0
- adc $0, w0
- mov -16(up,n,8), %rax
- mul v0
- add %rax, X0
- mov %rdx, w1
- adc $0, w1
- mov -16(up,n,8), %rax
- mul v1
- mov X1, -24(rp,n,8)
- mov -8(rp,n,8), X1
- add w3, X0
- adc $0, w1
- L(lo0): mov %rdx, w2
- mov X0, -16(rp,n,8)
- add %rax, X1
- adc $0, w2
- mov -8(up,n,8), %rax
- add w0, X1
- adc $0, w2
- mul v0
- L(lo3): add %rax, X1
- mov %rdx, w3
- adc $0, w3
- mov -8(up,n,8), %rax
- mul v1
- add w1, X1
- mov (rp,n,8), X0
- adc $0, w3
- mov %rdx, w0
- add %rax, X0
- adc $0, w0
- mov (up,n,8), %rax
- mul v0
- add w2, X0
- mov X1, -8(rp,n,8)
- mov %rdx, w1
- adc $0, w0
- L(lo2): add %rax, X0
- adc $0, w1
- mov (up,n,8), %rax
- add w3, X0
- adc $0, w1
- mul v1
- mov 8(rp,n,8), X1
- add %rax, X1
- mov %rdx, w2
- adc $0, w2
- mov 8(up,n,8), %rax
- mov X0, (rp,n,8)
- mul v0
- add w0, X1
- mov %rdx, w3
- adc $0, w2
- add %rax, X1
- mov 8(up,n,8), %rax
- mov 16(rp,n,8), X0 C useless but harmless in final iter
- adc $0, w3
- add $4, n
- jnc L(top)
- L(end): mul v1
- add w1, X1
- adc $0, w3
- add w2, %rax
- adc $0, %rdx
- mov X1, I(-8(rp),-24(rp,n,8))
- add w3, %rax
- adc $0, %rdx
- mov %rax, I((rp),-16(rp,n,8))
- mov %rdx, I(8(rp),-8(rp,n,8))
- addl $-2, vn
- lea 16(vp), vp
- lea 16(rp), rp
- jnz L(outer)
- pop %rax C deallocate vn slot
- pop %r15
- L(ret5):pop %r14
- pop %r13
- pop %r12
- L(ret2):pop %rbp
- pop %rbx
- FUNC_EXIT()
- ret
- EPILOGUE()
复制代码
反汇编后的原代码,Intel格式 - 00000000000008a0 <__gmpn_mul_basecase>:
- 8a0: 53 push rbx
- 8a1: 55 push rbp
- 8a2: 48 89 d3 mov rbx,rdx
- 8a5: 48 f7 db neg rbx
- 8a8: 48 8b 06 mov rax,QWORD PTR [rsi]
- 8ab: 48 8d 34 d6 lea rsi,[rsi+rdx*8]
- 8af: 48 8d 3c d7 lea rdi,[rdi+rdx*8]
- 8b3: 4c 8b 09 mov r9,QWORD PTR [rcx]
- 8b6: 49 f7 e1 mul r9
- 8b9: 41 f6 c0 01 test r8b,0x1
- 8bd: 0f 84 d7 00 00 00 je 99a <__gmpn_mul_basecase+0xfa>
- 8c3: f6 c3 01 test bl,0x1
- 8c6: 75 1e jne 8e6 <__gmpn_mul_basecase+0x46>
- 8c8: 49 89 c2 mov r10,rax
- 8cb: 49 89 d3 mov r11,rdx
- 8ce: 48 8b 44 de 08 mov rax,QWORD PTR [rsi+rbx*8+0x8]
- 8d3: f6 c3 02 test bl,0x2
- 8d6: 75 06 jne 8de <__gmpn_mul_basecase+0x3e>
- 8d8: 48 8d 6b 02 lea rbp,[rbx+0x2]
- 8dc: eb 58 jmp 936 <__gmpn_mul_basecase+0x96>
- 8de: 48 8d 2b lea rbp,[rbx]
- 8e1: e9 7d 00 00 00 jmp 963 <__gmpn_mul_basecase+0xc3>
- 8e6: 49 89 c3 mov r11,rax
- 8e9: 49 89 d2 mov r10,rdx
- 8ec: f6 c3 02 test bl,0x2
- 8ef: 74 13 je 904 <__gmpn_mul_basecase+0x64>
- 8f1: 48 8d 6b 03 lea rbp,[rbx+0x3]
- 8f5: 48 85 ed test rbp,rbp
- 8f8: 78 1c js 916 <__gmpn_mul_basecase+0x76>
- 8fa: 48 89 47 f8 mov QWORD PTR [rdi-0x8],rax
- 8fe: 48 89 17 mov QWORD PTR [rdi],rdx
- 901: 5d pop rbp
- 902: 5b pop rbx
- 903: c3 ret
- 904: 48 8d 6b 01 lea rbp,[rbx+0x1]
- 908: 48 8b 44 de 08 mov rax,QWORD PTR [rsi+rbx*8+0x8]
- 90d: eb 3d jmp 94c <__gmpn_mul_basecase+0xac>
- 90f: 90 nop
- 910: 49 89 d2 mov r10,rdx
- 913: 49 01 c3 add r11,rax
- 916: 48 8b 44 ee f0 mov rax,QWORD PTR [rsi+rbp*8-0x10]
- 91b: 49 83 d2 00 adc r10,0x0
- 91f: 49 f7 e1 mul r9
- 922: 49 01 c2 add r10,rax
- 925: 4c 89 5c ef e8 mov QWORD PTR [rdi+rbp*8-0x18],r11
- 92a: 48 8b 44 ee f8 mov rax,QWORD PTR [rsi+rbp*8-0x8]
- 92f: 49 89 d3 mov r11,rdx
- 932: 49 83 d3 00 adc r11,0x0
- 936: 49 f7 e1 mul r9
- 939: 4c 89 54 ef f0 mov QWORD PTR [rdi+rbp*8-0x10],r10
- 93e: 49 01 c3 add r11,rax
- 941: 49 89 d2 mov r10,rdx
- 944: 48 8b 04 ee mov rax,QWORD PTR [rsi+rbp*8]
- 948: 49 83 d2 00 adc r10,0x0
- 94c: 49 f7 e1 mul r9
- 94f: 4c 89 5c ef f8 mov QWORD PTR [rdi+rbp*8-0x8],r11
- 954: 49 89 d3 mov r11,rdx
- 957: 49 01 c2 add r10,rax
- 95a: 48 8b 44 ee 08 mov rax,QWORD PTR [rsi+rbp*8+0x8]
- 95f: 49 83 d3 00 adc r11,0x0
- 963: 49 f7 e1 mul r9
- 966: 4c 89 14 ef mov QWORD PTR [rdi+rbp*8],r10
- 96a: 48 83 c5 04 add rbp,0x4
- 96e: 73 a0 jae 910 <__gmpn_mul_basecase+0x70>
- 970: 49 01 c3 add r11,rax
- 973: 48 83 d2 00 adc rdx,0x0
- 977: 4c 89 5f f8 mov QWORD PTR [rdi-0x8],r11
- 97b: 48 89 17 mov QWORD PTR [rdi],rdx
- 97e: 41 ff c8 dec r8d
- 981: 0f 84 cd 02 00 00 je c54 <__gmpn_mul_basecase+0x3b4>
- 987: 48 8d 49 08 lea rcx,[rcx+0x8]
- 98b: 48 8d 7f 08 lea rdi,[rdi+0x8]
- 98f: 41 54 push r12
- 991: 41 55 push r13
- 993: 41 56 push r14
- 995: e9 ef 00 00 00 jmp a89 <__gmpn_mul_basecase+0x1e9>
- 99a: 41 54 push r12
- 99c: 41 55 push r13
- 99e: 41 56 push r14
- 9a0: 4c 8b 71 08 mov r14,QWORD PTR [rcx+0x8]
- 9a4: f6 c3 01 test bl,0x1
- 9a7: 75 0e jne 9b7 <__gmpn_mul_basecase+0x117>
- 9a9: 48 8d 2b lea rbp,[rbx]
- 9ac: 4d 31 d2 xor r10,r10
- 9af: 49 89 c4 mov r12,rax
- 9b2: 49 89 d3 mov r11,rdx
- 9b5: eb 68 jmp a1f <__gmpn_mul_basecase+0x17f>
- 9b7: 48 8d 6b 01 lea rbp,[rbx+0x1]
- 9bb: 4d 31 db xor r11,r11
- 9be: 4d 31 e4 xor r12,r12
- 9c1: 49 89 c2 mov r10,rax
- 9c4: 49 89 d5 mov r13,rdx
- 9c7: eb 24 jmp 9ed <__gmpn_mul_basecase+0x14d>
- 9c9: 0f 1f 00 nop DWORD PTR [rax]
- 9cc: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
- 9d3: 00 00 00
- 9d6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
- 9dd: 00 00 00
- 9e0: 49 f7 e1 mul r9
- 9e3: 49 01 c2 add r10,rax
- 9e6: 49 89 d5 mov r13,rdx
- 9e9: 49 83 d5 00 adc r13,0x0
- 9ed: 48 8b 44 ee f8 mov rax,QWORD PTR [rsi+rbp*8-0x8]
- 9f2: 49 f7 e6 mul r14
- 9f5: 4d 01 da add r10,r11
- 9f8: 49 83 d5 00 adc r13,0x0
- 9fc: 49 01 c4 add r12,rax
- 9ff: 4c 89 54 ef f8 mov QWORD PTR [rdi+rbp*8-0x8],r10
- a04: 49 89 d2 mov r10,rdx
- a07: 49 83 d2 00 adc r10,0x0
- a0b: 48 8b 04 ee mov rax,QWORD PTR [rsi+rbp*8]
- a0f: 49 f7 e1 mul r9
- a12: 49 01 c4 add r12,rax
- a15: 49 89 d3 mov r11,rdx
- a18: 49 83 d3 00 adc r11,0x0
- a1c: 4d 01 ec add r12,r13
- a1f: 48 8b 04 ee mov rax,QWORD PTR [rsi+rbp*8]
- a23: 49 83 d3 00 adc r11,0x0
- a27: 49 f7 e6 mul r14
- a2a: 4c 89 24 ef mov QWORD PTR [rdi+rbp*8],r12
- a2e: 49 01 c2 add r10,rax
- a31: 49 89 d4 mov r12,rdx
- a34: 48 8b 44 ee 08 mov rax,QWORD PTR [rsi+rbp*8+0x8]
- a39: 49 83 d4 00 adc r12,0x0
- a3d: 48 83 c5 02 add rbp,0x2
- a41: 73 9d jae 9e0 <__gmpn_mul_basecase+0x140>
- a43: 49 f7 e1 mul r9
- a46: 49 01 c2 add r10,rax
- a49: 49 89 d5 mov r13,rdx
- a4c: 49 83 d5 00 adc r13,0x0
- a50: 48 8b 46 f8 mov rax,QWORD PTR [rsi-0x8]
- a54: 49 f7 e6 mul r14
- a57: 4d 01 da add r10,r11
- a5a: 49 83 d5 00 adc r13,0x0
- a5e: 49 01 c4 add r12,rax
- a61: 4c 89 57 f8 mov QWORD PTR [rdi-0x8],r10
- a65: 48 83 d2 00 adc rdx,0x0
- a69: 4d 01 ec add r12,r13
- a6c: 4c 89 27 mov QWORD PTR [rdi],r12
- a6f: 48 83 d2 00 adc rdx,0x0
- a73: 48 89 57 08 mov QWORD PTR [rdi+0x8],rdx
- a77: 41 83 c0 fe add r8d,0xfffffffe
- a7b: 0f 84 cd 01 00 00 je c4e <__gmpn_mul_basecase+0x3ae>
- a81: 48 8d 49 10 lea rcx,[rcx+0x10]
- a85: 48 8d 7f 10 lea rdi,[rdi+0x10]
- a89: 41 57 push r15
- a8b: 41 50 push r8
- a8d: 4c 8b 09 mov r9,QWORD PTR [rcx]
- a90: 4c 8b 41 08 mov r8,QWORD PTR [rcx+0x8]
- a94: 48 8b 04 de mov rax,QWORD PTR [rsi+rbx*8]
- a98: 49 f7 e1 mul r9
- a9b: f6 c3 01 test bl,0x1
- a9e: 75 36 jne ad6 <__gmpn_mul_basecase+0x236>
- aa0: 4c 8b 34 df mov r14,QWORD PTR [rdi+rbx*8]
- aa4: 4d 31 d2 xor r10,r10
- aa7: 49 89 d3 mov r11,rdx
- aaa: f6 c3 02 test bl,0x2
- aad: 75 1c jne acb <__gmpn_mul_basecase+0x22b>
- aaf: 48 8d 6b 02 lea rbp,[rbx+0x2]
- ab3: 49 01 c6 add r14,rax
- ab6: 49 83 d3 00 adc r11,0x0
- aba: 48 8b 04 de mov rax,QWORD PTR [rsi+rbx*8]
- abe: 49 f7 e0 mul r8
- ac1: 4c 8b 7c df 08 mov r15,QWORD PTR [rdi+rbx*8+0x8]
- ac6: e9 9b 00 00 00 jmp b66 <__gmpn_mul_basecase+0x2c6>
- acb: 48 8d 2b lea rbp,[rbx]
- ace: 4d 31 ed xor r13,r13
- ad1: e9 eb 00 00 00 jmp bc1 <__gmpn_mul_basecase+0x321>
- ad6: 4c 8b 3c df mov r15,QWORD PTR [rdi+rbx*8]
- ada: 4d 31 e4 xor r12,r12
- add: 4d 31 db xor r11,r11
- ae0: f6 c3 02 test bl,0x2
- ae3: 74 19 je afe <__gmpn_mul_basecase+0x25e>
- ae5: 48 8d 6b 03 lea rbp,[rbx+0x3]
- ae9: 49 89 d5 mov r13,rdx
- aec: 49 01 c7 add r15,rax
- aef: 48 8b 04 de mov rax,QWORD PTR [rsi+rbx*8]
- af3: 4c 8b 74 df 08 mov r14,QWORD PTR [rdi+rbx*8+0x8]
- af8: 49 83 d5 00 adc r13,0x0
- afc: eb 22 jmp b20 <__gmpn_mul_basecase+0x280>
- afe: 48 8d 6b 01 lea rbp,[rbx+0x1]
- b02: e9 7d 00 00 00 jmp b84 <__gmpn_mul_basecase+0x2e4>
- b07: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
- b0c: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
- b13: 00 00 00
- b16: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
- b1d: 00 00 00
- b20: 49 f7 e0 mul r8
- b23: 49 89 d2 mov r10,rdx
- b26: 49 01 c6 add r14,rax
- b29: 49 83 d2 00 adc r10,0x0
- b2d: 4d 01 df add r15,r11
- b30: 49 83 d5 00 adc r13,0x0
- b34: 4d 01 e6 add r14,r12
- b37: 49 83 d2 00 adc r10,0x0
- b3b: 48 8b 44 ee f0 mov rax,QWORD PTR [rsi+rbp*8-0x10]
- b40: 49 f7 e1 mul r9
- b43: 49 01 c6 add r14,rax
- b46: 49 89 d3 mov r11,rdx
- b49: 49 83 d3 00 adc r11,0x0
- b4d: 48 8b 44 ee f0 mov rax,QWORD PTR [rsi+rbp*8-0x10]
- b52: 49 f7 e0 mul r8
- b55: 4c 89 7c ef e8 mov QWORD PTR [rdi+rbp*8-0x18],r15
- b5a: 4c 8b 7c ef f8 mov r15,QWORD PTR [rdi+rbp*8-0x8]
- b5f: 4d 01 ee add r14,r13
- b62: 49 83 d3 00 adc r11,0x0
- b66: 49 89 d4 mov r12,rdx
- b69: 4c 89 74 ef f0 mov QWORD PTR [rdi+rbp*8-0x10],r14
- b6e: 49 01 c7 add r15,rax
- b71: 49 83 d4 00 adc r12,0x0
- b75: 48 8b 44 ee f8 mov rax,QWORD PTR [rsi+rbp*8-0x8]
- b7a: 4d 01 d7 add r15,r10
- b7d: 49 83 d4 00 adc r12,0x0
- b81: 49 f7 e1 mul r9
- b84: 49 01 c7 add r15,rax
- b87: 49 89 d5 mov r13,rdx
- b8a: 49 83 d5 00 adc r13,0x0
- b8e: 48 8b 44 ee f8 mov rax,QWORD PTR [rsi+rbp*8-0x8]
- b93: 49 f7 e0 mul r8
- b96: 4d 01 df add r15,r11
- b99: 4c 8b 34 ef mov r14,QWORD PTR [rdi+rbp*8]
- b9d: 49 83 d5 00 adc r13,0x0
- ba1: 49 89 d2 mov r10,rdx
- ba4: 49 01 c6 add r14,rax
- ba7: 49 83 d2 00 adc r10,0x0
- bab: 48 8b 04 ee mov rax,QWORD PTR [rsi+rbp*8]
- baf: 49 f7 e1 mul r9
- bb2: 4d 01 e6 add r14,r12
- bb5: 4c 89 7c ef f8 mov QWORD PTR [rdi+rbp*8-0x8],r15
- bba: 49 89 d3 mov r11,rdx
- bbd: 49 83 d2 00 adc r10,0x0
- bc1: 49 01 c6 add r14,rax
- bc4: 49 83 d3 00 adc r11,0x0
- bc8: 48 8b 04 ee mov rax,QWORD PTR [rsi+rbp*8]
- bcc: 4d 01 ee add r14,r13
- bcf: 49 83 d3 00 adc r11,0x0
- bd3: 49 f7 e0 mul r8
- bd6: 4c 8b 7c ef 08 mov r15,QWORD PTR [rdi+rbp*8+0x8]
- bdb: 49 01 c7 add r15,rax
- bde: 49 89 d4 mov r12,rdx
- be1: 49 83 d4 00 adc r12,0x0
- be5: 48 8b 44 ee 08 mov rax,QWORD PTR [rsi+rbp*8+0x8]
- bea: 4c 89 34 ef mov QWORD PTR [rdi+rbp*8],r14
- bee: 49 f7 e1 mul r9
- bf1: 4d 01 d7 add r15,r10
- bf4: 49 89 d5 mov r13,rdx
- bf7: 49 83 d4 00 adc r12,0x0
- bfb: 49 01 c7 add r15,rax
- bfe: 48 8b 44 ee 08 mov rax,QWORD PTR [rsi+rbp*8+0x8]
- c03: 4c 8b 74 ef 10 mov r14,QWORD PTR [rdi+rbp*8+0x10]
- c08: 49 83 d5 00 adc r13,0x0
- c0c: 48 83 c5 04 add rbp,0x4
- c10: 0f 83 0a ff ff ff jae b20 <__gmpn_mul_basecase+0x280>
- c16: 49 f7 e0 mul r8
- c19: 4d 01 df add r15,r11
- c1c: 49 83 d5 00 adc r13,0x0
- c20: 4c 01 e0 add rax,r12
- c23: 48 83 d2 00 adc rdx,0x0
- c27: 4c 89 7f f8 mov QWORD PTR [rdi-0x8],r15
- c2b: 4c 01 e8 add rax,r13
- c2e: 48 83 d2 00 adc rdx,0x0
- c32: 48 89 07 mov QWORD PTR [rdi],rax
- c35: 48 89 57 08 mov QWORD PTR [rdi+0x8],rdx
- c39: 83 04 24 fe add DWORD PTR [rsp],0xfffffffe
- c3d: 48 8d 49 10 lea rcx,[rcx+0x10]
- c41: 48 8d 7f 10 lea rdi,[rdi+0x10]
- c45: 0f 85 42 fe ff ff jne a8d <__gmpn_mul_basecase+0x1ed>
- c4b: 58 pop rax
- c4c: 41 5f pop r15
- c4e: 41 5e pop r14
- c50: 41 5d pop r13
- c52: 41 5c pop r12
- c54: 5d pop rbp
- c55: 5b pop rbx
- c56: c3 ret
复制代码
|
|