第100章 中等难度练习题

要解答这个难度的题目,您可能会用到文本编辑器或者纸笔。

100.1 练习题2.1

100.1.1 Optimizing MSVC 2010 x86
__real@3fe0000000000000 DQ 03fe0000000000000r
__real@3f50624dd2f1a9fc DQ 03f50624dd2f1a9fcr

_g$   = 8
tv132 = 16
_x$ = 16
f1 PROC
         fld      QWORD PTR _x$[esp-4]
         fld      QWORD PTR __real@3f50624dd2f1a9fc
         fld      QWORD PTR __real@3fe0000000000000
         fld      QWORD PTR _g$[esp-4]
$LN2@f1:
         fld      ST(0)
         fmul     ST(0), ST(1)
         fsub     ST(0), ST(4)
         call     __ftol2_sse
         cdq
         xor      eax, edx
         sub      eax, edx
         mov      DWORD PTR tv132[esp-4], eax
         fild     DWORD PTR tv132[esp-4]
         fcomp    ST(3)
         fnstsw   ax
         test     ah, 5
         jnp      SHORT $LN19@f1
         fld      ST(3)
         fdiv     ST(0), ST(1)
         faddp    ST(1), ST(0)
         fmul     ST(0), ST(1)
         jmp      SHORT $LN2@f1
$LN19@f1:
         fstp     ST(3)
         fstp     ST(1)
         fstp     ST(0)
         ret      0
f1 ENDP

__real@3ff0000000000000 DQ 03ff0000000000000r

_x$ = 8
f2 PROC
         fld      QWORD PTR _x$[esp-4]
         sub      esp, 16
         fstp     QWORD PTR [esp+8]
         fld1
         fstp     QWORD PTR [esp]
         call     f1
         add      esp, 16
         ret      0
f2 ENDP
100.1.2 Optimizing MSVC 2012 x64
__real@3fe0000000000000 DQ 03fe0000000000000r
__real@3f50624dd2f1a9fc DQ 03f50624dd2f1a9fcr
__real@3ff0000000000000 DQ 03ff0000000000000r

x$ = 8
f        PROC
         movsdx   xmm2, QWORD PTR __real@3ff0000000000000
         movsdx   xmm5, QWORD PTR __real@3f50624dd2f1a9fc
         movsdx   xmm4, QWORD PTR __real@3fe0000000000000
         movapd   xmm3, xmm0
         npad     4
$LL4@f:
         movapd   xmm1, xmm2
         mulsd    xmm1, xmm2
         subsd    xmm1, xmm3
         cvttsd2si eax, xmm1
         cdq
         xor      eax, edx
         sub      eax, edx
         movd     xmm0, eax
         cvtdq2pd xmm0, xmm0
         comisd   xmm5, xmm0
         ja       SHORT $LN18@f
         movapd   xmm0, xmm3
         divsd    xmm0, xmm2
         addsd    xmm0, xmm2
         movapd   xmm2, xmm0
         mulsd    xmm2, xmm4
         jmp      SHORT $LL4@f
$LN18@f:
         movapd   xmm0, xmm2
         ret       0
f        ENDP

100.2 练习题2.4

下面这道题目摘自MSVC 2010,是标准的库函数。

100.2.1 Optimizing MSVC 2010
PUBLIC    _f
_TEXT    SEGMENT
_arg1$ = 8              ;size=4
_arg2$ = 12             ;size=4
_f    PROC
     push    esi
     mov     esi, DWORD PTR _arg1$[esp]
     push    edi
     mov     edi, DWORD PTR _arg2$[esp+4]
     cmp     BYTE PTR [edi], 0
     mov     eax, esi
     je      SHORT $LN7@f
     mov     dl, BYTE PTR [esi]
     push    ebx
     test    dl, dl
     je      SHORT $LN4@f
     sub     esi, edi
     npad    6 ; align next label
$LL5@f:
     mov     ecx, edi
     test    dl, dl
     je      SHORT $LN2@f
$LL3@f:
     mov     dl, BYTE PTR [ecx]
     test    dl, dl
     je      SHORT $LN14@f
     movsx   ebx, BYTE PTR [esi+ecx]
     movsx   edx, dl
     sub     ebx, edx
     jne     SHORT $LN2@f
     inc     ecx
     cmp     BYTE PTR [esi+ecx], bl
     jne     SHORT $LL3@f
$LN2@f:
     cmp    BYTE PTR [ecx], 0
     je     SHORT $LN14@f
     mov    dl, BYTE PTR [eax+1]
     inc    eax
     inc    esi
     test   dl, dl
     jne    SHORT $LL5@f
     xor    eax, eax
     pop    ebx
     pop    edi
     pop    esi
     ret    0
_f     ENDP
_TEXT     ENDS
END
100.2.2 GCC 4.4.1
                  public f
f                 proc near

var_C             = dword ptr -0Ch
var_8             = dword ptr -8
var_4             = dword ptr -4
arg_0             = dword ptr  8
arg_4             = dword ptr  0Ch

                  push    ebp
                  mov     ebp, esp
                  sub     esp, 10h
                  mov     eax, [ebp+arg_0]
                  mov     [ebp+var_4], eax
                  mov     eax, [ebp+arg_4]
                  movzx   eax, byte ptr [eax]
                  test    al, al
                  jnz     short loc_8048443
                  mov     eax, [ebp+arg_0]
                  jmp     short locret_8048453

loc_80483F4:
                  mov     eax, [ebp+var_4]
                  mov     [ebp+var_8], eax
                  mov     eax, [ebp+arg_4]
                  mov     [ebp+var_C], eax
                  jmp     short loc_804840A

loc_8048402:
                  add     [ebp+var_8], 1
                  add     [ebp+var_C], 1

loc_804840A:
                  mov     eax, [ebp+var_8]
                  movzx   eax, byte ptr [eax]
                  test    al, al
                  jz      short loc_804842E
                  mov     eax, [ebp+var_C]
                  movzx   eax, byte ptr [eax]
                  test    al, al
                  jz      short loc_804842E
                  mov     eax, [ebp+var_8]
                  movzx   edx, byte ptr [eax]
                  mov     eax, [ebp+var_C]
                  movzx   eax, byte ptr [eax]
                  cmp     dl, al
                  jz      short loc_8048402

loc_804842E:
                  mov     eax, [ebp+var_C]
                  movzx   eax, byte ptr [eax]
                  test    al, al
                  jnz     short loc_804843D
                  mov     eax, [ebp+var_4]
                  jmp     short locret_8048453

loc_804843D:
                  add     [ebp+var_4], 1
                  jmp     short loc_8048444

loc_8048443:
                  nop

loc_8048444:
                  mov     eax, [ebp+var_4]
                  movzx   eax, byte ptr [eax]
                  test    al, al
                  jnz     short loc_80483F4
                  mov     eax, 0

locret_8048453:
                  leave
                  retn
f                 endp
100.2.3 Optimizing Keil(ARM mode)
          PUSH     {r4,lr}
          LDRB     r2,[r1,#0]
          CMP      r2,#0
          POPEQ    {r4,pc}
          B        |L0.80|
|L0.20|
          LDRB     r12,[r3,#0]
          CMP      r12,#0
          BEQ      |L0.64|
          LDRB     r4,[r2,#0]
          CMP      r4,#0
          POPEQ    {r4,pc}
          CMP      r12,r4
          ADDEQ    r3,r3,#1
          ADDEQ    r2,r2,#1
          BEQ      |L0.20|
          B        |L0.76|
|L0.64|
          LDRB     r2,[r2,#0]
          CMP      r2,#0
          POPEQ    {r4,pc}
|L0.76|
          ADD      r0,r0,#1
|L0.80|
          LDRB     r2,[r0,#0]
          CMP      r2,#0
          MOVNE    r3,r0
          MOVNE    r2,r1
          MOVEQ    r0,#0
          BNE      |L0.20|
          POP      {r4,pc}
100.2.4 Optimizing Keil(Thumb mode)
          PUSH     {r4,r5,lr}
          LDRB     r2,[r1,#0]
          CMP      r2,#0
          BEQ      |L0.54|
          B        |L0.46|
|L0.10|
          MOVS     r3,r0
          MOVS     r2,r1
          B        |L0.20|
|L0.16|
          ADDS     r3,r3,#1
          ADDS     r2,r2,#1
|L0.20|
          LDRB     r4,[r3,#0]
          CMP      r4,#0
          BEQ      |L0.38|
          LDRB     r5,[r2,#0]
          CMP      r5,#0
          BEQ      |L0.54|
          CMP      r4,r5
          BEQ      |L0.16|
          B        |L0.44|
|L0.38|
          LDRB     r2,[r2,#0]
          CMP      r2,#0
          BEQ      |L0.54|
|L0.44|
          ADDS     r0,r0,#1
|L0.46|
          LDRB     r2,[r0,#0]
          CMP      r2,#0
          BNE      |L0.10|
          MOVS     r0,#0
|L0.54|
          POP      {r4,r5,pc}
100.2.5 Optimizing GCC 4.9.1(ARM64)

指令清单100.1 Optimizing GCC 4.9.1(ARM64)

func:
          ldrb    w6, [x1]
          mov     x2, x0
          cbz     w6, .L2
          ldrb    w2, [x0]
          cbz     w2, .L24
.L17:
          ldrb    w2, [x0]
          cbz     w2, .L5
          cmp     w6, w2
          mov     x5, x0
          mov     x2, x1
          beq     .L18
          b        .L5
.L4:
          ldrb    w4, [x2]
          cmp     w3, w4
          cbz     w4, .L8
          bne     .L8
.L18:
          ldrb    w3, [x5,1]!
          add     x2, x2, 1
          cbnz    w3, .L4
.L8:
          ldrb    w2, [x2]
          cbz     w2, .L27
.L5:
          ldrb    w2, [x0,1]!
          cbnz    w2, .L17
.L24:
          mov     x2, 0
.L2:
          mov     x0, x2
          ret
.L27:
          mov     x2, x0
          mov     x0, x2
          ret
100.2.6 Optimizing GCC 4.4.5(MIPS)

指令清单100.2 Optimizing GCC 4.4.5(MIPS)(IDA)

f:
                  lb       $v1, 0($a1)
                  or       $at, $zero
                  bnez     $v1, loc_18
                  move     $v0, $a0
locret_10:                                     # CODE XREF: f+50
                                               # f+78
                  jr       $ra
                  or       $at, $zero
loc_18:                                        # CODE XREF: f+8
                  lb       $a0, 0($a0)
                  or       $at, $zero
                  beqz     $a0, locret_94
                  move     $a2, $v0
loc_28:                                        # CODE XREF: f+8C
                  lb       $a0, 0($a2)
                  or       $at, $zero
                  beqz     $a0, loc_80
                  or       $at, $zero
                  bne      $v1, $a0, loc_80
                  move     $a3, $a1
                  b        loc_60
                  addiu    $a2, 1
loc_48:                                        # CODE XREF: f+68
                  lb       $t1, 0($a3)
                  or       $at, $zero
                  beqz     $t1, locret_10
                  or       $at, $zero
                  bne      $t0, $t1, loc_80
                  addiu    $a2, 1
loc_60:                                        # CODE XREF: f+40
                  lb       $t0, 0($a2)
                  or       $at, $zero
                  bnez     $t0, loc_48
                  addiu    $a3, 1
                  lb       $a0, 0($a3)
                  or       $at, $zero
                  beqz     $a0, locret_10
                  or       $at, $zero
loc_80:                                        # CODE XREF: f+30
                                               # f+38 ...
                  addiu    $v0, 1
                  lb       $a0, 0($v0)
                  or       $at, $zero
                  bnez     $a0, loc_28
                  move     $a2, $v0
locret_94:                                     # CODE XREF: f+20
                  jr       $ra
                  move     $v0, $zero

100.3 练习题2.6

100.3.1 Optimizing MSVC 2010
PUBLIC     _f
; Function compile flags: /Ogtpy
_TEXT     SEGMENT
_k0$ = -12              ;size = 4
_k3$ = -8               ;size = 4
_k2$ = -4               ;size = 4
_v$ = 8                 ;size = 4
_k1$ = 12               ;size = 4
_k$ = 12                ;size = 4
_f     PROC

     sub     esp, 12     ; 0000000cH
     mov     ecx, DWORD PTR _v$[esp+8]
     mov     eax, DWORD PTR [ecx]
     mov     ecx, DWORD PTR [ecx+4]
     push    ebx
     push    esi
     mov     esi, DWORD PTR _k$[esp+16]
     push    edi
     mov     edi, DWORD PTR [esi]
     mov     DWORD PTR _k0$[esp+24], edi
     mov     edi, DWORD PTR [esi+4]
     mov     DWORD PTR _k1$[esp+20], edi
     mov     edi, DWORD PTR [esi+8]
     mov     esi, DWORD PTR [esi+12]
     xor     edx, edx
     mov     DWORD PTR _k2$[esp+24], edi
     mov     DWORD PTR _k3$[esp+24], esi
     lea     edi, DWORD PTR [edx+32]
$LL8@f:
     mov     esi, ecx
     shr     esi, 5
     add     esi, DWORD PTR _k1$[esp+20]
     mov     ebx, ecx
     shl     ebx, 4
     add     ebx, DWORD PTR _k0$[esp+24]
     sub     edx, 1640531527   ; 61c88647H
     xor     esi, ebx
     lea     ebx, DWORD PTR [edx+ecx]
     xor     esi, ebx
     add     eax, esi
     mov     esi, eax
     shr     esi, 5
     add     esi, DWORD PTR _k3$[esp+24]
     mov     ebx, eax
     shl     ebx, 4
     add     ebx, DWORD PTR _k2$[esp+24]
     xor     esi, ebx
     lea     ebx, DWORD PTR [edx+eax]
     xor     esi, ebx
     add     ecx, esi
     dec     edi
     jne     SHORT $LL8@f
     mov     edx, DWORD PTR _v$[esp+20]
     pop     edi
     pop     esi
     mov     DWORD PTR [edx], eax
     mov     DWORD PTR [edx+4], ecx
     pop     ebx
     add     esp, 12              ; 0000000cH
     ret     0
_f     ENDP
100.3.2 Optimizing Keil(ARM mode)
         PUSH     {r4-r10,lr}
         ADD      r5,r1,#8
         LDM      r5,{r5,r7}
         LDR      r2,[r0,#4]
         LDR      r3,[r0,#0]
         LDR      r4,|L0.116|
         LDR      r6,[r1,#4]
         LDR      r8,[r1,#0]
         MOV      r12,#0
         MOV      r1,r12
|L0.40|
         ADD      r12,r12,r4
         ADD      r9,r8,r2,LSL #4
         ADD      r10,r2,r12
         EOR      r9,r9,r10
         ADD      r10,r6,r2,LSR #5
         EOR      r9,r9,r10
         ADD      r3,r3,r9
         ADD      r9,r5,r3,LSL #4
         ADD      r10,r3,r12
         EOR      r9,r9,r10
         ADD      r10,r7,r3,LSR #5
         EOR      r9,r9,r10
         ADD      r1,r1,#1
         CMP      r1,#0x20
         ADD      r2,r2,r9
         STRCS    r2,[r0,#4]
         STRCS    r3,[r0,#0]
         BCC      |L0.40|
         POP      {r4-r10,pc}
|L0.116|
         DCD      0x9e3779b9
100.3.3 Optimizing Keil(Thumb mode)
         PUSH     {r1-r7,lr}
         LDR      r5,|L0.84|
         LDR      r3,[r0,#0]
         LDR      r2,[r0,#4]
         STR      r5,[sp,#8]
         MOVS     r6,r1
         LDM      r6,{r6,r7}
         LDR      r5,[r1,#8]
         STR      r6,[sp,#4]
         LDR      r6,[r1,#0xc]
         MOVS     r4,#0
         MOVS     r1,r4
         MOV      lr,r5
         MOV      r12,r6
         STR      r7,[sp,#0]
|L0.30|
         LDR      r5,[sp,#8]
         LSLS     r6,r2,#4
         ADDS     r4,r4,r5
         LDR      r5,[sp,#4]
         LSRS     r7,r2,#5
         ADDS     r5,r6,r5
         ADDS     r6,r2,r4
         EORS     r5,r5,r6
         LDR      r6,[sp,#0]
         ADDS     r1,r1,#1
         ADDS     r6,r7,r6
         EORS     r5,r5,r6
         ADDS     r3,r5,r3
         LSLS     r5,r3,#4
         ADDS     r6,r3,r4
         ADD      r5,r5,lr
         EORS     r5,r5,r6
         LSRS     r6,r3,#5
         ADD      r6,r6,r12
         EORS     r5,r5,r6
         ADDS     r2,r5,r2
         CMP      r1,#0x20
         BCC      |L0.30|
         STR      r3,[r0,#0]
         STR      r2,[r0,#4]
         POP      {r1-r7,pc}
|L0.84|
         DCD      0x9e3779b9
100.3.4 Optimizing GCC 4.9.1(ARM64)

指令清单100.3 Optimizing GCC 4.9.1(ARM64)

f:
         ldr      w3, [x0]
         mov      w4, 0
         ldr      w2, [x0,4]
         ldr      w10, [x1]
         ldr      w9, [x1,4]
         ldr      w8, [x1,8]
         ldr      w7, [x1,12]
.L2:
         mov      w5, 31161
         add      w6, w10, w2, lsl 4
         movk     w5, 0x9e37, lsl 16
         add      w1, w9, w2, lsr 5
         add      w4, w4, w5
         eor      w1, w6, w1
         add      w5, w2, w4
         mov      w6, 14112
         eor      w1, w1, w5
         movk     w6, 0xc6ef, lsl 16
         add      w3, w3, w1
         cmp      w4, w6
         add      w5, w3, w4
         add      w6, w8, w3, lsl 4
         add      w1, w7, w3, lsr 5
         eor      w1, w6, w1
         eor      w1, w1, w5
         add      w2, w2, w1
         bne      .L2
         str      w3, [x0]
         str      w2, [x0,4]
         ret
100.3.5 Optimizing GCC 4.4.5(MIPS)

指令清单100.4 Optimizing GCC 4.4.5(MIPS)(IDA)

f:
                 lui     $t2, 0x9E37
                 lui     $t1, 0xC6EF
                 lw      $v0, 0($a0)
                 lw      $v1, 4($a0)
                 lw      $t6, 0xC($a1)
                 lw      $t5, 0($a1)
                 lw      $t4, 4($a1)
                 lw      $t3, 8($a1)
                 li      $t2, 0x9E3779B9
                 li      $t1, 0xC6EF3720
                 move    $a1, $zero
loc_2C:                                         # CODE XREF: f+6C
                 addu    $a1, $t2
                 sll     $a2, $v1, 4
                 addu    $t0, $a1, $v1
                 srl     $a3, $v1, 5
                 addu    $a2, $t5
                 addu    $a3, $t4
                 xor     $a2, $t0, $a2
                 xor     $a2, $a3
                 addu    $v0, $a2
                 sll     $a3, $v0, 4
                 srl     $a2, $v0, 5
                 addu    $a3, $t3
                 addu    $a2, $t6
                 xor     $a2, $a3, $a2
                 addu    $a3, $v0, $a1
                 xor     $a2, $a3
                 bne     $a1, $t1, loc_2C
                 addu    $v1, $a2
                 sw      $v1, 4($a0)
                 jr      $ra
                 sw      $v0, 0($a0)

100.4 练习题2.13

下述程序采用了一种加密算法。这种算法的名称是什么?

100.4.1 Optimizing MSVC 2012
_in$ = 8                                            ; size = 2
_f       PROC
         movzx    ecx, WORD PTR _in$[esp-4]
         lea      eax, DWORD PTR [ecx*4]
         xor      eax, ecx
         add      eax, eax
         xor      eax, ecx
         shl      eax, 2
         xor      eax, ecx
         and      eax, 32                           ; 00000020H
         shl      eax, 10                           ; 0000000aH
         shr      ecx, 1
         or       eax, ecx
         ret      0
_f       ENDP
100.4.2 Keil(ARM mode)
f PROC
          EOR     r1,r0,r0,LSR #2
          EOR     r1,r1,r0,LSR #3
          EOR     r1,r1,r0,LSR #5
          AND     r1,r1,#1
          LSR     r0,r0,#1
          ORR     r0,r0,r1,LSL #15
          BX      lr
          ENDP
100.4.3 Keil(Thumb mode)
f PROC
          LSRS    r1,r0,#2
          EORS    r1,r1,r0
          LSRS    r2,r0,#3
          EORS    r1,r1,r2
          LSRS    r2,r0,#5
          EORS    r1,r1,r2
          LSLS    r1,r1,#31
          LSRS    r0,r0,#1
          LSRS    r1,r1,#16
          ORRS    r0,r0,r1
          BX      lr
          ENDP
100.4.4 Optimizing GCC 4.9.1(ARM64)
f:
          uxth    w1, w0
          lsr     w2, w1, 3
          lsr     w0, w1, 1
          eor     w2, w2, w1, lsr 2
          eor     w2, w1, w2
          eor     w1, w2, w1, lsr 5
          and     w1, w1, 1
          orr     w0, w0, w1, lsl 15
          ret
100.4.5 Optimizing GCC 4.4.5(MIPS)

指令清单100.5 Optimizing GCC 4.4.5(MIPS)(IDA)

f:
                 andi    $a0, 0xFFFF
                 srl     $v1, $a0, 2
                 srl     $v0, $a0, 3
                 xor     $v0, $v1, $v0
                 xor     $v0, $a0, $v0
                 srl     $v1, $a0, 5
                 xor     $v0, $v1
                 andi    $v0, 1
                 srl     $a0, 1
                 sll     $v0, 15
                 jr      $ra
                 or      $v0, $a0

100.5 练习题2.14

下面这段程序采用了另一种著名算法。函数把两个输入变量输出为一个返回值。

100.5.1 MSVC 2012
_rt$1 = −4                                        ;size=4
_rt$2 = 8                                         ;size=4
_x$ = 8                                           ;size=4
_y$ = 12                                          ;size=4
?f@@YAIII@Z PROC                                  ; f
        push    ecx
        push    esi
        mov     esi, DWORD PTR _x$[esp+4]
        test    esi, esi
        jne     SHORT $LN7@f
        mov     eax, DWORD PTR _y$[esp+4]
        pop     esi
        pop     ecx
        ret     0
$LN7@f:
        mov     edx, DWORD PTR _y$[esp+4]
        mov     eax, esi
        test    edx, edx
        je      SHORT $LN8@f
        or      eax, edx
        push    edi
        bsf     edi, eax
        bsf     eax, esi
        mov     ecx, eax
        mov     DWORD PTR _rt$1[esp+12], eax
        bsf     eax, edx
        shr     esi, cl
        mov     ecx, eax
        shr     edx, cl
        mov     DWORD PTR _rt$2[esp+8], eax
        cmp     esi, edx
        je      SHORT $LN22@f
$LN23@f:
        jbe     SHORT $LN2@f
        xor     esi, edx
        xor     edx, esi
        xor     esi, edx
$LN2@f:
        cmp     esi, 1
        je      SHORT $LN22@f
        sub     edx, esi
        bsf     eax, edx
        mov     ecx, eax
        shr     edx, cl
        mov     DWORD PTR _rt$2[esp+8], eax
        cmp     esi, edx
        jne     SHORT $LN23@f
$LN22@f:
        mov     ecx, edi
        shl     esi, cl
        pop     edi
        mov     eax, esi
$LN8@f:
        pop     esi
        pop     ecx
        ret     0
?f@@YAIII@Z ENDP
100.5.2 Keil(ARM mode)
||f1|| PROC
        CMP      r0,#0
        RSB      r1,r0,#0
        AND      r0,r0,r1
        CLZ      r0,r0
        RSBNE    r0,r0,#0x1f
        BX       lr
        ENDP

f PROC
        MOVS     r2,r0
        MOV      r3,r1
        MOVEQ    r0,r1
        CMPNE    r3,#0
        PUSH     {lr}
        POPEQ    {pc}
        ORR      r0,r2,r3
        BL       ||f1||
        MOV      r12,r0
        MOV      r0,r2
        BL       ||f1||
        LSR      r2,r2,r0
|L0.196|
        MOV      r0,r3
        BL       ||f1||
        LSR      r0,r3,r0
        CMP      r2,r0
        EORHI    r1,r2,r0
        EORHI    r0,r0,r1
        EORHI    r2,r1,r0
        BEQ      |L0.240|
        CMP      r2,#1
        SUBNE    r3,r0,r2
        BNE      |L0.196|
|L0.240|
        LSL      r0,r2,r12
        POP      {pc}
        ENDP
100.5.3 GCC 4.6.3 for Raspberry Pi(ARM mode)
f:
        subs     r3, r0, #0
        beq      .L162
        cmp      r1, #0
        moveq    r1, r3
        beq      .L162
        orr      r2, r1, r3
        rsb      ip, r2, #0
        and      ip, ip, r2
        cmp      r2, #0
        rsb      r2, r3, #0
        and      r2, r2, r3
        clz      r2, r2
        rsb      r2, r2, #31
        clz      ip, ip
        rsbne    ip, ip, #31
        mov      r3, r3, lsr r2
        b        .L169
.L171:
        eorhi    r1, r1, r2
        eorhi    r3, r1, r2
        cmp      r3, #1
        rsb      r1, r3, r1
        beq      .L167
.L169:
        rsb      r0, r1, #0
        and      r0, r0, r1
        cmp      r1, #0
        clz      r0, r0
        mov      r2, r0
        rsbne    r2, r0, #31
        mov      r1, r1, lsr r2
        cmp      r3, r1
        eor      r2, r1, r3
        bne      .L171
.L167:
        mov      r1, r3, asl ip
.L162:
        mov      r0, r1
        bx       lr
100.5.4 Optimizing GCC 4.9.1(ARM64)

指令清单100.6 Optimizing GCC 4.9.1(ARM64)

f:
        mov      w3, w0
        mov      w0, w1
        cbz      w3, .L8
        mov      w0, w3
        cbz      w1, .L8
        mov      w6, 31
        orr      w5, w3, w1
        neg      w2, w3
        neg      w7, w5
        and      w2, w2, w3
        clz      w2, w2
        sub      w2, w6, w2
        and      w5, w7, w5
        mov      w4, w6
        clz      w5, w5
        lsr      w0, w3, w2
        sub      w5, w6, w5
        b        .L13
.L22:
        bls      .L12
        eor      w1, w1, w2
        eor      w0, w1, w2
.L12:
        cmp      w0, 1
        sub      w1, w1, w0
        beq      .L11
.L13:
        neg      w2, w1
        cmp      w1, wzr
        and      w2, w2, w1
        clz      w2, w2
        sub      w3, w4, w2
        csel     w2, w3, w2, ne
        lsr      w1, w1, w2
        cmp      w0, w1
        eor      w2, w1, w0
        bne      .L22
.L11:
        lsl      w0, w0, w5
.L8:
        ret
100.5.5 Optimizing GCC 4.4.5(MIPS)

指令清单100.7 Optimizing GCC 4.4.5(MIPS)(IDA)

f:

var_20            = -0x20
var_18            = -0x18
var_14            = -0x14
var_10            = -0x10
var_C             = -0xC
var_8             = -8
var_4             =-4

                  lui     $gp, (__gnu_local_gp >> 16)
                  addiu   $sp, -0x30
                  la      $gp, (__gnu_local_gp & 0xFFFF)
                  sw      $ra, 0x30+var_4($sp)
                  sw      $s4, 0x30+var_8($sp)
                  sw      $s3, 0x30+var_C($sp)
                  sw      $s2, 0x30+var_10($sp)
                  sw      $s1, 0x30+var_14($sp)
                  sw      $s0, 0x30+var_18($sp)
                  sw      $gp, 0x30+var_20($sp)
                  move    $s0, $a0
                  beqz    $a0, loc_154
                  move    $s1, $a1
                  bnez    $a1, loc_178
                  or      $s2, $a1, $a0
                  move    $s1, $a0

loc_154:                                      # CODE XREF: f+2C
                  lw      $ra, 0x30+var_4($sp)
                  move    $v0, $s1
                  lw      $s4, 0x30+var_8($sp)
                  lw      $s3, 0x30+var_C($sp)
                  lw      $s2, 0x30+var_10($sp)
                  lw      $s1, 0x30+var_14($sp)
                  lw      $s0, 0x30+var_18($sp)
                  jr      $ra
                  addiu   $sp, 0x30

loc_178:                                      # CODE XREF: f+34
                  lw      $t9, (__clzsi2 & 0xFFFF)($gp)
                  negu    $a0, $s2
                  jalr    $t9
                  and     $a0, $s2
                  lw      $gp, 0x30+var_20($sp)
                  bnez    $s2, loc_20C
                  li      $s4, 0x1F
                  move    $s4, $v0
loc_198:                                      # CODE XREF: f:loc_20C
                  lw      $t9, (__clzsi2 & 0xFFFF)($gp)
                  negu    $a0, $s0
                  jalr    $t9
                  and     $a0, $s0
                  nor     $v0, $zero, $v0
                  lw      $gp, 0x30+var_20($sp)
                  srlv    $s0, $v0
                  li      $s3, 0x1F
                  li      $s2, 1
loc_1BC:                                     # CODE XREF: f+F0
                  lw      $t9, (__clzsi2 & 0xFFFF)($gp)
                  negu    $a0, $s1
                  jalr    $t9
                  and     $a0, $s1
                  lw      $gp, 0x30+var_20($sp)
                  beqz    $s1, loc_1DC
                  or      $at, $zero
                  subu    $v0, $s3, $v0
loc_1DC:                                      # CODE XREF: f+BC
                  srlv    $s1, $v0
                  xor     $v1, $s1, $s0
                  beq     $s0, $s1, loc_214
                  sltu    $v0, $s1, $s0
                  beqz    $v0, loc_1FC
                  or      $at, $zero
                  xor     $s1, $v1
                  xor     $s0, $s1, $v1
loc_1FC:                                    # CODE XREF: f+D8
                  beq     $s0, $s2, loc_214
                  subu    $s1, $s0
                  b       loc_1BC
                  or      $at, $zero
loc_20C:                                    # CODE XREF: f+78
                  b       loc_198
                  subu    $s4, $v0
loc_214:                                    # CODE XREF: f+D0
                                            # f:loc_1FC
                  lw      $ra, 0x30+var_4($sp)
                  sllv    $s1, $s0, $s4
                  move    $v0, $s1
                  lw      $s4, 0x30+var_8($sp)
                  lw      $s3, 0x30+var_C($sp)
                  lw      $s2, 0x30+var_10($sp)
                  lw      $s1, 0x30+var_14($sp)
                  lw      $s0, 0x30+var_18($sp)
                  jr      $ra
                  addiu   $sp, 0x30

100.6 练习题2.15

这个程序实现了一种著名的算法。请问,这个算法的名称是什么?

在x86平台上,程序使用FPU进行运算;而在x64平台上,程序使用的是SIMD指令集。这属于正常现象,详细介绍请参见本书第27章。

100.6.1 Optimizing MSVC 2012 x64
__real@412e848000000000 DQ 0412e848000000000r     ; 1e+006
__real@4010000000000000 DQ 04010000000000000r     ;4
__real@4008000000000000 DQ 04008000000000000r     ;3
__real@3f800000 DD 03f800000r                     ;1

tmp$1 = 8
tmp$2 = 8
f       PROC
        movsdx  xmm3, QWORD PTR __real@4008000000000000
        movss   xmm4, DWORD PTR __real@3f800000
        mov     edx, DWORD PTR ?RNG_state@?1??get_rand@@9@9
        xor     ecx, ecx
        mov     r8d, 200000                               ; 00030d40H
        npad    2 ; align next label
$LL4@f:
        imul    edx, 1664525                              ; 0019660dH
        add     edx, 1013904223                           ; 3c6ef35fH
        mov     eax, edx
        and     eax, 8388607                              ; 007fffffH
        imul    edx, 1664525                              ; 0019660dH
        bts     eax, 30
        add     edx, 1013904223                           ; 3c6ef35fH
        mov     DWORD PTR tmp$2[rsp], eax
        mov     eax, edx
        and     eax, 8388607                              ; 007fffffH
        bts     eax, 30
        movss   xmm0, DWORD PTR tmp$2[rsp]
        mov     DWORD PTR tmp$1[rsp], eax
        cvtps2pd xmm0, xmm0
        subsd   xmm0, xmm3
        cvtpd2ps xmm2, xmm0
        movss   xmm0, DWORD PTR tmp$1[rsp]
        cvtps2pd xmm0, xmm0
        mulss   xmm2, xmm2
        subsd   xmm0, xmm3
        cvtpd2ps xmm1, xmm0
        mulss   xmm1, xmm1
        adds    xmm1, xmm2
        comiss  xmm4, xmm1
        jbe     SHORT $LN3@f
        inc     ecx
$LN3@f:
        imul    edx, 1664525                              ; 0019660dH
        add     edx, 1013904223                           ; 3c6ef35fH
        mov     eax, edx
        and     eax, 8388607                              ; 007fffffH
        imul    edx, 1664525                              ; 0019660dH
        bts     eax, 30
        add     edx, 1013904223                           ; 3c6ef35fH
        mov     DWORD PTR tmp$2[rsp], eax
        mov     eax, edx
        and     eax, 8388607                              ; 007fffffH
        bts     eax, 30
        movss   xmm0, DWORD PTR tmp$2[rsp]
        mov     DWORD PTR tmp$1[rsp], eax
        cvtps2pd xmm0, xmm0
        subsd   xmm0, xmm3
        cvtpd2ps xmm2, xmm0
        movss   xmm0, DWORD PTR tmp$1[rsp]
        cvtps2pd xmm0, xmm0
        mulss   xmm2, xmm2
        subsd   xmm0, xmm3
        cvtpd2ps xmm1, xmm0
        mulss   xmm1, xmm1
        adds    xmm1, xmm2
        comiss  xmm4, xmm1
        jbe     SHORT $LN15@f
        inc     ecx
$LN15@f:
        imul    edx, 1664525                              ; 0019660dH
        add     edx, 1013904223                           ; 3c6ef35fH
        mov     eax, edx
        and     eax, 8388607                              ; 007fffffH
        imul    edx, 1664525                              ; 0019660dH
        bts     eax, 30
        add     edx, 1013904223                           ; 3c6ef35fH
        mov     DWORD PTR tmp$2[rsp], eax
        mov     eax, edx
        and     eax, 8388607                              ; 007fffffH
        bts     eax, 30
        movss   xmm0, DWORD PTR tmp$2[rsp]
        mov     DWORD PTR tmp$1[rsp], eax
        cvtps2pd xmm0, xmm0
        subsd   xmm0, xmm3
        cvtpd2ps xmm2, xmm0
        movss   xmm0, DWORD PTR tmp$1[rsp]
        cvtps2pd xmm0, xmm0
        mulss   xmm2, xmm2
        subsd   xmm0, xmm3
        cvtpd2ps xmm1, xmm0
        mulss   xmm1, xmm1
        adds    xmm1, xmm2
        comiss  xmm4, xmm1
        jbe     SHORT $LN16@f
        inc     ecx
$LN16@f:
        imul    edx, 1664525                              ; 0019660dH
        add     edx, 1013904223                           ; 3c6ef35fH
        mov     eax, edx
        and     eax, 8388607                              ; 007fffffH
        imul    edx, 1664525                              ; 0019660dH
        bts     eax, 30
        add     edx, 1013904223                           ; 3c6ef35fH
        mov     DWORD PTR tmp$2[rsp], eax
        mov     eax, edx
        and     eax, 8388607                              ; 007fffffH
        bts     eax, 30
        movss   xmm0, DWORD PTR tmp$2[rsp]
        mov     DWORD PTR tmp$1[rsp], eax
        cvtps2pd xmm0, xmm0
        subsd   xmm0, xmm3
        cvtpd2ps xmm2, xmm0
        movss   xmm0, DWORD PTR tmp$1[rsp]
        cvtps2pd xmm0, xmm0
        mulss   xmm2, xmm2
        subsd   xmm0, xmm3
        cvtpd2ps xmm1, xmm0
        mulss   xmm1, xmm1
        adds    xmm1, xmm2
        comiss  xmm4, xmm1
        jbe     SHORT $LN17@f
        inc     ecx
$LN17@f:
        imul    edx, 1664525                              ; 0019660dH
        add     edx, 1013904223                           ; 3c6ef35fH
        mov     eax, edx
        and     eax, 8388607                              ; 007fffffH
        imul    edx, 1664525                              ; 0019660dH
        bts     eax, 30
        add     edx, 1013904223                           ; 3c6ef35fH
        mov     DWORD PTR tmp$2[rsp], eax
        mov     eax, edx
        and     eax, 8388607                              ; 007fffffH
        bts     eax, 30
        movss   xmm0, DWORD PTR tmp$2[rsp]
        mov     DWORD PTR tmp$1[rsp], eax
        cvtps2pd xmm0, xmm0
        subsd   xmm0, xmm3
        cvtpd2ps xmm2, xmm0
        movss   xmm0, DWORD PTR tmp$1[rsp]
        cvtps2pd xmm0, xmm0
        mulss   xmm2, xmm2
        subsd   xmm0, xmm3
        cvtpd2ps xmm1, xmm0
        mulss   xmm1, xmm1
        addss   xmm1, xmm2
        comiss  xmm4, xmm1
        jbe     SHORT $LN18@f
        inc     ecx
$LN18@f:
        dec     r8
        jne     $LL4@f
        movd    xmm0, ecx
        mov     DWORD PTR ?RNG_state@?1??get_rand@@9@9, edx
        cvtdq2ps xmm0, xmm0
        cvtps2pd xmm1, xmm0
        mulsd   xmm1, QWORD PTR __real@4010000000000000
        divsd   xmm1, QWORD PTR __real@412e848000000000
        cvtpd2ps xmm0, xmm1
        ret     0
f       ENDP
100.6.2 Optimizing GCC 4.4.6 x64
f1:
        mov     eax, DWORD PTR v1.2084[rip]
        imul    eax, eax, 1664525
        add     eax, 1013904223
        mov     DWORD PTR v1.2084[rip], eax
        and     eax, 8388607
        or      eax, 1073741824
        mov     DWORD PTR [rsp-4], eax
        movss   xmm0, DWORD PTR [rsp-4]
        subss   xmm0, DWORD PTR .LC0[rip]
        ret
f:
        push    rbp
        xor     ebp, ebp
        push    rbx
        xor     ebx, ebx
        sub     rsp, 16
.L6:
        xor     eax, eax
        call    f1
        xor     eax, eax
        movss   DWORD PTR [rsp], xmm0
        call    f1
        movss   xmm1, DWORD PTR [rsp]
        mulss   xmm0, xmm0
        mulss   xmm1, xmm1
        lea     eax, [rbx+1]
        addss   xmm1, xmm0
        movss   xmm0, DWORD PTR .LC1[rip]
        ucomiss xmm0, xmm1
        cmova   ebx, eax
        add     ebp, 1
        cmp     ebp, 1000000
        jne     .L6
        cvtsi2ss        xmm0, ebx
        unpcklps        xmm0, xmm0
        cvtps2pd        xmm0, xmm0
        mulsd   xmm0, QWORD PTR .LC2[rip]
        divsd   xmm0, QWORD PTR .LC3[rip]
        add     rsp, 16
        pop     rbx
        pop     rbp
        unpcklpd        xmm0, xmm0
        cvtpd2ps        xmm0, xmm0
        ret
v1.2084:
        .long   305419896
.LC0:
        .long   1077936128
.LC1:
        .long   1065353216
.LC2:
        .long   0
        .long   1074790400
.LC3:
        .long   0
        .long   1093567616
100.6.3 Optimizing GCC 4.8.1 x86
f1:
        sub     esp, 4
        imul    eax, DWORD PTR v1.2023, 1664525
        add     eax, 1013904223
        mov     DWORD PTR v1.2023, eax
        and     eax, 8388607
        or      eax, 1073741824
        mov     DWORD PTR [esp], eax
        fld     DWORD PTR [esp]
        fsub    DWORD PTR .LC0
        add     esp, 4
        ret
f:
        push    esi
        mov     esi, 1000000
        push    ebx
        xor     ebx, ebx
        sub     esp, 16
.L7:
        call    f1
        fstp    DWORD PTR [esp]
        call    f1
        lea     eax, [ebx+1]
        fld     DWORD PTR [esp]
        fmul    st, st(0)
        fxch    st(1)
        fmul    st, st(0)
        faddp   st(1), st
        fld1
        fucomip st, st(1)
        fstp    st(0)
        cmova   ebx, eax
        sub     esi, 1
        jne     .L7
        mov     DWORD PTR [esp+4], ebx
        fild    DWORD PTR [esp+4]
        fmul    DWORD PTR .LC3
        fdiv    DWORD PTR .LC4
        fstp    DWORD PTR [esp+8]
        fld     DWORD PTR [esp+8]
        add     esp, 16
        pop     ebx
        pop     esi
        ret
v1.2023:
        .long   305419896
.LC0:
        .long   1077936128
.LC3:
        .long   1082130432
.LC4:
        .long   1232348160
100.6.4 Keil(ARM模式):面向Cortex-R4F CPU的代码
f1      PROC
        LDR      r1,|L0.184|
        LDR      r0,[r1,#0]  ; v1
        LDR      r2,|L0.188|
        VMOV.F32 s1,#3.00000000
        MUL      r0,r0,r2
        LDR      r2,|L0.192|
        ADD      r0,r0,r2
        STR      r0,[r1,#0]  ; v1
        BFC      r0,#23,#9
        ORR      r0,r0,#0x40000000
        VMOV     s0,r0
        VSUB.F32 s0,s0,s1
        BX       lr
        ENDP

f       PROC
        PUSH     {r4,r5,lr}
        MOV      r4,#0
        LDR      r5,|L0.196|
        MOV      r3,r4
|L0.68|
        BL       f1
        VMOV.F32 s2,s0
        BL       f1
        VMOV.F32 s1,s2
        ADD      r3,r3,#1
        VMUL.F32 s1,s1,s1
        VMLA.F32 s1,s0,s0
        VMOV     r0,s1
        CMP      r0,#0x3f800000
        ADDLT    r4,r4,#1
        CMP      r3,r5
        BLT      |L0.68|
        VMOV     s0,r4
        VMOV.F64 d1,#4.00000000
        VCVT.F32.S32 s0,s0
        VCVT.F64.F32 d0,s0
        VMUL.F64 d0,d0,d1
        VLDR     d1,|L0.200|
        VDIV.F64 d2,d0,d1
        VCVT.F32.F64 s0,d2
        POP      {r4,r5,pc}
        ENDP

|L0.184|
        DCD      ||.data||
|L0.188|
        DCD      0x0019660d
|L0.192|
        DCD      0x3c6ef35f
|L0.196|
        DCD      0x000f4240
|L0.200|
        DCFD     0x412e848000000000 ; 1000000

        DCD      0x00000000
        AREA ||.data||, DATA, ALIGN=2
v1
        DCD      0x12345678
100.6.5 Optimizing GCC 4.9.1(ARM64)

指令清单100.8 Optimizing GCC 4.9.1(ARM64)

f1:
        adrp    x2, .LANCHOR0
        mov     w3, 26125
        mov     w0, 62303
        movk    w3, 0x19, lsl 16
        movk    w0, 0x3c6e, lsl 16
        ldr     w1, [x2,#:lo12:.LANCHOR0]
        fmov    s0, 3.0e+0
        madd    w0, w1, w3, w0
        str     w0, [x2,#:lo12:.LANCHOR0]
        and     w0, w0, 8388607
        orr     w0, w0, 1073741824
        fmov    s1, w0
        fsub    s0, s1, s0
        ret
mail_function:
        adrp    x7, .LANCHOR0
        mov     w3, 16960
        movk    w3, 0xf, lsl 16
        mov     w2, 0
        fmov    s2, 3.0e+0
        ldr     w1, [x7,#:lo12:.LANCHOR0]
        fmov    s3, 1.0e+0

.L5:
        mov     w6, 26125
        mov     w0, 62303
        movk    w6, 0x19, lsl 16
        movk    w0, 0x3c6e, lsl 16
        mov     w5, 26125
        mov     w4, 62303
        madd    w1, w1, w6, w0
        movk    w5, 0x19, lsl 16
        movk    w4, 0x3c6e, lsl 16
        and     w0, w1, 8388607
        add     w6, w2, 1
        orr     w0, w0, 1073741824
        madd    w1, w1, w5, w4
        fmov    s0, w0
        and     w0, w1, 8388607
        orr     w0, w0, 1073741824
        fmov    s1, w0
        fsub    s0, s0, s2
        fsub    s1, s1, s2
        fmul    s1, s1, s1
        fmadd   s0, s0, s0, s1
        fcmp    s0, s3
        csel    w2, w2, w6, pl
        subs    w3, w3, #1
        ben     .L5
        scvtf   s0, w2
        str     w1, [x7, #:lo12:.LANCH0R]]
        fmov    d1, 4.0e+0
        fcvt    d0, s0
        fmul    d0, d0, d1
        ldr     d1, .LC0
        fdiv    d0, d0, d1
        fcvt    s0, d0
        ret
.LC0:
        .word   0
        .word   1093567616
.V1:
        .word   1013904223
.V2:
        .word   1664525
.LANCH0R0: = . + 0
v3.3095
        .word   305419896
100.6.6 Optimizing GCC 4.4.5(MIPS)

指令清单100.9 Optimizing GCC 4.4.5(MIPS)(IDA)

f1:
        mov     eax, DWORD PTR v1.2084[rip]
        imul    eax, eax, 1664525
        add     eax, 1013904223
        mov     DWORD PTR v1.2084[rip], eax
        and     eax, 8388607
        or      eax, 1073741824
        mov     DWORD PTR [rsp-4], eax
        movss   xmm0, DWORD PTR [rsp-4]
        subss   xmm0, DWORD PTR .LC0[rip]
        ret
f:
        push    rbp
        xor     ebp, ebp
        push    rbx
        xor     ebx, ebx
        sub     rsp, 16
.L6:
        xor   eax, eax
        call  f1
        xor   eax, eax
        movss DWORD PTR [rsp], xmm0
        call  f1
        movss xmm1, DWORD PTR [rsp]
        mulss xmm0, xmm0
        mulss xmm1, xmm1
        lea   eax, [rbx+1]
        addss xmm1, xmm0
        movss xmm0, DWORD PTR .LC1[rip]
        ucomiss xmm0, xmm1
        cmova ebx, eax
        add   ebp, 1
        cmp   ebp, 1000000
        jne   .L6
        cvtsi2ss       xmm0, ebx
        unpcklps       xmm0, xmm0
        cvtps2pd       xmm0, xmm0
        mulsd xmm0, QWORD PTR .LC2[rip]
        divsd xmm0, QWORD PTR .LC3[rip]
        add   rsp, 16
        pop   rbx
        pop   rbp
        unpcklpd       xmm0, xmm0
        cvtpd2ps       xmm0, xmm0
        ret
v1.2084:
        .long 305419896
.LC0:
        .long 1077936128
.LC1:
        .long 1065353216
.LC2:
        .long 0
        .long 1074790400
.LC3:
        .long 0
        .long 1093567616

100.7 练习题2.16

这个题目是一个著名的函数。请问它的计算结果是什么?如果输入了4和2,该程序会出现栈溢出问题。为什么会这样,代码里有错误么?

100.7.1 Optimizing MSVC 2012 x64
m$ = 48
n$ = 56
f       PROC
$LN14:
        push    rbx
        sub     rsp, 32
        mov     eax, edx
        mov     ebx, ecx
        test    ecx, ecx
        je      SHORT $LN11@f
$LL5@f:
        test    eax, eax
        jne     SHORT $LN1@f
        mov     eax, 1
        jmp     SHORT $LN12@f
$LN1@f:
        lea     edx, DWORD PTR [rax-1]
        mov     ecx, ebx
        call    f
$LN12@f:
        dec     ebx
        test    ebx, ebx
        jne     SHORT $LL5@f
$LN11@f:
        inc     eax
        add     rsp, 32
        pop     rbx
        ret     0
f       ENDP
100.7.2 Optimizing Keil(ARM mode)
f PROC
        PUSH    {r4,lr}
        MOVS    r4,r0
        ADDEQ   r0,r1,#1
        POPEQ   {r4,pc}
        CMP     r1,#0
        MOVEQ   r1,#1
        SUBEQ   r0,r0,#1
        BEQ     |L0.48|
        SUB     r1,r1,#1
        BL      f
        MOV     r1,r0
        SUB     r0,r4,#1
|L0.48|
        POP     {r4,lr}
        B       f
        ENDP
100.7.3 Optimizing Keil(Thumb mode)
f PROC
        PUSH    {r4,lr}
        MOVS    r4,r0
        BEQ     |L0.26|
        CMP     r1,#0
        BEQ     |L0.30|
        SUBS    r1,r1,#1
        BL      f
        MOVS    r1,r0
|L0.18|
        SUBS    r0,r4,#1
        BL      f
        POP     {r4,pc}
|L0.26|
        ADDS    r0,r1,#1
        POP     {r4,pc}
|L0.30|
        MOVS    r1,#1
        B       |L0.18|
        ENDP
100.7.4 Non-optimizing GCC 4.9.1(ARM64)

指令清单100.10 Non-optimizing GCC 4.9.1(ARM64)

f:
        stp     x29, x30, [sp, -48]!
        add     x29, sp, 0
        str     x19, [sp,16]
        str     w0, [x29,44]
        str     w1, [x29,40]
        ldr     w0, [x29,44]
        cmp     w0, wzr
        bne     .L2
        ldr     w0, [x29,40]
        add     w0, w0, 1
        b       .L3
.L2:
        ldr     w0, [x29,40]
        cmp     w0, wzr
        bne     .L4
        ldr     w0, [x29,44]
        sub     w0, w0, #1
        mov     w1, 1
        bl      ack
        b       .L3
.L4:
        ldr     w0, [x29,44]
        sub     w19, w0, #1
        ldr     w0, [x29,40]
        sub     w1, w0, #1
        ldr     w0, [x29,44]
        bl      ack
        mov     w1, w0
        mov     w0, w19
        bl      ack
.L3:
        ldr     x19, [sp,16]
        ldp     x29, x30, [sp], 48
        ret
100.7.5 Optimizing GCC 4.9.1(ARM64)

开启优化模式之后,GCC生成的代码反而更长了。这是为什么?

指令清单100.11 Optimizing GCC 4.9.1(ARM64)

ack:
        stp     x29, x30, [sp, -160]!
        add     x29, sp, 0
        stp     d8, d9, [sp,96]
        stp     x19, x20, [sp,16]
        stp     d10, d11, [sp,112]
        stp     x21, x22, [sp,32]
        stp     d12, d13, [sp,128]
        stp     x23, x24, [sp,48]
        stp     d14, d15, [sp,144]
        stp     x25, x26, [sp,64]
        stp     x27, x28, [sp,80]
        cbz     w0, .L2
        sub     w0, w0, #1
        fmov    s10, w0
        b       .L4
.L46:
        fmov    w0, s10
        mov     w1, 1
        sub     w0, w0, #1
        fmov    s10, w0
        fmov    w0, s13
        cbz     w0, .L2
.L4:
        fmov    s13, s10
        cbz     w1, .L46
        sub     w1, w1, #1
        fmov    s11, s10
        b       .L7
.L48:
        fmov    w0, s11
        mov     w1, 1
        sub     w0, w0, #1
        fmov    s11, w0
        fmov    w0, s14
        cbz     w0, .L47
.L7:
        fmov    s14, s11
        cbz     w1, .L48
        sub     w1, w1, #1
        fmov    s12, s11
        b       .L10
.L50:
        fmov    w0, s12
        mov     w1, 1
        sub     w0, w0, #1
        fmov    s12, w0
        fmov    w0, s15
        cbz     w0, .L49
.L10:
        fmov    s15, s12
        cbz     w1, .L50
        sub     w1, w1, #1
        fmov    s8, s12
        b       .L13
.L52:
        fmov    w0, s8
        mov     w1, 1
        sub     w0, w0, #1
        fmov    s8, w0
        fmov    w0, s9
        cbz     w0, .L51
.L13:
        fmov    s9, s8
        cbz     w1, .L52
        sub     w1, w1, #1
        fmov    w22, s8
        b       .L16
.L54:
        mov     w1, 1
        sub     w22, w22, #1
        cbz     w28, .L53
.L16:
        mov     w28, w22
        cbz     w1, .L54
        sub     w1, w1, #1
        mov     w21, w22
        b       .L19
.L56:
        mov     w1, 1
        sub     w21, w21, #1
        cbz     w24, .L55
.L19:
        mov     w24, w21
        cbz     w1, .L56
        sub     w1, w1, #1
        mov     w20, w21
        b       .L22
.L58:
        mov     w1, 1
        sub     w20, w20, #1
        cbz     w25, .L57
.L22:
        mov     w25, w20
        cbz     w1, .L58
        sub     w1, w1, #1
        mov     w26, w20
        b       .L25
.L60:
        mov     w1, 1
        sub     w26, w26, #1
        cbz     w27, .L59
.L25:
        mov     w27, w26
        cbz     w1, .L60
        sub     w1, w1, #1
        mov     w19, w26
        b       .L28
.L62:
        mov     w23, w19
        mov     w1, 1
        sub     w19, w19, #1
        cbz     w23, .L61
.L28:
        add     w0, w19, 1
        cbz     w1, .L62
        sub     w1, w1, #1
        mov     w23, w19
        sub     w19, w19, #1
        bl      ack
        mov     w1, w0
        cbnz    w23, .L28
.L61:
        add     w1, w1, 1
        sub     w26, w26, #1
        cbnz    w27, .L25
.L59:
        add     w1, w1, 1
        sub     w20, w20, #1
        cbnz    w25, .L22
.L57:
        add     w1, w1, 1
        sub     w21, w21, #1
        cbnz    w24, .L19
.L55:
        add     w1, w1, 1
        sub     w22, w22, #1
        cbnz    w28, .L16
.L53:
        fmov    w0, s8
        add     w1, w1, 1
        sub     w0, w0, #1
        fmov    s8, w0
        fmov    w0, s9
        cbnz    w0, .L13
.L51:
        fmov    w0, s12
        add     w1, w1, 1
        sub     w0, w0, #1
        fmov    s12, w0
        fmov    w0, s15
        cbnz    w0, .L10
.L49:
        fmov    w0, s11
        add     w1, w1, 1
        sub     w0, w0, #1
        fmov    s11, w0
        fmov    w0, s14
        cbnz    w0, .L7
.L47:
        fmov    w0, s10
        add     w1, w1, 1
        sub     w0, w0, #1
        fmov    s10, w0
        fmov    w0, s13
        cbnz    w0, .L4
.L2:
        add     w0, w1, 1
        ldp     d8, d9, [sp,96]
        ldp     x19, x20, [sp,16]
        ldp     d10, d11, [sp,112]
        ldp     x21, x22, [sp,32]
        ldp     d12, d13, [sp,128]
        ldp     x23, x24, [sp,48]
        ldp     d14, d15, [sp,144]
        ldp     x25, x26, [sp,64]
        ldp     x27, x28, [sp,80]
        ldp     x29, x30, [sp], 160
        ret
100.7.6 Non-optimizing GCC 4.4.5(MIPS)

指令清单100.12 Non-optimizing GCC 4.4.5(MIPS)(IDA)

f:                                          # CODE XREF: f+64
                                            # f+94 ...

var_C          = -0xC
var_8          = -8
var_4          = -4
arg_0          = 0
arg_4          = 4

               addiu   $sp, -0x28
               sw      $ra, 0x28+var_4($sp)
               sw      $fp, 0x28+var_8($sp)
               sw      $s0, 0x28+var_C($sp)
               move    $fp, $sp
               sw      $a0, 0x28+arg_0($fp)
               sw      $a1, 0x28+arg_4($fp)
               lw      $v0, 0x28+arg_0($fp)
               or      $at, $zero
               bnez    $v0, loc_40
               or      $at, $zero
               lw      $v0, 0x28+arg_4($fp)
               or      $at, $zero
               addiu   $v0, 1
               b       loc_AC
               or      $at, $zero

loc_40:                                     # CODE XREF: f+24
               lw      $v0, 0x28+arg_4($fp)
               or      $at, $zero
               bnez    $v0, loc_74
               or      $at, $zero
               lw      $v0, 0x28+arg_0($fp)
               or      $at, $zero
               addiu   $v0, -1
               move    $a0, $v0
               li      $a1, 1
               jal     f
               or      $at, $zero
               b       loc_AC
               or      $at, $zero

loc_74:                                     # CODE XREF: f+48
               lw      $v0, 0x28+arg_0($fp)
               or      $at, $zero
               addiu   $s0, $v0, -1
               lw      $v0, 0x28+arg_4($fp)
               or      $at, $zero
               addiu   $v0, -1
               lw      $a0, 0x28+arg_0($fp)
               move    $a1, $v0
               jal     f
               or      $at, $zero
               move    $a0, $s0
               move    $a1, $v0
               jal     f
               or      $at, $zero

loc_AC:                                    # CODE XREF: f+38
                                           # f+6C
               move    $sp, $fp
               lw      $ra, 0x28+var_4($sp)
               lw      $fp, 0x28+var_8($sp)
               lw      $s0, 0x28+var_C($sp)
               addiu   $sp, 0x28
               jr      $ra
               or      $at, $zero

100.8 练习题2.17

下列程序向stdout输出信息,而且每次输出的结果还不一样。请问它输出的是什么信息?

请下载编译后的可执行文件:

可能有个别版本的Windows无法执行这个程序。如果发生这种情况,请下载MSVC 2012 redist.(http://www.microsoft.com/en-us/download/details.aspx?id=30679

)。

100.9 练习题2.18

下列程序会验证密码。请找到它的密码。

另外,它可以接受的密码不是唯一的。请尽可能地多列举一些密码。

您还可以对它进行修改,改变程序的密码:

100.10 练习题2.19

这组题目和练习题2.18的练习内容相同:

100.11 练习题2.20

下列程序向stdout输出信息。请问它输出的是什么信息?