计组复习题集

计组复习题集_第三章

首先说一点我自己容易搞混的知识点

leal (%edx), %eax
movl %edx, %eax

效果虽然一样，就是%eax中存储的内容变成了%edx中存储的内容

但是实现过程有点不一样：

书上写的是：

movl 从指定的位置读入数据

leal 将有效地址写入目的操作数

说通俗点就是：

movl 类似于 *p = a;

leal 类似于 (*x = a;) p = x;

3.18

Starting with C code of the form
int test(int x, int y) {
    int val = ____;
    if (____) {
        if (____)
            val = ____;
        else
            val = ____;
    } else if (____)
        val = ____;
    return val;
}
GCC generates the following assembly code:
; x at %ebp+8, y at %ebp+12
  movl    8(%ebp), %eax
  movl    12(%ebp), %edx
  cmpl    $-3, %eax
  jge     .L2
  cmpl    %edx, %eax
  jle     .L3
  imull   %edx, %eax
  jmp     .L4
.L3:
  leal    (%edx,%eax), %eax
  jmp   
.L2:
  cmpl    $2, %eax
  jg      .L5
  xorl    %edx, %eax
  jmp     .L4
.L5:
  subl    %edx, %eax
.L4:
Fill in the missing expressions in the C code. To make the code fit into the
C code template, you will need to undo some of the reordering of computations
done by GCC.

ANSWER

/**
 * 注意GCC对C代码进行了重排序，也就是C代码第一行的val = x ^ y被移到了汇编代码的下面，只有能以这个值输出时才进行计算
 * (但其实重排序不多
 * 汇编代码在if的处理上，会满足：除非迫不得已，否则不跳转，也就是说else才跳转。所以"jge""jle"等往往代表了else的条件。
 */
int test(int x, int y) {
    int val = x ^ y;
    if (x < -3) {   // if x>= -3, then jmp, 否则继续按顺序执行汇编代码
        if (y < x)  // 同理
            val = x * y;
        else        // .L3
            val = x + y; // leal (%edx, %eax), %eax == a = a+b;
    } else if (x > 2) // .L2
        //这里挺有趣的，虽然是"cmpl $2, %eax"和"jg .L5"，我第一反应是判断条件为 x <= 2, 但是实际上判断条件是 x > 2, else if 也是else的一种..
        val = x - y;  
    return val;
}

3.22

A function, fun_a, has the following overall structure:
int fun_a(unsigned x) {
  int val = 0;
  while (_____) {
    _____;
  }
  return _____;
}
The GCC C compiler generates the following assembly code:
;x at %ebp+8
  movl    8(%ebp), %edx
  movl    $0, %eax
  testl   %edx, %edx
  je      .L7
.L10:
  xorl    %edx, %eax
  shrl    %edx ;Shift right by 1
  jne     .L10
.L7:
  andl    $1, %eax
Reverse engineer the operation of this code and then do the following:

A. Use the assembly-code version to fill in the missing parts of the C code.

B. Describe in English what this function computes

ANSWER

/*A*/
int fun_a(unsigned x) {
  int val = 0;
  while (x != 0) {
    val ^= x;
    x >>= 1; 
  }
  return val & 1;
}

/*B*/
/**
 * 由于返回值 val & 1 从意义上来说是只取了val最低位的信息，于是我们只讨论val最低位表达的信息即可
 * 循环的作用就是计算val = (x >> 0) ^ (x >> 1) ^ (x >> 2) ^ ... ^ (x >> k-1) [假设x有k位有效数字]
 * 比如令 x = 1011
 * 那么：
 *   1011
 *   0101
 *   0010
 * ^ 0001
 * ------
 *   1101
 * 可以看出循环结束后val的最低位就是x的从最低位到最高位的每一位取异或，举个例子，假设x=10111，那么val的最低位=1^0^1^1^1。
 * 当有偶数个1时，先把所有的1两两取余转成0，式子中就只剩下0了，式子的结果是0，例如；1^0^0^1 = (1^1)^0^0 = 0^0^0 = 0;
 * 当有奇数个1时，尽可能地把1两两取余转成0，最后式子中只剩下一个1，表达式的值就是1，例如；1^0^1^1 = (1^1)^0^1 = 0^0^1 = 1;
 * 由此val的最低位的最终值反映了x中1的数量的奇偶性，这也是这个函数的作用：计算参数x中1的数量的奇偶性，当x中有奇数个1，返回1；当x中有偶数个1，返回0。
 */

3.29

For a C function switcher with the general structure

int switcher(int a, int b, int c)
{
  int answer;
  switch(a) {
  case _____:         /* Case A */
    c = _____;
    /* Fall through */
  case :              /* Case B */
    answer = _____;
    break;
  case _____:         /* Case C */
  case _____:         /* Case D */
    answer = _____;
    break;
  case _____:         /* Case E */
    answer = _____;
    break;
  default:
    answer = _____;
  }
  return answer;
}

GCC generates the assembly code and jump table shown in Figure 3.20.

Fill in the missing parts of the C code. Except for the ordering of case labels C and D, there is only one way to fit the different cases into the template.

;a at %ebp+8, b at %ebp+12, c at %ebp+16
  movl    8(%ebp), %eax
  cmpl    $7, %eax
  ja      .L2
  jmp     *.L7(,%eax,4)
.L2:
  movl    12(%ebp), %eax
  jmp     .L8
.L5:
  movl    $4, %eax
  jmp     .L8
.L6:
  movl    12(%ebp), %eax
  xorl    $15, %eax
  movl    %eax, 16(%ebp)
.L3:
  movl    16(%ebp), %eax
  addl    $112, %eax
  jmp     .L8
.L4:
  movl    16(%ebp), %eax
  addl    12(%ebp), %eax
  sall    $2, %eax
.L8:

.L7:
  .long   .L3
  .long   .L2
  .long   .L4
  .long   .L2
  .long   .L5
  .long   .L6
  .long   .L2
  .long   .L4

ANSWER

/**
 * default 的标号是 ja 对应的标号
 */
int switcher(int a, int b, int c)
{
  int answer;
  switch(a) {
  case 5:         /* Case A */
    c = b ^ 15;
    /* Fall through */
  case 0:              /* Case B */
    answer = c + 112;
    break;
  case 2:         /* Case C */
  case 7:         /* Case D */
    answer = (b + c) << 2;
    break;
  case 4:         /* Case E */
    answer = a;   // 也可以写成 answer = 4, 从上下文看，answer的赋值应该是与a/b/c相关的，但是在编译时编译器对此语句进行了优化：把返回值设为4，而非a。
    break;
  default:
    answer = b;
  }
  return answer;
}

3.34

For a C function having the general structure
int rfun(unsigned x) {
  if (_____)
    return _____;
  unsigned nx = _____;
  int rv = rfun(nx);
  return _____;
}
GCC generates the following assembly code (with the setup and completion code omitted):
  movl    8(%ebp), %ebx
  movl    $0, %eax
  testl   %ebx, %ebx
  je      .L3
  movl    %ebx, %eax
  shrl    %eax            ;Shift right by 1
  movl    %eax, (%esp)
  call    rfun
  movl    %ebx, %edx
  andl    $1, %edx
  leal    (%edx,%eax), %eax
.L3:
A. What value does rfun store in the callee-save register %ebx?

B. Fill in the missing expressions in the C code shown above.

C. Describe in English what function this code computes.

ANSWER

/*A*/
/**
 * %ebx 存储的是x的值
 */

/*B*/
int rfun(unsigned x) {
  if (x == 0)
    return x;
  unsigned nx = x >> 1;
  int rv = rfun(nx);
  return (x & 0x1) + rv;
}

/*C*/
/**
 * 计算x中位的和：递归地计算除了最低位之外的所有其他位的和，然后加上最低位得到结果
 */

3.37

Consider the following source code, where M and N are constants declared with #define:
int mat1[M][N];
int mat2[N][M];

int sum_element(int i, int j) {
  return mat1[i][j] + mat2[j][i];
}
In compiling this program, GCC generates the following assembly code:
;i at %ebp+8, j at %ebp+12
movl    8(%ebp), %ecx
movl    12(%ebp), %edx
leal    0(,%ecx,8), %eax
subl    %ecx, %eax
addl    %edx, %eax          ;%eax = 7 * i + j
leal    (%edx,%edx,4), %edx
addl    %ecx, %edx          ;%ebx = 5 * j + i
movl    mat1(,%eax,4), %eax
addl    mat2(,%edx,4), %eax
Use your reverse engineering skills to determine the values of M and N based on this assembly code.

ANSWER

 
%eax = 7 * i + j
%ebx = 5 * j + i
而 mat1[i][j] = mat1 + (i * N + j) * 4
mat1(,%eax,4) = mat1(,7 * i + j,4) = mat1 + (i * 7 + j) * 4
所以 N = 7
同理 M = 5

3.39

Consider the following structure declaration:
struct prob {
  int *p;
  struct {
    int x;
    int y;
  } s;
  struct prob *next;
};
This declaration illustrates that one structure can be embedded within another, just as arrays can be embedded within structures, and arrays can be embedded within arrays.
The following procedure (with some expressions omitted) operates on this structure:
void sp_init(struct prob *sp)
{
  sp->s.x = _____;
  sp->p = _____;
  sp->next = _____;
}
A. What are the offsets (in bytes) of the following fields?
     p: _____
   s.x: _____
   s.y: _____
  next: _____
B. How many total bytes does the structure require?

C.The compiler generates the following assembly code for the body of sp_init:
;sp at %ebp+8
  movl 8(%ebp), %eax  ;%eax = sp (type of sp: pointer)
  movl 8(%eax), %edx  ;%edx = sp->s.x
  movl %edx, 4(%eax)  ;
  leal 4(%eax), %edx  ;sp->s.x = 
  movl %edx, (%eax)   ; 
  movl %eax, 12(%eax) ;sp.y =  %eax
On the basis of this information, fill in the missing expressions in the code for sp_init.

ANSWER

/*A*/
     p: 0~7
   s.x: 8~11
   s.y: 11~15
  next: 16~23

/*B*/
  24 bytes in total.

/*C*/
void sp_init(struct prob *sp)
{
  sp->s.x = _____;
  sp->p = _____;
  sp->next = _____;
}

3.56

Consider the following assembly code:

;x at %ebp+8, n at %ebp+12
  movl    8(%ebp), %esi
  movl    12(%ebp), %ebx
  movl    $-1, %edi
  movl    $1, %edx
.L2:
  movl    %edx, %eax
  andl    %esi, %eax
  xorl    %eax, %edi
  movl    %ebx, %ecx
  sall    %cl, %edx
  testl   %edx, %edx
  jne     .L2
  movl    %edi, %eax

The preceding code was generated by compiling C code that had the following overall form:
int loop(int x, int n)
{
  int result = _____;
  int mask;
  for (mask = _____; mask _____; mask = _____) {
    result ^= _____;
  }
  return result;
}
Your task is to fill in the missing parts of the C code to get a program equivalent to the generated assembly code. Recall that the result of the function is returned in register %eax. You will find it helpful to examine the assembly code before, during, and after the loop to form a consistent mapping between the registers and the program variables.

A. Which registers hold program values x, n, result, and mask?

B. What are the initial values of result and mask?

C. What is the test condition for mask?

D. How does mask get updated?

E. How does result get updated?

F. Fill in all the missing parts of the C code.

ANSWER

/*A*/
       x : %esi
       n : %ebx
    mask : %edx
  result : %edi

/*B*/
    mask : 1
  result : -1

/*C*/
  mask != 0;

/*D*/
  mask <<= n;

/*E*/
  result ^= x & mask;

/*F*/
int loop(int x, int n)
{
  int result = -1;
  int mask;
  for (mask = 1; mask != 0; mask = mask << (short)n) {
    result ^= x & mask;
  }
  return result;
}

3.59

This problem will give you a chance to reverse engineer a switch statement from machine code. In the following procedure, the body of the switch statement has been removed:
int switch_prob(int x, int n)
{
  int result = x;
  switch(n) {
    /* Fill in code here */
  }
  return result;
}
Figure 3.44 shows the disassembled machine code for the procedure. We can see in lines 4 and 5 that parameters x and n are loaded into registers %eax and %edx, respectively.

The jump table resides in a different area of memory. We can see from the indirect jump on line 9 that the jump table begins at address 0x80485d0. Using the GDB debugger, we can examine the six 4-byte words of memory comprising the jump table with the command x/6w 0x80485d0. GDB prints the following:
(GDB) x/6w 0x80485d0
0x80485d0: 0x08048438 0x08048448 0x08048438 > 0x0804843d
0x80485e0: 0x08048442 0x08048445
Fill in the body of the switch statement with C code that will have the same behavior as the machine code.
08048420 <switch_prob>:
8048420: 55                     push %ebp
8048421: 89 e5                  mov %esp,%ebp
8048423: 8b 45 08               mov 0x8(%ebp),%eax
8048426: 8b 55 0c               mov 0xc(%ebp),%edx
8048429: 83 ea 32               sub $0x32,%edx
804842c: 83 fa 05               cmp $0x5,%edx
804842f: 77 17                  ja 8048448 <switch_prob+0x28>
8048431: ff 24 95 d0 85 04 08   jmp *0x80485d0(,%edx,4)
8048438: c1 e0 02               shl $0x2,%eax
804843b: eb 0e                  jmp 804844b <switch_prob+0x2b>
804843d: c1 f8 02               sar $0x2,%eax
8048440: eb 09                  jmp 804844b <switch_prob+0x2b>
8048442: 8d 04 40               lea (%eax,%eax,2),%eax
8048445: 0f af c0               imul %eax,%eax
8048448: 83 c0 0a               add $0xa,%eax
804844b: 5d                     pop %ebp
804844c: c3                     ret

ANSWER

/**
 * 8048429 和 804842c两行说明，找到对应的数组位置后还要加上32才是n
 */
int switch_prob(int x, int n)
{
  int result = x;
  switch(n) {
    case 32:
    case 34:
      result <<= 2;
      break;
    case 35:
      result >>= 2;
      break;
    case 36: 
      result *= 3;
    case 37:
      result *= result;
    default:
      result += 0xa;
  }
  return result;
}

3.64

For this exercise, we will examine the code generated by GCC for functions that have structures as arguments and return values, and from this see how these language features are typically implemented.
The following C code has a function word_sum having structures as argument and return values, and a function prod that calls word_sum:
typedef struct {
  int a;
  int *p;
} str1;

typedef struct {
  int sum;
  int diff;
} str2;

str2 word_sum(str1 s1) {
  str2 result;
  result.sum = s1.a + *s1.p;
  result.diff = s1.a - *s1.p;
  return result;
}

int prod(int x, int y)
{
  str1 s1;
  str2 s2;
  s1.a = x;
  s1.p = &y;
  s2 = word_sum(s1);
  return s2.sum * s2.diff;
}
GCC generates the following code for these two functions:
word_sum:
  pushl %ebp
  movl %esp, %ebp
  pushl %ebx
  movl 8(%ebp), %eax
  movl 12(%ebp), %ebx
  movl 16(%ebp), %edx
  movl (%edx), %edx
  movl %ebx, %ecx
  subl %edx, %ecx
  movl %ecx, 4(%eax)
  addl %ebx, %edx
  movl %edx, (%eax)
  popl %ebx
  popl %ebp
  ret $4
prod:
  pushl %ebp
  movl %esp, %ebp
  subl $20, %esp
  leal 12(%ebp), %edx
  leal -8(%ebp), %ecx
  movl 8(%ebp), %eax
  movl %eax, 4(%esp)
  movl %edx, 8(%esp)
  movl %ecx, (%esp)
  call word_sum
  subl $4, %esp
  movl -4(%ebp), %eax
  imull -8(%ebp), %eax
  leave
  ret
The instruction ret $4 is like a normal return instruction, but it increments the stack pointer by 8 (4 for the return address plus 4 additional), rather than 4.

A. We can see in lines 5–7 of the code for word_sum that it appears as if three
values are being retrieved from the stack, even though the function has only a single argument. Describe what these three values are.

B. We can see in line 4 of the code for prod that 20 bytes are allocated in the stack frame. These get used as five fields of 4 bytes each. Describe how each of these fields gets used.

C. How would you describe the general strategy for passing structures as arguments to a function?

D. How would you describe the general strategy for handling a structure as a return value from a function?

ANSWER

/* A */
/**
 * movl (%edx), %edx  ; 取地址操作，对应*s1.p，可知 12(%ebp) 对应 s1.p, 根据 str1 的结构关系，我们可以知道 16(%ebp) 对应 s1.v
 * movl %ebx, %ecx    ; 
 * subl %edx, %ecx    ; - 操作，对应 s1.a - *s1.p;
 * movl %ecx, 4(%eax) ; 
 * addl %ebx, %edx    ; + 操作，对应 s1.a + *s1.p;
 * movl %edx, (%eax)  ; 根据这几个赋值操作，可以推断出 %eax, 也就是 8(%ebp) 对应result, 4(%eax) = result.sum, (%eax) = result.prod
 */
  line 5 ~ line 7 的几个值：
  8(%ebp)  = %eax : result
  12(%ebp) = %ebx : s1.p
  16(%ebp) = %edx : s1.v

/* B */
/* 不严谨地说，movl就是揭示这个地址对应的意义的操作 */
/**
 * prod:
 *   pushl %ebp
 *   movl %esp, %ebp
 *   subl $20, %esp
 *   leal 12(%ebp), %edx  ; 12(%ebp)说明是传参，leal说明是取地址并将地址的值存入%edx指向的内存，可推断出是 &x
 *   leal -8(%ebp), %ecx  ; 
 *   movl 8(%ebp), %eax   ; 8(%ebp)说明是传参，movl 说明是取8(%ebp)指向的内存中的值存入%edx指向的内存，可推断出是 y
 *   movl %eax, 4(%esp)   ; 赋值操作，将y赋值给某个变量，可知是赋给s1.v，故得知 s1.v 存在 %esp+4 = %ebp-16
 *   movl %edx, 8(%esp)   ; 同理，s1.p 存在 -12(%ebp)
 *   movl %ecx, (%esp)    ; 取了 %ebp-8 这个地址存到了(%esp)里，这个地址目前还不知道什么意义。但是当我们看完了代码之后，能知道%ebp-8是指向s2的，那么%ebp - 8存的就是&s2
 *   call word_sum        ;
 *   subl $4, %esp        ;
 *   movl -4(%ebp), %eax  ;
 *   imull -8(%ebp), %eax ; 由上行和这行可以推出s2在-8(%ebp), 且s2.prod 在 %ebp-4, s2.sum 在 %ebp-8
 *   leave
 *   ret
 */
   -4(%ebp) : s2.prod
   -8(%ebp) : s2.num
  -12(%ebp) : s1.p
  -16(%ebp) : s1.v
  -20(%ebp) : &s2

/* C */
传入结构体参数时的传入方法：
从word_sum的汇编代码我们可以看到，向函数传入结构体参数的通用策略是：将结构体参数的成员的值分别作为参数传入函数。
通俗地说，就是将结构体参数拆开再传入。

/* D */
函数怎么将一个结构体作为返回值返回：
首先，ret指令返回的是%eax指向的内存存储的值。
然后，看word_sum的汇编代码，%eax存储的是结构体result的地址。（赋值操作是movl %edx,(%eax), 也就是赋给%eax存储的值指向的地址）
所以，函数返回的是结构体变量的地址。

3.65

In the following code, A and B are constants defined with #define:
typedef struct {
  short x[A][B]; /* Unknown constants A and B */
  int y;
} str1;

typedef struct {
  char array[B];
  int t;
  short s[B];
  int u;
} str2;

void setVal(str1 *p, str2 *q) {
  int v1 = q->t;
  int v2 = q->u;
  p->y = v1+v2;
}
GCC generates the following code for the body of setVal:
movl    12(%ebp), %eax
movl    36(%eax), %edx
addl    12(%eax), %edx
movl    8(%ebp), %eax
movl    %edx, 92(%eax)
What are the values of A and B? (The solution is unique.)

ANSWER

movl    12(%ebp), %eax  ; gets q
movl    36(%eax), %edx  ; 结合下一行，由于数据对齐的原因，只能推出 16+2*B 在 [33,36]范围内都是可能的, 不能直接说 16+2*B = 36。 从此句得知 B ∈ [8.5, 10]
addl    12(%eax), %edx  ; B ∈ [9,12], 结合上一句的推理结果，B = 9 或 10。
movl    8(%ebp), %eax   ; gets p
movl    %edx, 92(%eax)  ; A*B*2 ∈ [89 , 92], A * B ∈ [44.5 , 46]

; 所以 B = 9 或 10， 且需满足 A * B ∈ [44.5 , 46] ∧ A ∈ N+（正整数集）
; 设 B = 9， 则 A ∈ [44.5/9, 46/9]。由于 [44.5/9, 46/9] ∩ N+ = 5，所以 A = 5 是有效的数字。
; 设 B = 10, 则 A ∈ [4.45, 4.6], 这个区间内并没有整数，[4.45, 4.6] ∩ N+ = φ，没有能满足条件的值，因此 B 不能取10。
; 综上所述，B = 9 且 A = 5

A = 5, B = 9
; 这道题中文版和英文版数据不一样，所以结果也不一样
; 中文版的按照上面的流程走一遍，得到答案是: A = 3, B = 7