a[i]==i[a]==*(i+a)==*(a+i)

在C语言中,如果我们要访问一个数组的某个下标对应的元素,通常的写法是a[i]。但从汇编的角度看,写成i[a]一点问题都没有。

下面通过代码给出证明。

o foo1.c

 1 int main(int argc, char *argv[])
 2 {
 3     unsigned int a[] = {1, 2, 3};
 4     unsigned int n = sizeof (a) / sizeof (int);
 5 
 6     unsigned int sum = 0;
 7     for (unsigned int i = 0; i < n; i++)
 8         sum += a[i];
 9 
10     return sum;
11 }

o foo2.c

 1 int main(int argc, char *argv[])
 2 {
 3     unsigned int a[] = {1, 2, 3};
 4     unsigned int n = sizeof (a) / sizeof (int);
 5 
 6     unsigned int sum = 0;
 7     for (unsigned int i = 0; i < n; i++)
 8         sum += i[a];
 9 
10     return sum;
11 }

o foo3.c

 1 int main(int argc, char *argv[])
 2 {
 3     unsigned int a[] = {1, 2, 3};
 4     unsigned int n = sizeof (a) / sizeof (int);
 5 
 6     unsigned int sum = 0;
 7     for (unsigned int i = 0; i < n; i++)
 8         sum += *(i+a);
 9 
10     return sum;
11 }

o 编译和运行

1 $ gcc -g -Wall -std=gnu99 -m32 -o foo1 foo1.c
2 $ gcc -g -Wall -std=gnu99 -m32 -o foo2 foo2.c
3 $ gcc -g -Wall -std=gnu99 -m32 -o foo3 foo3.c
4 $ ./foo1; echo $?
5 6
6 $ ./foo2; echo $?
7 6
8 $ ./foo3; echo $?
9 6

o 反汇编后diff

1) foo1.gdb.out

 1 (gdb) disas /m main
 2 Dump of assembler code for function main:
 3 2       {
 4    0x080483ed <+0>:     push   ebp
 5    0x080483ee <+1>:     mov    ebp,esp
 6    0x080483f0 <+3>:     sub    esp,0x20
 7 
 8 3               unsigned int a[] = {1, 2, 3};
 9    0x080483f3 <+6>:     mov    DWORD PTR [ebp-0xc],0x1
10    0x080483fa <+13>:    mov    DWORD PTR [ebp-0x8],0x2
11    0x08048401 <+20>:    mov    DWORD PTR [ebp-0x4],0x3
12 
13 4               unsigned int n = sizeof (a) / sizeof (int);
14    0x08048408 <+27>:    mov    DWORD PTR [ebp-0x10],0x3
15 
16 5
17 6               unsigned int sum = 0;
18    0x0804840f <+34>:    mov    DWORD PTR [ebp-0x18],0x0
19 
20 7               for (unsigned int i = 0; i < n; i++)
21    0x08048416 <+41>:    mov    DWORD PTR [ebp-0x14],0x0
22    0x0804841d <+48>:    jmp    0x804842d <main+64>
23    0x08048429 <+60>:    add    DWORD PTR [ebp-0x14],0x1
24    0x0804842d <+64>:    mov    eax,DWORD PTR [ebp-0x14]
25    0x08048430 <+67>:    cmp    eax,DWORD PTR [ebp-0x10]
26    0x08048433 <+70>:    jb     0x804841f <main+50>
27 
28 8                       sum += a[i];
29    0x0804841f <+50>:    mov    eax,DWORD PTR [ebp-0x14]
30    0x08048422 <+53>:    mov    eax,DWORD PTR [ebp+eax*4-0xc]
31    0x08048426 <+57>:    add    DWORD PTR [ebp-0x18],eax
32 
33 9
34 10              return sum;
35    0x08048435 <+72>:    mov    eax,DWORD PTR [ebp-0x18]
36 
37 11      }
38    0x08048438 <+75>:    leave
39    0x08048439 <+76>:    ret
40 
41 End of assembler dump.

2) foo2.gdb.out

 1 (gdb) disas /m main
 2 Dump of assembler code for function main:
 3 2       {
 4    0x080483ed <+0>:     push   ebp
 5    0x080483ee <+1>:     mov    ebp,esp
 6    0x080483f0 <+3>:     sub    esp,0x20
 7 
 8 3               unsigned int a[] = {1, 2, 3};
 9    0x080483f3 <+6>:     mov    DWORD PTR [ebp-0xc],0x1
10    0x080483fa <+13>:    mov    DWORD PTR [ebp-0x8],0x2
11    0x08048401 <+20>:    mov    DWORD PTR [ebp-0x4],0x3
12 
13 4               unsigned int n = sizeof (a) / sizeof (int);
14    0x08048408 <+27>:    mov    DWORD PTR [ebp-0x10],0x3
15 
16 5
17 6               unsigned int sum = 0;
18    0x0804840f <+34>:    mov    DWORD PTR [ebp-0x18],0x0
19 
20 7               for (unsigned int i = 0; i < n; i++)
21    0x08048416 <+41>:    mov    DWORD PTR [ebp-0x14],0x0
22    0x0804841d <+48>:    jmp    0x804842d <main+64>
23    0x08048429 <+60>:    add    DWORD PTR [ebp-0x14],0x1
24    0x0804842d <+64>:    mov    eax,DWORD PTR [ebp-0x14]
25    0x08048430 <+67>:    cmp    eax,DWORD PTR [ebp-0x10]
26    0x08048433 <+70>:    jb     0x804841f <main+50>
27 
28 8                       sum += i[a];
29    0x0804841f <+50>:    mov    eax,DWORD PTR [ebp-0x14]
30    0x08048422 <+53>:    mov    eax,DWORD PTR [ebp+eax*4-0xc]
31    0x08048426 <+57>:    add    DWORD PTR [ebp-0x18],eax
32 
33 9
34 10              return sum;
35    0x08048435 <+72>:    mov    eax,DWORD PTR [ebp-0x18]
36 
37 11      }
38    0x08048438 <+75>:    leave
39    0x08048439 <+76>:    ret
40 
41 End of assembler dump.

3) foo3.gdb.out

 1 (gdb) disas /m main
 2 Dump of assembler code for function main:
 3 2       {
 4    0x080483ed <+0>:     push   ebp
 5    0x080483ee <+1>:     mov    ebp,esp
 6    0x080483f0 <+3>:     sub    esp,0x20
 7 
 8 3               unsigned int a[] = {1, 2, 3};
 9    0x080483f3 <+6>:     mov    DWORD PTR [ebp-0xc],0x1
10    0x080483fa <+13>:    mov    DWORD PTR [ebp-0x8],0x2
11    0x08048401 <+20>:    mov    DWORD PTR [ebp-0x4],0x3
12 
13 4               unsigned int n = sizeof (a) / sizeof (int);
14    0x08048408 <+27>:    mov    DWORD PTR [ebp-0x10],0x3
15 
16 5
17 6               unsigned int sum = 0;
18    0x0804840f <+34>:    mov    DWORD PTR [ebp-0x18],0x0
19 
20 7               for (unsigned int i = 0; i < n; i++)
21    0x08048416 <+41>:    mov    DWORD PTR [ebp-0x14],0x0
22    0x0804841d <+48>:    jmp    0x8048437 <main+74>
23    0x08048433 <+70>:    add    DWORD PTR [ebp-0x14],0x1
24    0x08048437 <+74>:    mov    eax,DWORD PTR [ebp-0x14]
25    0x0804843a <+77>:    cmp    eax,DWORD PTR [ebp-0x10]
26    0x0804843d <+80>:    jb     0x804841f <main+50>
27 
28 8                       sum += *(i+a);
29    0x0804841f <+50>:    mov    eax,DWORD PTR [ebp-0x14]
30    0x08048422 <+53>:    lea    edx,[eax*4+0x0]
31    0x08048429 <+60>:    lea    eax,[ebp-0xc]
32    0x0804842c <+63>:    add    eax,edx
33    0x0804842e <+65>:    mov    eax,DWORD PTR [eax]
34    0x08048430 <+67>:    add    DWORD PTR [ebp-0x18],eax
35 
36 9
37 10              return sum;
38    0x0804843f <+82>:    mov    eax,DWORD PTR [ebp-0x18]
39 
40 11      }
41    0x08048442 <+85>:    leave
42    0x08048443 <+86>:    ret
43 
44 End of assembler dump.

4) a[i] v.s. i[a]

5) i[a] v.s. *(i+a)

结论: a[i]==i[a]==*(i+a)==*(a+i)

分析: 在编译器的眼里,数组名a不过是一段连续内存的首地址。获取某个元素a[i]不过是在a对应的首地址上做偏移,找到对应的内存地址后从中取出其中的内容即可。

PS: 我在面试别人的过程中,如果求职的工程师说他懂汇编,我一般会问这样的问题,"能否在C代码中使用i[a]去访问数组a的第i个元素?"。无论对方说能与不能,我都很乐意进一步问"为什么能/不能?"从而挖掘出其对汇编及编译过程的理解深度。 通常,优秀的程序员能回答得富有计算机思维(即使他判定为不能, 比如"我觉得不能,编译器应该不支持这种怪诞的用法..."),而那些机械的程序员一般会选择放弃思考为什么能/不能。

原文地址:https://www.cnblogs.com/idorax/p/6305713.html