understanding assembly code (2)

What is this topic about?

It is a study on the difference betweeen a function returning a vector and a function accepting a reference to a vector as a parameter.

[torstan]$ more vec.cc
#include<vector>
using namespace std;

vector<int> f()
{
vector<int> v;
v.push_back(2);
return v;
}
/*
g++ -g vec.cc -o vec
c++filt _ZNSaIiEC1Ev

(gdb) disassemble f
Dump of assembler code for function _Z1fv:
0x00000000004007d8 <_Z1fv+0>: push %rbp
0x00000000004007d9 <_Z1fv+1>: mov %rsp,%rbp
0x00000000004007dc <_Z1fv+4>: push %rbx                  ;save rbx, since it will be used in this stack frame
0x00000000004007dd <_Z1fv+5>: sub $0x38,%rsp      ;allocate 56 bytes for local variables
0x00000000004007e1 <_Z1fv+9>: mov %rdi,0xffffffffffffffd0(%rbp)   ;save rdi to [rbp-48] because rdi has stored a THIS pointer to a vector
0x00000000004007e5 <_Z1fv+13>: lea 0xffffffffffffffe0(%rbp),%rdi    ;used for contruct an allocator at [rbp-32]. The allocator is used for vector construction
0x00000000004007e9 <_Z1fv+17>: callq 0x400958 <allocator>
0x00000000004007ee <_Z1fv+22>: lea 0xffffffffffffffe0(%rbp),%rsi        ;get the address of allocator, and save it in rsi
0x00000000004007f2 <_Z1fv+26>: mov 0xffffffffffffffd0(%rbp),%rdi      ;get THIS pointer stored in [rbp-48], and save it in rdi
0x00000000004007f6 <_Z1fv+30>: callq 0x400988 <vector>          ;construct a vector using parameters stored in rsi, rdi
0x00000000004007fb <_Z1fv+35>: jmp 0x40081b <_Z1fv+67>         ;jump to 0x000000000040081b
0x00000000004007fd <_Z1fv+37>: mov %rax,0xffffffffffffffc8(%rbp)
0x0000000000400801 <_Z1fv+41>: mov 0xffffffffffffffc8(%rbp),%rbx
0x0000000000400805 <_Z1fv+45>: lea 0xffffffffffffffe0(%rbp),%rdi
0x0000000000400809 <_Z1fv+49>: callq 0x400970 <~allocator>
0x000000000040080e <_Z1fv+54>: mov %rbx,0xffffffffffffffc8(%rbp)
0x0000000000400812 <_Z1fv+58>: mov 0xffffffffffffffc8(%rbp),%rdi
0x0000000000400816 <_Z1fv+62>: callq 0x4006f0
0x000000000040081b <_Z1fv+67>: lea 0xffffffffffffffe0(%rbp),%rdi     ;call destructor for the allocator at [rbp-32]
0x000000000040081f <_Z1fv+71>: callq 0x400970 <~allocator>
0x0000000000400824 <_Z1fv+76>: movl $0x2,0xffffffffffffffdc(%rbp)    ;store 2 in [rbp-36]
0x000000000040082b <_Z1fv+83>: lea 0xffffffffffffffdc(%rbp),%rsi      ;move the address of [rbp-36] to rsi
0x000000000040082f <_Z1fv+87>: mov 0xffffffffffffffd0(%rbp),%rdi      ;move THIS pointer to rdi
0x0000000000400833 <_Z1fv+91>: callq 0x4009fa <_ZNSt6vectorIiSaIiEE9push_backERKi> ;call push_back with THIS pointer pointing to [rbp-48]
0x0000000000400838 <_Z1fv+96>: jmp 0x400858 <_Z1fv+128>                                      ;jump to 0x0000000000400858
0x000000000040083a <_Z1fv+98>: mov %rax,0xffffffffffffffc8(%rbp)
0x000000000040083e <_Z1fv+102>: mov 0xffffffffffffffc8(%rbp),%rbx
0x0000000000400842 <_Z1fv+106>: mov 0xffffffffffffffd0(%rbp),%rdi
0x0000000000400846 <_Z1fv+110>: callq 0x4009a8 <~vector>
0x000000000040084b <_Z1fv+115>: mov %rbx,0xffffffffffffffc8(%rbp)
0x000000000040084f <_Z1fv+119>: mov 0xffffffffffffffc8(%rbp),%rdi
0x0000000000400853 <_Z1fv+123>: callq 0x4006f0
0x0000000000400858 <_Z1fv+128>: mov 0xffffffffffffffd0(%rbp),%rax           ;prepare return value, return the address stored in [rbp-48]
0x000000000040085c <_Z1fv+132>: add $0x38,%rsp                                   ;de-allocate 56 bytes on stack
0x0000000000400860 <_Z1fv+136>: pop %rbx                                            ;restore rbx
0x0000000000400861 <_Z1fv+137>: leaveq
0x0000000000400862 <_Z1fv+138>: retq

*/

int g(vector<int>& v)
{
v.push_back(2);
return 1;
}
/*
Dump of assembler code for function _Z1gRSt6vectorIiSaIiEE:
0x0000000000400864 <_Z1gRSt6vectorIiSaIiEE+0>: push %rbp
0x0000000000400865 <_Z1gRSt6vectorIiSaIiEE+1>: mov %rsp,%rbp
0x0000000000400868 <_Z1gRSt6vectorIiSaIiEE+4>: sub $0x10,%rsp          ;allocate 16 bytes on stack for local variables
0x000000000040086c <_Z1gRSt6vectorIiSaIiEE+8>: mov %rdi,0xfffffffffffffff8(%rbp)    ;move THIS pointer of vector to [rbp-8]
0x0000000000400870 <_Z1gRSt6vectorIiSaIiEE+12>: movl $0x2,0xfffffffffffffff4(%rbp) ;move 2 to [rbp-12]
0x0000000000400877 <_Z1gRSt6vectorIiSaIiEE+19>: lea 0xfffffffffffffff4(%rbp),%rsi     ;move the address of [rbp-12] to rsi
0x000000000040087b <_Z1gRSt6vectorIiSaIiEE+23>: mov 0xfffffffffffffff8(%rbp),%rdi   ;move THIS pointer to rdi
0x000000000040087f <_Z1gRSt6vectorIiSaIiEE+27>: callq 0x4009fa <_ZNSt6vectorIiSaIiEE9push_backERKi>  ;call push_back
0x0000000000400884 <_Z1gRSt6vectorIiSaIiEE+32>: mov $0x1,%eax                         ;prepare return value
0x0000000000400889 <_Z1gRSt6vectorIiSaIiEE+37>: leaveq
0x000000000040088a <_Z1gRSt6vectorIiSaIiEE+38>: retq

*/
int main()
{
vector<int> va = f();
vector<int> vb;
g(vb);
int sz = (int)va.size();
printf("size of va is %d\n", sz);
return 0;
}

/*
(gdb) disassemble main
Dump of assembler code for function main:
0x000000000040088c <main+0>: push %rbp
0x000000000040088d <main+1>: mov %rsp,%rbp
0x0000000000400890 <main+4>: push %rbx                ;save rbx in stack, because it will be used in this stack frame
0x0000000000400891 <main+5>: sub $0x68,%rsp            ;allocate memory for local variables
0x0000000000400895 <main+9>: lea 0xffffffffffffffd0(%rbp),%rdi       ;move the address of [rbp-48] to rdi, since va will be stored in [rbp-48]
0x0000000000400899 <main+13>: callq 0x4007d8 <_Z1fv>          ;call function f with parameter stored in rdi
0x000000000040089e <main+18>: lea 0xffffffffffffffa0(%rbp),%rdi     ;an allocator stored in [rbp-96], used for vector construction
0x00000000004008a2 <main+22>: callq 0x400958 <allocator>
0x00000000004008a7 <main+27>: lea 0xffffffffffffffa0(%rbp),%rsi
0x00000000004008ab <main+31>: lea 0xffffffffffffffb0(%rbp),%rdi   ;construct vb in [rbp-80]
0x00000000004008af <main+35>: callq 0x400988 <vector>
0x00000000004008b4 <main+40>: jmp 0x4008cd <main+65>      ;jump to 0x00000000004008cd
0x00000000004008b6 <main+42>: mov %rax,0xffffffffffffff90(%rbp)
0x00000000004008ba <main+46>: mov 0xffffffffffffff90(%rbp),%rbx
0x00000000004008be <main+50>: lea 0xffffffffffffffa0(%rbp),%rdi
0x00000000004008c2 <main+54>: callq 0x400970 <~allocator>
0x00000000004008c7 <main+59>: mov %rbx,0xffffffffffffff90(%rbp)
0x00000000004008cb <main+63>: jmp 0x400933 <main+167>
0x00000000004008cd <main+65>: lea 0xffffffffffffffa0(%rbp),%rdi    ;destruct the allocator stored in [rbp-96]
0x00000000004008d1 <main+69>: callq 0x400970 <~allocator>
0x00000000004008d6 <main+74>: lea 0xffffffffffffffb0(%rbp),%rdi       ;move THIS pointer of vb to rdi
0x00000000004008da <main+78>: callq 0x400864 <_Z1gRSt6vectorIiSaIiEE>   ;call function g with a parameter stored in rdi
0x00000000004008df <main+83>: lea 0xffffffffffffffd0(%rbp),%rdi                    ;move the adress of va to rdi, i.e. prepared for call vector.size()
0x00000000004008e3 <main+87>: callq 0x400a58 <_ZNKSt6vectorIiSaIiEE4sizeEv> ;call vector.size()
0x00000000004008e8 <main+92>: mov %eax,0xffffffffffffff9c(%rbp)                       ;return value is in eax
0x00000000004008eb <main+95>: mov 0xffffffffffffff9c(%rbp),%esi
0x00000000004008ee <main+98>: mov $0x4014fc,%edi
0x00000000004008f3 <main+103>: mov $0x0,%eax
0x00000000004008f8 <main+108>: callq 0x4006d0                                          ;prepare parameters, and call printf
0x00000000004008fd <main+113>: lea 0xffffffffffffffb0(%rbp),%rdi                   ;get the address of vb, save it in rdi
0x0000000000400901 <main+117>: callq 0x4009a8 <~vector>                         ;call destructor for vb
0x0000000000400906 <main+122>: lea 0xffffffffffffffd0(%rbp),%rdi                  ;get the address of va, save it in rdi
0x000000000040090a <main+126>: callq 0x4009a8 <~vector>                        ;call destructor for va
0x000000000040090f <main+131>: movl $0x0,0xffffffffffffff98(%rbp)
0x0000000000400916 <main+138>: jmp 0x40094d <main+193>                     ;jump to 0x000000000040094d
0x0000000000400918 <main+140>: mov %rax,0xffffffffffffff90(%rbp)
0x000000000040091c <main+144>: mov 0xffffffffffffff90(%rbp),%rbx
0x0000000000400920 <main+148>: lea 0xffffffffffffffb0(%rbp),%rdi
0x0000000000400924 <main+152>: callq 0x4009a8 <~vector>
0x0000000000400929 <main+157>: mov %rbx,0xffffffffffffff90(%rbp)
0x000000000040092d <main+161>: jmp 0x400933 <main+167>
0x000000000040092f <main+163>: mov %rax,0xffffffffffffff90(%rbp)
0x0000000000400933 <main+167>: mov 0xffffffffffffff90(%rbp),%rbx
0x0000000000400937 <main+171>: lea 0xffffffffffffffd0(%rbp),%rdi
0x000000000040093b <main+175>: callq 0x4009a8 <~vector>
0x0000000000400940 <main+180>: mov %rbx,0xffffffffffffff90(%rbp)
0x0000000000400944 <main+184>: mov 0xffffffffffffff90(%rbp),%rdi
0x0000000000400948 <main+188>: callq 0x4006f0
0x000000000040094d <main+193>: mov 0xffffffffffffff98(%rbp),%eax               ;prepare return value, 0, in eax
0x0000000000400950 <main+196>: add $0x68,%rsp                                       ;remove elements in stack to the saved rbx
0x0000000000400954 <main+200>: pop %rbx                                                ;restore rbx
0x0000000000400955 <main+201>: leaveq
0x0000000000400956 <main+202>: retq

*/

CONCLUSION:
1. Every vector consumes 24 bytes on stack, i.e. sizeof(vector<T>)=24. A vector may request more memory in heap.
2. For function f which returning a vector, a storage bigger than 24 bytes has been allocated in main, and the pointer to this storage(THIS pointer of vector) is passed to
function f with the help of register rdi. In function f, a allocator is constructed firstly. Secondly, va is constructed with parameter of THIS pointer and the pointer to the
allocator. Finally, THIS pointer is save in register rax, and return to function main.
3. For function g which accepts a reference to a vector as a parameter, a vector (vb) is constructed in function main firstly, and then THIS pointer of vb is passed to function g
with the help of register rdi.
4. Function main passes a pointer to function f/g.

--------------------------------------------------
This conclusion is consistent with what I saw from internet.
If a return value with more than 4 bytes is needed, then the caller passes an extra first argument to the callee. This extra argument is address of the location where the return value should be stored. As an example, for C function call:
x = TestFunc(a, b, c);
Is transformed into the call something like this:
TestFunc(&x, a, b, c);

from http://www.tenouk.com/Bufferoverflowc/Bufferoverflow3.html
//note: this statement is true on a 32 bits system. On a 64 bits system, registers can store data of 8 bytes. (Torstan)

Reference

inside the C++ object model

原文地址:https://www.cnblogs.com/Torstan/p/2605488.html