In the dynamic and often adversarial realm of cyber defense, reverse engineering stands as a fundamental skill set, crucial for understanding and mitigating security threats. This article adds a building block on reverse engineering knowledge, and its purpose is to show how to recognize normal function calls in C/C++ and Rust starting from the assembly of a x64 Linux program. This calling convention in called System V X86_64.
The reference platform is x64 (said also called x86_64 or amd64); the Rust compiler version is 1.72.0 for Linux; as for C++, the gcc compiler was used in version 11.4.0 also for Linux. The disassembler tool is IDA Free Version 8.3.230608 for Linux x86_64. The compilation of both Rust and g++ is executed with default options of correspondent compilers, namely no optimization is invoked.
In this post the focus is only on functions calls on Linux operating system on 64 bit platform, so the analysis of the disassembled code will cover only way to pass parameters and retrieve the result value and nothing else; the syntax of assembly code is the Intel syntax (so for two operand instruction, the first operand is the destination, the second operand is the source). In the next post I showed the implementation of the calling convention of Microsoft x64 used by the Microsoft C/C++ compiler on Windows x64.
c++ code and related assemblY CODE
The following C++ code show 8 examples of functions calls that takes long parameters and return a long:
#include <iostream>
long f1(long a)
{
	std::cout << "f1(" << a << ")" << std::endl;
	return a;
}
long f2(long a, long b)
{
	std::cout << "f2(" << a << ", " << b << ")" << std::endl;
	return a + b;
}
long f3(long a, long b, long c)
{
	std::cout << "f3(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ")" 
		<< std::endl;
	return a + b + c;
}
long f4(long a, long b, long c, long d)
{
	std::cout << "f4(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ", " 
		<< d << ")" 
		<< std::endl;
	return a + b + c + d;
}
long f5(long a, long b, long c, long d, long e)
{
	std::cout << "f5(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ", " 
		<< d << ", " 
		<< e << ")" 
		<< std::endl;
	return a + b + c + d + e;
}
long f6(long a, long b, long c, long d, long e, long f)
{
	std::cout << "f6(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ", " 
		<< d << ", " 
		<< e << ", " 
		<< f << ")" 
		<< std::endl;
	return a + b + c + d + e + f;
}
long f7(long a, long b, long c, long d, long e, long f, long g)
{
	std::cout << "f7(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ", " 
		<< d << ", " 
		<< e << ", " 
		<< f << ", " 
		<< g << ")" 
		<< std::endl;
	return a + b + c + d + e + f + g;
}
long f8(long a, long b, long c, long d, long e, long f, long g, long h)
{
	std::cout << "f8(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ", " 
		<< d << ", " 
		<< e << ", " 
		<< f << ", " 
		<< g << ", " 
		<< h << ")" 
		<< std::endl;
	return a + b + c + d + e + f + g;
}
int main()
{
	long z;
	z = f1(0x1000000000000001);
	z = f2(0x1000000000000001, 0x1000000000000002);
	z = f3(0x1000000000000001, 0x1000000000000002, 0x1000000000000003);
	z = f4(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004);
	z = f5(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004,
	       0x1000000000000005);
	z = f6(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004,
	       0x1000000000000005, 0x1000000000000006);
	z = f7(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004,
	       0x1000000000000005, 0x1000000000000006, 0x1000000000000007);
	z = f8(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004,
	       0x1000000000000005, 0x1000000000000006, 0x1000000000000007, 0x000000000000008);
	return 0;
}
The first function f1 takes a long-type parameter as input, the second function f2 takes two long-type parameters, and so on until the last function f8, which takes 8 long-type parameters as input. To see the full code visit my space on GitHub at this address: https://github.com/ettoremessina/reverse-engineering/blob/main/linux/calls/gcc/calls.cc
The corresponding assembly code of the main function (that contains the calls to f1…f8 functions) generated by the compiler is as follows:
; Attributes: bp-based frame
; int __fastcall main(int argc, const char **argv, const char **envp)
public main
main proc near
var_1C= dword ptr -1Ch
var_8= qword ptr -8
; __unwind {
endbr64
push    rbp
mov     rbp, rsp
sub     rsp, 10h
mov     rax, 1000000000000001h
mov     rdi, rax        ; __int64
call    _Z2f1l          ; f1(long)
mov     [rbp+var_8], rax
mov     rax, 1000000000000002h
mov     rsi, rax        ; __int64
mov     rax, 1000000000000001h
mov     rdi, rax        ; __int64
call    _Z2f2ll         ; f2(long,long)
mov     [rbp+var_8], rax
mov     rax, 1000000000000003h
mov     rdx, rax        ; __int64
mov     rax, 1000000000000002h
mov     rsi, rax        ; __int64
mov     rax, 1000000000000001h
mov     rdi, rax        ; __int64
call    _Z2f3lll        ; f3(long,long,long)
mov     [rbp+var_8], rax
mov     rax, 1000000000000004h
mov     rcx, rax        ; __int64
mov     rax, 1000000000000003h
mov     rdx, rax        ; __int64
mov     rax, 1000000000000002h
mov     rsi, rax        ; __int64
mov     rax, 1000000000000001h
mov     rdi, rax        ; __int64
call    _Z2f4llll       ; f4(long,long,long,long)
mov     [rbp+var_8], rax
mov     r8, 1000000000000005h ; __int64
mov     rax, 1000000000000004h
mov     rcx, rax        ; __int64
mov     rax, 1000000000000003h
mov     rdx, rax        ; __int64
mov     rax, 1000000000000002h
mov     rsi, rax        ; __int64
mov     rax, 1000000000000001h
mov     rdi, rax        ; __int64
call    _Z2f5lllll      ; f5(long,long,long,long,long)
mov     [rbp+var_8], rax
mov     r9, 1000000000000006h ; __int64
mov     r8, 1000000000000005h ; __int64
mov     rax, 1000000000000004h
mov     rcx, rax        ; __int64
mov     rax, 1000000000000003h
mov     rdx, rax        ; __int64
mov     rax, 1000000000000002h
mov     rsi, rax        ; __int64
mov     rax, 1000000000000001h
mov     rdi, rax        ; __int64
call    _Z2f6llllll     ; f6(long,long,long,long,long,long)
mov     [rbp+var_8], rax
sub     rsp, 8
push    7               ; __int64
mov     [rsp+20h+var_1C], 10000000h
mov     r9, 1000000000000006h ; __int64
mov     r8, 1000000000000005h ; __int64
mov     rax, 1000000000000004h
mov     rcx, rax        ; __int64
mov     rax, 1000000000000003h
mov     rdx, rax        ; __int64
mov     rax, 1000000000000002h
mov     rsi, rax        ; __int64
mov     rax, 1000000000000001h
mov     rdi, rax        ; __int64
call    _Z2f7lllllll    ; f7(long,long,long,long,long,long,long)
add     rsp, 10h
mov     [rbp+var_8], rax
push    8               ; __int64
push    7               ; __int64
mov     [rsp+20h+var_1C], 10000000h
mov     r9, 1000000000000006h ; __int64
mov     r8, 1000000000000005h ; __int64
mov     rax, 1000000000000004h
mov     rcx, rax        ; __int64
mov     rax, 1000000000000003h
mov     rdx, rax        ; __int64
mov     rax, 1000000000000002h
mov     rsi, rax        ; __int64
mov     rax, 1000000000000001h
mov     rdi, rax        ; __int64
call    _Z2f8llllllll   ; f8(long,long,long,long,long,long,long,long)
add     rsp, 10h
mov     [rbp+var_8], rax
mov     eax, 0
leave
retn
; } // starts at 1A62
main endp
By disassembling we can clearly see that the first parameter is passed into the rdi register, the second parameter into the rsi register, the third into rdx, the fourth into rcx, the fifth into r8, and the sixth into r9. From the seventh parameter onward the stack is used from right to left, so the seventh is the last one to be pushed. The return value is always passed via rax register.
Note: Be aware that compilers may not always employ push instructions to transfer these arguments onto the stack. It’s a typical approach to reserve sufficient space on the stack for all of a function’s outgoing arguments during the function prologue, followed by utilizing mov instructions to position arguments on the stack when needed.
RUST code and related assemblY CODE
The equivalent Rust code is:
fn f1(a: i64) -> i64 {
	println!("f1({a})");
	return a;
}
fn f2(a: i64, b: i64) -> i64 {
	println!("f2({a}, {b})");
	return a + b;
}
fn f3(a: i64, b: i64, c: i64) -> i64 {
	println!("f3({a}, {b}, {c})");
	return a + b + c;
}
fn f4(a: i64, b: i64, c: i64, d: i64) -> i64{
	println!("f4({a}, {b}, {c}, {d})");
	return a + b + c + d;
}
fn f5(a: i64, b: i64, c: i64, d: i64, e: i64) -> i64 {
	println!("f5({a}, {b}, {c}, {d}, {e})");
	return a + b + c + d + e;
}
fn f6(a: i64, b: i64, c: i64, d: i64, e: i64, f: i64) -> i64 {
	println!("f6({a}, {b}, {c}, {d}, {e}, {f})");
	return a + b + c + d + e + f;
}
fn f7(a: i64, b: i64, c: i64, d: i64, e: i64, f: i64, g: i64) -> i64 {
	println!("f7({a}, {b}, {c}, {d}, {e}, {f}, {g})");
	return a + b + c + d + e + f + g;
}
fn f8(a: i64, b: i64, c: i64, d: i64, e: i64, f: i64, g: i64, h: i64) -> i64 {
	println!("f8({a}, {b}, {c}, {d}, {e}, {f}, {g}, {h})");
	return a + b + c + d + e + f + g + h;
}
fn main() {
	let mut _z : i64;
	_z = f1(0x1000000000000001);
	_z = f2(0x1000000000000001, 0x1000000000000002);
	_z = f3(0x1000000000000001, 0x1000000000000002, 0x1000000000000003);
	_z = f4(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004);	
	_z = f5(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004, 
	        1000000000000000005);	
	_z = f6(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004,
	        0x1000000000000005, 0x1000000000000006);
	_z = f7(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004, 
	        0x1000000000000005, 0x1000000000000006, 0x1000000000000007);	
	_z = f8(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004, 
	        0x1000000000000005, 0x1000000000000006, 0x1000000000000007, 0x0000000000000008);	
}To see the full code visit my space on GitHub at this address: https://github.com/ettoremessina/reverse-engineering/blob/main/linux/calls/rust/calls.rs
The corresponding assembly code of the main function (that contains the calls to f1…f8 functions) generated by the compiler is as follows:
; calls::main::h677c54317e5b6682
_ZN5calls4main17h677c54317e5b6682E proc near
var_18= qword ptr -18h
var_10= qword ptr -10h
var_8= qword ptr -8
; __unwind {
sub     rsp, 18h
mov     rdi, 1000000000000001h
call    _ZN5calls2f117h60db23666fd8beddE ; calls::f1::h60db23666fd8bedd
mov     [rsp+18h+var_8], rax
mov     rdi, 1000000000000001h
mov     rsi, 1000000000000002h
call    _ZN5calls2f217h22534f4d994e9616E ; calls::f2::h22534f4d994e9616
mov     [rsp+18h+var_8], rax
mov     rdi, 1000000000000001h
mov     rsi, 1000000000000002h
mov     rdx, 1000000000000003h
call    _ZN5calls2f317hf7e5d2968f7df140E ; calls::f3::hf7e5d2968f7df140
mov     [rsp+18h+var_8], rax
mov     rdi, 1000000000000001h
mov     rsi, 1000000000000002h
mov     rdx, 1000000000000003h
mov     rcx, 1000000000000004h
call    _ZN5calls2f417h7fbd89c2fb397772E ; calls::f4::h7fbd89c2fb397772
mov     [rsp+18h+var_8], rax
mov     rdi, 1000000000000001h
mov     rsi, 1000000000000002h
mov     rdx, 1000000000000003h
mov     rcx, 1000000000000004h
mov     r8, 0DE0B6B3A7640005h
call    _ZN5calls2f517h674c4925b32f785aE ; calls::f5::h674c4925b32f785a
mov     [rsp+18h+var_8], rax
mov     rdi, 1000000000000001h
mov     rsi, 1000000000000002h
mov     rdx, 1000000000000003h
mov     rcx, 1000000000000004h
mov     r8, 1000000000000005h
mov     r9, 1000000000000006h
call    _ZN5calls2f617hec146649b00f86adE ; calls::f6::hec146649b00f86ad
mov     [rsp+18h+var_8], rax
mov     rdi, 1000000000000001h
mov     rsi, 1000000000000002h
mov     rdx, 1000000000000003h
mov     rcx, 1000000000000004h
mov     r8, 1000000000000005h
mov     r9, 1000000000000006h
mov     rax, 1000000000000007h
mov     [rsp+18h+var_18], rax
call    _ZN5calls2f717h0ff7e68a5bde8bccE ; calls::f7::h0ff7e68a5bde8bcc
mov     [rsp+18h+var_8], rax
mov     rdi, 1000000000000001h
mov     rsi, 1000000000000002h
mov     rdx, 1000000000000003h
mov     rcx, 1000000000000004h
mov     r8, 1000000000000005h
mov     r9, 1000000000000006h
mov     rax, 1000000000000007h
mov     [rsp+18h+var_18], rax
mov     [rsp+18h+var_10], 8
call    _ZN5calls2f817hb07c2e835b671928E ; calls::f8::hb07c2e835b671928
mov     [rsp+18h+var_8], rax
add     rsp, 18h
retn
; } // starts at 9B10
_ZN5calls4main17h677c54317e5b6682E endpIt is observed that the way of passing parameters and retrieving return values in Rust on 64-bit Linux are exactly the same as in C/C++ (also on 64-bit Linux), namely the CPU registers utilized, when needed, are as follows according to this order: rdi, rsi, rdx, rcx, r8 and r9. From the seventh parameter onward the stack is used (from right to left). The return value is always passed via rax register.
Notes on compilers
Compilers, in preparing the call, may use only the lower 32-bit part of the registers when it is not necessary to involve the more significant half directly. For example, if we pass small positive values to the function f6 in C++ code:
z = f6(1, 2, 3, 4, 5, 6);the compiler will generate the following code:
mov     r9d, 6          ; __int64
mov     r8d, 5          ; __int64
mov     ecx, 4          ; __int64
mov     edx, 3          ; __int64
mov     esi, 2          ; __int64
mov     edi, 1          ; __int64
call    _Z2f6llllll     ; f6(long,long,long,long,long,long)
mov     [rbp+var_8], raxAs you can see the code involves the registers edi, esi, edx, ecx, r8d and r9d (which are the least-significant half of the rdi, rsi, rdx, rcx, r8 and r9 registers) but it is important to remember that implicitly a mov on the least-significant 32-bit part of a register implicitly writes 0 to all bits of the most significant part of the 64-bit register, so the above does not change because it is in fact always used 64-bit registers but in this case loaded differently.
The example shown above is in C++; the behavior is also the same in the case of the Rust compiler. This call convention is also used by Apple clang++ on Mac OS X 64 bits.

 
            
 
            
 
            
 
            
