In the ever-changing and often competitive realm of cybersecurity, mastering reverse engineering is a crucial skill, indispensable for understanding and mitigating security threats. This article contributes an additional element to the knowledge of reverse engineering, aiming to demonstrate the identification of standard function calls in C/C++ based on the assembly of an Apple Mac OS X application for 64 bits Intel CPU.
The reference platform is Intel 64 bits; the C++ compiler is the Apple clang++ version 15.0.0 (clang-1500.1.0.2.5) for Mac OS X Intel 64 bits. The disassembler tool is IDA Free Version 8.3.230608 for Mac OS X x86_64. The compilation is executed with default options of correspondent compilers, namely no optimization is invoked.
In this post the focus is only on functions calls on Mac OS X operating system on Intel 64 bits platform, so the analysis of the disassembled code will cover only way to pass parameters and retrieve the result value and nothing else; the syntax of assembly code is the Intel syntax (so for two operand instruction, the first operand is the destination, the second operand is the source). This post shows that the clang++ call convention on Mac OS X on x86_64 is the same as g++ (and Rust) on Linux x86_64 platform (while it is different from the Microsoft x64 calling convention on 64 bits Windows).
c++ code and related assemblY CODE
The following C++ code show 8 examples of functions calls that takes long parameters and return a long:
#include <iostream>
long f1(long a)
{
	std::cout << "f1(" << a << ")" << std::endl;
	return a;
}
long f2(long a, long b)
{
	std::cout << "f2(" << a << ", " << b << ")" << std::endl;
	return a + b;
}
long f3(long a, long b, long c)
{
	std::cout << "f3(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ")" 
		<< std::endl;
	return a + b + c;
}
long f4(long a, long b, long c, long d)
{
	std::cout << "f4(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ", " 
		<< d << ")" 
		<< std::endl;
	return a + b + c + d;
}
long f5(long a, long b, long c, long d, long e)
{
	std::cout << "f5(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ", " 
		<< d << ", " 
		<< e << ")" 
		<< std::endl;
	return a + b + c + d + e;
}
long f6(long a, long b, long c, long d, long e, long f)
{
	std::cout << "f6(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ", " 
		<< d << ", " 
		<< e << ", " 
		<< f << ")" 
		<< std::endl;
	return a + b + c + d + e + f;
}
long f7(long a, long b, long c, long d, long e, long f, long g)
{
	std::cout << "f7(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ", " 
		<< d << ", " 
		<< e << ", " 
		<< f << ", " 
		<< g << ")" 
		<< std::endl;
	return a + b + c + d + e + f + g;
}
long f8(long a, long b, long c, long d, long e, long f, long g, long h)
{
	std::cout << "f8(" 
		<< a << ", " 
		<< b << ", " 
		<< c << ", " 
		<< d << ", " 
		<< e << ", " 
		<< f << ", " 
		<< g << ", " 
		<< h << ")" 
		<< std::endl;
	return a + b + c + d + e + f + g;
}
int main()
{
	long z;
	z = f1(0x1000000000000001);
	z = f2(0x1000000000000001, 0x1000000000000002);
	z = f3(0x1000000000000001, 0x1000000000000002, 0x1000000000000003);
	z = f4(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004);
	z = f5(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004,
	       0x1000000000000005);
	z = f6(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004,
	       0x1000000000000005, 0x1000000000000006);
	z = f7(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004,
	       0x1000000000000005, 0x1000000000000006, 0x1000000000000007);
	z = f8(0x1000000000000001, 0x1000000000000002, 0x1000000000000003, 0x1000000000000004,
	       0x1000000000000005, 0x1000000000000006, 0x1000000000000007, 0x000000000000008);
	return 0;
}The first function f1 takes a long-type parameter as input, the second function f2 takes two long-type parameters, and so on until the last function f8, which takes 8 long-type parameters as input. To see the full code visit my space on GitHub at this address: https://github.com/ettoremessina/reverse-engineering/tree/main/macosx-intel/calls/clang
The corresponding assembly code of the main function (that contains the calls to f1…f8 functions) generated by the compiler is as follows:
; Attributes: bp-based frame
; int __fastcall main(int argc, const char *argv, const char *envp)
public _main
_main proc near
var_20= qword ptr -20h
var_18= qword ptr -18h
var_10= qword ptr -10h
var_4= dword ptr -4
push    rbp
mov     rbp, rsp
sub     rsp, 20h
mov     [rbp+var_4], 0
mov     rdi, 1000000000000001h ; __int64
call    __Z2f1l         ; f1(long)
mov     [rbp+var_10], rax
mov     rdi, 1000000000000001h ; __int64
mov     rsi, 1000000000000002h ; __int64
call    __Z2f2ll        ; f2(long,long)
mov     [rbp+var_10], rax
mov     rdi, 1000000000000001h ; __int64
mov     rsi, 1000000000000002h ; __int64
mov     rdx, 1000000000000003h ; __int64
call    __Z2f3lll       ; f3(long,long,long)
mov     [rbp+var_10], rax
mov     rdi, 1000000000000001h ; __int64
mov     rsi, 1000000000000002h ; __int64
mov     rdx, 1000000000000003h ; __int64
mov     rcx, 1000000000000004h ; __int64
call    __Z2f4llll      ; f4(long,long,long,long)
mov     [rbp+var_10], rax
mov     rdi, 1000000000000001h ; __int64
mov     rsi, 1000000000000002h ; __int64
mov     rdx, 1000000000000003h ; __int64
mov     rcx, 1000000000000004h ; __int64
mov     r8, 1000000000000005h ; __int64
call    __Z2f5lllll     ; f5(long,long,long,long,long)
mov     [rbp+var_10], rax
mov     rdi, 1000000000000001h ; __int64
mov     rsi, 1000000000000002h ; __int64
mov     rdx, 1000000000000003h ; __int64
mov     rcx, 1000000000000004h ; __int64
mov     r8, 1000000000000005h ; __int64
mov     r9, 1000000000000006h ; __int64
call    __Z2f6llllll    ; f6(long,long,long,long,long,long)
mov     [rbp+var_10], rax
mov     rdi, 1000000000000001h ; __int64
mov     rsi, 1000000000000002h ; __int64
mov     rdx, 1000000000000003h ; __int64
mov     rcx, 1000000000000004h ; __int64
mov     r8, 1000000000000005h ; __int64
mov     r9, 1000000000000006h ; __int64
mov     rax, 1000000000000007h
mov     [rsp+20h+var_20], rax ; __int64
call    __Z2f7lllllll   ; f7(long,long,long,long,long,long,long)
mov     [rbp+var_10], rax
mov     rdi, 1000000000000001h ; __int64
mov     rsi, 1000000000000002h ; __int64
mov     rdx, 1000000000000003h ; __int64
mov     rcx, 1000000000000004h ; __int64
mov     r8, 1000000000000005h ; __int64
mov     r9, 1000000000000006h ; __int64
mov     rax, 1000000000000007h
mov     [rsp+20h+var_20], rax ; __int64
mov     [rsp+20h+var_18], 8 ; __int64
call    __Z2f8llllllll  ; f8(long,long,long,long,long,long,long,long)
mov     [rbp+var_10], rax
xor     eax, eax
add     rsp, 20h
pop     rbp
retn
_main endp
By disassembling we can clearly see that the first parameter is passed into the rdi register, the second parameter into the rsi register, the third into rdx, the fourth into rcx, the fifth into r8, and the sixth into r9. From the seventh parameter onward the stack is used from right to left, so the seventh is the last one to be pushed. The return value is always passed via rax register. This mode of passing parameters is the same mode used by C/C++ (with g++ compiler) and Rust on 64-bit Linux.
Note: 
Note that compilers might not consistently use push instructions to move these arguments to the stack. A common method involves allocating enough space on the stack for all of a function’s outgoing arguments in the function prologue, and then using mov instructions to place arguments on the stack as required.

 
            
 
            
 
            
 
            
