@John Bollinger
It is also very meaningful in the case of structures or arrays. In Windows, there is a type of programming called COM, and the well-known graphics API DirectX3D also uses COM. COM objects are generally called in this way.
typedef struct { void (*func1)(); void (*func2)(); void (*func3)(); void (*func4)(); void (*func5)(); } i_ibject_vtable; typedef struct { i_ibject_vtable *vtable; } i_object; int object_create(i_object **); int entry() { i_object *p_object; object_create(&p_object); p_object->vtable->func1(); p_object->vtable->func2(); p_object->vtable->func3(); p_object->vtable->func4(); p_object->vtable->func5(); return 0; }
.file "example.c" # GNU C23 (Compiler-Explorer-Build-gcc--binutils-2.44) version 15.2.0 (x86_64-linux-gnu) # compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP # GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 # options passed: -mtune=generic -march=x86-64 -g -g0 -Ofast -fno-asynchronous-unwind-tables .text .p2align 4 .globl entry .type entry, @function entry: subq $24, %rsp #, # /app/example.c:18: object_create(&p_object); leaq 8(%rsp), %rdi #, tmp114 call object_create # # /app/example.c:20: p_object->vtable->func1(); movq 8(%rsp), %rax # p_object, p_object # /app/example.c:20: p_object->vtable->func1(); movq (%rax), %rax # p_object.0_1->vtable, p_object.0_1->vtable # /app/example.c:20: p_object->vtable->func1(); call *(%rax) # _2->func1 # /app/example.c:21: p_object->vtable->func2(); movq 8(%rsp), %rax # p_object, p_object # /app/example.c:21: p_object->vtable->func2(); movq (%rax), %rax # p_object.1_4->vtable, p_object.1_4->vtable # /app/example.c:21: p_object->vtable->func2(); call *8(%rax) # _5->func2 # /app/example.c:22: p_object->vtable->func3(); movq 8(%rsp), %rax # p_object, p_object # /app/example.c:22: p_object->vtable->func3(); movq (%rax), %rax # p_object.2_7->vtable, p_object.2_7->vtable # /app/example.c:22: p_object->vtable->func3(); call *16(%rax) # _8->func3 # /app/example.c:23: p_object->vtable->func4(); movq 8(%rsp), %rax # p_object, p_object # /app/example.c:23: p_object->vtable->func4(); movq (%rax), %rax # p_object.3_10->vtable, p_object.3_10->vtable # /app/example.c:23: p_object->vtable->func4(); call *24(%rax) # _11->func4 # /app/example.c:24: p_object->vtable->func5(); movq 8(%rsp), %rax # p_object, p_object # /app/example.c:24: p_object->vtable->func5(); movq (%rax), %rax # p_object.4_13->vtable, p_object.4_13->vtable # /app/example.c:24: p_object->vtable->func5(); call *32(%rax) # _14->func5 # /app/example.c:27: } xorl %eax, %eax # addq $24, %rsp #, ret .size entry, .-entry .ident "GCC: (Compiler-Explorer-Build-gcc--binutils-2.44) 15.2.0" .section .note.GNU-stack,"",@progbits
Manually saving these function pointers results in negative optimization
typedef struct { void (*func1)(); void (*func2)(); void (*func3)(); void (*func4)(); void (*func5)(); } i_ibject_vtable; typedef struct { i_ibject_vtable *vtable; } i_object; int object_create(i_object **); int entry() { i_object *p_object; object_create(&p_object); i_ibject_vtable vtable; __builtin_memcpy(&vtable, p_object->vtable, sizeof(vtable)); vtable.func1(); vtable.func2(); vtable.func3(); vtable.func4(); vtable.func5(); return 0; }
.file "example.c" # GNU C23 (Compiler-Explorer-Build-gcc--binutils-2.44) version 15.2.0 (x86_64-linux-gnu) # compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP # GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 # options passed: -mtune=generic -march=x86-64 -g -g0 -Ofast -fno-asynchronous-unwind-tables .text .p2align 4 .globl entry .type entry, @function entry: subq $72, %rsp #, # /app/example.c:18: object_create(&p_object); leaq 8(%rsp), %rdi #, tmp106 call object_create # # /app/example.c:21: __builtin_memcpy(&vtable, p_object->vtable, sizeof(vtable)); movq 8(%rsp), %rax # p_object, p_object # /app/example.c:21: __builtin_memcpy(&vtable, p_object->vtable, sizeof(vtable)); movq (%rax), %rax # p_object.0_1->vtable, p_object.0_1->vtable movdqu (%rax), %xmm0 # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)_2] movq %xmm0, %rdx # MEM <char[1:40]> [(void *)_2], tmp119 movaps %xmm0, 16(%rsp) # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)&vtable] movdqu 16(%rax), %xmm0 # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)_2] movq 32(%rax), %rax # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)_2] movaps %xmm0, 32(%rsp) # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)&vtable] movq %rax, 48(%rsp) # MEM <char[1:40]> [(void *)_2], MEM <char[1:40]> [(void *)&vtable] # /app/example.c:23: vtable.func1(); call *%rdx # tmp119 # /app/example.c:24: vtable.func2(); call *24(%rsp) # vtable.func2 # /app/example.c:25: vtable.func3(); call *32(%rsp) # vtable.func3 # /app/example.c:26: vtable.func4(); call *40(%rsp) # vtable.func4 # /app/example.c:27: vtable.func5(); call *48(%rsp) # vtable.func5 # /app/example.c:30: } xorl %eax, %eax # addq $72, %rsp #, ret .size entry, .-entry .ident "GCC: (Compiler-Explorer-Build-gcc--binutils-2.44) 15.2.0" .section .note.GNU-stack,"",@progbits
I also can't do this for every object, because it's a tedious task. Once the compiler knows that i_object_vtable doesn't change often, it can optimize.
typedef struct { void (*func1)(); void (*func2)(); void (*func3)(); void (*func4)(); void (*func5)(); } i_ibject_vtable; typedef struct { i_ibject_vtable *vtable; } i_object; __attribute__((malloc)) i_object *object_create(); int entry() { i_object *p_object; p_object = object_create(); p_object->vtable->func1(); p_object->vtable->func2(); p_object->vtable->func3(); p_object->vtable->func4(); p_object->vtable->func5(); // Saved the pointer into the register p_object->vtable->func1(); p_object->vtable->func1(); p_object->vtable->func1(); return 0; }
.file "example.c" # GNU C23 (Compiler-Explorer-Build-gcc--binutils-2.44) version 15.2.0 (x86_64-linux-gnu) # compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP # GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 # options passed: -mtune=generic -march=x86-64 -g -g0 -Ofast -fno-asynchronous-unwind-tables .text .p2align 4 .globl entry .type entry, @function entry: pushq %rbx # # /app/example.c:18: p_object = object_create(); call object_create # # /app/example.c:20: p_object->vtable->func1(); movq (%rax), %rbx # p_object_12->vtable, _1 # /app/example.c:20: p_object->vtable->func1(); call *(%rbx) # _1->func1 # /app/example.c:21: p_object->vtable->func2(); call *8(%rbx) # _1->func2 # /app/example.c:22: p_object->vtable->func3(); call *16(%rbx) # _1->func3 # /app/example.c:23: p_object->vtable->func4(); call *24(%rbx) # _1->func4 # /app/example.c:24: p_object->vtable->func5(); call *32(%rbx) # _1->func5 # /app/example.c:27: p_object->vtable->func1(); call *(%rbx) # _1->func1 # /app/example.c:28: p_object->vtable->func1(); call *(%rbx) # _1->func1 # /app/example.c:29: p_object->vtable->func1(); call *(%rbx) # _1->func1 # /app/example.c:32: } xorl %eax, %eax # popq %rbx # ret .size entry, .-entry .ident "GCC: (Compiler-Explorer-Build-gcc--binutils-2.44) 15.2.0" .section .note.GNU-stack,"",@progbits
Unfortunately, most APIs return error codes instead of pointers, making it impossible to use __attribute__.