You should mark the functions static so that the compiler know they are local to that translation unit.
Without static the compiler cannot assume (barring LTO / WPA) that the function is only called once, so is less likely to inline it.
Demonstration using the LLVM Try Out page.
That said, code for readability first, micro-optimizations (and such tweaking is a micro-optimization) should only come after performance measures.
Example:
#include <cstdio> static void foo(int i) { int m = i % 3; printf("%d %d", i, m); } int main(int argc, char* argv[]) { for (int i = 0; i != argc; ++i) { foo(i); } }
Produces with static:
; ModuleID = '/tmp/webcompile/_27689_0.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" @.str = private constant [6 x i8] c"%d %d\00" ; <[6 x i8]*> [#uses=1] define i32 @main(i32 %argc, i8** nocapture %argv) nounwind { entry: %cmp4 = icmp eq i32 %argc, 0 ; <i1> [#uses=1] br i1 %cmp4, label %for.end, label %for.body for.body: ; preds = %for.body, %entry %0 = phi i32 [ %inc, %for.body ], [ 0, %entry ] ; <i32> [#uses=3] %rem.i = srem i32 %0, 3 ; <i32> [#uses=1] %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %0, i32 %rem.i) nounwind ; <i32> [#uses=0] %inc = add nsw i32 %0, 1 ; <i32> [#uses=2] %exitcond = icmp eq i32 %inc, %argc ; <i1> [#uses=1] br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body, %entry ret i32 0 } declare i32 @printf(i8* nocapture, ...) nounwind
Without static:
; ModuleID = '/tmp/webcompile/_27859_0.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" @.str = private constant [6 x i8] c"%d %d\00" ; <[6 x i8]*> [#uses=1] define void @foo(int)(i32 %i) nounwind { entry: %rem = srem i32 %i, 3 ; <i32> [#uses=1] %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %i, i32 %rem) ; <i32> [#uses=0] ret void } declare i32 @printf(i8* nocapture, ...) nounwind define i32 @main(i32 %argc, i8** nocapture %argv) nounwind { entry: %cmp4 = icmp eq i32 %argc, 0 ; <i1> [#uses=1] br i1 %cmp4, label %for.end, label %for.body for.body: ; preds = %for.body, %entry %0 = phi i32 [ %inc, %for.body ], [ 0, %entry ] ; <i32> [#uses=3] %rem.i = srem i32 %0, 3 ; <i32> [#uses=1] %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %0, i32 %rem.i) nounwind ; <i32> [#uses=0] %inc = add nsw i32 %0, 1 ; <i32> [#uses=2] %exitcond = icmp eq i32 %inc, %argc ; <i1> [#uses=1] br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body, %entry ret i32 0 }
staticto give them file scope. They may be inlined even if you don't do this. But if they are notstatic, they will have to be exported, which means a non-inlined version will have to be generated even if it's never used.