I would normalize the string and use standard methods to convert it to fload/double. It is not an easy task but doable. (xxx.xxx) - negative number.
#include <stdio.h> #include <string.h> #include <ctype.h> #include <stddef.h> typedef enum { NF_DECIMAL_AUTO = 0, NF_DECIMAL_FORCE_DOT, NF_DECIMAL_FORCE_COMMA, NF_DECIMAL_RIGHTMOST } nf_decimal_mode_t; static int nf_is_space_like(unsigned char c) { return (c == ' ' || c == '\t' || c == 0xA0); } static int nf_is_grouping(unsigned char c) { return nf_is_space_like(c) || c == '\'' || c == '_'; } /* Normalize "locale-ish" number to ASCII "[-]digits[.digits][e[+-]digits]". Returns bytes written (no NUL), or 0 on error. */ size_t normalize_float_string_mode(const char *src, char *dst, size_t dst_cap, nf_decimal_mode_t mode) { size_t retLen = 0; int ok = 1; size_t out = 0; if(src == NULL || dst == NULL || dst_cap == 0) { ok = 0; goto done; } #define NF_PUTC(ch) \ do \ { \ if(out + 1 >= dst_cap) \ { \ ok = 0; \ goto done; \ } \ dst[out++] = (char)(ch); \ } while(0) /* Find exponent start: first e/E that comes after at least one digit. */ size_t i = 0, first_digit_pos = (size_t)-1, exp_pos = (size_t)-1; for(; src[i] != '\0'; ++i) { char c = src[i]; if(first_digit_pos == (size_t)-1 && isdigit((unsigned char)c)) { first_digit_pos = i; } if((c == 'e' || c == 'E') && first_digit_pos != (size_t)-1) { exp_pos = i; break; } } size_t body_end = (exp_pos != (size_t)-1) ? exp_pos : i; /* Positions of last '.' and ',' before exponent. */ size_t last_dot = (size_t)-1, last_comma = (size_t)-1; for(size_t j = 0; j < body_end; ++j) { if(src[j] == '.') { last_dot = j; } else if(src[j] == ',') { last_comma = j; } } /* Decide which source char acts as decimal (0 => none). */ char decimal_src = 0; if(mode == NF_DECIMAL_FORCE_DOT) { decimal_src = (last_dot != (size_t)-1) ? '.' : 0; } else if(mode == NF_DECIMAL_FORCE_COMMA) { decimal_src = (last_comma != (size_t)-1) ? ',' : 0; } else if(mode == NF_DECIMAL_RIGHTMOST) { /* FIX: handle "only one present" correctly (no sentinel compare). */ if(last_dot != (size_t)-1 && last_comma != (size_t)-1) { decimal_src = (last_dot > last_comma) ? '.' : ','; } else if(last_dot != (size_t)-1) { decimal_src = '.'; } else if(last_comma != (size_t)-1) { decimal_src = ','; } else { decimal_src = 0; } } else /* NF_DECIMAL_AUTO */ { if(last_dot != (size_t)-1 && last_comma != (size_t)-1) { decimal_src = (last_dot > last_comma) ? '.' : ','; } else if(last_dot != (size_t)-1 || last_comma != (size_t)-1) { size_t sep_pos = (last_dot != (size_t)-1) ? last_dot : last_comma; char sep_char = (last_dot != (size_t)-1) ? '.' : ','; size_t k = sep_pos + 1, digits_right = 0; while(k < body_end && isdigit((unsigned char)src[k])) { ++digits_right; ++k; } size_t digits_left = 0; k = sep_pos; while(k > 0) { --k; if(isdigit((unsigned char)src[k])) { ++digits_left; } else if(src[k] != '.' && src[k] != ',' && !nf_is_grouping((unsigned char)src[k])) { break; } } if(digits_right == 3 && digits_left > 0) { decimal_src = 0; /* looks like thousands grouping */ } else { decimal_src = sep_char; } } } /* Skip leading spaces/tabs/NBSP */ size_t pos = 0; while(nf_is_space_like((unsigned char)src[pos])) { ++pos; } /* Optional sign or accounting parentheses */ { int negative = 0; if(src[pos] == '+') { ++pos; } else if(src[pos] == '-') { negative = 1; ++pos; } else if(src[pos] == '(') { negative = 1; ++pos; while(nf_is_space_like((unsigned char)src[pos])) { ++pos; } } if(negative) { NF_PUTC('-'); } } /* Copy body; abort if alpha appears before number "starts". */ { int wrote_digit = 0; int wrote_decimal = 0; int number_started = 0; for(size_t j = pos; j < body_end; ++j) { unsigned char c = (unsigned char)src[j]; if(isalpha(c)) { /* If we see letters before the number begins (e.g., "e10" / ".e10"), give up on body. */ if(!number_started) { break; } else { /* Once number started, letters shouldn't appear in body; stop. */ break; } } if(isdigit(c)) { NF_PUTC(c); wrote_digit = 1; number_started = 1; continue; } if(c == '.' || c == ',') { if(decimal_src && c == (unsigned char)decimal_src) { if(!wrote_decimal) { NF_PUTC('.'); wrote_decimal = 1; number_started = 1; } } /* else: treat as grouping -> drop */ continue; } if(nf_is_grouping(c) || c == ')') { continue; /* drop grouping and closing ')' */ } /* Unknown non-alpha junk: ignore. */ } /* Remove trailing '.' if no fraction followed */ if(wrote_decimal && out > 0 && dst[out - 1] == '.') { --out; wrote_decimal = 0; } /* Exponent (normalize). */ if(exp_pos != (size_t)-1) { size_t j = exp_pos; size_t out_before_exp = out; /* FIX: roll back whole exponent if bad */ int exp_has_digit = 0; NF_PUTC('e'); ++j; if(src[j] == '+' || src[j] == '-') { NF_PUTC(src[j]); ++j; } while(src[j] != '\0') { if(isdigit((unsigned char)src[j])) { NF_PUTC(src[j]); exp_has_digit = 1; ++j; } else if(nf_is_space_like((unsigned char)src[j])) { ++j; /* allow spaces inside exponent */ } else { break; } } if(!exp_has_digit) { out = out_before_exp; /* drop 'e' and optional sign */ } } if(!wrote_digit) { NF_PUTC('0'); } } if(out >= dst_cap) { ok = 0; goto done; } dst[out] = '\0'; retLen = out; done: if(!ok) { if(dst_cap > 0) { dst[0] = '\0'; } retLen = 0; } return retLen; #undef NF_PUTC } size_t normalize_float_string(const char *src, char *dst, size_t dst_cap) { return normalize_float_string_mode(src, dst, dst_cap, NF_DECIMAL_AUTO); } typedef struct { const char *input; nf_decimal_mode_t mode; const char *expected; } TestCase; static const char* mode_name(nf_decimal_mode_t m) { switch(m) { case NF_DECIMAL_AUTO: return "AUTO"; case NF_DECIMAL_FORCE_DOT: return "FORCE_DOT"; case NF_DECIMAL_FORCE_COMMA: return "FORCE_COMMA"; case NF_DECIMAL_RIGHTMOST: return "RIGHTMOST"; default: return "?"; } } int main(void) { const TestCase tests[] = { { "1,234.56", NF_DECIMAL_AUTO, "1234.56" }, { "1.234,56", NF_DECIMAL_AUTO, "1234.56" }, { "1 234,56", NF_DECIMAL_AUTO, "1234.56" }, { "12'345", NF_DECIMAL_AUTO, "12345" }, { "(1,234.56)", NF_DECIMAL_AUTO, "-1234.56" }, { "1,234e+3", NF_DECIMAL_FORCE_DOT, "1234e+3" }, { "1,234e+3", NF_DECIMAL_FORCE_COMMA, "1.234e+3" }, { "1,234e+3", NF_DECIMAL_RIGHTMOST, "1.234e+3" }, { "1,234e+3", NF_DECIMAL_AUTO, "1234e+3" }, { "1234", NF_DECIMAL_AUTO, "1234" }, { " +1,234 ", NF_DECIMAL_FORCE_DOT, "1234" }, { " ( 1 234 ) ", NF_DECIMAL_AUTO, "-1234" }, { "1.234", NF_DECIMAL_FORCE_DOT, "1.234" }, { "1.234", NF_DECIMAL_AUTO, "1234" }, { "1,234", NF_DECIMAL_AUTO, "1234" }, { "1.234", NF_DECIMAL_RIGHTMOST, "1.234" }, { "1,234,567.89", NF_DECIMAL_AUTO, "1234567.89" }, { "1.234.567,89", NF_DECIMAL_AUTO, "1234567.89" }, { "1_234_567,89", NF_DECIMAL_FORCE_COMMA, "1234567.89" }, { "1\t234,56", NF_DECIMAL_FORCE_COMMA, "1234.56" }, { "1\x00A0 234,56", NF_DECIMAL_FORCE_COMMA, "1234.56" }, { "00123,4500", NF_DECIMAL_FORCE_COMMA, "00123.4500" }, { ".5", NF_DECIMAL_FORCE_DOT, ".5" }, { ",5", NF_DECIMAL_FORCE_COMMA, ".5" }, { "5,", NF_DECIMAL_FORCE_COMMA, "5" }, { "5.", NF_DECIMAL_FORCE_DOT, "5" }, { "1,234E-02", NF_DECIMAL_FORCE_DOT, "1234e-02" }, { "1.234E+02", NF_DECIMAL_FORCE_DOT, "1.234e+02" }, { "1.234E", NF_DECIMAL_FORCE_DOT, "1.234" }, { "1,234E+", NF_DECIMAL_FORCE_DOT, "1234" }, /* fixed: no trailing 'e' */ { "(1,23)", NF_DECIMAL_FORCE_COMMA, "-1.23" }, { " +\t1'234'567,0 ", NF_DECIMAL_FORCE_COMMA, "1234567.0" }, { "1,2,3,4", NF_DECIMAL_FORCE_COMMA, "1.234" }, { "1.2.3.4", NF_DECIMAL_FORCE_DOT, "1.234" }, { "1,23,456", NF_DECIMAL_AUTO, "123456" }, { "000", NF_DECIMAL_AUTO, "000" }, { "", NF_DECIMAL_AUTO, "0" }, { " ", NF_DECIMAL_AUTO, "0" }, { "e10", NF_DECIMAL_AUTO, "0" }, /* fixed: abort on alpha before number */ { ".e10", NF_DECIMAL_FORCE_DOT, "0" }, /* fixed: same */ { "1,234e- 3", NF_DECIMAL_FORCE_DOT, "1234e-3" }, { "1,234e -3", NF_DECIMAL_FORCE_DOT, "1234" }, { "1.234,567", NF_DECIMAL_RIGHTMOST, "1234.567" }, { "1,234.567", NF_DECIMAL_RIGHTMOST, "1234.567" }, { "1 234 567", NF_DECIMAL_AUTO, "1234567" }, { "1_234_567", NF_DECIMAL_AUTO, "1234567" }, { "1'234'567", NF_DECIMAL_AUTO, "1234567" }, { " ( 14,50 ) ", NF_DECIMAL_FORCE_COMMA, "-14.50" }, /* fixed expected */ { "999,999,999,999,999.999999", NF_DECIMAL_AUTO, "999999999999999.999999" }, { "( .5 )", NF_DECIMAL_FORCE_DOT, "-.5" } }; const size_t N = sizeof(tests) / sizeof(tests[0]); char out[256]; size_t passCount = 0; for(size_t i = 0; i < N; ++i) { const TestCase *t = &tests[i]; size_t n = normalize_float_string_mode(t->input, out, sizeof(out), t->mode); int ok = (n == strlen(t->expected)) && (strcmp(out, t->expected) == 0); if(ok) { ++passCount; printf("%2zu. PASS mode=%-10s in=\"%s\" out=\"%s\"\n", i + 1, mode_name(t->mode), t->input, out); } else { printf("%2zu. FAIL mode=%-10s in=\"%s\"\n" " exp=\"%s\"\n" " got=\"%s\" (len %zu)\n", i + 1, mode_name(t->mode), t->input, t->expected, out, n); } } printf("\nSummary: %zu / %zu tests passed\n", passCount, N); return (passCount == N) ? 0 : 1; }
https://godbolt.org/z/GWvneKoTq
stdtod_l.'.'versus','(which could be handled with a string substitution), what other numeric locale concerns do you have?#define _GNU_SOURCEand then it became available!setlocale(LC_NUMERIC, "C"). That would preserve i18n in other aspects, although it would indeed affect numeric handling everywhere, not just in your config file parser.