This program is an expansion of the sorting utility built in previous chapters of K&R's 2nd edition of ANSI-C Programming. It parses command-line arguments to determine which fields to sort by and what options to apply to each field. For example, a command like ./sort -k2n -k1dr would mean "sort primarily by the 2nd field numerically, and for lines where the 2nd field is equal, sort by the 1st field in reverse directory order."
I'm a beginner working through K&R to improve my C programming skills and could use a review of my overall approach and implementation. Specifically, I have a few areas I'd like feedback on:
Argument Parsing: Is my approach to parsing the -k options in main() robust and easy to understand? It feels a bit complex, and I'm wondering if there's a cleaner way to handle it.
Field Extraction and Comparison: The core logic is in master_compare and extract_field. I'm temporarily modifying the input strings by inserting a '\0' to delimit a field for comparison, and then restoring the original character. Is this a safe and efficient practice, or is there a better way to handle field comparisons without modifying the strings?
Overall Structure: Does the program flow make sense? Are there any obvious bugs, edge cases, or violations of C best practices that I've missed?
Readability and Style: Is the code clear and maintainable? Any suggestions for improving variable names, comments, or function design would be very helpful.
Here is the code:
#include <ctype.h> #include <inttypes.h> #include <stdint.h> #include <stdio.h> #include <string.h> #include <stdlib.h> /* Exercise 5-17: Add a field-searching capability, so sorting may be done on fields within lines, each field sorted according to an independent set of options. */ #define MAXLINES 5000 /* max #lines to be sorted */ #define MAXLEN 1000 /* max length of any input line */ #define ALLOCSIZE 10000 /* size of available space */ #define OPT_REVERSE (1 << 0) // 0b0001 #define OPT_NUMERIC (1 << 1) // 0b0010 #define OPT_DIRECTORY (1 << 2) // 0b0100 #define OPT_FOLD (1 << 3) // 0b1000 #define MAX_FIELDS 1024 // max number of possible fields to add sorting conditions to #define MAX_SORT_FIELDS 10 // max number of sorting rules we can handle char *lineptr[MAXLINES]; /* pointers to text lines */ typedef struct { int field_num; uint8_t options; } FIeldSort; FIeldSort field_sorts[MAX_FIELDS] = {0}; int field_index = 0; int field_count = 0; void my_qsort(void *lineptr[], int left, int right, int (*comp)(void *, void *)); void swap(void *v[], int i, int j); int readlines(char *lineptr[], char *buf); void writelines(char *lineptr[], int nlines); int getline(char *, int); int my_strcmp(char *s, char *t); int strcmp_insensitive(char *s, char *t); int strcmp_reverse(char *s, char *t); int numcmp(char *, char *); int numcmp_reverse(char *s, char *t); int dir_cmp(char *s, char *t); int dircmp_insensitive(char *s, char *t); int dir_cmp_helper(char *s, char *t, int sensitive); int master_compare(char *lineA, char *lineB); void extract_field(char **start, char **end, int n); /* sort input lines */ int main(int argc, char *argv[]) { int nlines; /* number of input lines read */ char buf[ALLOCSIZE] = {'\0'}; int order = 1; // holds options for each field uint8_t field_options = 0; /* COMMAND LINE ARG PARSING */ if (argc == 0) printf("no arguments... basic line sort...?\n"); else if (argc > 1) { /* parse command line for field options */ while (++argv && *argv != NULL) { if (strncmp(*argv, "-k",2) == 0) { int temp = 0; char *p = *argv + 2; while (isdigit(*p)) { temp = (temp * 10) + (*p - '0'); p++; } field_sorts[field_index].field_num = temp; int max_fields = 4; while (*p != '\0' && isalpha(*p) && max_fields-- > 0) { switch (*p) { case 'r': field_sorts[field_index].options |= OPT_REVERSE; break; case 'n': field_sorts[field_index].options |= OPT_NUMERIC; break; case 'd': field_sorts[field_index].options |= OPT_DIRECTORY; break; case 'f': field_sorts[field_index].options |= OPT_FOLD; break; default: printf("field option not recognized... expected 'r', 'n', 'd', or 'f'...\n"); return 1; } p++; } field_index++; } else { printf("incorrect format... expected '-k'...\n"); return 1; } } } int (*comparison_function)(void *, void*) = (int (*)(void*, void*))master_compare; if ((nlines = readlines(lineptr, buf)) > 0) { my_qsort((void**) lineptr, 0, nlines-1, comparison_function); writelines(lineptr, nlines); return 0; } else { printf("error: no lines read\n"); return 1; } } /* qsort: sort v[left]....v[right] into increasing order */ void my_qsort(void *v[], int left, int right, int (*comp)(void *, void *)) { if (left >= right) /* do nothing if the array contains nothing */ return; swap(v, left, (left+right)/2); int last = left; for (int i = left+1; i <= right; i++) if ((*comp)(v[i], v[left]) < 0) swap(v, ++last, i); swap(v, left, last); my_qsort(v, left, last-1, comp); my_qsort(v, last+1, right, comp); } /* numcmp: compare s1 and s2 numerically */ int numcmp(char *s1, char *s2) { double v1, v2; v1 = atof(s1); v2 = atof(s2); if (v1 < v2) return -1; else if (v1 > v2) return 1; else return 0; } void swap(void *v[], int i, int j) { void *temp; temp = v[i]; v[i] = v[j]; v[j] = temp; } /* readlines: read input lines */ int readlines(char *lineptr[], char *buf) { int len, nlines, nchars; char *b = buf; char *bufmax = buf + ALLOCSIZE, line[MAXLEN]; nlines = 0; nchars = 0; while ((len = getline(line, MAXLEN)) > 0) { line[len-1] = '\0'; /* delete newline */ strcpy(buf, line); lineptr[nlines++] = buf; buf+= len; if (nlines >= MAXLINES) return -1; } return nlines; } /* writelines: write output lines */ void writelines(char *lineptr[], int nlines) { while (nlines-- > 0) { printf("%s\n", *lineptr++); fflush(stdout); } } int getline(char *s, int lim) { int c, i; i = 0; while (--lim > 0 && (c=getchar()) != EOF && c != '\n') *(s + i++) = c; if (c == '\n') *(s+i++) = c; *(s+i) = '\0'; return i; } /* strcmp: return <0 if s<t, 0 if s==t, >0 if s>t */ int my_strcmp(char *s, char *t) { for ( ; *s == *t; s++, t++) if (*s == '\0') return 0; return *s - *t; } int master_compare(char *lineA, char *lineB) { int result; for (int i = 0; i < field_index; i++) { /* pointers are reset to the start of the line for each new field rule */ char *fieldA = lineA, *fieldA_end, *fieldB = lineB, *fieldB_end; /* pass the ADDRESS of the pointers so the function can change them */ extract_field(&fieldA, &fieldA_end, i); extract_field(&fieldB, &fieldB_end, i); /* temporarily terminate the fields to treat the as separate strings */ char A_original = *fieldA_end; char B_original = *fieldB_end; *fieldA_end = '\0'; *fieldB_end = '\0'; // !! ready to do comparison !! uint8_t current_options = field_sorts[i].options; int numeric = 0, order = 1, fold = 0, directory = 0; if (current_options & OPT_FOLD) fold = 1; if (current_options & OPT_NUMERIC) numeric = 1; if (current_options & OPT_DIRECTORY) directory = 1; if (current_options & OPT_REVERSE) order = -1; if (numeric) result = numcmp(fieldA, fieldB); else if (directory & fold) result = dircmp_insensitive(fieldA, fieldB); else if (directory) result = dir_cmp(fieldA, fieldB); else if (fold) result = strcmp_insensitive(fieldA, fieldB); else result = my_strcmp(fieldA, fieldB); /* remove temporary null terminator */ *fieldA_end = A_original; *fieldB_end = B_original; /* if a difference was found, return the result immediately */ if (result != 0) return order*result; } /* if all fields were equal, return 0 */ return 0; } void extract_field(char **start, char **end, int n) { /* create a local copy to work with */ char *p = *start; int field_no = 0; while (*p != '\0') { /* move to the start of the next field */ while (isblank(*p)) p++; /* if we're at the end of the string, stop */ if (*p == '\0') break; field_no++; /* if we found our target field, break the search */ if (field_no == field_sorts[n].field_num) break; /* otherwise, skip this field and continue searching */ while (!isblank(*p) && *p != '\0') p++; } // update the caller's pointers with the results *start = p; *end = p; while (!isblank(**end) && **end != '\0') (*end)++; } int strcmp_insensitive(char *s, char *t) { for ( ; tolower(*s) == tolower(*t); s++, t++) if (tolower(*s) == '\0') return 0; return tolower(*s) - tolower(*t); } int dir_cmp(char *s, char *t) { return dir_cmp_helper(s, t, 0); } int dircmp_insensitive(char *s, char *t) { return dir_cmp_helper(s, t, 1); } int dir_cmp_helper(char *s, char *t, int sensitive) { /* SKIP TO ALPHANUMERIC CHARACTER */ while(*s != '\0' && !isalnum(*s) && *s != ' ') s++; while(*t != '\0' && !isalnum(*t) && *t != ' ') t++; /* COMPARE CHARACTER BY CHARACTER */ while (*s != '\0' && *t != '\0') { char x, y; if (sensitive) { x = tolower(*s); y = tolower(*t); if(x != y) return x - y; /* RETURN FINAL COMPARISON */ } else { if(*s != *t) return *s - *t; /* RETURN FINAL COMPARISON */ } s++; t++; /* CONTINUE COMPARISON, SKIP TO ALPHANUMERIC CHAR */ while(*s != '\0' && !isalnum(*s) && *s != ' ') s++; while(*t != '\0' && !isalnum(*t) && *t != ' ') t++; } /* REACHED END OF AT LEAST ONE STRING */ if (*s == '\0' && *t == '\0') return 0; else if (*s == '\0') return -1; else return 1; }
sort -d. See e.g. man7 (with blank possibly meaning whitespace). \$\endgroup\$