1) if you need to detect flags in multiple of 8bit, you had better to use characters, so when you detect the byte 0x7e, then eliminate it. This is applicable when you are using the flat character in asynchronous lines, that transmit only 8bit bytes. (Your sample data has only seven bits of data between flags, so I'll assume this is not the case.)
2) if you need to detect the flag on a per bit basis, then you face the HDLC framing problem, that has two parts: a) the flag is detected when you detect 6 joint 1 bits, and b) transmitter inserts a 0 bits if more than 5 1 bits are in sequence (to ensure transparency). For this second part you need also to delete the 6th 0 bit after a sequence of 5 1 bits also. I don't know if you want this, but your flag seems extremely similar to an HDLC flag to assume you are not actually studying this protocol.
This problem can be faced with a finite automaton. The NFA equivalent, to detect this is:
0,1 ,--. | / | / |v 1 1 1 1 1 0 (0)--->(1P)--->(2P)--->(3P)--->(4P)--->(5)--->(6D) | | _ . . / \ 1 \ \ 1 | V \0 `-->(7B) \ \ 1 1 1 1 1 0 (8)--->(9)--->(10)--->(11)--->(12)--->(13)--->(14F)
Where
([1-4]P) Accepting state, print the number of 1 chars shown in the state number, (e.g. for state (3P) print 111.) (6D) Accepting state, print 11111. (7B) Accepting state. BREAK condition detected. (14F) Accepting state. Flag detected. All others are not accepting states. The automaton has to continue matching states until no more accepting states can be reached, to output the string of the last accepting state visited as output (e.g. if the last accepting state was 6D, then it has to output '11111'. In case the last state visited includes several accepting states, the priority order is:
14F, 7B, 6D, 4P, 3P, 2P, 1P
This NFA recognises 8 kinds of strings (I'll express them as regexps), namely: 0, 01111110, 1, 11, 111, 1111, 111110, and 11111111*. If a flag is accepted, you have to check also if the number of bits is a multiple of 8, as you can have arbitrary bit sequences with this protocol. Probably you don't, as the sample data you posted includes only seven bits. It could be represented by the following lex(1) pattern:
%% 0 ECHO; 01111110 putchar('F'); 1111111* putchar('B'); 1 | 11 | 111 | 1111 ECHO; 111110 fputs("11111", stdout); . ; %%
NOTE
Below is a complete DFA that converts the input bits into the following alphabet (for your problem, it only needs to substitute the "F" and "B" strings with empty ones ""):
/* * DFA to implement. */ #include <stdio.h> #include <string.h> #define F(x) __FILE__":%d:%s: " x, __LINE__, __func__ #ifndef DEBUG #define DEBUG 0 #endif #if DEBUG #define D(...) fprintf(stderr, __VA_ARGS__) #else #define D(...) #endif #define E(...) fprintf(stderr, __VA_ARGS__) char *names[] = { "START" , "ST0" , "ST_1" , "ST01" , "ST_11" , "ST011" , "ST_111" , "ST0111" , "ST_1111" , "ST01111" , "ST_11111" , "ST011111" , "ST1111111", "ST" , "ST0111111", "ST1" , "ST11" , "ST111" , "ST1111" , "ST11111" , }; static enum st { START , ST0 , ST_1 , ST01 , ST_11 , ST011 , ST_111 , ST0111 , ST_1111 , ST01111 , ST_11111 , ST011111 , ST1111111, ST , ST0111111, ST1 , ST11 , ST111 , ST1111 , ST11111 , } state = START; char flag[] = "F"; char brk[] = "B"; struct tab_entry { enum st t_next; char *t_insert; } tab[][2] = { /* INPUT */ /* STATE 0/OUTPUT | 1/OUTPUT */ /* ========================+================ */ /* START */ ST0, 0, ST_1, 0, /* ST0 */ ST0, "0", ST01, 0, /* ST_1 */ ST0, 0, ST_11, 0, /* ST01 */ ST0, "01", ST011, 0, /* ST_11 */ ST0, 0, ST_111, 0, /* ST011 */ ST0, "011", ST0111, 0, /* ST_111 */ ST0, 0, ST_1111, 0, /* ST0111 */ ST0, "0111", ST01111, 0, /* ST_1111 */ ST0, 0, ST_11111, 0, /* ST01111 */ ST0, "01111", ST011111, 0, /* ST_11111 */ ST0, 0, ST1111111, 0, /* break */ /* ST011111 */ ST, "011111", ST0111111, 0, /* ST1111111 */ ST0, brk, ST1111111, 0, /* ST */ ST0, 0, ST1, 0, /* ST0111111 */ ST , flag, ST1111111, 0, /* break */ /* ST1 */ ST0, "1", ST11, 0, /* ST11 */ ST0, "11", ST111, 0, /* ST111 */ ST0, "111", ST1111, 0, /* ST1111 */ ST0, "1111", ST11111, 0, /* ST11111 */ ST, "11111", ST1111111, 0, /* break */ }; void hdlc_rst() { state = START; } char *hdlc_dec(char *out, int outsz, char *in) { char *res = out; int c; while ((c = *in++) != 0) { switch(c) { case '0': case '1':; struct tab_entry *entry = &tab[state][c - '0']; if (state != START) fputs("", stderr); D(F("State: %s(%d), Input: '%c'\n"), names[state], state, c); if (entry->t_insert) { int n = snprintf(out, outsz, "%s", entry->t_insert); D(F("Append \"%s\"\n"), entry->t_insert); out += n; outsz -= n; } D(F("State %s(%d) -> %s(%d)\n"), names[state], state, names[entry->t_next], entry->t_next); state = entry->t_next; break; default: E(F("Invalid character '%c' in input string\n"), c); break; } /* switch */ } /* while */ char *p = tab[state][0].t_insert; snprintf(out, outsz, "%s", p ? p : ""); return res; } /* hdlc_dec */ char *hdlc_enc(char *out, int outsz, char *in) { char *res = out; char c; int n_ones = 0; while ((c = *in++) != 0) { int n = 0; switch(c) { case 'F': case 'f': n = snprintf(out, outsz, "01111110"); n_ones = 0; break; case 'B': case 'b': n = snprintf(out, outsz, "1111111"); n_ones = 0; break; case '0': n = snprintf(out, outsz, "0"); n_ones = 0; break; case '1': n = snprintf(out, outsz, "1"); n_ones++; if (n_ones == 5) { n += snprintf(out+1, outsz-1, "0"); n_ones = 0; } break; default: D(F("Invalid character '%c' in input string\n"), c); break; } /* switch */ outsz -= n; out += n; } /* while */ return res; } /* hdlc_enc */ int main(int argc, char **argv) { static char linea[10240], out[10240]; char *(*hdlc)(char *out, int outsz, char *in) = hdlc_dec; if (strcmp(argv[0], "hdlc_dec") == 0) hdlc = hdlc_dec; else if (strcmp(argv[0], "hdlc_enc") == 0) hdlc = hdlc_enc; while(fgets(linea, sizeof linea, stdin)) { int l = strlen(linea); if (linea[l-1] == '\n') linea[--l] = '\0'; hdlc(out, sizeof out, linea); D(F("%s ==> %s\n"), linea, out); printf("%s\n", out); } /* while */ } /* main */
To compile, use the following command:
cc -o hdlc_enc hdlc.c ln hdlc_enc hdlc_dec
or, if you want debug traces of how the automaton works, use
cc -o hdlc_enc -DDEBUG=1 hdlc.c ln hdlc_enc hdlc_dec
and you'll have both commands hdlc_enc to encode standard input, and hdlc_dec to decode it. To encode, use F for flag, B for BREAK, 0 and 1 (no restrictions) It will make 0 insertions at the proper places. To decode, just feed a sequence of bits, and it will transform flags into F, BREAKs into B and eliminate spureous 0s inserted by encoder. (after five 1 bits a 0 is inserted, to ensure transparency)
I have compiled the automaton by hand, so probably it's not optimum in the number of states, but 18 states is not too much.
The states names stand for the prefix ST followed by what has been read too far and not yet output because we cannot decide if it is a flag, break or a simple data bit sequence. States anchored to the stream beginning are prefixed by ST_.
NOTE 2
A DFA that, in addition recognises only multiple of 8 bit patterns between flags is far out of the scope of this answer. You can count the bits between flags and see if they are multiple of eight bits on flag recognition, as the DFA should have N1xN2 states (around 80 states) and this makes it no worth to implement.