// Process a triple quoted string, the leading """ of which has been seen, but // not consumed static token_t* triple_string(lexer_t* lexer) { consume_chars(lexer, 3); // Leading """ while(true) { if(is_eof(lexer)) return literal_doesnt_terminate(lexer); char c = look(lexer); if((c == '\"') && (lookn(lexer, 2) == '\"') && (lookn(lexer, 3) == '\"')) { consume_chars(lexer, 3); // Triple strings can end with 3 or more "s. If there are more than 3 // the extra ones are part of the string contents while(look(lexer) == '\"') { append_to_token(lexer, '\"'); consume_chars(lexer, 1); } normalise_string(lexer); return make_token_with_text(lexer, TK_STRING); } consume_chars(lexer, 1); append_to_token(lexer, c); } }
// Append the given value to the current token text, UTF-8 encoded static void append_utf8(lexer_t* lexer, int value) { assert(value >= 0 && value <= 0x10FFFF); if(value <= 0x7F) { append_to_token(lexer, (char)(value & 0x7F)); } else if(value <= 0x7FF) { append_to_token(lexer, (char)(0xC0 | (value >> 6))); append_to_token(lexer, (char)(0x80 | (value & 0x3F))); }
// Make a token with the specified ID and current token text static token_t* make_token_with_text(lexer_t* lexer, token_id id) { token_t* t = make_token(lexer, id); append_to_token(lexer, '\0'); token_set_string(t, stringtab(lexer->buffer)); return t; }
void nmea_process_character(char c) { switch (c) { case '$': /* a new sentence is starting */ sentence_started(); /* reset and enable checksum calculation */ checksum = 0; checksum_state = CS_CALC; break; case ',': token_finished(); break; case '*': /* checksum is following */ token_finished(); checksum_state = CS_READ; break; case '\r': /* \n is following soon, we ignore this */ break; case '\n': token_finished(); ATOMIC(ATOMIC_FORCEON) { sentence_finished(); } checksum_state = CS_UNKNOWN; break; default: append_to_token(c); } if (checksum_state == CS_CALC && c != '$') { add_to_checksum(c); } }
// Process a triple quoted string, the leading """ of which has been seen, but // not consumed static token_t* triple_string(lexer_t* lexer) { consume_chars(lexer, 3); // Leading """ while(true) { if(is_eof(lexer)) return literal_doesnt_terminate(lexer); char c = look(lexer); if((c == '\"') && (lookn(lexer, 2) == '\"') && (lookn(lexer, 3) == '\"')) { consume_chars(lexer, 3); normalise_string(lexer); return make_token_with_text(lexer, TK_STRING); } consume_chars(lexer, 1); append_to_token(lexer, c); } }
/* Parse a field into tokens as defined by rfc822. */ static TOKEN parse_field (HDR_LINE hdr) { static const char specials[] = "<>@.,;:\\[]\"()"; static const char specials2[] = "<>@.,;:"; static const char tspecials[] = "/?=<>@,;:\\[]\"()"; static const char tspecials2[] = "/?=<>@.,;:"; /* FIXME: really include '.'?*/ static struct { const unsigned char *name; size_t namelen; } tspecial_header[] = { { "Content-Type", 12}, { "Content-Transfer-Encoding", 25}, { "Content-Disposition", 19}, { NULL, 0} }; const char *delimiters; const char *delimiters2; const unsigned char *line, *s, *s2; size_t n; int i, invalid = 0; TOKEN t, tok, *tok_tail; errno = 0; if (!hdr) return NULL; tok = NULL; tok_tail = &tok; line = hdr->line; if (!(s = strchr (line, ':'))) return NULL; /* oops */ n = s - line; if (!n) return NULL; /* oops: invalid name */ delimiters = specials; delimiters2 = specials2; for (i = 0; tspecial_header[i].name; i++) { if (n == tspecial_header[i].namelen && !memcmp (line, tspecial_header[i].name, n)) { delimiters = tspecials; delimiters2 = tspecials2; break; } } s++; /* Move over the colon. */ for (;;) { while (!*s) { if (!hdr->next || !hdr->next->cont) return tok; /* Ready. */ /* Next item is a header continuation line. */ hdr = hdr->next; s = hdr->line; } if (*s == '(') { int level = 1; int in_quote = 0; invalid = 0; for (s++;; s++) { while (!*s) { if (!hdr->next || !hdr->next->cont) goto oparen_out; /* Next item is a header continuation line. */ hdr = hdr->next; s = hdr->line; } if (in_quote) { if (*s == '\"') in_quote = 0; else if (*s == '\\' && s[1]) /* what about continuation? */ s++; } else if (*s == ')') { if (!--level) break; } else if (*s == '(') level++; else if (*s == '\"') in_quote = 1; } oparen_out: if (!*s) ; /* Actually this is an error, but we don't care about it. */ else s++; } else if (*s == '\"' || *s == '[') { /* We do not check for non-allowed nesting of domainliterals */ int term = *s == '\"' ? '\"' : ']'; invalid = 0; s++; t = NULL; for (;;) { for (s2 = s; *s2; s2++) { if (*s2 == term) break; else if (*s2 == '\\' && s2[1]) /* what about continuation? */ s2++; } t = (t ? append_to_token (t, s, s2 - s) : new_token (term == '\"'? tQUOTED : tDOMAINLIT, s, s2 - s)); if (!t) goto failure; if (*s2 || !hdr->next || !hdr->next->cont) break; /* Next item is a header continuation line. */ hdr = hdr->next; s = hdr->line; } *tok_tail = t; tok_tail = &t->next; s = s2; if (*s) s++; /* skip the delimiter */ } else if ((s2 = strchr (delimiters2, *s))) { /* Special characters which are not handled above. */ invalid = 0; t = new_token (tSPECIAL, s, 1); if (!t) goto failure; *tok_tail = t; tok_tail = &t->next; s++; } else if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n') { invalid = 0; s = skip_ws (s + 1); } else if (*s > 0x20 && !(*s & 128)) { /* Atom. */ invalid = 0; for (s2 = s + 1; *s2 > 0x20 && !(*s2 & 128) && !strchr (delimiters, *s2); s2++) ; t = new_token (tATOM, s, s2 - s); if (!t) goto failure; *tok_tail = t; tok_tail = &t->next; s = s2; } else { /* Invalid character. */ if (!invalid) { /* For parsing we assume only one space. */ t = new_token (tSPACE, NULL, 0); if (!t) goto failure; *tok_tail = t; tok_tail = &t->next; invalid = 1; } s++; } } /*NOTREACHED*/ failure: { int save = errno; release_token_list (tok); errno = save; } return NULL; }
/**************** * Parse a field into tokens as defined by rfc822. */ static TOKEN parse_field( HDR_LINE hdr ) { static const char specials[] = "<>@.,;:\\[]\"()"; static const char specials2[]= "<>@.,;:"; static const char tspecials[] = "/?=<>@,;:\\[]\"()"; static const char tspecials2[]= "/?=<>@.,;:"; static struct { const char *name; int namelen; } tspecial_header[] = { { "Content-Type", 12 }, { "Content-Transfer-Encoding", 25 }, { NULL, 0 } }; const char *delimiters; const char *delimiters2; const char *line, *s, *s2; size_t n; int i, invalid = 0; TOKEN t, tok, *tok_head; if( !hdr ) return NULL; tok = NULL; tok_head = &tok; line = hdr->line; if( !(s = strchr( line, ':' )) ) return NULL; /* oops */ n = s - line; if( !n ) return NULL; /* oops: invalid name */ delimiters = specials; delimiters2 = specials2; for(i=0; tspecial_header[i].name; i++ ) { if( n == tspecial_header[i].namelen && !memicmp( line, tspecial_header[i].name, n ) ) { delimiters = tspecials; delimiters2 = tspecials2; break; } } /* Add this point we could store the fieldname in the parsing structure. * If we decide to do this, we should lowercase the name except for the * first character which should be uppercased. This way we don't * need to apply the case insensitive compare in the future */ s++; /* move over the colon */ for(;;) { if( !*s ) { if( !hdr->next || !hdr->next->cont ) break; hdr = hdr->next; s = hdr->line; } if( *s == '(' ) { int level = 1; int in_quote = 0; invalid = 0; for(s++ ; ; s++ ) { if( !*s ) { if( !hdr->next || !hdr->next->cont ) break; hdr = hdr->next; s = hdr->line; } if( in_quote ) { if( *s == '\"' ) in_quote = 0; else if( *s == '\\' && s[1] ) /* what about continuation?*/ s++; } else if( *s == ')' ) { if( !--level ) break; } else if( *s == '(' ) level++; else if( *s == '\"' ) in_quote = 1; } if( !*s ) ;/* actually this is an error, but we don't care about it */ else s++; } else if( *s == '\"' || *s == '[' ) { /* We do not check for non-allowed nesting of domainliterals */ int term = *s == '\"' ? '\"' : ']'; invalid = 0; s++; t = NULL; for(;;) { for( s2 = s; *s2; s2++ ) { if( *s2 == term ) break; else if( *s2 == '\\' && s2[1] ) /* what about continuation?*/ s2++; } t = t ? append_to_token( t, s, s2-s) : new_token( term == '\"'? tQUOTED : tDOMAINLIT, s, s2-s); if( *s2 || !hdr->next || !hdr->next->cont ) break; hdr = hdr->next; s = hdr->line; } *tok_head = t; tok_head = &t->next; s = s2; if( *s ) s++; /* skip the delimiter */ } else if( (s2 = strchr( delimiters2, *s )) ) { /* special characters which are not handled above */ invalid = 0; t = new_token( tSPECIAL, s, 1 ); *tok_head = t; tok_head = &t->next; s++; } else if( *s == ' ' || *s == '\t' || *s == '\r' || *s == '\n' ) { invalid = 0; s = skip_ws(s+1); } else if( *s > 0x20 && !(*s & 128) ) { /* atom */ invalid = 0; for( s2 = s+1; *s2 > 0x20 && !(*s2 & 128 ) && !strchr( delimiters, *s2 ); s2++ ) ; t = new_token( tATOM, s, s2-s ); *tok_head = t; tok_head = &t->next; s = s2; } else { /* invalid character */ if( !invalid ) { /* for parsing we assume only one space */ t = new_token( tSPACE, NULL, 0); *tok_head = t; tok_head = &t->next; invalid = 1; } s++; } } return tok; }
/** * Removes longest common prefix indentation from every line in a triple * quoted string. If the string begins with an empty line, that line is removed * entirely. */ static void normalise_string(lexer_t* lexer) { if(lexer->buflen == 0) return; // Make sure we have a null terminated string. append_to_token(lexer, '\0'); // If we aren't multiline, do nothing. if(memchr(lexer->buffer, '\n', lexer->buflen) == NULL) return; // Calculate leading whitespace. char* buf = lexer->buffer; size_t ws = lexer->buflen; size_t ws_this_line = 0; bool in_leading_ws = true; for(size_t i = 0; i < lexer->buflen; i++) { char c = lexer->buffer[i]; if(in_leading_ws) { if(c == ' ' || c == '\t') { ws_this_line++; } else if((c != '\r') && (c != '\n')) { if(ws_this_line < ws) ws = ws_this_line; in_leading_ws = false; } } if(c == '\n') { ws_this_line = 0; in_leading_ws = true; } } // Trim leading whitespace on each line. if(ws > 0) { char* line_start = lexer->buffer; char* compacted = lexer->buffer; size_t rem = lexer->buflen; while(rem > 0) { char* line_end = strchr(line_start, '\n'); size_t line_len = (line_end == NULL) ? rem : (size_t)(line_end - line_start + 1); if(line_start != line_end) { size_t trim = (line_len < ws) ? line_len : ws; memmove(compacted, line_start + trim, line_len - trim); compacted += line_len - trim; } else { memmove(compacted, line_start, line_len); compacted += line_len; } line_start += line_len; rem -= line_len; } } // Trim a leading newline if there is one. buf = lexer->buffer; if((buf[0] == '\r') && (buf[1] == '\n')) { lexer->buflen -= 2; memmove(&buf[0], &buf[2], lexer->buflen); } else if(buf[0] == '\n') { lexer->buflen--; memmove(&buf[0], &buf[1], lexer->buflen); } }