static void unbreak_backslash_broken_lines(struct token_list *tl, tok_message_queue *mq) { const char *s = tl->orig, *e = s+tl->orig_size; darray_char *txt = talloc_darray(tl); darray(const char*) *olines = talloc_darray(tl); darray(const char*) *tlines = talloc_darray(tl); do { const char *line_start = s, *line_end; const char *lnw; //last non-white size_t start_offset = txt->size; //scan to the next line and find the last non-white character in the line while (s<e && !creturn(*s)) s++; line_end = s; lnw = s; while (lnw>line_start && cspace(lnw[-1])) lnw--; if (s<e && creturn(*s)) { s++; //check for non-standard newlines (i.e. "\r", "\r\n", or "\n\r") if (s<e && *s=='\n'+'\r'-s[-1]) s++; } //add the backslash-break-free version of the text if (lnw>line_start && lnw[-1]=='\\' && line_end<e) { darray_append_items(*txt, line_start, lnw-1-line_start); if (lnw<e && cspace(*lnw)) { tok_msg_warn(spaces_after_backslash_break, lnw, "Trailing spaces after backslash-broken line"); } } else darray_append_items(*txt, line_start, s-line_start); //add the line starts for this line darray_append(*olines, line_start); darray_append(*tlines, (const char*)start_offset); //Since the txt buffer moves when expanded, we're storing offsets // for now. Once we're done building txt, we can add the base // of it to all the offsets to make them pointers. } while (s<e); //stick a null terminator at the end of the text darray_realloc(*txt, txt->size+1); txt->item[txt->size] = 0; //convert the line start offsets to pointers { const char **i; darray_foreach(i, *tlines) *i = txt->item + (size_t)(*i); } tl->olines = olines->item; tl->olines_size = olines->size; tl->txt = txt->item; tl->txt_size = txt->size; tl->tlines = tlines->item; tl->tlines_size = tlines->size; }
//Reads a C string starting at s until quoteChar is found or e is reached // Returns the pointer to the terminating quote character or e if none was found char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) { const char * const tokstart = s; const char *p; int has_endquote=0, has_newlines=0; //tok_msg_debug(called, s, "Called read_cstring on `%s`", s); #define append(startptr,endptr) array_append_items(*out, startptr, (endptr)-(startptr)) #define append_char(theChar) array_append(*out, theChar) #define append_zero() do {array_append(*out, 0); out->size--;} while(0) p = s; while (p<e) { char c = *p++; if (c == '\\') { append(s, p-1); s = p; if (p >= e) { append_char('\\'); tok_msg_error(ended_in_backslash, p-1, "read_cstring input ended in backslash"); break; } c = *p++; if (c>='0' && c<='9') { unsigned int octal = c-'0'; size_t digit_count = 0; while (p<e && *p>='0' && *p<='9') { octal <<= 3; octal += (*p++) - '0'; if (++digit_count >= 2) break; } if (p<e && *p>='0' && *p<='9') { tok_msg_info(ambiguous_octal, s-2, "Octal followed by digit"); } if (octal > 0xFF) { tok_msg_warn(octal_overflow, s-2, "Octal out of range"); } c = octal; } else { switch (c) { case 'x': { size_t digit_count = 0; size_t zero_count = 0; unsigned int hex = 0; while (p<e && *p=='0') p++, zero_count++; for (;p<e;digit_count++) { c = *p++; if (c>='0' && c<='9') c -= '0'; else if (c>='A' && c<='F') c -= 'A'-10; else if (c>='a' && c<='f') c -= 'a'-10; else { p--; break; } hex <<= 4; hex += c; } if (zero_count+digit_count > 2) { char *hex_string = strdup_rng(s-2, p); tok_msg_warn(ambiguous_hex, s-2, "Hex escape '%s' is ambiguous", hex_string); if (digit_count > 2) tok_msg_warn(hex_overflow, s-2, "Hex escape '%s' out of range", hex_string); free(hex_string); } c = hex & 0xFF; } break; case 'a': c=0x7; break; case 'b': c=0x8; break; case 'e': c=0x1B; break; case 'f': c=0xC; break; case 'n': c=0xA; break; case 'r': c=0xD; break; case 't': c=0x9; break; case 'v': c=0xB; break; case '\\': break; default: if (c == quoteChar) break; if (c=='\'' && quoteChar=='"') { /* tok_msg_info(escaped_single_quote, s-2, "Single quote characters need not be escaped within double quotes"); */ break; } if (c=='"' && quoteChar=='\'') { /* tok_msg_info(escaped_double_quote, s-2, "Double quote characters need not be escaped within single quotes"); */ break; } if (c=='?') // \? is needed in some situations to avoid building a trigraph break; tok_msg_warn(unknown_escape, s-2, "Unknown escape sequence '\\%c'", c); break; } } s = p; append_char(c); } else if (c == quoteChar) { p--; has_endquote = 1; break; } else if (creturn(c)) { has_newlines = 1; } } append(s, p); append_zero(); if (!has_endquote) { tok_msg_error(missing_endquote, tokstart, "Missing endquote on %s literal", quoteChar=='\'' ? "character" : "string"); } else if (has_newlines) { tok_msg_warn(quote_newlines, tokstart, "%s literal contains newline character(s)", quoteChar=='\'' ? "Character" : "String"); } return (char*)p; #undef append #undef append_char #undef append_zero }