static void unbreak_backslash_broken_lines(struct token_list *tl, tok_message_queue *mq) {
	const char *s = tl->orig, *e = s+tl->orig_size;
	darray_char         *txt    = talloc_darray(tl);
	darray(const char*) *olines = talloc_darray(tl);
	darray(const char*) *tlines = talloc_darray(tl);
	
	do {
		const char *line_start = s, *line_end;
		const char *lnw; //last non-white
		size_t start_offset = txt->size;
		
		//scan to the next line and find the last non-white character in the line
		while (s<e && !creturn(*s)) s++;
		line_end = s;
		lnw = s;
		while (lnw>line_start && cspace(lnw[-1])) lnw--;
		if (s<e && creturn(*s)) {
			s++;
			//check for non-standard newlines (i.e. "\r", "\r\n", or "\n\r")
			if (s<e && *s=='\n'+'\r'-s[-1])
				s++;
		}
		
		//add the backslash-break-free version of the text
		if (lnw>line_start && lnw[-1]=='\\' && line_end<e) {
			darray_append_items(*txt, line_start, lnw-1-line_start);
			if (lnw<e && cspace(*lnw)) {
				tok_msg_warn(spaces_after_backslash_break, lnw,
					"Trailing spaces after backslash-broken line");
			}
		} else
			darray_append_items(*txt, line_start, s-line_start);
		
		//add the line starts for this line
		darray_append(*olines, line_start);
		darray_append(*tlines, (const char*)start_offset);
			//Since the txt buffer moves when expanded, we're storing offsets
			//  for now.  Once we're done building txt, we can add the base
			//  of it to all the offsets to make them pointers.
	} while (s<e);
	
	//stick a null terminator at the end of the text
	darray_realloc(*txt, txt->size+1);
	txt->item[txt->size] = 0;
	
	//convert the line start offsets to pointers
	{
		const char **i;
		darray_foreach(i, *tlines)
			*i = txt->item + (size_t)(*i);
	}
	
	tl->olines = olines->item;
	tl->olines_size = olines->size;
	tl->txt = txt->item;
	tl->txt_size = txt->size;
	tl->tlines = tlines->item;
	tl->tlines_size = tlines->size;
}
Exemple #2
0
//Reads a C string starting at s until quoteChar is found or e is reached
//  Returns the pointer to the terminating quote character or e if none was found
char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) {
	const char * const tokstart = s;
	const char *p;
	int has_endquote=0, has_newlines=0;
	
	//tok_msg_debug(called, s, "Called read_cstring on `%s`", s);
	
	#define append(startptr,endptr) array_append_items(*out, startptr, (endptr)-(startptr))
	#define append_char(theChar) array_append(*out, theChar)
	#define append_zero() do {array_append(*out, 0); out->size--;} while(0)
	
	p = s;
	while (p<e) {
		char c = *p++;
		if (c == '\\') {
			append(s, p-1);
			s = p;
			if (p >= e) {
				append_char('\\');
				tok_msg_error(ended_in_backslash, p-1,
					"read_cstring input ended in backslash");
				break;
			}
			c = *p++;
			if (c>='0' && c<='9') {
				unsigned int octal = c-'0';
				size_t digit_count = 0;
				while (p<e && *p>='0' && *p<='9') {
					octal <<= 3;
					octal += (*p++) - '0';
					if (++digit_count >= 2)
						break;
				}
				if (p<e && *p>='0' && *p<='9') {
					tok_msg_info(ambiguous_octal, s-2,
						"Octal followed by digit");
				}
				if (octal > 0xFF) {
					tok_msg_warn(octal_overflow, s-2,
						"Octal out of range");
				}
				c = octal;
			} else {
				switch (c) {
					case 'x': {
						size_t digit_count = 0;
						size_t zero_count = 0;
						unsigned int hex = 0;
						while (p<e && *p=='0') p++, zero_count++;
						for (;p<e;digit_count++) {
							c = *p++;
							if (c>='0' && c<='9')
								c -= '0';
							else if (c>='A' && c<='F')
								c -= 'A'-10;
							else if (c>='a' && c<='f')
								c -= 'a'-10;
							else {
								p--;
								break;
							}
							hex <<= 4;
							hex += c;
						}
						if (zero_count+digit_count > 2) {
							char *hex_string = strdup_rng(s-2, p);
							tok_msg_warn(ambiguous_hex, s-2,
								"Hex escape '%s' is ambiguous", hex_string);
							if (digit_count > 2)
								tok_msg_warn(hex_overflow, s-2,
									"Hex escape '%s' out of range", hex_string);
							free(hex_string);
						}
						c = hex & 0xFF;
					}	break;
					case 'a':
						c=0x7;
						break;
					case 'b':
						c=0x8;
						break;
					case 'e':
						c=0x1B;
						break;
					case 'f':
						c=0xC;
						break;
					case 'n':
						c=0xA;
						break;
					case 'r':
						c=0xD;
						break;
					case 't':
						c=0x9;
						break;
					case 'v':
						c=0xB;
						break;
					case '\\':
						break;
					default:
						if (c == quoteChar)
							break;
						if (c=='\'' && quoteChar=='"') {
							/* tok_msg_info(escaped_single_quote, s-2,
								"Single quote characters need not be escaped within double quotes"); */
							break;
						}
						if (c=='"' && quoteChar=='\'') {
							/* tok_msg_info(escaped_double_quote, s-2,
								"Double quote characters need not be escaped within single quotes"); */
							break;
						}
						if (c=='?') // \? is needed in some situations to avoid building a trigraph
							break;
						tok_msg_warn(unknown_escape, s-2,
							"Unknown escape sequence '\\%c'", c);
						break;
				}
			}
			s = p;
			append_char(c);
		} else if (c == quoteChar) {
			p--;
			has_endquote = 1;
			break;
		} else if (creturn(c)) {
			has_newlines = 1;
		}
	}
	append(s, p);
	append_zero();
	if (!has_endquote) {
		tok_msg_error(missing_endquote, tokstart,
			"Missing endquote on %s literal",
			quoteChar=='\'' ? "character" : "string");
	} else if (has_newlines) {
		tok_msg_warn(quote_newlines, tokstart,
			"%s literal contains newline character(s)",
			quoteChar=='\'' ? "Character" : "String");
	}
	return (char*)p;
	
	#undef append
	#undef append_char
	#undef append_zero
}