static int length_of_normal_non_splitters( const char * buffer, const basic_parser_type * parser) { bool at_end = false; int length = 0; char current = buffer[0]; while(current != '\0' && !at_end) { length += 1; current = buffer[length]; if( is_splitters( current, parser ) ) { at_end = true; continue; } if( is_special( current, parser ) ) { at_end = true; continue; } if( is_in_quoters( current, parser ) ) { at_end = true; continue; } if( length_of_comment(&buffer[length], parser) > 0) { at_end = true; continue; } } return length; }
void parser_strip_buffer(const parser_type * parser , char ** __buffer) { char * src = *__buffer; char * target = util_calloc( ( strlen( *__buffer ) + 1) , sizeof * target ); int src_position = 0; int target_position = 0; while (src_position < strlen( src )) { int comment_length; int delete_length; /** Skip comments. */ comment_length = length_of_comment( &src[src_position], parser); if(comment_length > 0) { src_position += comment_length; continue; } /** Skip characters which are just deleted. */ delete_length = length_of_delete( &src[src_position] , parser ); if (delete_length > 0) { src_position += delete_length; continue; } /* Quotations. */ if( is_in_quoters( src[src_position], parser ) ) { int length = length_of_quotation( &src[src_position] ); char * token = alloc_quoted_token( &src[src_position], length, false ); memcpy( &target[target_position] , &src[src_position] , length); free( token ); src_position += length; target_position += length; continue; } /** OK -it is a god damn normal charactar - copy it straight over: */ target[target_position] = src[src_position]; src_position += 1; target_position += 1; } target[target_position] = '\0'; target = util_realloc( target , sizeof * target * (target_position + 1) ); free( src ); *__buffer = target; }
/** Allocates a new stringlist. */ stringlist_type * parser_tokenize_buffer( const parser_type * parser, const char * buffer, bool strip_quote_marks) { int position = 0; int buffer_size = strlen(buffer); int splitters_length = 0; int comment_length = 0; int delete_length = 0; stringlist_type * tokens = stringlist_alloc_new(); while( position < buffer_size ) { /** Skip initial splitters. */ splitters_length = length_of_initial_splitters( &buffer[position], parser ); if(splitters_length > 0) { position += splitters_length; continue; } /** Skip comments. */ comment_length = length_of_comment( &buffer[position], parser); if(comment_length > 0) { position += comment_length; continue; } /** Skip characters which are just deleted. */ delete_length = length_of_delete( &buffer[position] , parser ); if (delete_length > 0) { position += delete_length; continue; } /** Copy the character if it is in the special set, */ if( is_special( buffer[position], parser ) ) { char key[2]; key[0] = buffer[position]; key[1] = '\0'; stringlist_append_copy( tokens, key ); position += 1; continue; } /** If the character is a quotation start, we copy the whole quotation. */ if( is_in_quoters( buffer[position], parser ) ) { int length = length_of_quotation( &buffer[position] ); char * token = alloc_quoted_token( &buffer[position], length, strip_quote_marks ); stringlist_append_owned_ref( tokens, token ); position += length; continue; } /** If we are here, we are guaranteed that that buffer[position] is not: 1. Whitespace. 2. The start of a comment. 3. A special character. 4. The start of a quotation. 5. Something to delete. In other words, it is the start of plain non-splitters. Now we need to find the length of the non-splitters until: 1. Whitespace starts. 2. A comment starts. 3. A special character occur. 4. A quotation starts. */ { int length = length_of_normal_non_splitters( &buffer[position], parser ); char * token = util_calloc( (length + 1) , sizeof * token); int token_length; if (parser->delete_set == NULL) { token_length = length; memcpy( token , &buffer[position] , length * sizeof * token ); } else { int i; token_length = 0; for (i = 0; i < length; i++) { char c = buffer[position + i]; if ( !is_in_delete_set( c , parser)) { token[token_length] = c; token_length++; } } } if (token_length > 0) { /* We do not insert empty tokens. */ token[token_length] = '\0'; stringlist_append_owned_ref( tokens, token ); } else free( token ); /* The whole thing is discarded. */ position += length; continue; } } return tokens; }