static int length_of_normal_non_splitters( const char * buffer, const basic_parser_type * parser) { bool at_end = false; int length = 0; char current = buffer[0]; while(current != '\0' && !at_end) { length += 1; current = buffer[length]; if( is_splitters( current, parser ) ) { at_end = true; continue; } if( is_special( current, parser ) ) { at_end = true; continue; } if( is_in_quoters( current, parser ) ) { at_end = true; continue; } if( length_of_comment(&buffer[length], parser) > 0) { at_end = true; continue; } } return length; }
void parser_strip_buffer(const parser_type * parser , char ** __buffer) { char * src = *__buffer; char * target = util_calloc( ( strlen( *__buffer ) + 1) , sizeof * target ); int src_position = 0; int target_position = 0; while (src_position < strlen( src )) { int comment_length; int delete_length; /** Skip comments. */ comment_length = length_of_comment( &src[src_position], parser); if(comment_length > 0) { src_position += comment_length; continue; } /** Skip characters which are just deleted. */ delete_length = length_of_delete( &src[src_position] , parser ); if (delete_length > 0) { src_position += delete_length; continue; } /* Quotations. */ if( is_in_quoters( src[src_position], parser ) ) { int length = length_of_quotation( &src[src_position] ); char * token = alloc_quoted_token( &src[src_position], length, false ); memcpy( &target[target_position] , &src[src_position] , length); free( token ); src_position += length; target_position += length; continue; } /** OK -it is a god damn normal charactar - copy it straight over: */ target[target_position] = src[src_position]; src_position += 1; target_position += 1; } target[target_position] = '\0'; target = util_realloc( target , sizeof * target * (target_position + 1) ); free( src ); *__buffer = target; }
bool parser_fseek_string(const parser_type * parser , FILE * stream , const char * __string , bool skip_string, bool case_sensitive) { bool string_found = false; char * string = util_alloc_string_copy( __string ); if (!case_sensitive) util_strupr( string ); { long int initial_pos = ftell( stream ); /* Store the inital position. */ bool cont = true; if (strstr( string , parser->comment_start ) != NULL) util_abort("%s: sorry the string contains a comment start - will never find it ... \n"); /* A bit harsh ?? */ do { int c = fgetc( stream ); if (!case_sensitive) c = toupper( c ); /* Special treatment of quoters - does not properly handle escaping of the quoters. */ if (is_in_quoters( c , parser )) { long int quote_start_pos = ftell(stream); if (!fseek_quote_end( c , stream )) { fseek( stream , quote_start_pos , SEEK_SET); fprintf(stderr,"Warning: unterminated quotation starting at line: %d \n",util_get_current_linenr( stream )); fseek(stream , 0 , SEEK_END); } /* Now we are either at the first character following a terminated quotation, or at EOF. */ continue; } /* Special treatment of comments: */ if (c == parser->comment_start[0]) { /* OK - this might be the start of a comment - let us check further. */ bool comment_start = fgetc_while_equal( stream , &parser->comment_start[1] , false); if (comment_start) { long int comment_start_pos = ftell(stream) - strlen( parser->comment_start ); /* Start seeking for comment_end */ if (!util_fseek_string(stream , parser->comment_end , true , true)) { /* No end comment end was found - what to do about that?? The file is just positioned at the end - and the routine will exit at the next step - with a Warning. */ fseek( stream , comment_start_pos , SEEK_SET); fprintf(stderr,"Warning: unterminated comment starting at line: %d \n",util_get_current_linenr( stream )); fseek(stream , 0 , SEEK_END); } continue; /* Now we are at the character following a comment end - or at EOF. */ } } /*****************************************************************/ /* Now c is a regular character - and we can start looking for our string. */ if (c == string[0]) { /* OK - we got the first character right - lets try in more detail: */ bool equal = fgetc_while_equal( stream , &string[1] , case_sensitive); if (equal) { string_found = true; cont = false; } } if (c == EOF) cont = false; } while (cont); if (string_found) { if (!skip_string) fseek(stream , -strlen(string) , SEEK_CUR); /* Reposition to the beginning of 'string' */ } else fseek(stream , initial_pos , SEEK_SET); /* Could not find the string reposition at initial position. */ } free( string ); return string_found; }
/** Allocates a new stringlist. */ stringlist_type * parser_tokenize_buffer( const parser_type * parser, const char * buffer, bool strip_quote_marks) { int position = 0; int buffer_size = strlen(buffer); int splitters_length = 0; int comment_length = 0; int delete_length = 0; stringlist_type * tokens = stringlist_alloc_new(); while( position < buffer_size ) { /** Skip initial splitters. */ splitters_length = length_of_initial_splitters( &buffer[position], parser ); if(splitters_length > 0) { position += splitters_length; continue; } /** Skip comments. */ comment_length = length_of_comment( &buffer[position], parser); if(comment_length > 0) { position += comment_length; continue; } /** Skip characters which are just deleted. */ delete_length = length_of_delete( &buffer[position] , parser ); if (delete_length > 0) { position += delete_length; continue; } /** Copy the character if it is in the special set, */ if( is_special( buffer[position], parser ) ) { char key[2]; key[0] = buffer[position]; key[1] = '\0'; stringlist_append_copy( tokens, key ); position += 1; continue; } /** If the character is a quotation start, we copy the whole quotation. */ if( is_in_quoters( buffer[position], parser ) ) { int length = length_of_quotation( &buffer[position] ); char * token = alloc_quoted_token( &buffer[position], length, strip_quote_marks ); stringlist_append_owned_ref( tokens, token ); position += length; continue; } /** If we are here, we are guaranteed that that buffer[position] is not: 1. Whitespace. 2. The start of a comment. 3. A special character. 4. The start of a quotation. 5. Something to delete. In other words, it is the start of plain non-splitters. Now we need to find the length of the non-splitters until: 1. Whitespace starts. 2. A comment starts. 3. A special character occur. 4. A quotation starts. */ { int length = length_of_normal_non_splitters( &buffer[position], parser ); char * token = util_calloc( (length + 1) , sizeof * token); int token_length; if (parser->delete_set == NULL) { token_length = length; memcpy( token , &buffer[position] , length * sizeof * token ); } else { int i; token_length = 0; for (i = 0; i < length; i++) { char c = buffer[position + i]; if ( !is_in_delete_set( c , parser)) { token[token_length] = c; token_length++; } } } if (token_length > 0) { /* We do not insert empty tokens. */ token[token_length] = '\0'; stringlist_append_owned_ref( tokens, token ); } else free( token ); /* The whole thing is discarded. */ position += length; continue; } } return tokens; }