static int length_of_normal_non_splitters( const char * buffer, const basic_parser_type * parser) {
  bool at_end  = false;
  int length   = 0;
  char current = buffer[0];

  while(current != '\0' && !at_end)   {
    length += 1;
    current = buffer[length];

    if( is_splitters( current, parser ) )  {
      at_end = true;
      continue;
    }
    if( is_special( current, parser ) )  {
      at_end = true;
      continue;
    }
    if( is_in_quoters( current, parser ) )  {
      at_end = true;
      continue;
    }
    if( length_of_comment(&buffer[length], parser) > 0)  {
      at_end = true;
      continue;
    }
  }

  return length;
}
Beispiel #2
0
void parser_strip_buffer(const parser_type * parser , char ** __buffer) {
  char * src     = *__buffer;
  char * target  = util_calloc( ( strlen( *__buffer ) + 1) , sizeof * target );

  int src_position    = 0;
  int target_position = 0;
  while (src_position < strlen( src )) {
    int comment_length;
    int delete_length;

    /**
      Skip comments.
    */
    comment_length = length_of_comment( &src[src_position], parser);
    if(comment_length > 0)
    {
      src_position += comment_length;
      continue;
    }

    
    /**
       Skip characters which are just deleted. 
    */
    delete_length = length_of_delete( &src[src_position] , parser );
    if (delete_length > 0) {
      src_position += delete_length;
      continue;
    }
    
    /*
      Quotations.
    */
    if( is_in_quoters( src[src_position], parser ) )
    {
      int length   = length_of_quotation( &src[src_position] );
      char * token = alloc_quoted_token( &src[src_position], length, false );
      memcpy( &target[target_position] , &src[src_position] , length);
      free( token );
      src_position    += length;
      target_position += length;
      continue;
    }

    /**
       OK -it is a god damn normal charactar - copy it straight over: 
    */
    target[target_position] = src[src_position];
    src_position    += 1;
    target_position += 1;
  }
  target[target_position] = '\0';
  target = util_realloc( target , sizeof * target * (target_position + 1) );
  
  free( src );
  *__buffer = target;
}
Beispiel #3
0
/**
   Allocates a new stringlist. 
*/
stringlist_type * parser_tokenize_buffer(
  const parser_type    * parser,
  const char           * buffer,
  bool                   strip_quote_marks)
{
  int position          = 0;
  int buffer_size       = strlen(buffer);
  int splitters_length  = 0;
  int comment_length    = 0;
  int delete_length     = 0;

  stringlist_type * tokens = stringlist_alloc_new();
  
  while( position < buffer_size )
  {
    /** 
      Skip initial splitters.
    */
    splitters_length = length_of_initial_splitters( &buffer[position], parser );
    if(splitters_length > 0)
    {
      position += splitters_length;
      continue;
    }


    /**
      Skip comments.
    */
    comment_length = length_of_comment( &buffer[position], parser);
    if(comment_length > 0)
    {
      position += comment_length;
      continue;
    }

    
    /**
       Skip characters which are just deleted. 
    */
      
    delete_length = length_of_delete( &buffer[position] , parser );
    if (delete_length > 0) {
      position += delete_length;
      continue;
    }



    /** 
       Copy the character if it is in the special set,
    */
    if( is_special( buffer[position], parser ) )
    {
      char key[2];
      key[0] = buffer[position];
      key[1] = '\0';
      stringlist_append_copy( tokens, key );
      position += 1;
      continue;
    }

    /**
       If the character is a quotation start, we copy the whole quotation.
    */
    if( is_in_quoters( buffer[position], parser ) )
    {
      int length   = length_of_quotation( &buffer[position] );
      char * token = alloc_quoted_token( &buffer[position], length, strip_quote_marks );
      stringlist_append_owned_ref( tokens, token );
      position += length;
      continue;
    }

    /**
      If we are here, we are guaranteed that that
      buffer[position] is not:

      1. Whitespace.
      2. The start of a comment.
      3. A special character.
      4. The start of a quotation.
      5. Something to delete.

      In other words, it is the start of plain
      non-splitters. Now we need to find the
      length of the non-splitters until:

      1. Whitespace starts.
      2. A comment starts.
      3. A special character occur.
      4. A quotation starts.
    */

    {
      int length   = length_of_normal_non_splitters( &buffer[position], parser );
      char * token = util_calloc( (length + 1) , sizeof * token);
      int token_length;
      if (parser->delete_set == NULL) {
        token_length = length;
        memcpy( token , &buffer[position] , length * sizeof * token );
      } else {
        int i;
        token_length = 0;
        for (i = 0; i < length; i++) {
          char c = buffer[position + i];
          if ( !is_in_delete_set( c , parser)) {
            token[token_length] = c;
            token_length++;
          }
        }
      }


      if (token_length > 0) { /* We do not insert empty tokens. */
        token[token_length] = '\0';
        stringlist_append_owned_ref( tokens, token );
      } else 
        free( token );    /* The whole thing is discarded. */

      position += length;
      continue;
    }
  }

  return tokens;
}