Example #1
0
static int is_name_char(char c){
  if (is_name_first_char(c) == TRUE ||
      (c >= '0' && c <= '9') ||
      c == '-' || c == '.')
    return TRUE;
  else
    return FALSE;
}
Example #2
0
/*
 * Parse succesive blocks of XML data, generating events for the 
 * handlers/callbacks as we go. State is maintained in the 
 * simple_xml_parser object.
 * If the top level XML document ends before the last character, 
 * the "read" parameter indicates how much input was consumed.
 */
hcerr_t xml_parse(xml_parser *formal_parser, char s[], hc_long_t size, hc_long_t *read){
  simple_xml_parser *parser = (simple_xml_parser *)  formal_parser;

  int i = 0;

  if (DEBUG == TRUE){
    print_state(parser->state, parser->depth);
    printf ("in parser with " LL_FORMAT " %s\n", size, s);
    fflush(stdout);
  }

  while (i < size){

    switch(parser->state){

    case OUTSIDE_ELEMENT:
      if (is_white_space(s[i])){
	/*skip_white_space */
	break;
      }
      if (s[i] == '<'){
	parser->start_tag = TRUE;
	change_state(&parser->state, EXPECTING_OPEN_OR_CLOSE_TAG, parser->depth);
      }
      else {
	HC_ERR_LOG(("Expected '<', read %c at %d %s\n", s[i], i, s));
	return HCERR_XML_EXPECTED_LT;
      }
      break;
      
      
    case DOCUMENT_ELEMENT:
      /* discard document element */
      if (s[i] != '>'){
	if (DEBUG == TRUE)
	  printf("discarding %c", s[i]);
	break;
      }
      else{
	parser->state = OUTSIDE_ELEMENT;
	break;
      }

    case EXPECTING_OPEN_OR_CLOSE_TAG:
      if (is_white_space(s[i])){
	/*skip_white_space */
	break;
      }
      if (s[i] == '/'){
	if (DEBUG)
	  printf("parser->start_tag = FALSE\n");
	parser->start_tag = FALSE;
	break;
      }

    case EXPECTING_TAG:
      
      if (is_name_first_char(s[i]) == TRUE){
	change_state(&parser->state, SCANNING_TAG, parser->depth);
	require_ok(token_append(&parser->buffer_list, s[i]));
	break;
      }

      /* Discard document element */
      else if (s[i] == '?' && parser->depth == 0){
	parser->state = DOCUMENT_ELEMENT;
	break;
      }
      else{
	HC_ERR_LOG(("Invalid first character for element name : %c %d %s\n", s[i], i, s));
	return HCERR_XML_INVALID_ELEMENT_TAG;
      }
      
      // FALLTHRU INTENTIONAL???

      /* Start tag is terminated by whitespace, /, or >
	 End tag is terminated by whitespace or >
      */
    case SCANNING_TAG:
      /* Still reading token */
      if (is_name_char(s[i]) == TRUE){
	require_ok(token_append(&parser->buffer_list, s[i]));
	break;
      }
      else if (is_white_space(s[i]) == TRUE) {
	parser->current_tag = token_finish(&parser->buffer_list);
	if (parser->start_tag == TRUE){
	  /*printf("Start element: %s\n", parser->current_tag);*/
	  change_state(&parser->state, SCANNING_ATTRIBUTES, parser->depth);
	  break;
	}
	else{
	  change_state(&parser->state, SCANNING_CLOSE_TAG, parser->depth);
	  break;
	}
      }
      else if (s[i] == '>') {
	if (DEBUG == TRUE)
	  printf("parser->depth: %d\n", parser->depth);
	require_ok(close_tag(&i, parser));
	if (DEBUG == TRUE)
	  printf("parser->depth: %d\n", parser->depth);
	if (parser->depth == 0){
	  *read = i + 1;
	  return HCERR_OK;
	}
      }
      /* <element/> */
      else if (s[i] == '/' && parser->start_tag == TRUE) {
	if (DEBUG == TRUE){
	  printf("Start element: %s\n", parser->current_tag);	
	  printf("End element: %s\n", parser->current_tag);
	}
	change_state(&parser->state, EXPECTING_RIGHT_BRACKET, parser->depth);
	break;
      }
      else {
	HC_ERR_LOG(("Invalid character '%c' in tag. %i %s\n", s[i], i, s));
	return HCERR_XML_INVALID_ELEMENT_TAG;
      }
      break;

    case EXPECTING_RIGHT_BRACKET:
      if (s[i] != '>') {
	HC_ERR_LOG(("Unexpected character %c after close element. %d %s", s[i], i, s));
	return HCERR_XML_MALFORMED_START_ELEMENT;
      }
      if (parser->depth == 0){
	*read = i + 1;
	return HCERR_OK;
      }
      change_state(&parser->state, OUTSIDE_ELEMENT, parser->depth);
      break;


    case SCANNING_CLOSE_TAG:
	  if (is_white_space(s[i])) {
	    break;
	  }
	  if (DEBUG == TRUE)
	    fprintf(stdout, "End element: %s\n", parser->current_tag);
	  if (s[i] != '>') {
	    HC_ERR_LOG(("Unexpected character %c after close element. %d %s", s[i], i, s));
	    return HCERR_XML_MALFORMED_END_ELEMENT;
	  }
	  require_ok((*parser->end_element_callback)(parser->current_tag, parser->data));
	  parser->depth--;
	  if (parser->depth == 0){
	    *read = i + 1;
	    return HCERR_OK;
	  }

	  change_state(&parser->state, OUTSIDE_ELEMENT, parser->depth);
	  break;


      /* Expected tokens:
       *   attribute_name
       *   '/'
       *   >
       */
    case SCANNING_ATTRIBUTES:
      if (is_white_space(s[i])){
	/*skip_white_space */
	break;
      }
      if (is_name_first_char(s[i]) == TRUE) {
	change_state(&parser->state, SCANNING_ATTRIBUTE_NAME, parser->depth);
	require_ok(token_append(&parser->buffer_list, s[i]));
      }
      else if (s[i] == '/' && parser->start_tag == TRUE) {

	if (DEBUG == TRUE){
	  int j = 0;
	  printf("SA Start element: %s\n", parser->current_tag);		 
	  fprintf(stdout, "Start  element: %s %d\n", parser->current_tag, parser->current_attribute);
	  for (j = 0; j < parser->current_attribute; j++){
	    printf(" %s=\"%s\"", *(parser->attribute_names + j), *(parser->attribute_values + j));
	  }
	  fprintf(stdout, "End  element: %s\n", parser->current_tag);
	  fflush(stdout);
	} 
	require_ok((*parser->start_element_callback)(parser->current_tag, 
						     parser->data, 
						     parser->attribute_names,
						     parser->attribute_values,
						     parser->current_attribute));
	require_ok((*parser->end_element_callback)(parser->current_tag, parser->data));

	parser->current_attribute = 0;
	change_state(&parser->state, EXPECTING_RIGHT_BRACKET, parser->depth);
      }
      else  if (s[i] == '>') { 
	if (DEBUG == TRUE){
	  int j = 0;
	  fprintf(stdout, "Start  element event: %s %d\n", parser->current_tag, parser->current_attribute);
	  for (j = 0; j < parser->current_attribute; j++){
	    printf(" %s=\"%s\"", *(parser->attribute_names + j), *(parser->attribute_values + j));
	  }
	}
	require_ok((*parser->start_element_callback)(parser->current_tag, 
						     parser->data,
						     parser->attribute_names,
						     parser->attribute_values,
						     parser->current_attribute));

	parser->current_attribute = 0;
	parser->depth++;
	change_state(&parser->state, OUTSIDE_ELEMENT, parser->depth);
      }
      else{
	HC_ERR_LOG(("Unexpected character %c after close element. %d %s", s[i], i, s));
	return HCERR_XML_MALFORMED_START_ELEMENT;
      }
      break;

    case SCANNING_ATTRIBUTE_NAME:
      if (s[i] == '='){
	if (parser->current_attribute == parser->attribute_arrays_size){
	  require_ok(grow_attribute_arrays(parser));
	}
	parser->attribute_names[parser->current_attribute] = token_finish(&parser->buffer_list);

	change_state(&parser->state, SCANNING_START_ATTRIBUTE_VALUE, parser->depth);
      }
      else if (is_name_char(s[i]) == TRUE) {
	require_ok(token_append(&parser->buffer_list, s[i]));
      }
      else{
	HC_ERR_LOG(("Illegal char %c in attribute name. %i <<%s>>\n", s[i], i, s));
	return HCERR_XML_BAD_ATTRIBUTE_NAME;
      }
      break;

    case SCANNING_START_ATTRIBUTE_VALUE:
      if (is_white_space(s[i])){
	break;
      }
      else if (s[i] != '"'){
	HC_ERR_LOG(("Attribute value does not begin with quote: '%c'. %i %s\n", s[i], i, s));
	return HCERR_XML_BAD_ATTRIBUTE_NAME;
      }
      change_state(&parser->state, SCANNING_ATTRIBUTE_VALUE, parser->depth);
      break;


    case SCANNING_ATTRIBUTE_VALUE:
      if (s[i] == '\\') {
	if (parser->backslash == TRUE){
	  parser->backslash = FALSE;
	}
	else{
	  parser->backslash = TRUE;
	}
      }
      else if (s[i] == '"' && parser->backslash == FALSE) {
	parser->attribute_values[parser->current_attribute++] = token_finish(&parser->buffer_list);
	change_state(&parser->state, SCANNING_ATTRIBUTES, parser->depth);
      	break;
      }
      require_ok(token_append(&parser->buffer_list, s[i]));
      
      break;
    }
    i++;
  }
  return HCERR_OK;
}
Example #3
0
void NCDConfigTokenizer_Tokenize (char *str, size_t left, NCDConfigTokenizer_output output, void *user)
{
    size_t line = 1;
    size_t line_char = 1;
    
    while (left > 0) {
        size_t l;
        int error = 0;
        int token;
        void *token_val = NULL;
        size_t token_len = 0;
        
        if (*str == '#') {
            l = 1;
            while (l < left && str[l] != '\n') {
                l++;
            }
            token = 0;
        }
        else if (l = data_begins_with(str, left, "{")) {
            token = NCD_TOKEN_CURLY_OPEN;
        }
        else if (l = data_begins_with(str, left, "}")) {
            token = NCD_TOKEN_CURLY_CLOSE;
        }
        else if (l = data_begins_with(str, left, "(")) {
            token = NCD_TOKEN_ROUND_OPEN;
        }
        else if (l = data_begins_with(str, left, ")")) {
            token = NCD_TOKEN_ROUND_CLOSE;
        }
        else if (l = data_begins_with(str, left, ";")) {
            token = NCD_TOKEN_SEMICOLON;
        }
        else if (l = data_begins_with(str, left, ".")) {
            token = NCD_TOKEN_DOT;
        }
        else if (l = data_begins_with(str, left, ",")) {
            token = NCD_TOKEN_COMMA;
        }
        else if (l = data_begins_with(str, left, ":")) {
            token = NCD_TOKEN_COLON;
        }
        else if (l = data_begins_with(str, left, "[")) {
            token = NCD_TOKEN_BRACKET_OPEN;
        }
        else if (l = data_begins_with(str, left, "]")) {
            token = NCD_TOKEN_BRACKET_CLOSE;
        }
        else if (l = data_begins_with(str, left, "->")) {
            token = NCD_TOKEN_ARROW;
        }
        else if (l = data_begins_with(str, left, "If")) {
            token = NCD_TOKEN_IF;
        }
        else if (l = data_begins_with(str, left, "Elif")) {
            token = NCD_TOKEN_ELIF;
        }
        else if (l = data_begins_with(str, left, "elif")) {
            token = NCD_TOKEN_ELIF;
        }
        else if (l = data_begins_with(str, left, "Else")) {
            token = NCD_TOKEN_ELSE;
        }
        else if (l = data_begins_with(str, left, "else")) {
            token = NCD_TOKEN_ELSE;
        }
        else if (l = data_begins_with(str, left, "Foreach")) {
            token = NCD_TOKEN_FOREACH;
        }
        else if (l = data_begins_with(str, left, "As")) {
            token = NCD_TOKEN_AS;
        }
        else if (l = data_begins_with(str, left, "include_guard")) {
            token = NCD_TOKEN_INCLUDE_GUARD;
        }
        else if (l = data_begins_with(str, left, "include")) {
            token = NCD_TOKEN_INCLUDE;
        }
        else if (is_name_first_char(*str)) {
            l = 1;
            while (l < left && is_name_char(str[l])) {
                l++;
            }
            
            // allocate buffer
            bsize_t bufsize = bsize_add(bsize_fromsize(l), bsize_fromint(1));
            char *buf;
            if (bufsize.is_overflow || !(buf = malloc(bufsize.value))) {
                BLog(BLOG_ERROR, "malloc failed");
                error = 1;
                goto out;
            }
            
            // copy and terminate
            memcpy(buf, str, l);
            buf[l] = '\0';
            
            if (!strcmp(buf, "process")) {
                token = NCD_TOKEN_PROCESS;
                free(buf);
            }
            else if (!strcmp(buf, "template")) {
                token = NCD_TOKEN_TEMPLATE;
                free(buf);
            }
            else {
                token = NCD_TOKEN_NAME;
                token_val = buf;
                token_len = l;
            }
        }
        else if (*str == '"') do {
            // init string
            ExpString estr;
            if (!ExpString_Init(&estr)) {
                BLog(BLOG_ERROR, "ExpString_Init failed");
                goto string_fail0;
            }
            
            // skip start quote
            l = 1;
            
            // decode string
            while (l < left) {
                uint8_t dec_ch;
                
                // get character
                if (str[l] == '\\') {
                    if (left - l < 2) {
                        BLog(BLOG_ERROR, "escape character found in string but nothing follows");
                        goto string_fail1;
                    }
                    
                    size_t extra = 0;
                    
                    switch (str[l + 1]) {
                        case '\'':
                        case '\"':
                        case '\\':
                        case '\?':
                            dec_ch = str[l + 1]; break;
                        
                        case 'a':
                            dec_ch = '\a'; break;
                        case 'b':
                            dec_ch = '\b'; break;
                        case 'f':
                            dec_ch = '\f'; break;
                        case 'n':
                            dec_ch = '\n'; break;
                        case 'r':
                            dec_ch = '\r'; break;
                        case 't':
                            dec_ch = '\t'; break;
                        case 'v':
                            dec_ch = '\v'; break;
                        
                        case '0':
                            dec_ch = 0; break;
                        
                        case 'x': {
                            if (left - l < 4) {
                                BLog(BLOG_ERROR, "hexadecimal escape found in string but too little characters follow");
                                goto string_fail1;
                            }
                            
                            uintmax_t hex_val;
                            if (!parse_unsigned_hex_integer_bin(&str[l + 2], 2, &hex_val)) {
                                BLog(BLOG_ERROR, "hexadecimal escape found in string but two hex characters don't follow");
                                goto string_fail1;
                            }
                            
                            dec_ch = hex_val;
                            extra = 2;
                        } break;
                        
                        default:
                            BLog(BLOG_ERROR, "bad escape sequence in string");
                            goto string_fail1;
                    }
                    
                    l += 2 + extra;
                }
                else if (str[l] == '"') {
                    break;
                }
                else {
                    dec_ch = str[l];
                    l++;
                }
                
                // append character to string
                if (!ExpString_AppendByte(&estr, dec_ch)) {
                    BLog(BLOG_ERROR, "ExpString_AppendChar failed");
                    goto string_fail1;
                }
            }
            
            // make sure ending quote was found
            if (l == left) {
                BLog(BLOG_ERROR, "missing ending quote for string");
                goto string_fail1;
            }
            
            // skip ending quote
            l++;
            
            token = NCD_TOKEN_STRING;
            token_val = ExpString_Get(&estr);
            token_len = ExpString_Length(&estr);
            break;
            
        string_fail1:
            ExpString_Free(&estr);
        string_fail0:
            error = 1;
        } while (0);
        else if (is_space_char(*str)) {
            token = 0;
            l = 1;
        }
        else {
            BLog(BLOG_ERROR, "unrecognized character");
            error = 1;
        }
        
    out:
        // report error
        if (error) {
            output(user, NCD_ERROR, NULL, 0, line, line_char);
            return;
        }
        
        // output token
        if (token) {
            if (!output(user, token, token_val, token_len, line, line_char)) {
                return;
            }
        }
        
        // update line/char counters
        for (size_t i = 0; i < l; i++) {
            if (str[i] == '\n') {
                line++;
                line_char = 1;
            } else {
                line_char++;
            }
        }
        
        str += l;
        left -= l;
    }
    
    output(user, NCD_EOF, NULL, 0, line, line_char);
}
Example #4
0
void NCDConfigTokenizer_Tokenize (char *str, size_t left, NCDConfigTokenizer_output output, void *user)
{
    size_t line = 1;
    size_t line_char = 1;
    
    while (left > 0) {
        size_t l;
        int error = 0;
        int token;
        void *token_val = NULL;
        
        if (*str == '#') {
            l = 1;
            while (l < left && str[l] != '\n') {
                l++;
            }
            token = 0;
        }
        else if (l = data_begins_with(str, left, "{")) {
            token = NCD_TOKEN_CURLY_OPEN;
        }
        else if (l = data_begins_with(str, left, "}")) {
            token = NCD_TOKEN_CURLY_CLOSE;
        }
        else if (l = data_begins_with(str, left, "(")) {
            token = NCD_TOKEN_ROUND_OPEN;
        }
        else if (l = data_begins_with(str, left, ")")) {
            token = NCD_TOKEN_ROUND_CLOSE;
        }
        else if (l = data_begins_with(str, left, ";")) {
            token = NCD_TOKEN_SEMICOLON;
        }
        else if (l = data_begins_with(str, left, ".")) {
            token = NCD_TOKEN_DOT;
        }
        else if (l = data_begins_with(str, left, ",")) {
            token = NCD_TOKEN_COMMA;
        }
        else if (l = data_begins_with(str, left, ":")) {
            token = NCD_TOKEN_COLON;
        }
        else if (l = data_begins_with(str, left, "[")) {
            token = NCD_TOKEN_BRACKET_OPEN;
        }
        else if (l = data_begins_with(str, left, "]")) {
            token = NCD_TOKEN_BRACKET_CLOSE;
        }
        else if (l = data_begins_with(str, left, "->")) {
            token = NCD_TOKEN_ARROW;
        }
        else if (is_name_first_char(*str)) {
            l = 1;
            while (l < left && is_name_char(str[l])) {
                l++;
            }
            
            // allocate buffer
            bsize_t bufsize = bsize_add(bsize_fromsize(l), bsize_fromint(1));
            char *buf;
            if (bufsize.is_overflow || !(buf = malloc(bufsize.value))) {
                BLog(BLOG_ERROR, "malloc failed");
                error = 1;
                goto out;
            }
            
            // copy and terminate
            memcpy(buf, str, l);
            buf[l] = '\0';
            
            if (!strcmp(buf, "process")) {
                token = NCD_TOKEN_PROCESS;
                free(buf);
            }
            else if (!strcmp(buf, "template")) {
                token = NCD_TOKEN_TEMPLATE;
                free(buf);
            }
            else {
                token = NCD_TOKEN_NAME;
                token_val = buf;
            }
        }
        else if (*str == '"') do {
            // init string
            ExpString estr;
            if (!ExpString_Init(&estr)) {
                BLog(BLOG_ERROR, "ExpString_Init failed");
                goto string_fail0;
            }
            
            // skip start quote
            l = 1;
            
            // decode string
            while (l < left) {
                char dec_ch;
                
                // get character
                if (str[l] == '\\') {
                    if (left - l < 2) {
                        BLog(BLOG_ERROR, "escape character found in string but nothing follows");
                        goto string_fail1;
                    }
                    
                    dec_ch = str[l + 1];
                    l += 2;
                }
                else if (str[l] == '"') {
                    break;
                }
                else {
                    dec_ch = str[l];
                    l++;
                }
                
                // string cannot contain zeros bytes
                if (dec_ch == '\0') {
                    BLog(BLOG_ERROR, "string contains zero byte");
                    goto string_fail1;
                }
                
                // append character to string
                if (!ExpString_AppendChar(&estr, dec_ch)) {
                    BLog(BLOG_ERROR, "ExpString_AppendChar failed");
                    goto string_fail1;
                }
            }
            
            // make sure ending quote was found
            if (l == left) {
                BLog(BLOG_ERROR, "missing ending quote for string");
                goto string_fail1;
            }
            
            // skip ending quote
            l++;
            
            token = NCD_TOKEN_STRING;
            token_val = ExpString_Get(&estr);
            break;
            
        string_fail1:
            ExpString_Free(&estr);
        string_fail0:
            error = 1;
        } while (0);
        else if (is_space_char(*str)) {
            token = 0;
            l = 1;
        }
        else {
            BLog(BLOG_ERROR, "unrecognized character");
            error = 1;
        }
        
    out:
        // report error
        if (error) {
            output(user, NCD_ERROR, NULL, line, line_char);
            return;
        }
        
        // output token
        if (token) {
            if (!output(user, token, token_val, line, line_char)) {
                return;
            }
        }
        
        // update line/char counters
        for (size_t i = 0; i < l; i++) {
            if (str[i] == '\n') {
                line++;
                line_char = 1;
            } else {
                line_char++;
            }
        }
        
        str += l;
        left -= l;
    }
    
    output(user, NCD_EOF, NULL, line, line_char);
}