コード例 #1
0
ファイル: xmlparse.c プロジェクト: henryem/blinkdb-scheduling
/** Checks for next tag */
static
void proc_before(
   PPOS*                 ppos                /**< input stream position */
   )
{
   int c;

   assert(ppos        != NULL);
   assert(ppos->state == STATE_BEFORE);

   c = skip_space(ppos);

   if (c != '<')
   {
      xml_error(ppos, "Expecting '<'");
      ppos->state = STATE_ERROR;
   }
   else
   {
      c = getsymbol(ppos);

      switch(c)
      {
      case EOF :
         xml_error(ppos, "Unexpected EOF");
         ppos->state = STATE_ERROR;
         break;
      case '!' :
         handle_decl(ppos);
         break;
      case '?' :
         handle_pi(ppos);
         break;
      case '/' :
         handle_endtag(ppos);
         break;
      default :
         ungetsymbol(ppos, c);
         handle_starttag(ppos);
         break;
      }
   }
}
コード例 #2
0
ファイル: ekhtml_starttag.c プロジェクト: 0xmono/miranda-ng
char *ekhtml_parse_starttag(ekhtml_parser_t *parser, void **state_data,
			    char *curp, char *endp, int *baddata)
{
    ekhtml_starttag_state *startstate = *state_data;
    int *offset = &parser->state.offset;
    char *workp;
    
    assert(*curp == '<' && isalpha(*(curp + 1)));
    assert(endp - curp >= 3);
    
    if(startstate == NULL){  /* First time the tag is called */
        startstate          = &parser->startstate;
        startstate->tagend  = sizeof("<F") - 1;
        startstate->mode    = EKHTML_STMODE_TAG;
        startstate->attrs   = NULL;
        startstate->curattr = NULL;
        startstate->quote   = '\0';
        *state_data         = startstate;
        *offset             = startstate->tagend;
    }
    
    workp = curp + *offset;
    
    if(startstate->mode == EKHTML_STMODE_TAG){
        /* Find that tag! */
        workp = ekhtml_find_notcharsmap(workp, endp - workp, EKCMap_CharMap,
                                        EKHTML_CHAR_TAGNAME);
        *offset = workp - curp;  
        if(workp == endp)
            return NULL;
        
        startstate->tagend  = *offset;
        startstate->mode = EKHTML_STMODE_BEGNAME;
    }
    
    while(workp != endp){  /* Main state processing loop */
        if(startstate->mode == EKHTML_STMODE_BEGNAME){
            ekhtml_attr_t *attr;
            
            workp = ekhtml_find_notcharsmap(workp, endp - workp, 
                                            EKCMap_CharMap,
                                            EKHTML_CHAR_WHITESPACE);
            if(workp == endp)
                break;
            
            if(!(EKCMap_CharMap[(unsigned char)*workp] & 
                 EKHTML_CHAR_BEGATTRNAME))
            {
                /* Bad attrname character */
                startstate->mode = EKHTML_STMODE_SUCK;  
            } else {
                assert(startstate->curattr == NULL);
                /* Valid attribute name, allocate space for it */
                attr = ekhtml_parser_attr_new(parser);
                attr->name.str      = (char *)NULL + (workp - curp);
                attr->name.len      = 0;     /* Will get assigned later */
                attr->val.str       = NULL;
                attr->val.len       = 0;
                attr->isBoolean     = 1;
                attr->next          = NULL;
                startstate->mode    = EKHTML_STMODE_GETNAME;
                startstate->curattr = attr;
            }
        }

        if(startstate->mode == EKHTML_STMODE_GETNAME){
            workp = ekhtml_find_notcharsmap(workp, endp - workp, 
                                            EKCMap_CharMap,
                                            EKHTML_CHAR_ATTRNAME);
            if(workp == endp)
                break;
            
            /* There be dragons here -- watch out -- see comment @ top 
               of file */
            startstate->curattr->name.len = 
                workp - (curp + (int)startstate->curattr->name.str);
            if(*workp == '='){
                startstate->mode = EKHTML_STMODE_BEGVALUE;
                workp++;  /* Skip the equals sign */
            } else {
                if(!(EKCMap_CharMap[(unsigned char)*workp] & 
                     EKHTML_CHAR_WHITESPACE))
            {
                /* Found something we weren't expecting.  Use the current
                   attribute as a boolean value and suck the rest */
                scroll_attribute(startstate);
                startstate->mode = EKHTML_STMODE_SUCK;
            } else
                startstate->mode = EKHTML_STMODE_GETEQUAL;
            }
        }
        
        if(startstate->mode == EKHTML_STMODE_GETEQUAL){
            workp = ekhtml_find_notcharsmap(workp, endp - workp, 
                                            EKCMap_CharMap,
                                            EKHTML_CHAR_WHITESPACE);
            if(workp == endp)
                break;
      
            if(*workp != '='){ 
                /* Unexpected value.  Could either be time to suck, or this was
                   really only a boolean value */
                scroll_attribute(startstate);
                
                if(EKCMap_CharMap[(unsigned char)*workp] & 
                   EKHTML_CHAR_BEGATTRNAME)
                {
                    startstate->mode = EKHTML_STMODE_BEGNAME;
                    continue;
                } else {
                    startstate->mode = EKHTML_STMODE_SUCK;
                } 
            } else {
                startstate->mode = EKHTML_STMODE_BEGVALUE;
                workp++;  /* Skip the equals sign */
            }
        }
        
        if(startstate->mode == EKHTML_STMODE_BEGVALUE){
            workp = ekhtml_find_notcharsmap(workp, endp - workp, 
                                            EKCMap_CharMap,
                                            EKHTML_CHAR_WHITESPACE);
            if(workp == endp)
                break;
            
            startstate->curattr->isBoolean = 0;
            startstate->curattr->val.str = (char *)NULL + (workp - curp);
            startstate->quote        = '\0';
            if(*workp == '"' || *workp == '\''){
                startstate->curattr->val.str++;  /* Skip the quote */
                startstate->mode   = EKHTML_STMODE_GETVALUE;
                startstate->quote  = *workp;
                workp++;
            } else if(!(EKCMap_CharMap[(unsigned char)*workp] & 
                        EKHTML_CHAR_ATTRVALUE))
            {
                /* Bad value .. */
                startstate->curattr->val.len = 0;
                scroll_attribute(startstate);
                startstate->mode = EKHTML_STMODE_SUCK;
            } else {
                /* Valid value */
                startstate->mode = EKHTML_STMODE_GETVALUE;
            }
        }
        
        if(startstate->mode == EKHTML_STMODE_GETVALUE){
            if(startstate->quote){
                for(;workp != endp && *workp != '>' && *workp != '<'; workp++){
                    if(*workp == startstate->quote){
                        startstate->curattr->val.len = 
                            workp - (curp + (int)startstate->curattr->val.str);
                        scroll_attribute(startstate);
                        startstate->mode = EKHTML_STMODE_BEGNAME;
                        workp++;  /* Skip the quote */
                        break;
                    }
                }
                /* In case we broke out in the above loop, we may 
                   need to continue in the main loop -- CONFUSING */
                if(startstate->mode == EKHTML_STMODE_BEGNAME)
                    continue;
            } else
                workp = ekhtml_find_notcharsmap(workp, endp - workp, 
                                                EKCMap_CharMap,
                                                EKHTML_CHAR_ATTRVALUE);
            if(workp == endp)
                break;
            
            startstate->curattr->val.len = 
                workp - (curp + (int)startstate->curattr->val.str);
            scroll_attribute(startstate);
            
            if(*workp == '>' || *workp == '<') {
                *offset = workp - curp;
                handle_starttag(parser, curp, startstate);
                release_attributes(parser, startstate);
                *state_data = NULL;
                if(*workp == '<')
                    return workp;
                else
                    return workp + 1;
            } else {
                startstate->mode          = EKHTML_STMODE_BEGNAME;
                continue;
            }
        }

        if(startstate->mode == EKHTML_STMODE_SUCK){
            /* The sucking mode is here in case someone puts a bad character
               in an attribute name. We suck until what looks like end of tag*/
            for(;workp != endp && *workp != '<' && *workp != '>'; workp++)
                ;
            if(workp == endp)
                break;
            
            *offset = workp - curp;
            handle_starttag(parser, curp, startstate);
            release_attributes(parser, startstate);
            *state_data = NULL;
            if(*workp == '<')
                return workp;
            else
                return workp + 1;
        }
    }
    
    *offset = workp - curp;
    return NULL;
}
コード例 #3
0
ファイル: soup_parser.c プロジェクト: AvdN/tdi
static int
lexer_callback(tdi_lexer_event *event_, void *self_)
{
    tdi_soup_parser *self = self_;
    tdi_parser_event event;

    switch (event_->type) {
    case TDI_LEXER_EVENT_STARTTAG:
        return handle_starttag(self, &event, event_);

    case TDI_LEXER_EVENT_ENDTAG:
        return handle_endtag(self, &event, event_);

    case TDI_LEXER_EVENT_TEXT:
        if (self->inempty && close_empty(self) == -1) return -1;

        event.type = TDI_PARSER_EVENT_TEXT;
        event.info.text.data = event_->info.text.data;
        return !self->cb(&event, self->cb_ctx) ? 0 : -1;

    case TDI_LEXER_EVENT_COMMENT:
        if (self->inempty && close_empty(self) == -1) return -1;

        event.type = TDI_PARSER_EVENT_COMMENT;
        event.info.comment.data = event_->info.comment.data;
        return !self->cb(&event, self->cb_ctx) ? 0 : -1;

    case TDI_LEXER_EVENT_MSECTION:
        if (self->inempty && close_empty(self) == -1) return -1;

        event.type = TDI_PARSER_EVENT_MSECTION;
        event.info.msection.data = event_->info.msection.data;
        event.info.msection.name = event_->info.msection.name;
        event.info.msection.value = event_->info.msection.value;
        return !self->cb(&event, self->cb_ctx) ? 0 : -1;

    case TDI_LEXER_EVENT_DECL:
        if (self->inempty && close_empty(self) == -1) return -1;

        event.type = TDI_PARSER_EVENT_DECL;
        event.info.decl.data = event_->info.decl.data;
        event.info.decl.name = event_->info.decl.name;
        event.info.decl.value = event_->info.decl.value;
        return !self->cb(&event, self->cb_ctx) ? 0 : -1;

    case TDI_LEXER_EVENT_PI:
        if (self->inempty && close_empty(self) == -1) return -1;

        event.type = TDI_PARSER_EVENT_PI;
        event.info.pi.data = event_->info.pi.data;
        return !self->cb(&event, self->cb_ctx) ? 0 : -1;

    case TDI_LEXER_EVENT_ESCAPE:
        break;
    }

    /* Should not happen */
    PyErr_SetNone(PyExc_AssertionError);
    self->last_error = TDI_PARSER_ERR_ENV;
    return -1;
}