static const char *getTok(const char *args, agxbuf * xb) { char c; int more = 1; args = skipWS(args); if (*args == '\0') return 0; while (more) { c = *args++; if (isspace(c)) more = 0; else if (c == '\0') { more = 0; args--; } else if ((c == '"') || (c == '\'')) { args = endQuote(args, xb, c); } else if (c == '\\') { c = *args++; if (c == '\0') args--; else agxbputc(xb, c); } else agxbputc(xb, c); } return args; }
inline char *nextTag(XmlBuffer * xb) { if (xb->nulledChar) { xb->nulledChar = 0; return xb->cur + 1; } skipWS(xb); if (*xb->cur == '<') return xb->cur + 1; return NULL; }
static int tagEquals(XmlBuffer * xb, const char *t) { char *start = NULL; int sz = 0; if (*xb->cur == 0) { xb->cur++; sz = 1; } // why is this needed ? else start = xb->cur; skipWS(xb); if (sz || getChar(xb, '<')) { skipWS(xb); if (getWord(xb, t, 1)) return 1; } else { printf("OOOPS\n"); } xb->cur = start; return 0; }
/* Return NULL if unsuccessful */ static char* readPoint (char* s, double* xp, double* yp) { char* endp; s = skipWS(s); *xp = strtod (s, &endp); if (s == endp) return NULL; endp++; /* skip comma */ s = endp; *yp = strtod (s, &endp); if (s == endp) return NULL; else return endp; }
static char* arrowEnd (char* s0, char* pfx, int* fp, double* xp, double* yp) { char* s = skipWS(s0); if (strncmp(s,pfx,2)) return s; s += 2; /* skip prefix */ s = readPoint (s, xp, yp); if (s == NULL) { fprintf (stderr, "Illegal spline end: %s\n", s0); exit (1); } *fp = 1; return s; }
static char *getValue(XmlBuffer * xb, const char *v) { skipWS(xb); char dlm = 0; char *start = NULL; if ((dlm = getChar(xb, '"')) || (dlm = getChar(xb, '\''))) { start = xb->cur; while (*xb->cur != dlm) { xb->cur++; } *xb->cur = 0; xb->cur++; return start; } return NULL; }
/** * Skip any whitespace then look for a token, throwing an exception if no valid token * is found. * * Advance the string iterator past the parsed token on success. On failure the string iterator is * in an undefined location. */ const Token& Tokeniser::nextToken() { if ( tokens.size()>tokp ) return tokens[tokp++]; // Don't extend stream of tokens further than the end of stream; if ( tokp>0 && tokens[tokp-1].type==T_EOS ) return tokens[tokp-1]; skipWS(inp, inEnd); tokens.push_back(Token()); Token& tok = tokens[tokp++]; if (tokeniseEos(inp, inEnd, tok)) return tok; if (tokeniseIdentifierOrReservedWord(inp, inEnd, tok)) return tok; if (tokeniseNumeric(inp, inEnd, tok)) return tok; if (tokeniseString(inp, inEnd, tok)) return tok; if (tokeniseParens(inp, inEnd, tok)) return tok; if (tokeniseOperator(inp, inEnd, tok)) return tok; throw TokenException("Found illegal character"); }
bool Parser::lexQName(String& name) { String n1; if (!lexNCName(n1)) return false; skipWS(); // If the next character is :, what we just got it the prefix, if not, // it's the whole thing. if (peekAheadHelper() != ':') { name = n1; return true; } String n2; if (!lexNCName(n2)) return false; name = n1 + ":" + n2; return true; }
Command* makeCommand() { char buffer[1024]; int i = 0; char ch,*ptr; printf("%%");fflush(stdout); while(i < sizeof(buffer) && (ch = getchar()) != '\n' && ch != EOF) buffer[i++] = ch; buffer[i] = 0; ptr = buffer+i-1; while(ptr>=buffer && isspace(*ptr)) ptr--,i--; *++ptr = 0; Command* c = allocCommand(ptr); if(ch==EOF) return setCommand(c,exitCMD,"exit"); else { int mode = R_NONE; char input[1024]; char output[1024]; *input = *output = 0; int len = strlen(buffer); int ok = extractRedirect(buffer,&len,&mode,input,output); c->_mode = mode; c->_input = strdup(input); c->_output = strdup(output); len = trimString(buffer,len); if (ok) { char* sc = skipWS(buffer); char* ec = cutWord(sc); if (strcmp(sc,"cd")==0) { char* a0 = skipWS(ec); char* a1 = cutWord(a0); return setCommand(c,cdCMD,sc); } else if (strcmp(sc,"pwd")==0) { return setCommand(c,pwdCMD,sc); } else if (strcmp(sc,"ln") == 0) { char* a0 = skipWS(ec); char* a1 = skipWS(cutWord(a0)); char* a2 = cutWord(a1); char* args[2] = {a0,a1}; return setCommandArgs(setCommand(c,linkCMD,sc),2,args); } else if (strcmp(sc,"rm") == 0) { char* a0 = skipWS(ec); char* a1 = cutWord(a0); char* args[1] = {a0}; return setCommandArgs(setCommand(c,rmCMD,sc),1,args); } else if (strcmp(sc,"exit") == 0) { return setCommand(c,exitCMD,sc); } else { if (*sc) { if (strchr(ec,'|') != NULL) { // This is a pipeline. char* args[512]; Stage* stages[512]; int nbs = 0; args[0] = sc; char* arg = skipWS(ec); int nba = 1; setCommand(c,pipelineCMD,""); while(arg && *arg) { char* p = *arg == '|' ? arg : 0; if (p) { *p = 0; addCommandStage(c,allocStage(nba,args)); args[0] = arg = skipWS(p+1); arg = cutWord(arg); nba = 1; } else { args[nba++] = arg; arg = skipWS(cutWord(arg)); } } return addCommandStage(c,allocStage(nba,args)); } else { char* args[1024]; args[0] = sc; char* arg = ec; int nba = 1; while(arg && *arg) { args[nba++] = arg; arg = skipWS(cutWord(arg)); assert(nba < 1024); } return setCommandArgs(setCommand(c,basicCMD,args[0]),nba,args); } } } } return c; } }
bool Lexer::GetToken(Token&obj) { string strBuf; int id; DataValue dv; while (!isEOF()) { skipWS(); ///////////// // Check for chr/strlit // if (nextChar() == '-') bool b=true; if (nextChar() == '\'' || nextChar() == '\"') { char ch = getChar(); strBuf = ""; while (!isEOF()) { if (nextChar() == '\r' || nextChar() == '\n') { dv.SetStrData("Newline in constant."); throw Token(Token::LEX_ERROR,dv); } if (nextChar() == ch) { getChar(); if (ch == '\'') { if (strBuf.length() != 1) { dv.SetStrData("Character literals must have exactly one character."); throw Token(Token::LEX_ERROR,dv); } dv.SetCharData(strBuf[0]); obj=Token(Token::LEX_CHRLIT,dv); return true; } else { dv.SetStrData(strBuf); obj=Token(Token::LEX_STRLIT,dv); return true; } } strBuf += getChar(); } dv.SetStrData("EOF in constant."); throw Token(Token::LEX_ERROR,dv); } ///////////// // Check for Operators, then keywords // StorePosition(); id = dfaOperators.GetString(strBuf); if (id == 0) { ResetPosition(); id = dfaKeywords.GetString(strBuf); if (id == 0) { ///////////// // Check for numbers/idents // while (!isEOF() && !isWS(nextChar()) && !dfaOperators.ValidFirst(nextChar())) { strBuf += toLower(getChar()); } //if it's all numbers if (strBuf.find_first_not_of("0123456789",0,10) == string::npos) { //read in anything, including dots (will cover floats and invalid idents) while (!isEOF() && !isWS(nextChar()) && (nextChar() == '.' || !dfaOperators.ValidFirst(nextChar()))) { strBuf += toLower(getChar()); } } bool found = false; if (strBuf.find_first_not_of("0123456789.",0,11) == string::npos) { string::size_type off; off = strBuf.find('.',0); if (off == string::npos) { dv.SetIntData(atoi(strBuf.c_str())); obj=Token(Token::LEX_INTLIT,dv); return true; } else { if (strBuf.find('.',off+1) == string::npos) { dv.SetFloatData(atof(strBuf.c_str())); obj=Token(Token::LEX_FLOLIT,dv); return true; } } } //validate identifier try { if (strBuf.length() > 20) throw 2; for (string::iterator it = strBuf.begin();it != strBuf.end();++it) { if (it == strBuf.begin()) { if (!isAlpha(*it)) { throw 0; } } if (!isAlpha(*it) && *it != '_' && !isNum(*it)) { throw 1; } } } catch(int iError) { switch (iError) { case 0: dv.SetStrData("Unrecognized lexeme ("+strBuf+"): identifiers must begin with a letter."); throw Token(Token::LEX_ERROR,dv); case 1: dv.SetStrData("Unrecognized lexeme ("+strBuf+"): identifiers can only contain underscores and alphanumeric characters."); throw Token(Token::LEX_ERROR,dv); case 2: dv.SetStrData("Unrecognized lexeme ("+strBuf+"): identifiers can be at most 20 characters long."); throw Token(Token::LEX_ERROR,dv); } } dv.SetStrData(strBuf); obj=Token(Token::LEX_IDENT,dv); return true; } } if (id) { dv.Clear(); obj=Token((Token::TokenType)id,dv); return true; } dv.SetStrData("Undefined error."); throw Token(Token::LEX_ERROR,dv); } //int 0 means eof dv.SetStrData("EOF"); obj=Token(Token::LEX_ERROR,dv); return false; }
inline Parser::Token Parser::nextTokenInternal() { skipWS(); if (m_nextPos >= m_data.length()) return Token(0); char code = peekCurHelper(); switch (code) { case '(': case ')': case '[': case ']': case '@': case ',': case '|': return makeTokenAndAdvance(code); case '\'': case '\"': return lexString(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return lexNumber(); case '.': { char next = peekAheadHelper(); if (next == '.') return makeTokenAndAdvance(DOTDOT, 2); if (next >= '0' && next <= '9') return lexNumber(); return makeTokenAndAdvance('.'); } case '/': if (peekAheadHelper() == '/') return makeTokenAndAdvance(SLASHSLASH, 2); return makeTokenAndAdvance('/'); case '+': return makeTokenAndAdvance(PLUS); case '-': return makeTokenAndAdvance(MINUS); case '=': return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); case '!': if (peekAheadHelper() == '=') return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); return Token(XPATH_ERROR); case '<': if (peekAheadHelper() == '=') return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); case '>': if (peekAheadHelper() == '=') return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); case '*': if (isBinaryOperatorContext()) return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); ++m_nextPos; return Token(NAMETEST, "*"); case '$': { // $ QName m_nextPos++; String name; if (!lexQName(name)) return Token(XPATH_ERROR); return Token(VARIABLEREFERENCE, name); } } String name; if (!lexNCName(name)) return Token(XPATH_ERROR); skipWS(); // If we're in an operator context, check for any operator names if (isBinaryOperatorContext()) { if (name == "and") //### hash? return Token(AND); if (name == "or") return Token(OR); if (name == "mod") return Token(MULOP, NumericOp::OP_Mod); if (name == "div") return Token(MULOP, NumericOp::OP_Div); } // See whether we are at a : if (peekCurHelper() == ':') { m_nextPos++; // Any chance it's an axis name? if (peekCurHelper() == ':') { m_nextPos++; //It might be an axis name. Step::Axis axis; if (parseAxisName(name, axis)) return Token(AXISNAME, axis); // Ugh, :: is only valid in axis names -> error return Token(XPATH_ERROR); } // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest skipWS(); if (peekCurHelper() == '*') { m_nextPos++; return Token(NAMETEST, name + ":*"); } // Make a full qname. String n2; if (!lexNCName(n2)) return Token(XPATH_ERROR); name = name + ":" + n2; } skipWS(); if (peekCurHelper() == '(') { // note: we don't swallow the '(' here! // Either node type oor function name. if (name == "processing-instruction") return Token(PI); if (name == "node") return Token(NODE); if (name == "text") return Token(TEXT); if (name == "comment") return Token(COMMENT); return Token(FUNCTIONNAME, name); } // At this point, it must be NAMETEST. return Token(NAMETEST, name); }
TObject* Scanner::get(){ //first extract the first character from the sourcereader char c = getChar(); if(c==END_OF_BUFFER) { //FIXME please update this, better handling return new TObject(new string(1, char(0)), NULL_TOKEN, line, char_line); } //next determine the type switch(TOKEN_TYPE type = token_type(c)){ case WHITESPACE: { //skip the whitespace and return something skipWS(c); //return the next token return get(); } case COMMENT: { skipCMT(c); return get(); } case NUMERIC: case DOT: { //extracts all the numbers return extractNumber(c); } case UNDERSCORE: case IDENTIFIER: { TObject* tok = extractIdentifier(c); TOKEN_TYPE key = is_keyword(*tok->str); //then check if that is a keyword if(key!=UNKNOWN){ //then change the format tok->type = key; tok->special = true; } return tok; } case GREATER_THAN: case LESS_THAN: case EQUAL_SIGN: { //check if we have an = comming return extractComparator(c); } case OPERATOR: { //get the next char ch = getChar(); if(ch=='='){ switch(c){ case '-': return new TObject(0, MINUS_EQUALS, line, char_line); case '+': return new TObject(0, PLUS_EQUALS, line, char_line); case '*': return new TObject(0, TIMES_EQUALS, line, char_line); case '/': return new TObject(0, DIVIDE_EQUALS, line, char_line); case '%': return new TObject(0, MODULO_EQUALS, line, char_line); case '^': return new TObject(0, EXPO_EQUALS, line, char_line); default: break; } }else{ putback(); } return new TObject(new string(1, c), type, line, char_line); } case UNKNOWN: { string error= ERROR_MSG[6]+"\""; error.append(getLine().append("\"")); throw new YottaError(SYNTAX_ERROR, error, line, char_line); } default: { return new TObject(new string(1, c), type, line, char_line); } } }
XmlNode *XmlNode::parse(string::iterator& curr, string::iterator end) { skipWS(curr,end); if (curr == end || *curr != '<') return NULL; string tag = parseTag(curr,end); if (tag.empty() || tag[0] == '/') return NULL; skipWS(curr,end); if (curr == end) return NULL; if (*curr == '<') { XmlNode *p = NULL; while (curr != end) { string::iterator mark = curr; string nexttag = parseTag(curr,end); if (nexttag.empty()) { if (p != NULL) delete p; return NULL; } if (nexttag[0] == '/') { // should be the closing </tag> if (nexttag.size() == tag.size()+1 && nexttag.find(tag,1) == 1) { // is closing tag if (p == NULL) p = new XmlLeaf(unquote(tag),""); return p; } else { if (p != NULL) delete p; return NULL; } } else { if (p == NULL) p = new XmlBranch(unquote(tag)); // an opening tag curr = mark; XmlNode *c = parse(curr,end); if (c != NULL) ((XmlBranch*)p)->pushnode(c); } skipWS(curr,end); if(curr == end || *curr != '<') { if (p != NULL) delete p; return NULL; } } } else { // XmlLeaf string value; while (curr != end && *curr != '<') { value += *curr; curr++; } if(curr == end) return NULL; string nexttag = parseTag(curr,end); if (nexttag.empty() || nexttag[0] != '/') return NULL; if (nexttag.size() == tag.size()+1 && nexttag.find(tag,1) == 1) { return new XmlLeaf(unquote(tag),unquote(value)); } else { // error return NULL; } } // should never get here return NULL; }
static int attrsOk(XmlBuffer * xb, const XmlElement * e, XmlAttr * r, const char *tag, int etag) { unsigned int n; char *ptr, wa[32]; char msg1[] = { "Unknown attribute in list for " }; char msg2[] = { "Bad attribute list for " }; char word[32]; for (n = 0; (e + n)->attr; n++) wa[n] = 0; xb->eTagFound = 0; for (skipWS(xb); isalpha(*xb->cur); skipWS(xb)) { // for (n=0; n < a.size(); n++) { for (n = 0; (e + n)->attr; n++) { if (wa[n] == 1) continue; if (getWord(xb, (e + n)->attr, 0)) { if (!isalnum(*xb->cur)) { skipWS(xb); if (getChar(xb, '=')) { (r + n)->attr = getValue(xb, (e + n)->attr); wa[n] = 1; goto ok; } else Throw(xb, "'=' expected in attribute list"); } } } strncpy(word, xb->cur, 10); word[10] = 0; ptr = (char *) alloca(strlen(tag) + strlen(msg1) + 8 + 20); strcpy(ptr, msg1); strcat(ptr, tag); strcat(ptr, " ("); strcat(ptr, word); strcat(ptr, ")"); Throw(xb, ptr); ok:; } if (getChars(xb, "/>")) { xb->eTagFound = 1; xb->etag = etag; return 1; } else if (getChar(xb, '>')) return 1; else if (getChars(xb, "?>") && strcmp(tag, "?xml") == 0) { xb->eTagFound = 1; xb->etag = etag; return 1; } ptr = (char *) alloca(strlen(tag) + strlen(msg2) + 96); strcpy(ptr, msg2); strcat(ptr, tag); strcat(ptr, ": "); strncpy(word, xb->cur, 30); word[30]=0; strcat(ptr, word); strcat(ptr," "); strcat(ptr, tag); Throw(xb, ptr); return -1; }