VRMLFile::VRMLFile(std::string sSourceUrl,Misc::CharacterSource& sSource,NodeCreator& sNodeCreator) :Misc::TokenSource(sSource), sourceUrl(sSourceUrl), nodeCreator(sNodeCreator), nodeMap(101), currentLine(1) { /* Initialize the token source: */ setWhitespace(',',true); // Comma is treated as whitespace setPunctuation("#[]{}\n"); // Newline is treated as punctuation to count lines setQuotes("\"\'"); /* Check the VRML file header: */ TokenSource::readNextToken(); if(!isToken("#")) Misc::throwStdErr("VRMLFile: %s is not a valid VRML 2.0 file",sourceUrl.c_str()); TokenSource::readNextToken(); if(!isToken("VRML")) Misc::throwStdErr("VRMLFile: %s is not a valid VRML 2.0 file",sourceUrl.c_str()); TokenSource::readNextToken(); if(!isToken("V2.0")) Misc::throwStdErr("VRMLFile: %s is not a valid VRML 2.0 file",sourceUrl.c_str()); TokenSource::readNextToken(); if(!isToken("utf8")) Misc::throwStdErr("VRMLFile: %s is not a valid VRML 2.0 file",sourceUrl.c_str()); /* Extract the URL prefix: */ urlPrefix=sourceUrl.begin(); for(std::string::const_iterator suIt=sourceUrl.begin();suIt!=sourceUrl.end();++suIt) if(*suIt=='/') urlPrefix=suIt+1; }
static void U_CALLCONV singleEnumLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { const SingleEnum *sen; char *s; uint32_t start, end, uv; int32_t value; sen=(const SingleEnum *)context; u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops: syntax error in %s.txt field 0 at %s\n", sen->ucdFile, fields[0][0]); exit(*pErrorCode); } /* parse property alias */ s=trimTerminateField(fields[1][0], fields[1][1]); value=u_getPropertyValueEnum(sen->prop, s); if(value<0) { if(sen->prop==UCHAR_BLOCK) { if(isToken("Greek", s)) { value=UBLOCK_GREEK; /* Unicode 3.2 renames this to "Greek and Coptic" */ } else if(isToken("Combining Marks for Symbols", s)) { value=UBLOCK_COMBINING_MARKS_FOR_SYMBOLS; /* Unicode 3.2 renames this to "Combining Diacritical Marks for Symbols" */ } else if(isToken("Private Use", s)) { value=UBLOCK_PRIVATE_USE; /* Unicode 3.2 renames this to "Private Use Area" */ } } } if(value<0) { fprintf(stderr, "genprops error: unknown %s name in %s.txt field 1 at %s\n", sen->propName, sen->ucdFile, s); exit(U_PARSE_ERROR); } uv=(uint32_t)(value<<sen->vecShift); if((uv&sen->vecMask)!=uv) { fprintf(stderr, "genprops error: %s value overflow (0x%x) at %s\n", sen->propName, (int)uv, s); exit(U_INTERNAL_PROGRAM_ERROR); } if(start==0 && end==0x10ffff) { /* Also set bits for initialValue and errorValue. */ end=UPVEC_MAX_CP; } upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to set %s code: %s\n", sen->propName, u_errorName(*pErrorCode)); exit(*pErrorCode); } }
// parse EPD from buffer // note that this is not a real EPD parser, just a simplified one! bool EPDFile::parse( const char *ptr ) { cheng4::Board b; while (ptr && *ptr) { skipSpaces( ptr ); const char *res = b.fromFEN( ptr ); if ( !res ) { skipUntilEOL(ptr); continue; } EPDPosition pos; pos.fen = b.toFEN(); ptr = res; for (;;) { skipSpaces( ptr ); if ( isToken( ptr, "bm" ) ) { // parse best moves for (;;) { skipSpaces(ptr); Move m = b.fromSAN(ptr); if ( m == mcNone ) break; pos.best.push_back(m); } continue; } if ( isToken( ptr, "am" ) ) { // parse avoid moves for (;;) { skipSpaces(ptr); Move m = b.fromSAN(ptr); if ( m == mcNone ) break; pos.avoid.push_back(m); } continue; } if ( *ptr == 13 || *ptr == 10 || *ptr == ';') { skipUntilEOL( ptr ); break; } } positions.push_back( pos ); } return 1; }
int getObjType( rsComm_t *rsComm, char *objName, char * objType ) { if ( isData( rsComm, objName, NULL ) >= 0 ) { strcpy( objType, "-d" ); } else if ( isColl( rsComm, objName, NULL ) >= 0 ) { strcpy( objType, "-c" ); } else if ( isResc( rsComm, objName ) == 0 ) { strcpy( objType, "-r" ); } else if ( isUser( rsComm, objName ) == 0 ) { strcpy( objType, "-u" ); } else if ( isMeta( rsComm, objName ) == 0 ) { strcpy( objType, "-m" ); } else if ( isToken( rsComm, objName ) == 0 ) { strcpy( objType, "-t" ); } else { return INVALID_OBJECT_TYPE; } return 0; }
/** * Throws: std::invalid_argument upon invalid argument * and other std::exceptions's from string methods. */ Http::Cookie::Cookie(const char *cookieStr) : name(), value() { const char *ptr; bool parsed = false; ptr = cookieStr; while (isToken(*ptr)) { ptr += 1; } if (ptr > cookieStr) { name.append(cookieStr, ptr - cookieStr); ptr = skipWhitespace(ptr); if ('=' == *ptr) { const char *startPtr; ptr = skipWhitespace(ptr + 1); startPtr = ptr; while (*ptr && ';' != *ptr) { ptr += 1; } if (ptr > startPtr) { value.append(startPtr, ptr - startPtr); parsed = true; } } } if (! parsed) { throw std::invalid_argument(cookieStr); } }
void Aggregate::checkUnderflow(Word n,const char* m)const{ if(!isToken() || !((obj)n).isInt() || n>hP()->n){ std::stringstream s; if(m) s<<m; s<<" checkUnderflow n="<<n<<" elements()="<<elements(); throw(std::runtime_error(s.str())); } }
bool Lexer::isToken(std::string& s, char next){ //s1 = s + next //if s is in the token map AND s1 doesn't complete another token //OR next is whitespace if(next=='\0'){ return mStringToType.find(s)!=mStringToType.end(); } std::stringstream ss; ss<<s<<next; return (mStringToType.find(s)!=mStringToType.end() && !isToken(ss.str(),'\0')) || next==' ' || (!isOperator(s) && isOperator(std::string(1,next))); };
static void U_CALLCONV binariesLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { const Binaries *bin; char *s; uint32_t start, end, uv; int32_t i; bin=(const Binaries *)context; u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]); exit(*pErrorCode); } /* parse binary property name */ s=(char *)u_skipWhitespace(fields[1][0]); for(i=0;; ++i) { if(i==bin->binariesCount) { /* ignore unrecognized properties */ if(beVerbose) { addIgnoredProp(s, fields[1][1]); } return; } if(isToken(bin->binaries[i].propName, s)) { break; } } if(bin->binaries[i].vecShift>=32) { fprintf(stderr, "genprops error: shift value %d>=32 for %s %s\n", (int)bin->binaries[i].vecShift, bin->ucdFile, bin->binaries[i].propName); exit(U_INTERNAL_PROGRAM_ERROR); } uv=U_MASK(bin->binaries[i].vecShift); if(start==0 && end==0x10ffff) { /* Also set bits for initialValue and errorValue. */ end=UPVEC_MAX_CP; } upvec_setValue(pv, start, end, bin->binaries[i].vecWord, uv, uv, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to set %s code: %s\n", bin->binaries[i].propName, u_errorName(*pErrorCode)); exit(*pErrorCode); } }
static void U_CALLCONV binariesLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { const Binaries *bin; char *s; uint32_t start, end; int32_t i; bin=(const Binaries *)context; u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]); exit(*pErrorCode); } /* parse binary property name */ s=(char *)u_skipWhitespace(fields[1][0]); for(i=0;; ++i) { if(i==bin->binariesCount) { /* ignore unrecognized properties */ return; } if(isToken(bin->binaries[i].propName, s)) { break; } } if(bin->binaries[i].vecMask==0) { fprintf(stderr, "genbidi error: mask value %d==0 for %s %s\n", (int)bin->binaries[i].vecMask, bin->ucdFile, bin->binaries[i].propName); exit(U_INTERNAL_PROGRAM_ERROR); } upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genbidi error: unable to set %s, code: %s\n", bin->binaries[i].propName, u_errorName(*pErrorCode)); exit(*pErrorCode); } }
int getObjType(rsComm_t *rsComm, char *objName, char * objType) { if (isData(rsComm, objName, NULL) >= 0) strcpy(objType,"-d"); else if (isColl(rsComm, objName, NULL) >= 0) strcpy(objType,"-c"); else if (isResc(rsComm, objName) == 0) strcpy(objType,"-r"); else if (isRescGroup(rsComm, objName) == 0) strcpy(objType,"-g"); else if (isUser(rsComm, objName) == 0) strcpy(objType,"-u"); else if (isMeta(rsComm, objName) == 0) strcpy(objType,"-m"); else if (isToken(rsComm, objName) == 0) strcpy(objType,"-t"); else return(INVALID_OBJECT_TYPE); return (0); }
//Cuts the string s into a singly linked list of Tokens int TokenList::parseInput(std::string& s) { std::string buff; int l = s.length(); for (int i = 0; i < l;) { if (isToken(s[i])) { buff.clear(); if (isNum(s[i])) while (isNum(s[i])) buff += s[i++]; else buff = s[i++]; this->addToken(buff); } else if (s[i] == ' ') ++i; else return 0; } return 1; }
/** processes a message-chunk. * * \param chunk the chunk of bytes to process * * \return number of bytes actually parsed and processed */ std::size_t HttpMessageProcessor::process(const BufferRef& chunk, size_t* out_nparsed) { /* * CR = 0x0D * LF = 0x0A * SP = 0x20 * HT = 0x09 * * CRLF = CR LF * LWS = [CRLF] 1*( SP | HT ) * * HTTP-message = Request | Response * * generic-message = start-line * *(message-header CRLF) * CRLF * [ message-body ] * * start-line = Request-Line | Status-Line * * Request-Line = Method SP Request-URI SP HTTP-Version CRLF * * Method = "OPTIONS" | "GET" | "HEAD" * | "POST" | "PUT" | "DELETE" * | "TRACE" | "CONNECT" * | extension-method * * Request-URI = "*" | absoluteURI | abs_path | authority * * extension-method = token * * Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF * * HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT * Status-Code = 3*DIGIT * Reason-Phrase = *<TEXT, excluding CR, LF> * * absoluteURI = "http://" [user ':'******'@'] hostname [abs_path] [qury] * abs_path = "/" *CHAR * authority = ... * token = 1*<any CHAR except CTLs or seperators> * separator = "(" | ")" | "<" | ">" | "@" * | "," | ";" | ":" | "\" | <"> * | "/" | "[" | "]" | "?" | "=" * | "{" | "}" | SP | HT * * message-header = field-name ":" [ field-value ] * field-name = token * field-value = *( field-content | LWS ) * field-content = <the OCTETs making up the field-value * and consisting of either *TEXT or combinations * of token, separators, and quoted-string> * * message-body = entity-body * | <entity-body encoded as per Transfer-Encoding> */ const char* i = chunk.cbegin(); const char* e = chunk.cend(); const size_t initialOutOffset = out_nparsed ? *out_nparsed : 0; size_t result = initialOutOffset; size_t* nparsed = out_nparsed ? out_nparsed : &result; //TRACE(2, "process(curState:%s): size: %ld: '%s'", state_str(), chunk.size(), chunk.str().c_str()); TRACE(2, "process(curState:%s): size: %ld", state_str(), chunk.size()); #if 0 switch (state_) { case CONTENT: // fixed size content if (!passContent(chunk, nparsed)) goto done; i += *nparsed; break; case CONTENT_ENDLESS: // endless-sized content (until stream end) { *nparsed += chunk.size(); bool rv = filters_.empty() ? onMessageContent(chunk) : onMessageContent(filters_.process(chunk)); goto done; } default: break; } #endif while (i != e) { #if !defined(XZERO_NDEBUG) if (std::isprint(*i)) { TRACE(3, "parse: %4ld, 0x%02X (%c), %s", *nparsed, *i, *i, state_str()); } else { TRACE(3, "parse: %4ld, 0x%02X, %s", *nparsed, *i, state_str()); } #endif switch (state_) { case MESSAGE_BEGIN: contentLength_ = -1; switch (mode_) { case REQUEST: state_ = REQUEST_LINE_BEGIN; versionMajor_ = 0; versionMinor_ = 0; break; case RESPONSE: state_ = STATUS_LINE_BEGIN; code_ = 0; versionMajor_ = 0; versionMinor_ = 0; break; case MESSAGE: state_ = HEADER_NAME_BEGIN; // an internet message has no special top-line, // so we just invoke the callback right away if (!onMessageBegin()) goto done; break; } break; case REQUEST_LINE_BEGIN: if (isToken(*i)) { state_ = REQUEST_METHOD; method_ = chunk.ref(*nparsed - initialOutOffset, 1); ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case REQUEST_METHOD: if (*i == SP) { state_ = REQUEST_ENTITY_BEGIN; ++*nparsed; ++i; } else if (!isToken(*i)) { state_ = SYNTAX_ERROR; } else { method_.shr(); ++*nparsed; ++i; } break; case REQUEST_ENTITY_BEGIN: if (std::isprint(*i)) { entity_ = chunk.ref(*nparsed - initialOutOffset, 1); state_ = REQUEST_ENTITY; ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case REQUEST_ENTITY: if (*i == SP) { state_ = REQUEST_PROTOCOL_BEGIN; ++*nparsed; ++i; } else if (std::isprint(*i)) { entity_.shr(); ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case REQUEST_PROTOCOL_BEGIN: if (*i != 'H') { state_ = SYNTAX_ERROR; } else { state_ = REQUEST_PROTOCOL_T1; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_T1: if (*i != 'T') { state_ = SYNTAX_ERROR; } else { state_ = REQUEST_PROTOCOL_T2; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_T2: if (*i != 'T') { state_ = SYNTAX_ERROR; } else { state_ = REQUEST_PROTOCOL_P; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_P: if (*i != 'P') { state_ = SYNTAX_ERROR; } else { state_ = REQUEST_PROTOCOL_SLASH; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_SLASH: if (*i != '/') { state_ = SYNTAX_ERROR; } else { state_ = REQUEST_PROTOCOL_VERSION_MAJOR; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_VERSION_MAJOR: if (*i == '.') { state_ = REQUEST_PROTOCOL_VERSION_MINOR; ++*nparsed; ++i; } else if (!std::isdigit(*i)) { state_ = SYNTAX_ERROR; } else { versionMajor_ = versionMajor_ * 10 + *i - '0'; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_VERSION_MINOR: if (*i == CR) { state_ = REQUEST_LINE_LF; ++*nparsed; ++i; } #if defined(X0_HTTP_SUPPORT_SHORT_LF) else if (*i == LF) { state_ = HEADER_NAME_BEGIN; ++*nparsed; ++i; TRACE(2, "request-line: method=%s, entity=%s, vmaj=%d, vmin=%d", method_.str().c_str(), entity_.str().c_str(), versionMajor_, versionMinor_); if (!onMessageBegin(method_, entity_, versionMajor_, versionMinor_)) { goto done; } } #endif else if (!std::isdigit(*i)) { state_ = SYNTAX_ERROR; } else { versionMinor_ = versionMinor_ * 10 + *i - '0'; ++*nparsed; ++i; } break; case REQUEST_LINE_LF: if (*i == LF) { state_ = HEADER_NAME_BEGIN; ++*nparsed; ++i; TRACE(2, "request-line: method=%s, entity=%s, vmaj=%d, vmin=%d", method_.str().c_str(), entity_.str().c_str(), versionMajor_, versionMinor_); if (!onMessageBegin(method_, entity_, versionMajor_, versionMinor_)) { goto done; } } else state_ = SYNTAX_ERROR; break; case STATUS_LINE_BEGIN: case STATUS_PROTOCOL_BEGIN: if (*i != 'H') { state_ = SYNTAX_ERROR; } else { state_ = STATUS_PROTOCOL_T1; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_T1: if (*i != 'T') { state_ = SYNTAX_ERROR; } else { state_ = STATUS_PROTOCOL_T2; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_T2: if (*i != 'T') { state_ = SYNTAX_ERROR; } else { state_ = STATUS_PROTOCOL_P; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_P: if (*i != 'P') { state_ = SYNTAX_ERROR; } else { state_ = STATUS_PROTOCOL_SLASH; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_SLASH: if (*i != '/') { state_ = SYNTAX_ERROR; } else { state_ = STATUS_PROTOCOL_VERSION_MAJOR; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_VERSION_MAJOR: if (*i == '.') { state_ = STATUS_PROTOCOL_VERSION_MINOR; ++*nparsed; ++i; } else if (!std::isdigit(*i)) { state_ = SYNTAX_ERROR; } else { versionMajor_ = versionMajor_ * 10 + *i - '0'; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_VERSION_MINOR: if (*i == SP) { state_ = STATUS_CODE_BEGIN; ++*nparsed; ++i; } else if (!std::isdigit(*i)) { state_ = SYNTAX_ERROR; } else { versionMinor_ = versionMinor_ * 10 + *i - '0'; ++*nparsed; ++i; } break; case STATUS_CODE_BEGIN: if (!std::isdigit(*i)) { code_ = SYNTAX_ERROR; break; } state_ = STATUS_CODE; /* fall through */ case STATUS_CODE: if (std::isdigit(*i)) { code_ = code_ * 10 + *i - '0'; ++*nparsed; ++i; } else if (*i == SP) { state_ = STATUS_MESSAGE_BEGIN; ++*nparsed; ++i; } else if (*i == CR) { // no Status-Message passed state_ = STATUS_MESSAGE_LF; ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case STATUS_MESSAGE_BEGIN: if (isText(*i)) { state_ = STATUS_MESSAGE; message_ = chunk.ref(*nparsed - initialOutOffset, 1); ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case STATUS_MESSAGE: if (isText(*i) && *i != CR && *i != LF) { message_.shr(); ++*nparsed; ++i; } else if (*i == CR) { state_ = STATUS_MESSAGE_LF; ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case STATUS_MESSAGE_LF: if (*i == LF) { state_ = HEADER_NAME_BEGIN; ++*nparsed; ++i; //TRACE(2, "status-line: HTTP/%d.%d, code=%d, message=%s", versionMajor_, versionMinor_, code_, message_.str().c_str()); if (!onMessageBegin(versionMajor_, versionMinor_, code_, message_)) { goto done; } } else state_ = SYNTAX_ERROR; break; case HEADER_NAME_BEGIN: if (isToken(*i)) { name_ = chunk.ref(*nparsed - initialOutOffset, 1); state_ = HEADER_NAME; ++*nparsed; ++i; } else if (*i == CR) { state_ = HEADER_END_LF; ++*nparsed; ++i; } #if defined(X0_HTTP_SUPPORT_SHORT_LF) else if (*i == LF) state_ = HEADER_END_LF; #endif else state_ = SYNTAX_ERROR; break; case HEADER_NAME: if (isToken(*i)) { name_.shr(); ++*nparsed; ++i; } else if (*i == ':') { state_ = LWS_BEGIN; lwsNext_ = HEADER_VALUE_BEGIN; lwsNull_ = HEADER_VALUE_END; // only (CR LF) parsed, assume empty value & go on with next header ++*nparsed; ++i; } else if (*i == CR) { state_ = LWS_LF; lwsNext_ = HEADER_COLON; lwsNull_ = SYNTAX_ERROR; ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case HEADER_COLON: if (*i == ':') { state_ = LWS_BEGIN; lwsNext_ = HEADER_VALUE_BEGIN; lwsNull_ = HEADER_VALUE_END; ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case LWS_BEGIN: if (*i == CR) { state_ = LWS_LF; ++*nparsed; ++i; #if defined(X0_HTTP_SUPPORT_SHORT_LF) } else if (*i == LF) { state_ = LWS_SP_HT_BEGIN; ++*nparsed; ++i; #endif } else if (*i == SP || *i == HT) { state_ = LWS_SP_HT; ++*nparsed; ++i; } else if (std::isprint(*i)) { state_ = lwsNext_; } else state_ = SYNTAX_ERROR; break; case LWS_LF: if (*i == LF) { state_ = LWS_SP_HT_BEGIN; ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case LWS_SP_HT_BEGIN: if (*i == SP || *i == HT) { if (!value_.empty()) value_.shr(3); // CR LF (SP | HT) state_ = LWS_SP_HT; ++*nparsed; ++i; } else { // only (CF LF) parsed so far and no 1*(SP | HT) found. state_ = lwsNull_; // XXX no nparsed/i-update } break; case LWS_SP_HT: if (*i == SP || *i == HT) { if (!value_.empty()) value_.shr(); ++*nparsed; ++i; } else state_ = lwsNext_; break; case HEADER_VALUE_BEGIN: if (isText(*i)) { value_ = chunk.ref(*nparsed - initialOutOffset, 1); ++*nparsed; ++i; state_ = HEADER_VALUE; } else if (*i == CR) { state_ = HEADER_VALUE_LF; ++*nparsed; ++i; #if defined(X0_HTTP_SUPPORT_SHORT_LF) } else if (*i == LF) { state_ = HEADER_VALUE_END; ++*nparsed; ++i; #endif } else { state_ = SYNTAX_ERROR; } break; case HEADER_VALUE: if (*i == CR) { state_ = LWS_LF; lwsNext_ = HEADER_VALUE; lwsNull_ = HEADER_VALUE_END; ++*nparsed; ++i; } #if defined(X0_HTTP_SUPPORT_SHORT_LF) else if (*i == LF) { state_ = LWS_SP_HT_BEGIN; lwsNext_ = HEADER_VALUE; lwsNull_ = HEADER_VALUE_END; ++*nparsed; ++i; } #endif else if (isText(*i)) { value_.shr(); ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case HEADER_VALUE_LF: if (*i == LF) { state_ = HEADER_VALUE_END; ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case HEADER_VALUE_END: { TRACE(2, "header: name='%s', value='%s'", name_.str().c_str(), value_.str().c_str()); if (iequals(name_, "Content-Length")) { contentLength_ = value_.toInt(); TRACE(2, "set content length to: %ld", contentLength_); } else if (iequals(name_, "Transfer-Encoding")) { if (iequals(value_, "chunked")) { chunked_ = true; } } bool rv = onMessageHeader(name_, value_); name_.clear(); value_.clear(); // continue with the next header state_ = HEADER_NAME_BEGIN; if (!rv) { goto done; } break; } case HEADER_END_LF: if (*i == LF) { if (isContentExpected()) state_ = CONTENT_BEGIN; else state_ = MESSAGE_BEGIN; ++*nparsed; ++i; if (!onMessageHeaderEnd()) { TRACE(2, "messageHeaderEnd returned false. returning `Aborted`-state"); goto done; } if (!isContentExpected() && !onMessageEnd()) { goto done; } } else { state_ = SYNTAX_ERROR; } break; case CONTENT_BEGIN: if (chunked_) state_ = CONTENT_CHUNK_SIZE_BEGIN; else if (contentLength_ >= 0) state_ = CONTENT; else state_ = CONTENT_ENDLESS; break; case CONTENT_ENDLESS: { // body w/o content-length (allowed in simple MESSAGE types only) BufferRef c(chunk.ref(*nparsed - initialOutOffset)); //TRACE(2, "prepared content-chunk (%ld bytes): %s", c.size(), c.str().c_str()); *nparsed += c.size(); i += c.size(); bool rv = filters_.empty() ? onMessageContent(c) : onMessageContent(filters_.process(c).ref()); if (!rv) goto done; break; } case CONTENT: { // fixed size content length std::size_t offset = *nparsed - initialOutOffset; std::size_t chunkSize = std::min(static_cast<size_t>(contentLength_), chunk.size() - offset); contentLength_ -= chunkSize; *nparsed += chunkSize; i += chunkSize; bool rv = filters_.empty() ? onMessageContent(chunk.ref(offset, chunkSize)) : onMessageContent(filters_.process(chunk.ref(offset, chunkSize)).ref()); if (contentLength_ == 0) state_ = MESSAGE_BEGIN; if (!rv) goto done; if (state_ == MESSAGE_BEGIN && !onMessageEnd()) goto done; break; } case CONTENT_CHUNK_SIZE_BEGIN: if (!std::isxdigit(*i)) { state_ = SYNTAX_ERROR; break; } state_ = CONTENT_CHUNK_SIZE; contentLength_ = 0; /* fall through */ case CONTENT_CHUNK_SIZE: if (*i == CR) { state_ = CONTENT_CHUNK_LF1; ++*nparsed; ++i; } else if (*i >= '0' && *i <= '9') { contentLength_ = contentLength_ * 16 + *i - '0'; ++*nparsed; ++i; } else if (*i >= 'a' && *i <= 'f') { contentLength_ = contentLength_ * 16 + 10 + *i - 'a'; ++*nparsed; ++i; } else if (*i >= 'A' && *i <= 'F') { contentLength_ = contentLength_ * 16 + 10 + *i - 'A'; ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case CONTENT_CHUNK_LF1: if (*i != LF) { state_ = SYNTAX_ERROR; } else { //TRACE(2, "content_length: %ld", contentLength_); if (contentLength_ != 0) state_ = CONTENT_CHUNK_BODY; else state_ = CONTENT_CHUNK_CR3; ++*nparsed; ++i; } break; case CONTENT_CHUNK_BODY: if (contentLength_) { std::size_t offset = *nparsed - initialOutOffset; std::size_t chunkSize = std::min(static_cast<size_t>(contentLength_), chunk.size() - offset); contentLength_ -= chunkSize; *nparsed += chunkSize; i += chunkSize; bool rv = filters_.empty() ? onMessageContent(chunk.ref(offset, chunkSize)) : onMessageContent(filters_.process(chunk.ref(offset, chunkSize)).ref()); if (!rv) { goto done; } } else if (*i == CR) { state_ = CONTENT_CHUNK_LF2; ++*nparsed; ++i; } break; case CONTENT_CHUNK_LF2: if (*i != LF) { state_ = SYNTAX_ERROR; } else { state_ = CONTENT_CHUNK_SIZE; ++*nparsed; ++i; } break; case CONTENT_CHUNK_CR3: if (*i != CR) { state_ = SYNTAX_ERROR; } else { state_ = CONTENT_CHUNK_LF3; ++*nparsed; ++i; } break; case CONTENT_CHUNK_LF3: if (*i != LF) { state_ = SYNTAX_ERROR; } else { ++*nparsed; ++i; if (!onMessageEnd()) goto done; state_ = MESSAGE_BEGIN; } break; case SYNTAX_ERROR: { #if !defined(XZERO_NDEBUG) TRACE(1, "parse: syntax error"); if (std::isprint(*i)) { TRACE(1, "parse: syntax error at nparsed: %ld, character: '%c'", *nparsed, *i); } else { TRACE(1, "parse: syntax error at nparsed: %ld, character: 0x%02X", *nparsed, *i); } chunk.dump("request chunk (at syntax error)"); #endif goto done; } default: #if !defined(XZERO_NDEBUG) TRACE(1, "parse: unknown state %i", state_); if (std::isprint(*i)) { TRACE(1, "parse: internal error at nparsed: %ld, character: '%c'", *nparsed, *i); } else { TRACE(1, "parse: internal error at nparsed: %ld, character: 0x%02X", *nparsed, *i); } Buffer::dump(chunk.data(), chunk.size(), "request chunk (at unknown state)"); #endif goto done; } } // we've reached the end of the chunk if (state_ == CONTENT_BEGIN) { // we've just parsed all headers but no body yet. if (contentLength_ < 0 && !chunked_ && mode_ != MESSAGE) { // and there's no body to come if (!onMessageEnd()) goto done; // subsequent calls to process() parse next request(s). state_ = MESSAGE_BEGIN; } } done: return *nparsed - initialOutOffset; }
inline static bool isToken(const std::string& s) { return isToken(s.c_str(), s.size()); }
bool lexicalAnalyzer::checkAndAddTokens(char charToBeChecked, vector<Token> &tokenList, int& lineNumber, ifstream& in){ Token tokenToBeAdded = Token(); string tempString = string(1, charToBeChecked); char nextChar = in.peek(); char tempChar; int currLine = lineNumber; bool endstring = false; int aposCount = 1; if(isspace(charToBeChecked)){ return true; } else if(isalpha(charToBeChecked)){ string word = string(1, charToBeChecked); while(isalnum(nextChar)){ in.get(tempChar); word += string(1, tempChar); //cout << word << endl; if (isToken(word) != ""){ if(!isalnum(in.peek())){ tokenToBeAdded = Token (isToken(word), word, lineNumber); tokenList.push_back(tokenToBeAdded); return true; } } nextChar = in.peek(); } tokenToBeAdded = Token ("ID", word, lineNumber); tokenList.push_back(tokenToBeAdded); return true; //cout << word << endl; } //Keep Reading from input switch (charToBeChecked){ case ',': tokenToBeAdded = Token ("COMMA", tempString, lineNumber); tokenList.push_back(tokenToBeAdded); return true; //output comma break; case ':': if(nextChar == '-'){ tempString += nextChar; in.get(tempChar); tokenToBeAdded = Token ("COLON_DASH", tempString, lineNumber); tokenList.push_back(tokenToBeAdded); return true; } else{ tokenToBeAdded = Token ("COLON", tempString, lineNumber); tokenList.push_back(tokenToBeAdded); return true; } //check if it's a c-dash, then output colon case '.': tokenToBeAdded = Token ("PERIOD", tempString, lineNumber); tokenList.push_back(tokenToBeAdded); return true; //create token object for period, and add to tokenList case '?': tokenToBeAdded = Token ("Q_MARK", tempString, lineNumber); tokenList.push_back(tokenToBeAdded); return true; //create token object for qmark, and add to tokenList case '(': tokenToBeAdded = Token ("LEFT_PAREN", tempString, lineNumber); tokenList.push_back(tokenToBeAdded); return true; //create token object for lparenth, and add to tokenList case ')': tokenToBeAdded = Token ("RIGHT_PAREN", tempString, lineNumber); tokenList.push_back(tokenToBeAdded); return true; //create token object for r parenth, and add to tokenList case '#': if (nextChar == '|'){ in.get(tempChar); tempString += string(1, tempChar); if(in.peek() == '|'){ in.get(tempChar); tempString += string(1, tempChar); if(in.peek() == '#'){ in.get(tempChar); tempString += string(1, tempChar); tokenToBeAdded = Token ("COMMENT", tempString, currLine); tokenList.push_back(tokenToBeAdded); return true; } } do { if(in.eof()){ tokenToBeAdded = Token ("UNDEFINED", tempString, currLine); tokenList.push_back(tokenToBeAdded); return false; } in.get(tempChar); if(tempChar == '\n'){ lineNumber++; } tempString += string(1, tempChar); nextChar = in.peek(); } while(nextChar != '|'); in.get(tempChar); tempString += string(1, tempChar); nextChar = in.peek(); if(nextChar == '#'){ in.get(tempChar); tempString += string(1, tempChar); tokenToBeAdded = Token ("COMMENT", tempString, currLine); tokenList.push_back(tokenToBeAdded); return true; } else return false; } else{ while(nextChar != '\n' && !in.eof()){ in.get(tempChar); tempString += string(1, tempChar); nextChar = in.peek(); } tokenToBeAdded = Token ("COMMENT", tempString, currLine); tokenList.push_back(tokenToBeAdded); return true; } //peek for |, and add appropriate token. break; case '\'': in.get(tempChar); tempString += string(1, tempChar); nextChar = in.peek(); while(endstring == false){ nextChar = in.peek(); if(tempChar == '\n') lineNumber++; if(tempChar == '\'' && nextChar == '\''){ in.get(tempChar); tempString += string(1, tempChar); aposCount += 2; in.get(tempChar); tempString += string(1, tempChar); } else if(tempChar == '\''){ tokenToBeAdded = Token ("STRING", tempString, currLine); tokenList.push_back(tokenToBeAdded); return true; } else if(in.eof()){ tokenToBeAdded = Token ("UNDEFINED", tempString, currLine); tokenList.push_back(tokenToBeAdded); return false; } else{ in.get(tempChar); tempString += string(1, tempChar); } } default: tokenToBeAdded = Token ("UNDEFINED", tempString, currLine); tokenList.push_back(tokenToBeAdded); } return true; }
command_t makeCommandList(char* fileString) { int maxListSize = 1000; int listSize = 0; //printf("before malloc"); command_t commandList = malloc(sizeof(struct command)*maxListSize); //checkParen(fileString); // printf("before strlen"); int length = strlen(fileString); //printf("length: %d\n", length); int index; /*command_t comm = malloc(sizeof(command_t)); comm->type = AND_COMMAND;*/ int lineNumber = 1; //printf("begin forloop\n"); for(index = 0; index < length; index++) { //printf("index: %d\n", index); char c = fileString[index]; //printf("makeCommandList: c: %c %d\n", c, c); //printf("listSize: %d\n", listSize); if(listSize == maxListSize) { //printf("realloc commandList"); maxListSize *= 2; commandList = realloc(commandList, sizeof(struct command)*maxListSize); } if(c == '&') { if(fileString[index+1] == '&') { commandList[listSize].type = AND_COMMAND; //commandList[listSize].status = 1; listSize++; index++; continue; } else { fprintf(stderr, "%d: invalid syntax: only one & sign", lineNumber); exit(1); } } else if(c == '|') { //printf("OR"); if(fileString[index+1] == '|') { // add PIPE_COMMAND to list commandList[listSize].type = OR_COMMAND; listSize++; index++; continue; } else { // add OR_COMMAND to list commandList[listSize].type = PIPE_COMMAND; listSize++; continue; } } else if(c == ';') { // add SEQUENCE_COMMAND to list commandList[listSize].type = SEQUENCE_COMMAND; listSize++; continue; } else if(c == '\n') { commandList[listSize].type = NEW_LINE; lineNumber++; listSize++; continue; } else if(c == ' ') { continue; } else if(c == '>') { commandList[listSize].type = RIGHT_REDIRECT; listSize++; continue; } else if(c == '<') { commandList[listSize].type = LEFT_REDIRECT; listSize++; continue; } else if(c == ')') { commandList[listSize].type = CLOSED_PAREN; listSize++; continue; } else if( c == '(') { commandList[listSize].type = OPEN_PAREN; listSize++; continue; } else { commandList[listSize].type = SIMPLE_COMMAND; //index++; c = fileString[index]; int maxWords = 10; int maxChars = 10; int wordsIndex = 0; char** words = malloc(maxWords*sizeof(char*)); while(1) { int breakAll = 0; if(wordsIndex == maxWords) { maxWords *= 2; words = realloc(words, maxWords*sizeof(char*)); } int letterIndex = 0; char* letters = malloc(maxChars*sizeof(char)); c = fileString[index]; if(isToken(c)) { // words[wordsIndex] = letters; /*printf("index: %d\n", index); printf("token c: %c %d\n", c, c); printf("isToken\n");*/ wordsIndex++; index--; break; } while(c != ' ' && c != '\0' && c != EOF) { //printf("word c%d: %c %d\n",index, c, c); //printf("index: %d\n", index); if(isToken(c)) { breakAll = 1; //printf("letterIndex: %d\n", letterIndex); if(letterIndex > 0) { letters[letterIndex] = '\0'; words[wordsIndex] = letters; wordsIndex++; } index--; commandList[listSize].numWords = wordsIndex; break; } if(letterIndex == maxChars) { maxChars *= 2; letters = realloc(letters, maxChars*sizeof(char)); } letters[letterIndex] = c; letterIndex++; index++; if(index < length) { c = fileString[index]; } else { letters[letterIndex] = '\0'; words[wordsIndex] = letters; breakAll = 1; break; } } if(breakAll) { break; } //printf("PRINT_WORD"); // printWord(letters); letters[letterIndex] = '\0'; words[wordsIndex] = letters; wordsIndex++; commandList[listSize].numWords = wordsIndex; index++; //printWords(commandList[0].u.word, commandList[0].numWords); } commandList[listSize].u.word = words; //printWords(commandList[listSize].u.word, commandList[listSize].numWords); //printWords(commandList[0].u.word, commandList[0].numWords); listSize++; } } //printf("listSize: %d\n", listSize); g_commandListLength = listSize; //printWords(commandList[0].u.word, commandList[0].numWords); //printCommandList(commandList, listSize); return zeroIO(commandList, g_commandListLength); }
char* getTextFromFile(int (*get_next_byte) (void *), void *get_next_byte_argument) { // processes file and adds chars to a text file, compresses excess whitespace (spaces or tabs) // checks for characters that are not for words or special tokens int maxLetters = 2; char* fileString = malloc(sizeof(char)*maxLetters); char prevChar; int index = 0; int prevIsWhiteSpace = 0; int prevIsNewLine = 0; int lineNumber = 1; while(1) { // printf("index: %d\n", index); int c = get_next_byte(get_next_byte_argument); //printf("char: %c\n", c); //printf("lineNumber: %d\n", lineNumber); // printf("prevIsWhiteSpace: %d\n", prevIsWhiteSpace); if(c == EOF) // end of file? { break; } else if(index == maxLetters) // need to reallocate { maxLetters *= 100 ; fileString = realloc(fileString, maxLetters*sizeof(char)); } if(c == '\n') { lineNumber++; } if(c == '#') { if(index != 0) { if( prevChar != ' ' && prevChar != '\n') { fprintf(stderr, "%d: COMMENT SYNTAX ERROR", lineNumber); exit(1); } } c = get_next_byte(get_next_byte_argument); while(c != EOF) { if(c == '\n') { lineNumber++; break; } c = get_next_byte(get_next_byte_argument); } prevChar = c; continue; } if(c == '\t') // is a tab { if(prevIsWhiteSpace) { //index++; continue; } else { prevIsWhiteSpace = 1; fileString[index] = (char) 32; prevChar = ' '; index++; continue; } } else if(c == ' ') // is a space { if(prevIsWhiteSpace) { //index++; prevChar = ' '; continue; } else { prevIsWhiteSpace = 1; fileString[index] = ' '; prevChar = ' '; index++; continue; } } else if(c == '\n') { if(prevIsNewLine) { //index++; prevChar = '\n'; continue; } else { prevIsNewLine = 1; fileString[index] = '\n'; prevChar = '\n'; index++; continue; } } else { prevIsNewLine = 0; prevIsWhiteSpace = 0; //printf("index in else: %d\n", index); if(!isToken(c) && !isValidCharForWord(c)) { //printf("syntax error invalid char: %d \n", c); fprintf(stderr, "%d: syntax error invalid char", lineNumber); exit(1); break; } fileString[index] = (char) c; prevChar = c; index++; continue; } } fileString[index] = '\0'; return fileString; }
foreach (QString t, _knownTokens) { if (isToken(str, i, t)) { token = t; return true; } }
void pushObjAction(Obj o){ // the object is passed non duplicated. If needed it must be duplicated if(isToken(o) && !o->readOnly && !o->reference) o->readOnly=true; stack().push(o); };
Token Lexer::extractNextToken(std::string& s, std::string& cutpart){ Token result; result.column=0; result.line=0; result.type=SYMBOL_INVALID; result.value=s; cutpart = ""; //get rid of leading whitespace int whitespaceend = 0; while(whitespaceend<s.length() && s.at(whitespaceend)==' '){ whitespaceend++; } cutpart.append(s.substr(0,whitespaceend)); s.erase(0,whitespaceend); int tokenend = 1; while(tokenend<s.length() && !isToken(s.substr(0,tokenend), s.at(tokenend))){ tokenend++; } std::string tokenstr = s.substr(0,tokenend); if(isOperator(tokenstr)){ result.type = getOperatorType(tokenstr); }else if(isKeyword(tokenstr)){ result.type = SYMBOL_KEYWORD; }else if(isLiteral(tokenstr)){ result.type = SYMBOL_LITERAL; }else{//identifier if(tokenstr=="//"){ result.type = SYMBOL_COMMENTSINGLE; }else if(tokenstr=="/*"){ result.type = SYMBOL_COMMENTMULTISTART; }else if(tokenstr=="*/"){ result.type = SYMBOL_COMMENTMULTIEND; }else if(tokenstr=="#"){ result.type = SYMBOL_PREPROCESSOR; }else if(tokenstr=="\""){ result.type = SYMBOL_STRINGDELIM; }else{ if(tokenstr.length()==0 || tokenstr.find_first_not_of(" ")==tokenstr.npos){ result.type = SYMBOL_INVALID; }else{ if(isnumber(tokenstr)){ result.type = SYMBOL_LITERAL; }else{ result.type = SYMBOL_IDENTIFIER; } } } } result.value = tokenstr; cutpart.append(tokenstr); //get rid of token and trailing whitespace whitespaceend = tokenend; while(whitespaceend<s.length() && s.at(whitespaceend)==' '){ whitespaceend++; } if(tokenend<s.length()){ if(whitespaceend<=s.length()){ cutpart.append(s.substr(tokenend,(whitespaceend-tokenend))); }else{ cutpart.append(s.substr(tokenend, s.npos-tokenend)); } } s.erase(0,whitespaceend); return result; };