StringData *StringData::copy(bool sharedMemory /* = false */) const { if (sharedMemory) { if (isLiteral()) { return new StringData(m_data, size(), AttachLiteral); } return new StringData(m_data, size(), CopyString); } else { if (isLiteral()) { return NEW(StringData)(m_data, size(), AttachLiteral); } return NEW(StringData)(m_data, size(), CopyString); } }
void testSymbol() { #if 0 // Operator int i = 0; for(; i < KeywordsCount; ++i) { printf("%s\n", Operators[i]); } #endif // test: isKeyword isLiteral isVar isSemicolon symbol_construct symbol_deconstruct #if 0 // not ok char *symbol[] = {"int", "i", "=", "12", ";"}; for(int i = 0; i < sizeof(symbol) / sizeof(symbol[0]); ++i) { // ok printf("symbol %s : isKeyword:%s isLiteral:%s isVar:%s isSemicolon:%s\n", symbol[i], TO_BOOL_STR(isKeyword(symbol[i])), TO_BOOL_STR(isLiteral(symbol[i])), TO_BOOL_STR(isVar(symbol[i])), TO_BOOL_STR(isSemicolon(symbol[i]))); Symbol *sb = symbol_construct(symbol[i]); if(sb) { // not ok printf("%x %x %x %x \n", IS_KEYWORD(sb->type), IS_LITERAL(sb->type), IS_VAR(sb->type), IS_SEMICOLON(sb->type)); symbol_deconstruct(sb); } } #endif // test: isCharLiteral isStringLiteral isDecNumber isOctNumber isHexNumber isFloatNumer // tes: isDoubleNumber #if 0 // ok int i = 0; const char *strArr[] = {"\'c\'", "\"abc\"", "453", "0453", "781", "a90", "0x34", "0X56", "9.34", "9.4e2", "9.5E5", "9e+2", "9e-3", "9.34f", "9.34F" }; for(; i < sizeof(strArr) / sizeof(strArr[0]); ++i) { printf("%s: isCharLiteral(%s)\n\t", strArr[i], TO_BOOL_STR(isCharLiteral(strArr[i]))); printf("isStringLiteral(%s)\n\t", TO_BOOL_STR(isStringLiteral(strArr[i]))); printf("isDecNumber(%s)\n\t", TO_BOOL_STR(isDecNumber(strArr[i]))); printf("isOctNumber(%s)\n\t", TO_BOOL_STR(isOctNumber(strArr[i]))); printf("isHexNumber(%s)\n\t", TO_BOOL_STR(isHexNumber(strArr[i]))); printf("isFloatNumber(%s)\n\t", TO_BOOL_STR(isFloatNumber(strArr[i]))); printf("isDoubleNumber(%s)\n", TO_BOOL_STR(isDoubleNumber(strArr[i]))); } /* // I don't know why, but it can't output all strings for(; i < sizeof(strArr) / sizeof(strArr[0]); ++i) { printf("%s: isCharLiteral(%s)\n\t isStringLiteral(%s)\n\t isDecNumber(%s)\n\t isOctNumber(%s)\n\t isHexNumber(%s)\n\t isFloatNumber(%s)\n\t isDoubleNumber(%s)\n", strArr[i], TO_BOOL_STR(isCharLiteral(strArr[i])), TO_BOOL_STR(isStringLiteral(strArr[i])), TO_BOOL_STR(isDecNumber(strArr[i])), TO_BOOL_STR(isOctNumber(strArr[i])), TO_BOOL_STR(isHexNumber(strArr[i])), TO_BOOL_STR(isFloatNumber(strArr[i])), TO_BOOL_STR(isDoubleNumber(strArr[i]))); } */ #endif }
void truncateLiterals(Env& env) { if (!env.region || env.region->empty() || env.region->blocks().back()->empty()) return; // Don't finish a region with literal values or values that have a class // related to the current context class. They produce valuable information // for optimizations that's lost across region boundaries. auto& lastBlock = *env.region->blocks().back(); auto sk = lastBlock.start(); auto endSk = sk; auto unit = lastBlock.unit(); for (int i = 0, len = lastBlock.length(); i < len; ++i, sk.advance(unit)) { auto const op = sk.op(); if (!isLiteral(op) && !isThisSelfOrParent(op) && !isTypeAssert(op)) { if (i == len - 1) return; endSk = sk; } } // Don't truncate if we've decided we want to truncate the entire block. // That'll mean we'll chop off the trailing N-1 opcodes, then in the next // region we'll select N-1 opcodes and chop off N-2 opcodes, and so forth... if (endSk != lastBlock.start()) { FTRACE(1, "selectTracelet truncating block after offset {}:\n{}\n", endSk.offset(), show(lastBlock)); lastBlock.truncateAfter(endSk); } }
Symbol *symbol_construct(const char *str) { SYMBOL_TYPE type; Symbol *sb = (Symbol *)malloc(sizeof(Symbol)); if(!sb) return NULL; char *sbStr = (char *)malloc(strlen(str) + 1); if(!sbStr) return NULL; type = 0; if(isKeyword(str)) type |= SYMBOL_TYPE_KEYWORD; if(isVar(str)) type |= SYMBOL_TYPE_VAR; if(isSemicolon(str)) type |= SYMBOL_TYPE_SEMICOLON; if(isLiteral(str)) type |= SYMBOL_TYPE_LITERAL; sb->type = type; strcpy(sbStr, str); sb->str = sbStr; return sb; }
bool StringData::calculate(int &totalSize) { if (m_data && !isLiteral()) { totalSize += (size() + 1); // ending NULL return true; } return false; }
void constdef(){ symbol_t t, id; if(!isType(sym)){ msg(ERR, "missing a type name", line); ERROR_STATUS = 1; } t = copySym(sym); do{ nextSym(); if(sym->type!=ID){ msg(ERR, "missing a identifier after a type name", line); ERROR_STATUS = 1; } id = copySym(sym); nextSym(); if(sym->type!=ASN){ msg(ERR, "constant need a value", line); ERROR_STATUS = 1; } nextSym(); readLiteral(); if(!isLiteral(sym)){ msg(ERR, "missing a literal in constant definition", line); ERROR_STATUS = 1; } createconst(t, id, sym); mfree(t); mfree(id); nextSym(); }while(sym->type==COMMA); }
void StringData::restore(const char *&data) { ASSERT(!isLiteral()); m_data = data; m_len &= LenMask; m_len |= IsLinear; m_hash = hash_string(m_data, size()); }
//////////////////////////////////////////////////////////////////////////////// // Lexer::Type::set // a single number: 1 // a list of numbers: 1,3,5 // a range: 5-10 // or a combination: 1,3,5-10 // // <id> [ - <id> ] [ , <id> [ - <id> ] ] ... bool Lexer::isSet (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; int count = 0; std::string dummyToken; Lexer::Type dummyType; do { if (isInteger (dummyToken, dummyType)) { ++count; if (isLiteral ("-", false, false)) { if (isInteger (dummyToken, dummyType)) ++count; else { _cursor = marker; return false; } } } else { _cursor = marker; return false; } } while (isLiteral (",", false, false)); // Success is multiple numbers, matching the pattern. if (count > 1 && (isEOS () || isWhitespace (_text[_cursor]) || isHardBoundary (_text[_cursor], _text[_cursor + 1]))) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::set; return true; } _cursor = marker; return false; }
bool Lexer::isOneOf ( const std::map <std::string, std::string>& options, bool allowAbbreviations, bool endBoundary) { for (auto& item : options) if (isLiteral (item.first, allowAbbreviations, endBoundary)) return true; return false; }
ObjLexer::ObjLexer(std::string const& raw_object) { char ch; int i = 0; while (i < (int)raw_object.size()) { ch = raw_object[i]; // v/vt/vn if (ch == 'v') { Token::Type type; if (raw_object[i + 1] == 't') { type = Token::Type::VERTEX_T; } else if (raw_object[i + 1] == 'n') { type = Token::Type::VERTEX_N; } else { type = Token::Type::VERTEX; } i += AddNewToken(type, i, raw_object); } // f else if (ch == 'f') { i += AddNewToken(Token::Type::FACE, i, raw_object); } // comment or g (grouping which we ignore atm) else if (ch == '#' || ch == 'g') { size_t size = raw_object.find('\n', i); i += size - i; } // everything else becomes a literal else if (isLiteral(ch)) { i += AddNewToken(Token::Type::LITERAL, i, raw_object); } // whitespace, and eoln else if (ch == ' ' || ch == '\n') { i++; continue; } } }
StringData *StringData::copy(bool sharedMemory /* = false */) const { if (isStatic()) { // Static strings cannot change, and are always available. return const_cast<StringData *>(this); } if (sharedMemory) { // Even if it's literal, it might come from hphpi's class info // which will be freed at the end of the request, and so must be // copied. return new StringData(m_data, size(), CopyString); } else { if (isLiteral()) { return NEW(StringData)(m_data, size(), AttachLiteral); } return NEW(StringData)(m_data, size(), CopyString); } }
void StringData::dump() { const char *p = data(); int len = size(); printf("StringData(%d) (%s%s%s%d): [", _count, isLiteral() ? "literal " : "", isShared() ? "shared " : "", isLinear() ? "linear " : "", len); for (int i = 0; i < len; i++) { char ch = p[i]; if (isprint(ch)) { std::cout << ch; } else { printf("\\%02x", ch); } } printf("]\n"); }
void readLiteral(){ if(isLiteral(sym)) return; if(sym->type == PLUS){ nextSym(); }else if(sym->type == MINUS){ nextSym(); if(sym->type == REAL){ ccstrinv((char*)(sym->value)); sym->value = (int)ccstrcat((char*)(sym->value), '-'); ccstrinv((char*)(sym->value)); }else if(sym->type == INTEGER){ sym->value = -(sym->value); }else{ // error occur; } } return; }
void jumpentry(ident_t expr, ident_t nextlab){ ident_t lit; readLiteral(); if(!isLiteral(sym)){ // error occur; } if(sym->type!=INTEGER && sym->type!=CHARLIT){ // error occur; } lit = getLiteral(global, sym); gen(CMP, expr, lit, 0); gen(JNE, nextlab, 0, 0); nextSym(); if(sym->type!=COLON){ // error occur; } nextSym(); statement(); }
void StringData::dump() const { StringSlice s = slice(); printf("StringData(%d) (%s%s%s%d): [", _count, isLiteral() ? "literal " : "", isShared() ? "shared " : "", isStatic() ? "static " : "", s.len); for (uint32_t i = 0; i < s.len; i++) { char ch = s.ptr[i]; if (isprint(ch)) { std::cout << ch; } else { printf("\\x%02x", ch); } } #ifdef TAINTED printf("\n"); this->getTaintDataRefConst().dump(); #endif printf("]\n"); }
const Token* AttributesParser::parseAttribute(const Token* pNext) { std::string id; std::string value; pNext = parseIdentifier(pNext, id); if (isOperator(pNext, OperatorToken::OP_ASSIGN)) { pNext = next(); if (isOperator(pNext, OperatorToken::OP_OPENBRACE)) { pNext = parseComplexAttribute(pNext, id); } else if (isIdentifier(pNext) || isLiteral(pNext)) { value = pNext->asString(); pNext = next(); } else throw SyntaxException("bad attribute declaration"); } setAttribute(id, value); return pNext; }
ident_t factor(){ ident_t ret = 0; symbol_t id; if(sym->type==ID){ id = copySym(sym); nextSym(); if(sym->type!=LPAREN){ if(context) ret = findTable(context->local, (char*)(id->value)); if(!ret) ret = findTable(global, (char*)(id->value)); if(!ret){ msg(ERR, "undefined identifier", line); ERROR_STATUS = 1; } return ret; } nextSym(); if((ret = funccall(id))==0){ msg(ERR, "no value return from function", line); ERROR_STATUS = 1; } mfree(id); }else if(sym->type==LPAREN){ nextSym(); ret = expression(tn); if(sym->type!=RPAREN){ msg(ERR, "missing \')\'", line); ERROR_STATUS = 1; } nextSym(); }else if(isLiteral(sym) || isAddOperator(sym)){ readLiteral(); ret = getLiteral(global, sym); nextSym(); } return ret; }
void StringData::dump() const { const char *p = data(); int len = size(); printf("StringData(%d) (%s%s%s%s%d): [", _count, isLiteral() ? "literal " : "", isShared() ? "shared " : "", isLinear() ? "linear " : "", isStatic() ? "static " : "", len); for (int i = 0; i < len; i++) { char ch = p[i]; if (isprint(ch)) { std::cout << ch; } else { printf("\\x%02x", ch); } } #ifdef TAINTED printf("\n"); this->getTaintDataRef().dump(); #endif printf("]\n"); }
//////////////////////////////////////////////////////////////////////////////// // Lexer::Type::dom // [ <isUUID> | <isDigit>+ . ] <isIdentifier> [ . <isIdentifier> ]* // // Configuration: // rc.<name> // // System: // context.program // context.args // context.width // context.height // system.version // system.os // // Relative or absolute attribute: // <attribute> // <id>.<attribute> // <uuid>.<attribute> // // Single tag: // tags.<word> // // Date type: // <date>.year // <date>.month // <date>.day // <date>.week // <date>.weekday // <date>.julian // <date>.hour // <date>.minute // <date>.second // // Annotations (entry is a date): // annotations.<N>.entry // annotations.<N>.description // bool Lexer::isDOM (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; std::string partialToken; Lexer::Type partialType; if (isLiteral ("rc.", false, false) && isWord (partialToken, partialType)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else _cursor = marker; if (isOneOf ({"context.program", "context.args", "context.width", "context.height", "system.version", "system.os"}, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } // Optional: // <uuid>. // <id>. std::string extractedToken; Lexer::Type extractedType; if (isUUID (extractedToken, extractedType, false) || isInteger (extractedToken, extractedType)) { if (! isLiteral (".", false, false)) { _cursor = marker; return false; } } // Any failure after this line should rollback to the checkpoint. std::size_t checkpoint = _cursor; // [prefix]tags.<word> if (isLiteral ("tags", false, false) && isLiteral (".", false, false) && isWord (partialToken, partialType)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else _cursor = checkpoint; // [prefix]attribute if (isOneOf (attributes, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } // [prefix]attribute. if (isOneOf (attributes, false, false)) { if (isLiteral (".", false, false)) { std::string attribute = _text.substr (checkpoint, _cursor - checkpoint - 1); // if attribute type is 'date', then it has sub-elements. if (attributes[attribute] == "date" && isOneOf ({"year", "month", "day", "week", "weekday", "julian", "hour", "minute", "second"}, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } } else { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } } // [prefix]annotations. if (isLiteral ("annotations", true, false) && isLiteral (".", false, false)) { std::string extractedToken; Lexer::Type extractedType; if (isInteger (extractedToken, extractedType)) { if (isLiteral (".", false, false)) { if (isLiteral ("description", false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else if (isLiteral ("entry", false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else if (isLiteral ("entry", false, false) && isLiteral (".", false, false) && isOneOf ({"year", "month", "day", "week", "weekday", "julian", "hour", "minute", "second"}, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } } } } _cursor = marker; return false; }
void StringData::restore(const char *&data) { ASSERT(!isLiteral()); m_data = data; m_len &= LenMask; m_len |= IsLinear; }
Token Lexer::extractNextToken(std::string& s, std::string& cutpart){ Token result; result.column=0; result.line=0; result.type=SYMBOL_INVALID; result.value=s; cutpart = ""; //get rid of leading whitespace int whitespaceend = 0; while(whitespaceend<s.length() && s.at(whitespaceend)==' '){ whitespaceend++; } cutpart.append(s.substr(0,whitespaceend)); s.erase(0,whitespaceend); int tokenend = 1; while(tokenend<s.length() && !isToken(s.substr(0,tokenend), s.at(tokenend))){ tokenend++; } std::string tokenstr = s.substr(0,tokenend); if(isOperator(tokenstr)){ result.type = getOperatorType(tokenstr); }else if(isKeyword(tokenstr)){ result.type = SYMBOL_KEYWORD; }else if(isLiteral(tokenstr)){ result.type = SYMBOL_LITERAL; }else{//identifier if(tokenstr=="//"){ result.type = SYMBOL_COMMENTSINGLE; }else if(tokenstr=="/*"){ result.type = SYMBOL_COMMENTMULTISTART; }else if(tokenstr=="*/"){ result.type = SYMBOL_COMMENTMULTIEND; }else if(tokenstr=="#"){ result.type = SYMBOL_PREPROCESSOR; }else if(tokenstr=="\""){ result.type = SYMBOL_STRINGDELIM; }else{ if(tokenstr.length()==0 || tokenstr.find_first_not_of(" ")==tokenstr.npos){ result.type = SYMBOL_INVALID; }else{ if(isnumber(tokenstr)){ result.type = SYMBOL_LITERAL; }else{ result.type = SYMBOL_IDENTIFIER; } } } } result.value = tokenstr; cutpart.append(tokenstr); //get rid of token and trailing whitespace whitespaceend = tokenend; while(whitespaceend<s.length() && s.at(whitespaceend)==' '){ whitespaceend++; } if(tokenend<s.length()){ if(whitespaceend<=s.length()){ cutpart.append(s.substr(tokenend,(whitespaceend-tokenend))); }else{ cutpart.append(s.substr(tokenend, s.npos-tokenend)); } } s.erase(0,whitespaceend); return result; };
bool Token::operator==(const std::string& str) const { return !isLiteral() ? str == mGeneric : false; }
/** \todo check for instr.eof() */ void xmlstream_iterator::getNext() { // first use the token stack if filled if (mTokenStack.size() != 0) { // get the token from the stack and return it Token tok; mCurToken = mTokenStack.top(); mTokenStack.pop(); return; } bool finished = false; std::string generic; // get next char char c; do { if (mPutbackChar == char(-1)) { c = mInput.get(); mLocation.step(); } else { c = mPutbackChar; mPutbackChar = char(-1); mLocation.step(); } // do we have an eof? // TODO: check for instr.eof() if (c == char(EOF)) { if (generic.length() != 0) { mCurToken = c; return; } else { break; } } // is it a literal? if (isLiteral(c)) { mCdataMode = false; if (generic.length() == 0) { mCurToken = c; // quick fix for removing set_cdataMode() functionality if (c == '>') { mCdataMode = true; } return; } mPutbackChar = c; mLocation.step(-1); break; } // a string delimiter and not in cdata mode? if (isStringDelimiter(c) && !mCdataMode) { generic = c; char delim = c; do { c = mInput.get(); mLocation.step(); if (c == char(EOF)) { break; } generic += c; } while (c != delim); break; } // a whitespace? if (isWhiteSpace(c)) { if (generic.length() == 0) { continue; } else { if (!mCdataMode) { break; } } } // a newline char? if (isNewLine(c) ) { if (!mCdataMode || generic.length() == 0) { continue; } } // add to generic string generic += c; } while (!finished); // set the generic string mCurToken = generic; }
bool Token::operator==(char ch) const { return !isLiteral() ? false : ch == mLiteral; }
bool isLiteralNumber() const { return LITERAL_STR != type && isLiteral(); }
void StringData::append(const char *s, int len) { ASSERT(!isStatic()); // never mess around with static strings! if (len == 0) return; if (UNLIKELY(uint32_t(len) > MaxSize)) { throw InvalidArgumentException("len>=2^30", len); } if (UNLIKELY(len + m_len > MaxSize)) { throw FatalErrorException(0, "String length exceeded 2^30 - 1: %u", len + m_len); } int newlen; // TODO: t1122987: in any of the cases below where we need a bigger buffer, // we can probably assume we're in a concat-loop and pick a good buffer // size to avoid O(N^2) copying cost. if (isShared() || isLiteral()) { // buffer is immutable, don't modify it. // We are mutating, so we don't need to repropagate our own taint StringSlice r = slice(); char* newdata = string_concat(r.ptr, r.len, s, len, newlen); if (isShared()) m_big.shared->decRef(); m_len = newlen; m_data = newdata; m_big.cap = newlen | IsMalloc; m_hash = 0; } else if (rawdata() == s) { // appending ourself to ourself, be conservative. // We are mutating, so we don't need to repropagate our own taint StringSlice r = slice(); char *newdata = string_concat(r.ptr, r.len, s, len, newlen); releaseData(); m_len = newlen; m_data = newdata; m_big.cap = newlen | IsMalloc; m_hash = 0; } else if (isSmall()) { // we're currently small but might not be after append. // We are mutating, so we don't need to repropagate our own taint int oldlen = m_len; newlen = oldlen + len; if (unsigned(newlen) <= MaxSmallSize) { // win. memcpy(&m_small[oldlen], s, len); m_small[newlen] = 0; m_small[MaxSmallSize] = 0; m_len = newlen; m_data = m_small; m_hash = 0; } else { // small->big string transition. char *newdata = string_concat(m_small, oldlen, s, len, newlen); m_len = newlen; m_data = newdata; m_big.cap = newlen | IsMalloc; m_hash = 0; } } else { // generic "big string concat" path. realloc buffer. int oldlen = m_len; char* oldp = m_data; ASSERT((oldp > s && oldp - s > len) || (oldp < s && s - oldp > oldlen)); // no overlapping newlen = oldlen + len; char* newdata = (char*) realloc(oldp, newlen + 1); memcpy(newdata + oldlen, s, len); newdata[newlen] = 0; m_len = newlen; m_data = newdata; m_big.cap = newlen | IsMalloc; m_hash = 0; } ASSERT(uint32_t(newlen) <= MaxSize); TAINT_OBSERVER_REGISTER_MUTATED(m_taint_data, rawdata()); ASSERT(checkSane()); }
bool isLiteralString() const { return LITERAL_STR == type && isLiteral(); }