bool TGztParser::ParseChainedFieldValues(TFieldValueDescriptorProto* value) { DO(ParseSingleFieldValue(value)); // try read several more values, interleaved with "+" or "|" if (!LookingAtListDelimiter()) return true; // What was previously read into @value was actually a first item of chained list, // not a single value. So transform @value to a list and place its current content // as first sub-value of this list. // Re-use previuosly allocated items THolder<TFieldValueDescriptorProto> sub_value; if (value->mutable_list()->mutable_value()->ClearedCount() > 0) { sub_value.Reset(value->mutable_list()->mutable_value()->ReleaseCleared()); sub_value->CopyFrom(*value); //sub_value->Swap(value); -- Swap is unsafe here because it creates cycles for some reason! } else sub_value.Reset(new TFieldValueDescriptorProto(*value)); value->Clear(); value->set_type(TFieldValueDescriptorProto::TYPE_LIST); value->mutable_list()->mutable_value()->AddAllocated(sub_value.Release()); // only single kind of separating token is allowed at single chained list. // so next we only accept a delimiters of same level which are equal to the first one. Stroka delimiter = CurrentToken().text; if (delimiter == "|") value->mutable_list()->set_type(TValuesListDescriptorProto::PIPE_DELIMITED); else if (delimiter == "+") value->mutable_list()->set_type(TValuesListDescriptorProto::PLUS_DELIMITED); else YASSERT(false); const char* delim_text = delimiter.c_str(); while (TryConsume(delim_text)) DO(ParseSingleFieldValue(value->mutable_list()->add_value())); // it is an error to meet any list delimiter here (as it will be mixed with previuos delimiters). if (!LookingAtListDelimiter()) return true; else { AddError(Substitute("Distinct kinds of delimiters (\"$0\" and \"$1\") " "should not be mixed in a single chained list at same level.", delimiter, CurrentToken().text)); return false; } }
void CConstProcessor::CreateTempConst() { if (!ParserSM().FindIdentifier(CurrentToken())) { iIdentifier = new CConstIdentifier(CurrentToken()); iInternalState = EStateExpectEquals; iResult = Parser::ENoError; } else { iResult = Parser::EDuplicateIdentifier; } }
bool CResParser::ParseValue(CUniString& strValue) { switch (CurrentToken()) { case tokenStringLiteral: case tokenIdentifier: strValue=GetStringLiteral(); NextToken(); break; case tokenDoubleLiteral: strValue=GetTokenText(); NextToken(); break; case tokenInt32Literal: if (GetTypeQualifier()==tqRgb) { strValue=GetTokenText(); NextToken(); } else { int iValue=GetInt32Literal(); NextToken(); while (CurrentToken()==tokenOr) { NextToken(); if (!Check(tokenInt32Literal)) return false; iValue|=GetInt32Literal(); NextToken(); } strValue=Format(L"%i", iValue); } break; case tokenInt64Literal: strValue=Format(L"%I64i", GetInt64Literal()); NextToken(); break; default: Unexpected(L"when parsing value "); return false; } return true; }
//------------------------------------------------------------------------------ int CheckToken(READFILE Stream,TokenType Type,char * Value) { //DEBUG printf("Current type = %d Require %d\n",CurrentToken(Stream)->KindToken,Type); //DEBUG printf("Current value = %s Require %s\n",CurrentToken(Stream)->NameToken,Value); return(CheckTokenType(Stream,Type) && !strcmp(CurrentToken(Stream)->NameToken,Value)); }
void CConstProcessor::ProcessState() { Tokens::TTokenType tokenType = CurrentTokenType(); iResult = Parser::EUnexpectedToken; switch (iInternalState) { case EStateExpectConstIdentifier: // Expect an identifier for the new const if (tokenType == Tokens::EIdentifier) { CreateTempConst(); } break; case EStateExpectEquals: if (tokenType == Tokens::EEquals) { iInternalState = EStateExpectValue; iResult = Parser::ENoError; } break; case EStateExpectValue: { // Expect a value formatted as decimal or hex if (tokenType == Tokens::ENumberHex) { iIdentifier->iValue = HexToVal(CurrentToken()); CommitConst(); } else if (tokenType == Tokens::ENumberDec) { iIdentifier->iValue = atol(CurrentToken()); CommitConst(); } break; } } if (iResult != Parser::ENoError) { ParserSM().SetError(iResult); } }
bool TParserBase::ConsumeExtendedIdentifierOrString(Stroka* output, const char* error) { // always returns UTF8 if (LookingAtType(Tokenizer::TYPE_STRING)) { Tokenizer::ParseString(CurrentToken().text, output); NextToken(); return RecodeToUtf8(*output); } else return ConsumeExtendedIdentifier(output, error); }
//----------------------------------------------------------------------------- void TokenError(READFILE Stream) { String RestOfLine; strcpy(RestOfLine,""); fgets(RestOfLine,20,Stream->FileHandle); printf("ERROR: Line %d Char %d Token \"%s\" continuing with \"%s\"\n", Stream->Line,Stream->Character,CurrentToken(Stream)->NameToken,RestOfLine); exit(EXIT_FAILURE); }
bool TParserBase::ConsumeString(Stroka* output, const char* error) { if (LookingAtType(Tokenizer::TYPE_STRING)) { Tokenizer::ParseString(CurrentToken().text, output); NextToken(); return true; } else { AddError(error); return false; } }
//----------------------------------------------------------------------------- void SetTokenType(READFILE Stream,TokenType Type) { TOKEN ThisToken; ThisToken = CurrentToken(Stream); if (ThisToken == NULL) { CodingError("No token"); } ThisToken->KindToken = Type; }
//------------------------------------------------------------------------------ int AcceptToken(READFILE Stream,TokenType Type,char * Value) { if (CheckTokenType(Stream,Type) && !strcmp(CurrentToken(Stream)->NameToken,Value)) { NextToken(Stream); return(1); } else { TokenError(Stream); return(0); } }
bool TParserBase::ConsumeNumber(double* output, const char* error) { if (LookingAtType(Tokenizer::TYPE_FLOAT)) { *output = Tokenizer::ParseFloat(CurrentToken().text); NextToken(); return true; } else if (LookingAtType(Tokenizer::TYPE_INTEGER)) { // Also accept integers. ui64 value = 0; if (!Tokenizer::ParseInteger(CurrentToken().text, NProtoBuf::kuint64max, &value)) { AddError("Integer out of range."); // We still return true because we did, in fact, parse a number. } *output = (double)value; NextToken(); return true; } else { AddError(error); return false; } }
bool TParserBase::ConsumeExtendedIdentifier(Stroka* output, const char* error) { // Always return UTF8 // First piece should be an identifier, non-ASCII alpha character or underscore if (!LookingAtType(Tokenizer::TYPE_IDENTIFIER) && !LookingAt('_') && !LookingAtNonAsciiAlpha()) { AddError(error); return false; } output->clear(); do { if (LookingAtNonAscii()) { if (CodePage_ == NULL) { AddError(Substitute("Unquoted non-ASCII characters allowed only for single-byte encodings (e.g. cp1251).\n" "Current encoding: $0.", NameByCharset(Encoding))); return false; } else if (!CodePage_->IsAlpha(CurrentToken().text[0])) break; } else if (LookingAtType(Tokenizer::TYPE_FLOAT)) { // We don't accept syntax like "blah.123". AddError("Need space between unquoted identifier and decimal point."); return false; } else if (!LookingAtType(Tokenizer::TYPE_IDENTIFIER) && !(LookingAtType(Tokenizer::TYPE_INTEGER) && ::IsAlnum(CurrentToken().text[0])) && !LookingAt('_')) break; *output += CurrentToken().text; NextToken(); } while (AdjacentToPrevious()); YASSERT(!output->empty()); return RecodeToUtf8(*output); }
bool TParserBase::ConsumeInteger64(ui64 max_value, ui64* output, const char* error) { if (LookingAtType(Tokenizer::TYPE_INTEGER)) { if (!Tokenizer::ParseInteger(CurrentToken().text, max_value, output)) { AddError("Integer out of range."); // We still return true because we did, in fact, parse an integer. *output = 0; } NextToken(); return true; } else { AddError(error); return false; } }
bool TParserBase::ConsumeInteger(int* output, const char* error) { if (LookingAtType(Tokenizer::TYPE_INTEGER)) { ui64 value = 0; if (!Tokenizer::ParseInteger(CurrentToken().text, NProtoBuf::kint32max, &value)) { AddError("Integer out of range."); // We still return true because we did, in fact, parse an integer. } *output = (int)value; NextToken(); return true; } else { AddError(error); return false; } }
bool TParserBase::ConsumeIdentifierAppend(Stroka* output, const char* error) { if (LookingAtType(Tokenizer::TYPE_IDENTIFIER)) { // To avoid allocating new memory blocks for CurrentToken().text in subsequent Input->Next() // do explicit copying (without aliasing) into @output const Stroka& current_text = CurrentToken().text; output->AppendNoAlias(~current_text, +current_text); Input->Next(); return true; } else { AddError(error); return false; } }
//----------------------------------------------------------------------------- int CheckTokenType(READFILE Stream,TokenType Type) { TOKEN ThisToken; ThisToken = CurrentToken(Stream); if (ThisToken == NULL) { CodingError("No token"); } return((ThisToken->KindToken == Type) || (Type == predicate_symbol && ThisToken->KindToken == lower_word) || (Type == functor && (ThisToken->KindToken == lower_word || ThisToken->KindToken == number || ThisToken->KindToken == distinct_object)) || (Type == name && (ThisToken->KindToken == lower_word || ThisToken->KindToken == number))); }
//--------------------------------------------------------------------------- void TokenWarning(READFILE Stream,char * Message) { printf("WARNING: Line %d Char %d Token \"%s\" : %s\n", Stream->Line,Stream->Character,CurrentToken(Stream)->NameToken,Message); }
bool TGztParser::ParseSingleFieldValue(TFieldValueDescriptorProto* value) { bool is_negative = TryConsume('-'); switch (CurrentToken().type) { case Tokenizer::TYPE_START: AddError("Trying to read value before any tokens have been read."); return false; case Tokenizer::TYPE_END: AddError("Unexpected end of stream while parsing field value."); return false; case Tokenizer::TYPE_IDENTIFIER: if (is_negative) { AddError("Invalid '-' symbol before identifier."); return false; } value->set_type(TFieldValueDescriptorProto::TYPE_IDENTIFIER); DO(ConsumeIdentifier(value->mutable_string_or_identifier(), "Expected identifier.")); return true; case Tokenizer::TYPE_INTEGER: ui64 number, max_value; max_value = is_negative ? static_cast<ui64>(NProtoBuf::kint64max) + 1 : NProtoBuf::kuint64max; DO(ConsumeInteger64(max_value, &number, "Expected integer.")); value->set_type(TFieldValueDescriptorProto::TYPE_INTEGER); value->set_int_number(is_negative ? -(i64)number : number); return true; case Tokenizer::TYPE_FLOAT: value->set_type(TFieldValueDescriptorProto::TYPE_FLOAT); value->set_float_number(Tokenizer::ParseFloat(CurrentToken().text) * (is_negative ? -1 : 1)); NextToken(); return true; case Tokenizer::TYPE_STRING: if (is_negative) { AddError("Invalid '-' symbol before string."); return false; } value->set_type(TFieldValueDescriptorProto::TYPE_STRING); Tokenizer::ParseString(CurrentToken().text, value->mutable_string_or_identifier()); NextToken(); return true; case Tokenizer::TYPE_SYMBOL: if (LookingAt('{')) { if (is_negative) { AddError("Invalid '-' symbol before inner block."); return false; } //parse sub-article value->set_type(TFieldValueDescriptorProto::TYPE_BLOCK); return ParseArticleBlock(value->mutable_sub_field()); } else if (LookingAt('[')) { if (is_negative) { AddError("Invalid '-' symbol before list."); return false; } return ParseBracketedValuesList(value); } } AddError("Expected field value."); return false; }
bool CResParser::Parse(CResNode* pRootNode, const wchar_t* pszContent, const wchar_t* pszFileName) { // Initialize tokenizer CCppTokenizer::ParseString(pszContent, pszFileName); // Parse it CVector<CResNode*> ScopeStack; ScopeStack.Push(pRootNode); while (CurrentToken()!=tokenEOF) { // Ending a Node if (CurrentToken()==tokenCloseBrace) { // Skip it NextToken(); if (CurrentToken()==tokenSemiColon) { NextToken(); } // Pop scope stack... ScopeStack.Pop(); // Check have something to pop! if (ScopeStack.IsEmpty()) { Unexpected(); return false; } continue; } // Generate definition? if (CurrentToken()==tokenGen) { while (true) { // Yes, find the end of the generator block (or, a new target definition) const wchar_t* pszEndScan[]= {L"#endgen", L"#target"}; if (!ScanForward(pszEndScan, _countof(pszEndScan))) return false; NextToken(); if (CurrentToken()!=tokenTarget) break; } Skip(tokenEndGen); continue; } // Get name of element - expect string literal or identifier... CUniString strName=GetStringLiteral(); if (CurrentToken()!=tokenStringLiteral && CurrentToken()!=tokenIdentifier) { Unexpected(L"when parsing Node or value name "); return false; } NextToken(); if (CurrentToken()==tokenPointer) { // Get the link to name NextToken(); CUniString strLinkTo=GetStringLiteral(); if (!Skip(tokenStringLiteral)) return false; if (strLinkTo.IsEmpty()) { SetError(L"Missing link reference name"); return false; } // Start a new Node CResNode* pNewSection; pNewSection=ScopeStack.Top()->CreateNewNode(strName); pNewSection->SetLink(strLinkTo); Skip(tokenSemiColon); continue; } // Parse an optional assign before open brace... bool bHaveAssign=CurrentToken()==tokenEquals; if (bHaveAssign) { NextToken(); } // Starting a Node? if (CurrentToken()==tokenOpenBrace) { // Skip it NextToken(); // Start a new Node ScopeStack.Push(ScopeStack.Top()->CreateNewNode(strName)); continue; } // Expect assignment if (!bHaveAssign) { if (!Skip(tokenEquals)) return false; NextToken(); } // Parse value... CUniString strValue; if (!ParseValue(strValue)) return false; // Skip semicolon if (!Skip(tokenSemiColon)) return false; // Store value... ScopeStack.Top()->SetValue(strName, strValue); } // Check nothing left on stack except the root node if (ScopeStack.GetSize()!=1) { SetError(L"Missing closing brace"); return false; } // Done! return true; }
void TGztParser::RecordLocation(const NProtoBuf::Message* descriptor, NProtoBuf::DescriptorPool::ErrorCollector::ErrorLocation location) { RecordLocation(descriptor, location, CurrentToken().line, CurrentToken().column); }
void CStructProcessor::ProcessState() { Tokens::TTokenType tokenType = CurrentTokenType(); iResult = Parser::EUnexpectedToken; const CIdentifierBase* ident = NULL; switch (iInternalState) { case EStateExpectStructIdentifier: // name of new struct if (tokenType == Tokens::EIdentifier) { if (!ParserSM().FindIdentifier(CurrentToken())) { iIdentifier = new CStructIdentifier(CurrentToken()); iInternalState = EStateExpectMoreMembersOrEnd; iResult = Parser::ENoError; } else { iResult = Parser::EDuplicateIdentifier; } } break; case EStateExpectMoreMembersOrEnd: iResult = Parser::ENoError; switch (tokenType) { case Tokens::EPadType: case Tokens::EIntType: case Tokens::EIdentifier: case Tokens::EMessageIdType: ident = ParserSM().FindIdentifier(CurrentToken()); if (!ident) { iResult = Parser::EUnknownIdentifier; } else if (ident->Type() == Parser::EEnumTypeIdentifier || ident->Type() == Parser::EStructIdentifier || ident->Type() == Parser::EIntegerTypeIdentifier || ident->Type() == Parser::EContextIdentifier || ident->Type() == Parser::EMessageIdTypeIdentifier) { iTempMember = new TMember(); iTempMember->iMemberType = ident; if (ident->Type() == Parser::EIntegerTypeIdentifier) { iInternalState = EStateExpectMemberNameOrFormat; } else { iInternalState = EStateExpectMemberName; } } else if (ident->Type() == Parser::EPadTypeIdentifier) { iTempMember = new TMember(); iTempMember->iMemberType = ident; iInternalState = EStateExpectPadSize; break; } else { iResult = Parser::EInvalidType; } break; case Tokens::EEnd: iInternalState = EStateExpectEndStruct; if (iIdentifier->iMembers.size() == 0) { iResult = Parser::EUnexpectedToken; } break; default: iResult = Parser::EUnexpectedToken; break; } break; case EStateExpectMemberNameOrFormat: switch (tokenType) { case Tokens::EIdentifier: if (iIdentifier->FindMember(CurrentToken())) { iResult = Parser::EDuplicateIdentifier; delete iTempMember; } else { iTempMember->iMemberName = _strdup(CurrentToken()); iIdentifier->AddMember(iTempMember); iResult = Parser::ENoError; } iInternalState = EStateExpectMoreMembersOrEnd; iTempMember = NULL; break; case Tokens::EDisplayDec: case Tokens::EDisplayHex: iTempMember->iIdentifierOptions = new TIntegerIdentifierOptions(tokenType == Tokens::EDisplayHex); iInternalState = EStateExpectMemberName; iResult = Parser::ENoError; break; } break; case EStateExpectMemberName: if (tokenType == Tokens::EIdentifier) { if (iIdentifier->FindMember(CurrentToken())) { iResult = Parser::EDuplicateIdentifier; delete iTempMember; } else { iTempMember->iMemberName = _strdup(CurrentToken()); iIdentifier->AddMember(iTempMember); iResult = Parser::ENoError; } iInternalState = EStateExpectMoreMembersOrEnd; iTempMember = NULL; } break; case EStateExpectEndStruct: if (tokenType == Tokens::EStruct) { ParserSM().AddIdentifier(iIdentifier); iIdentifier = NULL; iResult = Parser::ENoError; } ParserSM().SetState(new CInitialState(ParserSM())); break; case EStateExpectPadSize: { iResult = Parser::ENoError; iInternalState = EStateExpectMoreMembersOrEnd; unsigned int v = 0; if (tokenType == Tokens::ENumberHex) { v = HexToVal(CurrentToken()); } else if (tokenType == Tokens::ENumberDec) { v = atol(CurrentToken()); } else { iResult = Parser::EUnexpectedToken; delete iTempMember; } if (iResult == Parser::ENoError) { if (v >= 1) { iTempMember->iArraySize = v; iIdentifier->AddMember(iTempMember); } else { iResult = Parser::EValueOutOfRange; delete iTempMember; } } iTempMember = NULL; } break; } if (iResult != Parser::ENoError) { ParserSM().SetError(iResult); } }