// Richtext simplification filter: Remove hard-coded font settings, // <style> elements, <p> attributes other than 'align' and // and unnecessary meta-information. QString simplifyRichTextFilter(const QString &in, bool *isPlainTextPtr = 0) { unsigned elementCount = 0; bool paragraphAlignmentFound = false; QString out; QXmlStreamReader reader(in); QXmlStreamWriter writer(&out); writer.setAutoFormatting(false); writer.setAutoFormattingIndent(0); while (!reader.atEnd()) { switch (reader.readNext()) { case QXmlStreamReader::StartElement: elementCount++; if (filterElement(reader.name())) { const QStringRef name = reader.name(); QXmlStreamAttributes attributes = reader.attributes(); filterAttributes(name, &attributes, ¶graphAlignmentFound); writer.writeStartElement(name.toString()); if (!attributes.isEmpty()) writer.writeAttributes(attributes); } else reader.readElementText(); // Skip away all nested elements and characters. break; case QXmlStreamReader::Characters: if (!isWhiteSpace(reader.text())) writer.writeCharacters(reader.text().toString()); break; case QXmlStreamReader::EndElement: writer.writeEndElement(); break; default: break; } } // Check for plain text (no spans, just <html><head><body><p>) if (isPlainTextPtr) *isPlainTextPtr = !paragraphAlignmentFound && elementCount == 4u; // return out; }
Lexeme Lexal::readString(int startLine, int startPos, wchar_t quote) { std::wstring str; bool closed = false; while (! reader.isEof()) { wchar_t ch = reader.getNextChar(); pos++; if ('\n' == ch) { line++; pos = 0; str += ch; } else if ('\\' == ch) { if (! reader.isEof()) { wchar_t nextCh = reader.getNextChar(); if (isWhiteSpace(nextCh)) throw Exception(L"Invalid escape sequence at " + posToStr(line, pos)); pos++; switch (nextCh) { case L'\t': str += L'\t'; break; case L'\n': str += L'\n'; break; case L'\r': str += L'\r'; break; default: str += nextCh; } } } else if (quote == ch) { closed = true; break; } else str += ch; } if (! closed) throw Exception(L"String at " + posToStr(startLine, startPos) + L" doesn't closed"); return Lexeme(Lexeme::String, str, startLine, startPos); }
bool MorkParser::parseDict() { char cur = nextChar(); bool Result = true; nowParsing_ = NPValues; while ( Result && cur != '>' && cur ) { if ( !isWhiteSpace( cur ) ) { switch ( cur ) { case '<': { if ( morkData_.substr( morkPos_ - 1, strlen( MorkDictColumnMeta ) ) == MorkDictColumnMeta ) { nowParsing_ = NPColumns; morkPos_ += strlen( MorkDictColumnMeta ) - 1; } break; } case '(': Result = parseCell(); break; case '/': Result = parseComment(); break; } } cur = nextChar(); } return Result; }
void Lexal::skipSpaces() { while (! reader.isEof()) { wchar_t ch = reader.getNextChar(); pos++; if (! isWhiteSpace(ch)) { if ('#' == ch) skipToLineEnd(); else { bool finish = false; if (('/' == ch) && (! reader.isEof())) { wchar_t nextCh = reader.getNextChar(); pos++; if ('/' == nextCh) skipToLineEnd(); else if ('*' == nextCh) skipMultilineComment(line, pos); else { pos--; reader.ungetChar(nextCh); finish = true; } } else finish = true; if (finish) { pos--; reader.ungetChar(ch); return; } } } else if ('\n' == ch) { pos = 0; line++; } } }
bool QPHReader::read(Translator &translator) { m_currentField = NoField; QString result; while (!atEnd()) { readNext(); if (isStartElement()) { if (name() == QLatin1String("source")) m_currentField = SourceField; else if (name() == QLatin1String("target")) m_currentField = TargetField; else if (name() == QLatin1String("definition")) m_currentField = DefinitionField; else m_currentField = NoField; } else if (isWhiteSpace()) { // ignore these } else if (isCharacters()) { if (m_currentField == SourceField) m_currentSource += text(); else if (m_currentField == TargetField) m_currentTarget += text(); else if (m_currentField == DefinitionField) m_currentDefinition += text(); } else if (isEndElement() && name() == QLatin1String("phrase")) { TranslatorMessage msg; msg.setSourceText(m_currentSource); msg.setTranslation(m_currentTarget); msg.setTranslatorComment(m_currentDefinition); translator.append(msg); m_currentSource.clear(); m_currentTarget.clear(); m_currentDefinition.clear(); } } return true; }
int thisIsTheEnd(OBJ inStream){ /* * This is the end * Beautiful friend * This is the end * My only friend, the end */ int end = 0; char ch; do{ //printf(CYN "\nendSearch: " RESET); ch = nextChar(inStream); if(ch == '\n' || ch == EOF){ //printf(RED "<END>" RESET); end = 1; break; } } while (isWhiteSpace(ch)); unreadChar(inStream, ch); return end; }
int QScript::Lexer::lex() { int token = 0; state = Start; ushort stringType = 0; // either single or double quotes pos8 = pos16 = 0; done = false; terminator = false; // did we push a token on the stack previously ? // (after an automatic semicolon insertion) if (stackToken >= 0) { setDone(Other); token = stackToken; stackToken = -1; } while (!done) { switch (state) { case Start: if (isWhiteSpace()) { // do nothing } else if (current == '/' && next1 == '/') { recordStartPos(); shift(1); state = InSingleLineComment; } else if (current == '/' && next1 == '*') { recordStartPos(); shift(1); state = InMultiLineComment; } else if (current == 0) { syncProhibitAutomaticSemicolon(); if (!terminator && !delimited && !prohibitAutomaticSemicolon) { // automatic semicolon insertion if program incomplete token = QScriptGrammar::T_SEMICOLON; stackToken = 0; setDone(Other); } else { setDone(Eof); } } else if (isLineTerminator()) { shiftWindowsLineBreak(); yylineno++; yycolumn = 0; bol = true; terminator = true; syncProhibitAutomaticSemicolon(); if (restrKeyword) { token = QScriptGrammar::T_SEMICOLON; setDone(Other); } } else if (current == '"' || current == '\'') { recordStartPos(); state = InString; stringType = current; } else if (isIdentLetter(current)) { recordStartPos(); record16(current); state = InIdentifier; } else if (current == '0') { recordStartPos(); record8(current); state = InNum0; } else if (isDecimalDigit(current)) { recordStartPos(); record8(current); state = InNum; } else if (current == '.' && isDecimalDigit(next1)) { recordStartPos(); record8(current); state = InDecimal; } else { recordStartPos(); token = matchPunctuator(current, next1, next2, next3); if (token != -1) { if (terminator && !delimited && !prohibitAutomaticSemicolon && (token == QScriptGrammar::T_PLUS_PLUS || token == QScriptGrammar::T_MINUS_MINUS)) { // automatic semicolon insertion stackToken = token; token = QScriptGrammar::T_SEMICOLON; } setDone(Other); } else { setDone(Bad); err = IllegalCharacter; errmsg = QLatin1String("Illegal character"); } } break; case InString: if (current == stringType) { shift(1); setDone(String); } else if (current == 0 || isLineTerminator()) { setDone(Bad); err = UnclosedStringLiteral; errmsg = QLatin1String("Unclosed string at end of line"); } else if (current == '\\') { state = InEscapeSequence; } else { record16(current); } break; // Escape Sequences inside of strings case InEscapeSequence: if (isOctalDigit(current)) { if (current >= '0' && current <= '3' && isOctalDigit(next1) && isOctalDigit(next2)) { record16(convertOctal(current, next1, next2)); shift(2); state = InString; } else if (isOctalDigit(current) && isOctalDigit(next1)) { record16(convertOctal('0', current, next1)); shift(1); state = InString; } else if (isOctalDigit(current)) { record16(convertOctal('0', '0', current)); state = InString; } else { setDone(Bad); err = IllegalEscapeSequence; errmsg = QLatin1String("Illegal escape squence"); } } else if (current == 'x') state = InHexEscape; else if (current == 'u') state = InUnicodeEscape; else { if (isLineTerminator()) { shiftWindowsLineBreak(); yylineno++; yycolumn = 0; bol = true; } else { record16(singleEscape(current)); } state = InString; } break; case InHexEscape: if (isHexDigit(current) && isHexDigit(next1)) { state = InString; record16(QLatin1Char(convertHex(current, next1))); shift(1); } else if (current == stringType) { record16(QLatin1Char('x')); shift(1); setDone(String); } else { record16(QLatin1Char('x')); record16(current); state = InString; } break; case InUnicodeEscape: if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) { record16(convertUnicode(current, next1, next2, next3)); shift(3); state = InString; } else if (current == stringType) { record16(QLatin1Char('u')); shift(1); setDone(String); } else { setDone(Bad); err = IllegalUnicodeEscapeSequence; errmsg = QLatin1String("Illegal unicode escape sequence"); } break; case InSingleLineComment: if (isLineTerminator()) { shiftWindowsLineBreak(); yylineno++; yycolumn = 0; terminator = true; bol = true; if (restrKeyword) { token = QScriptGrammar::T_SEMICOLON; setDone(Other); } else state = Start; } else if (current == 0) { setDone(Eof); } break; case InMultiLineComment: if (current == 0) { setDone(Bad); err = UnclosedComment; errmsg = QLatin1String("Unclosed comment at end of file"); } else if (isLineTerminator()) { shiftWindowsLineBreak(); yylineno++; } else if (current == '*' && next1 == '/') { state = Start; shift(1); } break; case InIdentifier: if (isIdentLetter(current) || isDecimalDigit(current)) { record16(current); break; } setDone(Identifier); break; case InNum0: if (current == 'x' || current == 'X') { record8(current); state = InHex; } else if (current == '.') { record8(current); state = InDecimal; } else if (current == 'e' || current == 'E') { record8(current); state = InExponentIndicator; } else if (isOctalDigit(current)) { record8(current); state = InOctal; } else if (isDecimalDigit(current)) { record8(current); state = InDecimal; } else { setDone(Number); } break; case InHex: if (isHexDigit(current)) record8(current); else setDone(Hex); break; case InOctal: if (isOctalDigit(current)) { record8(current); } else if (isDecimalDigit(current)) { record8(current); state = InDecimal; } else { setDone(Octal); } break; case InNum: if (isDecimalDigit(current)) { record8(current); } else if (current == '.') { record8(current); state = InDecimal; } else if (current == 'e' || current == 'E') { record8(current); state = InExponentIndicator; } else { setDone(Number); } break; case InDecimal: if (isDecimalDigit(current)) { record8(current); } else if (current == 'e' || current == 'E') { record8(current); state = InExponentIndicator; } else { setDone(Number); } break; case InExponentIndicator: if (current == '+' || current == '-') { record8(current); } else if (isDecimalDigit(current)) { record8(current); state = InExponent; } else { setDone(Bad); err = IllegalExponentIndicator; errmsg = QLatin1String("Illegal syntax for exponential number"); } break; case InExponent: if (isDecimalDigit(current)) { record8(current); } else { setDone(Number); } break; default: Q_ASSERT_X(0, "Lexer::lex", "Unhandled state in switch statement"); } // move on to the next character if (!done) shift(1); if (state != Start && state != InSingleLineComment) bol = false; } // no identifiers allowed directly after numeric literal, e.g. "3in" is bad if ((state == Number || state == Octal || state == Hex) && isIdentLetter(current)) { state = Bad; err = IllegalIdentifier; errmsg = QLatin1String("Identifier cannot start with numeric literal"); } // terminate string buffer8[pos8] = '\0'; double dval = 0; if (state == Number) { dval = qstrtod(buffer8, 0, 0); } else if (state == Hex) { // scan hex numbers dval = QScript::integerFromString(buffer8, pos8, 16); state = Number; } else if (state == Octal) { // scan octal number dval = QScript::integerFromString(buffer8, pos8, 8); state = Number; } restrKeyword = false; delimited = false; switch (parenthesesState) { case IgnoreParentheses: break; case CountParentheses: if (token == QScriptGrammar::T_RPAREN) { --parenthesesCount; if (parenthesesCount == 0) parenthesesState = BalancedParentheses; } else if (token == QScriptGrammar::T_LPAREN) { ++parenthesesCount; } break; case BalancedParentheses: parenthesesState = IgnoreParentheses; break; } switch (state) { case Eof: return 0; case Other: if(token == QScriptGrammar::T_RBRACE || token == QScriptGrammar::T_SEMICOLON) delimited = true; return token; case Identifier: if ((token = findReservedWord(buffer16, pos16)) < 0) { /* TODO: close leak on parse error. same holds true for String */ if (driver) { Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented"); qsyylval.ustr = 0; // driver->intern(buffer16, pos16); } else qsyylval.ustr = 0; return QScriptGrammar::T_IDENTIFIER; } if (token == QScriptGrammar::T_CONTINUE || token == QScriptGrammar::T_BREAK || token == QScriptGrammar::T_RETURN || token == QScriptGrammar::T_THROW) { restrKeyword = true; } else if (token == QScriptGrammar::T_IF || token == QScriptGrammar::T_FOR || token == QScriptGrammar::T_WHILE || token == QScriptGrammar::T_WITH) { parenthesesState = CountParentheses; parenthesesCount = 0; } else if (token == QScriptGrammar::T_DO) { parenthesesState = BalancedParentheses; } return token; case String: if (driver) { Q_ASSERT_X(false, Q_FUNC_INFO, "not implemented"); qsyylval.ustr = 0; // driver->intern(buffer16, pos16); } else qsyylval.ustr = 0; return QScriptGrammar::T_STRING_LITERAL; case Number: qsyylval.dval = dval; return QScriptGrammar::T_NUMERIC_LITERAL; case Bad: return -1; default: Q_ASSERT(!"unhandled numeration value in switch"); return -1; } }
UINT CLiteHTMLReader::parseDocument(void) { ASSERT(m_lpszBuffer != NULL); bool bAbort = false; // continue parsing or abort? bool bIsClosingTag = false; // tag parsed is a closing tag? bool bIsOpeningTag = false; // tag parsed is an opening tag? CString strCharacters; // character data CString strComment; // comment data CString strT; // temporary storage DWORD dwCharDataStart = 0L; // starting position of character data DWORD dwCharDataLen = 0L; // length of character data LONG lTemp = 0L; // temporary storage TCHAR ch = 0; // character at current buffer position CLiteHTMLTag oTag; // tag information if ( (!m_lpszBuffer) || (!m_dwBufLen) ) return (0U); // reset seek pointer to beginning ResetSeekPointer(); // notify event handler about parsing startup if (getEventNotify(notifyStartStop)) { bAbort = false; m_pEventHandler->BeginParse(m_dwAppData, bAbort); if (bAbort) goto LEndParse; } // skip leading white-space characters while (isWhiteSpace(ReadChar())) ; ch = UngetChar(); while ((ch = ReadChar()) != NULL) { switch (ch) { // tag starting delimeter? case _T('<'): { UngetChar(); strComment.Empty(); if (!parseComment(strComment)) { bIsOpeningTag = false; bIsClosingTag = false; if (!parseTag(oTag, bIsOpeningTag, bIsClosingTag)) { ++dwCharDataLen; // manually advance buffer position // because the last call to UngetChar() // moved it back one character ch = ReadChar(); break; } } // clear pending notifications if ( (dwCharDataLen) || (strCharacters.GetLength()) ) { strCharacters += CString(&m_lpszBuffer[dwCharDataStart], dwCharDataLen); NormalizeCharacters(strCharacters); if ( (strCharacters.GetLength()) && (getEventNotify(notifyCharacters)) ) { bAbort = false; m_pEventHandler->Characters(strCharacters, m_dwAppData, bAbort); if (bAbort) goto LEndParse; } strCharacters.Empty(); } dwCharDataLen = 0L; dwCharDataStart = m_dwBufPos; if (strComment.GetLength()) { if (getEventNotify(notifyComment)) { bAbort = false; m_pEventHandler->Comment(strComment, m_dwAppData, bAbort); if (bAbort) goto LEndParse; } } else { if ( (bIsOpeningTag) && (getEventNotify(notifyTagStart)) ) { bAbort = false; m_pEventHandler->StartTag(&oTag, m_dwAppData, bAbort); if (bAbort) goto LEndParse; } if ( (bIsClosingTag) && (getEventNotify(notifyTagEnd)) ) { bAbort = false; m_pEventHandler->EndTag(&oTag, m_dwAppData, bAbort); if (bAbort) goto LEndParse; } } break; } // entity reference beginning delimeter? case _T('&'): { UngetChar(); lTemp = 0; if (m_bResolveEntities) lTemp = CLiteHTMLEntityResolver::resolveEntity(&m_lpszBuffer[m_dwBufPos], ch); if (lTemp) { strCharacters += CString(&m_lpszBuffer[dwCharDataStart], dwCharDataLen) + ch; m_dwBufPos += lTemp; dwCharDataStart = m_dwBufPos; dwCharDataLen = 0L; } else { ch = ReadChar(); ++dwCharDataLen; } break; } // any other character default: { ++dwCharDataLen; break; } } } // clear pending notifications if ( (dwCharDataLen) || (strCharacters.GetLength()) ) { strCharacters += CString(&m_lpszBuffer[dwCharDataStart], dwCharDataLen) + ch; NormalizeCharacters(strCharacters); strCharacters.TrimRight(); // explicit trailing white-space removal if ( (strCharacters.GetLength()) && (getEventNotify(notifyCharacters)) ) { bAbort = false; m_pEventHandler->Characters(strCharacters, m_dwAppData, bAbort); if (bAbort) goto LEndParse; } } LEndParse: // notify event handler about parsing completion if (getEventNotify(notifyStartStop)) m_pEventHandler->EndParse(m_dwAppData, bAbort); m_lpszBuffer = NULL; m_dwBufLen = 0L; return (m_dwBufPos); }
int Lexer::lex(void* p1, void* p2) { ASSERT(!m_error); ASSERT(m_buffer8.isEmpty()); ASSERT(m_buffer16.isEmpty()); YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); int token = 0; m_terminator = false; start: while (isWhiteSpace(m_current)) shift1(); int startOffset = currentOffset(); if (m_current == -1) { if (!m_terminator && !m_delimited && !m_isReparsing) { // automatic semicolon insertion if program incomplete token = ';'; goto doneSemicolon; } return 0; } m_delimited = false; switch (m_current) { case '>': if (m_next1 == '>' && m_next2 == '>') { if (m_next3 == '=') { shift4(); token = URSHIFTEQUAL; break; } shift3(); token = URSHIFT; break; } if (m_next1 == '>') { if (m_next2 == '=') { shift3(); token = RSHIFTEQUAL; break; } shift2(); token = RSHIFT; break; } if (m_next1 == '=') { shift2(); token = GE; break; } shift1(); token = '>'; break; case '=': if (m_next1 == '=') { if (m_next2 == '=') { shift3(); token = STREQ; break; } shift2(); token = EQEQ; break; } shift1(); token = '='; break; case '!': if (m_next1 == '=') { if (m_next2 == '=') { shift3(); token = STRNEQ; break; } shift2(); token = NE; break; } shift1(); token = '!'; break; case '<': if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { // <!-- marks the beginning of a line comment (for www usage) shift4(); goto inSingleLineComment; } if (m_next1 == '<') { if (m_next2 == '=') { shift3(); token = LSHIFTEQUAL; break; } shift2(); token = LSHIFT; break; } if (m_next1 == '=') { shift2(); token = LE; break; } shift1(); token = '<'; break; case '+': if (m_next1 == '+') { shift2(); if (m_terminator) { token = AUTOPLUSPLUS; break; } token = PLUSPLUS; break; } if (m_next1 == '=') { shift2(); token = PLUSEQUAL; break; } shift1(); token = '+'; break; case '-': if (m_next1 == '-') { if (m_atLineStart && m_next2 == '>') { shift3(); goto inSingleLineComment; } shift2(); if (m_terminator) { token = AUTOMINUSMINUS; break; } token = MINUSMINUS; break; } if (m_next1 == '=') { shift2(); token = MINUSEQUAL; break; } shift1(); token = '-'; break; case '*': if (m_next1 == '=') { shift2(); token = MULTEQUAL; break; } shift1(); token = '*'; break; case '/': if (m_next1 == '/') { shift2(); goto inSingleLineComment; } if (m_next1 == '*') goto inMultiLineComment; if (m_next1 == '=') { shift2(); token = DIVEQUAL; break; } shift1(); token = '/'; break; case '&': if (m_next1 == '&') { shift2(); token = AND; break; } if (m_next1 == '=') { shift2(); token = ANDEQUAL; break; } shift1(); token = '&'; break; case '^': if (m_next1 == '=') { shift2(); token = XOREQUAL; break; } shift1(); token = '^'; break; case '%': if (m_next1 == '=') { shift2(); token = MODEQUAL; break; } shift1(); token = '%'; break; case '|': if (m_next1 == '=') { shift2(); token = OREQUAL; break; } if (m_next1 == '|') { shift2(); token = OR; break; } shift1(); token = '|'; break; case '.': if (isASCIIDigit(m_next1)) { record8('.'); shift1(); goto inNumberAfterDecimalPoint; } token = '.'; shift1(); break; case ',': case '~': case '?': case ':': case '(': case ')': case '[': case ']': token = m_current; shift1(); break; case ';': shift1(); m_delimited = true; token = ';'; break; case '{': lvalp->intValue = currentOffset(); shift1(); token = OPENBRACE; break; case '}': lvalp->intValue = currentOffset(); shift1(); m_delimited = true; token = CLOSEBRACE; break; case '\\': goto startIdentifierWithBackslash; case '0': goto startNumberWithZeroDigit; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': goto startNumber; case '"': case '\'': goto startString; default: if (isIdentStart(m_current)) goto startIdentifierOrKeyword; if (isLineTerminator(m_current)) { shiftLineTerminator(); m_atLineStart = true; m_terminator = true; if (lastTokenWasRestrKeyword()) { token = ';'; goto doneSemicolon; } goto start; } goto returnError; } m_atLineStart = false; goto returnToken; startString: { int stringQuoteCharacter = m_current; shift1(); const UChar* stringStart = currentCharacter(); while (m_current != stringQuoteCharacter) { // Fast check for characters that require special handling. // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently // as possible, and lets through all common ASCII characters. if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { m_buffer16.append(stringStart, currentCharacter() - stringStart); goto inString; } shift1(); } lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart); shift1(); m_atLineStart = false; m_delimited = false; token = STRING; goto returnToken; inString: while (m_current != stringQuoteCharacter) { if (m_current == '\\') goto inStringEscapeSequence; if (UNLIKELY(isLineTerminator(m_current))) goto returnError; if (UNLIKELY(m_current == -1)) goto returnError; record16(m_current); shift1(); } goto doneString; inStringEscapeSequence: shift1(); if (m_current == 'x') { shift1(); if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) { record16(convertHex(m_current, m_next1)); shift2(); goto inString; } record16('x'); if (m_current == stringQuoteCharacter) goto doneString; goto inString; } if (m_current == 'u') { shift1(); if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) { record16(convertUnicode(m_current, m_next1, m_next2, m_next3)); shift4(); goto inString; } if (m_current == stringQuoteCharacter) { record16('u'); goto doneString; } goto returnError; } if (isASCIIOctalDigit(m_current)) { if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) { record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0'); shift3(); goto inString; } if (isASCIIOctalDigit(m_next1)) { record16((m_current - '0') * 8 + m_next1 - '0'); shift2(); goto inString; } record16(m_current - '0'); shift1(); goto inString; } if (isLineTerminator(m_current)) { shiftLineTerminator(); goto inString; } record16(singleEscape(m_current)); shift1(); goto inString; } startIdentifierWithBackslash: shift1(); if (UNLIKELY(m_current != 'u')) goto returnError; shift1(); if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) goto returnError; token = convertUnicode(m_current, m_next1, m_next2, m_next3); if (UNLIKELY(!isIdentStart(token))) goto returnError; goto inIdentifierAfterCharacterCheck; startIdentifierOrKeyword: { const UChar* identifierStart = currentCharacter(); shift1(); while (isIdentPart(m_current)) shift1(); if (LIKELY(m_current != '\\')) { lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart); goto doneIdentifierOrKeyword; } m_buffer16.append(identifierStart, currentCharacter() - identifierStart); } do { shift1(); if (UNLIKELY(m_current != 'u')) goto returnError; shift1(); if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) goto returnError; token = convertUnicode(m_current, m_next1, m_next2, m_next3); if (UNLIKELY(!isIdentPart(token))) goto returnError; inIdentifierAfterCharacterCheck: record16(token); shift4(); while (isIdentPart(m_current)) { record16(m_current); shift1(); } } while (UNLIKELY(m_current == '\\')); goto doneIdentifier; inSingleLineComment: while (!isLineTerminator(m_current)) { if (UNLIKELY(m_current == -1)) return 0; shift1(); } shiftLineTerminator(); m_atLineStart = true; m_terminator = true; if (lastTokenWasRestrKeyword()) goto doneSemicolon; goto start; inMultiLineComment: shift2(); while (m_current != '*' || m_next1 != '/') { if (isLineTerminator(m_current)) shiftLineTerminator(); else { shift1(); if (UNLIKELY(m_current == -1)) goto returnError; } } shift2(); m_atLineStart = false; goto start; startNumberWithZeroDigit: shift1(); if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) { shift1(); goto inHex; } if (m_current == '.') { record8('0'); record8('.'); shift1(); goto inNumberAfterDecimalPoint; } if ((m_current | 0x20) == 'e') { record8('0'); record8('e'); shift1(); goto inExponentIndicator; } if (isASCIIOctalDigit(m_current)) goto inOctal; if (isASCIIDigit(m_current)) goto startNumber; lvalp->doubleValue = 0; goto doneNumeric; inNumberAfterDecimalPoint: while (isASCIIDigit(m_current)) { record8(m_current); shift1(); } if ((m_current | 0x20) == 'e') { record8('e'); shift1(); goto inExponentIndicator; } goto doneNumber; inExponentIndicator: if (m_current == '+' || m_current == '-') { record8(m_current); shift1(); } if (!isASCIIDigit(m_current)) goto returnError; do { record8(m_current); shift1(); } while (isASCIIDigit(m_current)); goto doneNumber; inOctal: { do { record8(m_current); shift1(); } while (isASCIIOctalDigit(m_current)); if (isASCIIDigit(m_current)) goto startNumber; double dval = 0; const char* end = m_buffer8.end(); for (const char* p = m_buffer8.data(); p < end; ++p) { dval *= 8; dval += *p - '0'; } if (dval >= mantissaOverflowLowerBound) dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8); m_buffer8.resize(0); lvalp->doubleValue = dval; goto doneNumeric; } inHex: { do { record8(m_current); shift1(); } while (isASCIIHexDigit(m_current)); double dval = 0; const char* end = m_buffer8.end(); for (const char* p = m_buffer8.data(); p < end; ++p) { dval *= 16; dval += toASCIIHexValue(*p); } if (dval >= mantissaOverflowLowerBound) dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16); m_buffer8.resize(0); lvalp->doubleValue = dval; goto doneNumeric; } startNumber: record8(m_current); shift1(); while (isASCIIDigit(m_current)) { record8(m_current); shift1(); } if (m_current == '.') { record8('.'); shift1(); goto inNumberAfterDecimalPoint; } if ((m_current | 0x20) == 'e') { record8('e'); shift1(); goto inExponentIndicator; } // Fall through into doneNumber. doneNumber: // Null-terminate string for strtod. m_buffer8.append('\0'); lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0); m_buffer8.resize(0); // Fall through into doneNumeric. doneNumeric: // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. if (UNLIKELY(isIdentStart(m_current))) goto returnError; m_atLineStart = false; m_delimited = false; token = NUMBER; goto returnToken; doneSemicolon: token = ';'; m_delimited = true; goto returnToken; doneIdentifier: m_atLineStart = false; m_delimited = false; lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); m_buffer16.resize(0); token = IDENT; goto returnToken; doneIdentifierOrKeyword: { m_atLineStart = false; m_delimited = false; m_buffer16.resize(0); const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident); token = entry ? entry->lexerValue() : IDENT; goto returnToken; } doneString: // Atomize constant strings in case they're later used in property lookup. shift1(); m_atLineStart = false; m_delimited = false; lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); m_buffer16.resize(0); token = STRING; // Fall through into returnToken. returnToken: { int lineNumber = m_lineNumber; llocp->first_line = lineNumber; llocp->last_line = lineNumber; llocp->first_column = startOffset; llocp->last_column = currentOffset(); m_lastToken = token; return token; } returnError: m_error = true; return -1; }
/** * additional formatting for line of source code. * every line of source code in a source code file should be sent * one after the other to this function. * indents event tables * unindents the case blocks * * @param line the original formatted line will be updated if necessary. */ void ASEnhancer::enhance(string& line, bool isInPreprocessor, bool isInSQL) { bool isSpecialChar = false; // is a backslash escape character shouldIndentLine = true; lineNumber++; // check for beginning of event table if (nextLineIsEventIndent) { isInEventTable = true; nextLineIsEventIndent = false; } // check for beginning of SQL declare section if (nextLineIsDeclareIndent) { isInDeclareSection = true; nextLineIsDeclareIndent = false; } if (line.length() == 0 && ! isInEventTable && ! isInDeclareSection && ! emptyLineFill) return; // test for unindent on attached brackets if (unindentNextLine) { sw.unindentDepth++; sw.unindentCase = true; unindentNextLine = false; } // parse characters in the current line. for (size_t i = 0; i < line.length(); i++) { char ch = line[i]; // bypass whitespace if (isWhiteSpace(ch)) continue; // handle special characters (i.e. backslash+character such as \n, \t, ...) if (isSpecialChar) { isSpecialChar = false; continue; } if (!(isInComment) && line.compare(i, 2, "\\\\") == 0) { i++; continue; } if (!(isInComment) && ch == '\\') { isSpecialChar = true; continue; } // handle quotes (such as 'x' and "Hello Dolly") if (!isInComment && (ch == '"' || ch == '\'')) { if (!isInQuote) { quoteChar = ch; isInQuote = true; } else if (quoteChar == ch) { isInQuote = false; continue; } } if (isInQuote) continue; // handle comments if (!(isInComment) && line.compare(i, 2, "//") == 0) { // check for windows line markers if (line.compare(i + 2, 1, "\xf0") > 0) lineNumber--; break; // finished with the line } else if (!(isInComment) && line.compare(i, 2, "/*") == 0) { isInComment = true; i++; continue; } else if ((isInComment) && line.compare(i, 2, "*/") == 0) { isInComment = false; i++; continue; } if (isInComment) continue; // if we have reached this far then we are NOT in a comment or string of special characters if (line[i] == '{') bracketCount++; if (line[i] == '}') bracketCount--; bool isPotentialKeyword = isCharPotentialHeader(line, i); // ---------------- wxWidgets and MFC macros ---------------------------------- if (isPotentialKeyword) { if (findKeyword(line, i, "BEGIN_EVENT_TABLE") || findKeyword(line, i, "BEGIN_DISPATCH_MAP") || findKeyword(line, i, "BEGIN_EVENT_MAP") || findKeyword(line, i, "BEGIN_MESSAGE_MAP") || findKeyword(line, i, "BEGIN_PROPPAGEIDS")) { nextLineIsEventIndent = true; break; } if (findKeyword(line, i, "END_EVENT_TABLE") || findKeyword(line, i, "END_DISPATCH_MAP") || findKeyword(line, i, "END_EVENT_MAP") || findKeyword(line, i, "END_MESSAGE_MAP") || findKeyword(line, i, "END_PROPPAGEIDS")) { isInEventTable = false; break; } } // ---------------- process SQL ----------------------------------------------- if (isInSQL) { if (isBeginDeclareSectionSQL(line, i)) nextLineIsDeclareIndent = true; if (isEndDeclareSectionSQL(line, i)) isInDeclareSection = false; break; } // ---------------- process switch statements --------------------------------- if (isPotentialKeyword && findKeyword(line, i, "switch")) { switchDepth++; switchStack.push_back(sw); // save current variables sw.switchBracketCount = 0; sw.unindentCase = false; // don't clear case until end of switch i += 5; // bypass switch statement continue; } // just want unindented switch statements from this point if (caseIndent || switchDepth == 0 || (isInPreprocessor && !preprocessorIndent)) { // bypass the entire word if (isPotentialKeyword) { string name = getCurrentWord(line, i); i += name.length() - 1; } continue; } i = processSwitchBlock(line, i); } // end of for loop * end of for loop * end of for loop * end of for loop if (isInEventTable || isInDeclareSection) { if (line.length() == 0 || line[0] != '#') indentLine(line, 1); } if (shouldIndentLine && sw.unindentDepth > 0) unindentLine(line, sw.unindentDepth); }
/** * process the character at the current index in a switch block. * * @param line a reference to the line to indent. * @param index the current line index. * @return the new line index. */ size_t ASEnhancer::processSwitchBlock(string& line, size_t index) { size_t i = index; bool isPotentialKeyword = isCharPotentialHeader(line, i); if (line[i] == '{') { sw.switchBracketCount++; if (lookingForCaseBracket) // if 1st after case statement { sw.unindentCase = true; // unindenting this case sw.unindentDepth++; lookingForCaseBracket = false; // not looking now } return i; } lookingForCaseBracket = false; // no opening bracket, don't indent if (line[i] == '}') // if close bracket { sw.switchBracketCount--; assert(sw.switchBracketCount <= bracketCount); if (sw.switchBracketCount == 0) // if end of switch statement { int lineUnindent = sw.unindentDepth; if (line.find_first_not_of(" \t") == i && switchStack.size() > 0) lineUnindent = switchStack[switchStack.size()-1].unindentDepth; if (shouldIndentLine) { if (lineUnindent > 0) i -= unindentLine(line, lineUnindent); shouldIndentLine = false; } switchDepth--; sw = switchStack.back(); switchStack.pop_back(); } return i; } if (isPotentialKeyword && (findKeyword(line, i, "case") || findKeyword(line, i, "default"))) { if (sw.unindentCase) // if unindented last case { sw.unindentCase = false; // stop unindenting previous case sw.unindentDepth--; } i = findCaseColon(line, i); i++; for (; i < line.length(); i++) // bypass whitespace { if (!isWhiteSpace(line[i])) break; } if (i < line.length()) { if (line[i] == '{') { bracketCount++; sw.switchBracketCount++; if (!isOneLineBlockReached(line, i)) unindentNextLine = true; return i; } } lookingForCaseBracket = true; i--; // need to process this char return i; } if (isPotentialKeyword) { string name = getCurrentWord(line, i); // bypass the entire name i += name.length() - 1; } return i; }
short SlkToken::get(void) { if (NULL == mSource || NULL == mErrorAndStringBuffer) { return END_OF_SLK_INPUT_; } for (;;) { if (*mIterator == '\0') { if (isCanFinish()) { newToken(); endTokenWithEof(); return END_OF_SLK_INPUT_; } else { if (!mIterator.Load()) { newToken(); endTokenWithEof(); return END_OF_SLK_INPUT_; } } } int isSkip = TRUE; //跳过注释与白空格 for (; isSkip && *mIterator != '\0';) { isSkip = FALSE; for (; isWhiteSpace(*mIterator); ++mIterator) { if (*mIterator == '\n') { ++mLineNumber; if (mCommentNum <= 0) { mCommentOnNewLine = TRUE; } } isSkip = TRUE; } //#引导的单行注释 if (*mIterator == '#') { newComment(); for (; *mIterator != '\0' && *mIterator != '\n'; ++mIterator) { if (*mIterator != '\r') pushCommentChar(*mIterator); } endComment(); isSkip = TRUE; } //C++风格的单行注释与多行注释 if (*mIterator == '/' && (*(mIterator + 1) == '/' || *(mIterator + 1) == '*')) { newComment(); pushCommentChar(*mIterator); ++mIterator; if (*mIterator == '/') { pushCommentChar(*mIterator); ++mIterator; for (; *mIterator != '\0' && *mIterator != '\n'; ++mIterator) { if (*mIterator != '\r') pushCommentChar(*mIterator); } isSkip = TRUE; } else if (*mIterator == '*') { pushCommentChar(*mIterator); ++mIterator; for (;;) { if (*mIterator != '\0') { if (*mIterator == '\n') { pushCommentChar(*mIterator); ++mLineNumber; } else if (*mIterator == '*' && *(mIterator + 1) == '/') { pushCommentChar(*mIterator); ++mIterator; pushCommentChar(*mIterator); ++mIterator; break; } else if (*mIterator != '\r') { pushCommentChar(*mIterator); } } else { if (mIterator.Load()) { continue; } else { endComment(); newToken(); endTokenWithEof(); return END_OF_SLK_INPUT_; } } ++mIterator; } isSkip = TRUE; } endComment(); } } if (*mIterator != '\0') break; } newToken(); if (isCanFinish()) setCanFinish(FALSE); if (*mIterator == '{' && *(mIterator + 1) == ':') { ++mIterator; ++mIterator; int line = mLineNumber; //搜索脚本结束 :} for (; *mIterator != '\0';) { while (*mIterator != '\0' && *mIterator != ':') { if (*mIterator == '\n')++mLineNumber; pushTokenChar(*mIterator); ++mIterator; } if (*mIterator == '\0') break; IScriptSource::Iterator next = mIterator + 1; if (*next == '}') { ++mIterator; ++mIterator; break; } else { pushTokenChar(*mIterator); ++mIterator; } } if (*mIterator == '\0') { char* pInfo = mErrorAndStringBuffer->NewErrorInfo(); if (pInfo) tsnprintf(pInfo, MAX_ERROR_INFO_CAPACITY, "[line %d ]:ExternScript can't finish!", line); } endToken(); if (myhavelinefeed(mCurToken)){ removeFirstAndLastEmptyLine(); } return SCRIPT_CONTENT_; } else if (isOperator(*mIterator)) {//操作符 getOperatorToken(); return getOperatorTokenValue(); } else if (*mIterator == '.' && 0 == myisdigit(*(mIterator + 1), FALSE)) { char c = *mIterator; ++mIterator; pushTokenChar(c); endToken(); return DOT_; } else if (*mIterator == '(') { ++mIterator; pushTokenChar('('); endToken(); return LPAREN_; } else if (*mIterator == ')') { ++mIterator; pushTokenChar(')'); endToken(); return RPAREN_; } else if (*mIterator == '[') { ++mIterator; pushTokenChar('['); endToken(); return LBRACK_; } else if (*mIterator == ']') { ++mIterator; pushTokenChar(']'); endToken(); return RBRACK_; } else if (*mIterator == '{') { ++mIterator; pushTokenChar('{'); endToken(); return LBRACE_; } else if (*mIterator == '}') { ++mIterator; pushTokenChar('}'); endToken(); return RBRACE_; } else if (*mIterator == ',') { ++mIterator; pushTokenChar(','); endToken(); return COMMA_; } else if (*mIterator == ';') { ++mIterator; pushTokenChar(';'); endToken(); return SEMI_; } else {//关键字、标识符或常数 if (*mIterator == '"' || *mIterator == '\'') {//引号括起来的名称或关键字 int line = mLineNumber; char c = *mIterator; for (++mIterator; *mIterator != '\0' && *mIterator != c;) { if (*mIterator == '\n')++mLineNumber; if (*mIterator == '\\') { //pushTokenChar(*mIterator); ++mIterator; } pushTokenChar(*mIterator); ++mIterator; if (*mIterator == '\0') { if (mIterator.Load()) { continue; } else { char* pInfo = mErrorAndStringBuffer->NewErrorInfo(); if (pInfo) tsnprintf(pInfo, MAX_ERROR_INFO_CAPACITY, "[line %d ]:String can't finish!", line); endTokenWithEof(); return END_OF_SLK_INPUT_; } } } if (*mIterator != '\0') { ++mIterator; } else { char* pInfo = mErrorAndStringBuffer->NewErrorInfo(); if (pInfo) tsnprintf(pInfo, MAX_ERROR_INFO_CAPACITY, "[line %d ]:String can't finish!", line); } endToken(); if (myhavelinefeed(mCurToken)){ removeFirstAndLastEmptyLine(); } return STRING_; } else { int isNum = TRUE; int isHex = FALSE; if (*mIterator == '0' && *(mIterator + 1) == 'x') { isHex = TRUE; pushTokenChar(*mIterator); ++mIterator; pushTokenChar(*mIterator); ++mIterator; } for (; !isSpecialChar(*mIterator); ++mIterator) { if (*mIterator == '#') break; else if (*mIterator == '/') { IScriptSource::Iterator next = mIterator + 1; if (*next != '\0' && (*next == '/' || *next == '*')) { break; } } else if (*mIterator == '.') { if (!isNum) { break; } else { IScriptSource::Iterator next = mIterator + 1; if (0 == myisdigit(*next, isHex)) { break; } } } else if (0 == myisdigit(*mIterator, isHex)) { isNum = FALSE; } pushTokenChar(*mIterator); } endToken(); if (isNum) { return NUMBER_; } else { int token = handleStringOrScriptDelimiter(); if (token) return token; return IDENTIFIER_; } } } }
//creates the command struct based on tokens passed in command_t make_command(char *token, command_t subcommand) { //checks for more than one <, > containsCarrot(token); command_t cmdStruct = checked_malloc(sizeof(struct command)); //for subshell if(token == NULL){ cmdStruct->type = SUBSHELL_COMMAND; cmdStruct->input = 0; cmdStruct->output = 0; cmdStruct->u.subshell_command = subcommand; return cmdStruct; } int tokenTracker = 0; //if token starts with spaces... int i; for(i = 0; isWhiteSpace(token[i]); i++){ tokenTracker++; } int start = tokenTracker; char* command = checked_malloc(50); tokenTracker = getNextWord(tokenTracker, token, &command); if(command[0] == '&'){ cmdStruct->type = AND_COMMAND; cmdStruct->input = 0; cmdStruct->output = 0; } else if (command[0] == '|'){ if(command[1] == '|'){ cmdStruct->type = OR_COMMAND; } else { cmdStruct->type = PIPE_COMMAND; } cmdStruct->input = 0; cmdStruct->output = 0; } else if (command[0] == ';'){ cmdStruct->type = SEQUENCE_COMMAND; cmdStruct->input = 0; cmdStruct->output = 0; } else if (command[0] == '<' || command[0] == '>'){ fprintf(stderr, "%d: Incorrect syntax2 \n", line_number); exit(1); } else { cmdStruct->type = SIMPLE_COMMAND; int bufferSize = 10; char** wordArray = checked_malloc(sizeof(char*)*10); int numberOfWords = 0; wordArray[numberOfWords] = checked_malloc(tokenTracker+ 1); command[tokenTracker] = '\0'; if(containsCarrot(command)){ int buffSize = 50; char* com = checked_malloc(buffSize); int comstart = 0; while(command[comstart] != '<' && command[comstart]!= '>'){ if(comstart >= buffSize){ buffSize += 50; com = checked_realloc(com, sizeof(char*)*buffSize); } com[comstart] = command[comstart]; comstart++; } wordArray[numberOfWords] = com; tokenTracker = tokenTracker - 2 ; } else{ wordArray[numberOfWords] = command; } numberOfWords++; while(token[tokenTracker] != '\0'){ char* word = checked_malloc(50); tokenTracker++; start = tokenTracker; tokenTracker = getNextWord(tokenTracker, token, &word); if(word[0] != '<' && word[0] != '>' && (word[tokenTracker-start-1] == '<' || word[tokenTracker-start-1] == '>')){ word[tokenTracker-start-1] = '\0'; tokenTracker = tokenTracker-2; } else{ word[tokenTracker-start] = '\0'; } if(word[0] == '<'){ char* in = checked_malloc(50); tokenTracker = getInputOutput(tokenTracker, token, &in); cmdStruct->input = in; } else if (word[0] == '>'){ char* output = checked_malloc(50); tokenTracker = getInputOutput(tokenTracker, token, &output); cmdStruct->output = output; } else { if(numberOfWords >= bufferSize){ bufferSize += 50; wordArray = checked_realloc(wordArray, sizeof(char*)*bufferSize); } if(word[0] != '\0'){ wordArray[numberOfWords] = checked_malloc(tokenTracker-start); wordArray[numberOfWords] = word; numberOfWords++; cmdStruct->input = 0; cmdStruct->output = 0; } } } wordArray[numberOfWords] = '\0'; cmdStruct->u.word = wordArray; } return cmdStruct; }
void simplePreprocess( vector<string> &allFileNames, const std::string &fileName, const vector<string> &input, vector<string> &output, const vector<string> *inputConstants, ostream *errorStream, int recursionLevel) { if(!errorStream) errorStream = &cout; if(recursionLevel > 20) { (*errorStream) << "Too many levels of #include recursion in shader: " << fileName << endl; return; } unsigned int lineNumber = 0; // Add header if(inputConstants) { for(unsigned int i = 0; i < (*inputConstants).size(); i++) { output.push_back(string("#define ") + (*inputConstants)[i]); } } // Correct the line number. output.push_back( getFileLineNumber( allFileNames, fileName, 0)); while(lineNumber < input.size()) { int c = 0; // Read to the first non-whitespace thing in a line. while(input[lineNumber][c] && isWhiteSpace(input[lineNumber][c])) { c++; } // TODO: Extend this to understand "#pragma once" or at // the very least #ifdefs so we can make #include guards. if(input[lineNumber][c] == '#') { // Skip the '#' c++; // Tokenize it. std::vector<std::string> preProcTokens; stringTokenize(input[lineNumber].c_str() + c, " ", preProcTokens, false); if(preProcTokens.size()) { if(preProcTokens[0] == "include") { if(preProcTokens.size() > 1) { string includeFileName = preProcTokens[1]; // Knock off quotes or brackets around the // name. They don't really matter here. if(includeFileName[0] == '"' || includeFileName[0] == '<') { // Chop off the end. includeFileName[includeFileName.size() - 1] = 0; // Chop off the start. includeFileName = includeFileName.c_str() + 1; // Note, if this code changes, be // aware that C++ strings will happily // store the \0 in the string, and // screw everything up later. } // Load the included file. char *code = NULL; int codeLength = 0; code = FileSystem::loadFile(includeFileName, &codeLength, true); if(code) { // Process another file's contents // straight into our own output list. vector<string> inputLines; stringTokenize(code, "\n", inputLines, true); simplePreprocess( allFileNames, includeFileName, inputLines, output, NULL, errorStream, recursionLevel + 1); // Get back to the correct line // number. // // FIXME: Apparently this doesn't work // // with shaders on Intel's // // drivers. Works on nVidia, need to // // test ATI. // ostringstream str; // str << "#line " << lineNumber; // output.push_back(str.str()); output.push_back( getFileLineNumber( allFileNames, fileName, lineNumber)); } else { // Error: Bad #include (*errorStream) << "Couldn't open " << includeFileName.c_str() << "." << endl; } delete[] code; } else { // Error: Bad #include } } else { // If it's a directive we don't recognize // here, it's probably something that GLSL // already handles, so just pass it through. output.push_back(input[lineNumber]); } } else { // Error: Had just a '#' on the line. } } else { // Normal line of code. output.push_back(input[lineNumber]); } lineNumber++; } }
// keys on even indexes, values on odd indexes. Reduces code expansion for the templated // alternatives. // If "ba" starts with empty content it will be removed from ba to simplify later calls static QList<QByteArray> parseChallenge(QByteArray &ba, QByteArray *scheme, QByteArray* nextAuth = 0) { QList<QByteArray> values; const char *b = ba.constData(); int len = ba.count(); int start = 0, end = 0, pos = 0, pos2 = 0; // parse scheme while (start < len && isWhiteSpaceOrComma(b[start])) { start++; } end = start; while (end < len && !isWhiteSpace(b[end])) { end++; } // drop empty stuff from the given string, it would have to be skipped over and over again if (start != 0) { ba = ba.mid(start); end -= start; len -= start; start = 0; b = ba.constData(); } Q_ASSERT(scheme); *scheme = ba.left(end); while (end < len) { start = end; while (end < len && b[end] != '=') { end++; } pos = end; // save the end position while (end - 1 > start && isWhiteSpace(b[end - 1])) { // trim whitespace end--; } pos2 = start; while (pos2 < end && isWhiteSpace(b[pos2])) { // skip whitespace pos2++; } if (containsScheme(b, start, end) || (b[pos2] == ',' && b[pos] != '=' && pos == len)) { if (nextAuth) { *nextAuth = QByteArray (b + start); } break; // break on start of next scheme. } while (start < len && isWhiteSpaceOrComma(b[start])) { start++; } values.append(QByteArray (b + start, end - start)); end = pos; // restore the end position if (end == len) { break; } // parse value start = end + 1; //skip '=' while (start < len && isWhiteSpace(b[start])) { start++; } if (b[start] == '"') { //quoted string bool hasBs = false; bool hasErr = false; end = ++start; while (end < len) { if (b[end] == '\\') { end++; if (end + 1 >= len) { hasErr = true; break; } else { hasBs = true; end++; } } else if (b[end] == '"') { break; } else { end++; } } if (hasErr || (end == len)) { // remove the key we already inserted kDebug(7113) << "error in quoted text for key" << values.last(); values.removeLast(); break; } QByteArray value = QByteArray(b + start, end - start); if (hasBs) { // skip over the next character, it might be an escaped backslash int i = -1; while ( (i = value.indexOf('\\', i + 1)) >= 0 ) { value.remove(i, 1); } } values.append(value); end++; } else { //unquoted string end = start; while (end < len && b[end] != ',' && !isWhiteSpace(b[end])) { end++; } values.append(QByteArray(b + start, end - start)); } //the quoted string has ended, but only a comma ends a key-value pair while (end < len && isWhiteSpace(b[end])) { end++; } // garbage, here should be end or field delimiter (comma) if (end < len && b[end] != ',') { kDebug(7113) << "unexpected character" << b[end] << "found in WWW-authentication header where token boundary (,) was expected"; break; } } // ensure every key has a value // WARNING: Do not remove the > 1 check or parsing a Type 1 NTLM // authentication challenge will surely fail. if (values.count() > 1 && values.count() % 2) { values.removeLast(); } return values; }
short SlkToken::get(void) { if (NULL == mSource || NULL == mErrorAndStringBuffer) { return END_OF_SLK_INPUT_; } newToken(); for (;;) { if (*mIterator == '\0') { if (isCanFinish()) { endTokenWithEof(); return END_OF_SLK_INPUT_; } else { if (!mIterator.Load()) { endTokenWithEof(); return END_OF_SLK_INPUT_; } } } int isSkip = TRUE; //跳过注释与白空格 for (; isSkip && *mIterator != '\0';) { isSkip = FALSE; for (; isWhiteSpace(*mIterator); ++mIterator) { if (*mIterator == '\n')++mLineNumber; isSkip = TRUE; } //#引导的单行注释 if (*mIterator == '#') { for (; *mIterator != '\0' && *mIterator != '\n'; ++mIterator); isSkip = TRUE; } //C++风格的单行注释与多行注释 if (*mIterator == '/' && (*(mIterator + 1) == '/' || *(mIterator + 1) == '*')) { ++mIterator; if (*mIterator == '/') { ++mIterator; for (; *mIterator != '\0' && *mIterator != '\n'; ++mIterator); isSkip = TRUE; } else if (*mIterator == '*') { ++mIterator; for (;;) { if (*mIterator != '\0') { if (*mIterator == '\n')++mLineNumber; if (*mIterator == '*' && *(mIterator + 1) == '/') { ++mIterator; ++mIterator; break; } } else { if (mIterator.Load()) { continue; } else { endTokenWithEof(); return END_OF_SLK_INPUT_; } } ++mIterator; } isSkip = TRUE; } } } if (*mIterator != '\0') break; } if (isCanFinish()) setCanFinish(FALSE); if (*mIterator == '{' && *(mIterator + 1) == ':') { ++mIterator; ++mIterator; int line = mLineNumber; //搜索脚本结束 :} for (; *mIterator != '\0';) { while (*mIterator != '\0' && *mIterator != ':') { if (*mIterator == '\n')++mLineNumber; pushTokenChar(*mIterator); ++mIterator; } if (*mIterator == '\0') break; IScriptSource::Iterator next = mIterator + 1; if (*next == '}') { ++mIterator; ++mIterator; break; } else { pushTokenChar(*mIterator); ++mIterator; } } if (*mIterator == '\0') { char* pInfo = mErrorAndStringBuffer->NewErrorInfo(); if (pInfo) tsnprintf(pInfo, MAX_ERROR_INFO_CAPACITY, "[line %d ]:ExternScript can't finish!", line); } endToken(); return SCRIPT_CONTENT_; } else if (isOperator(*mIterator))//操作符 { getOperatorToken(); return getOperatorTokenValue(); } else if (*mIterator == '.' && 0 == myisdigit(*(mIterator + 1), FALSE)) { char c = *mIterator; ++mIterator; pushTokenChar(c); endToken(); return DOT_; } else if (isDelimiter(*mIterator))//分隔符 { char c = *mIterator; ++mIterator; pushTokenChar(c); endToken(); switch (c) { case '(': return LPAREN_; case ')': return RPAREN_; case '[': return LBRACK_; case ']': return RBRACK_; case '{': return LBRACE_; case '}': return RBRACE_; case ',': return COMMA_; case ';': return SEMI_; default: return END_OF_SLK_INPUT_; } } else//关键字、标识符或常数 { if (*mIterator == '"' || *mIterator == '\'')//引号括起来的名称或关键字 { int line = mLineNumber; char c = *mIterator; for (++mIterator; *mIterator != '\0' && *mIterator != c;) { if (*mIterator == '\n')++mLineNumber; if (*mIterator == '\\') { //pushTokenChar(*mIterator); ++mIterator; } pushTokenChar(*mIterator); ++mIterator; if (*mIterator == '\0') { if (mIterator.Load()) { continue; } else { char* pInfo = mErrorAndStringBuffer->NewErrorInfo(); if (pInfo) tsnprintf(pInfo, MAX_ERROR_INFO_CAPACITY, "[line %d ]:String can't finish!", line); endTokenWithEof(); return END_OF_SLK_INPUT_; } } } if (*mIterator != '\0') { ++mIterator; } else { char* pInfo = mErrorAndStringBuffer->NewErrorInfo(); if (pInfo) tsnprintf(pInfo, MAX_ERROR_INFO_CAPACITY, "[line %d ]:String can't finish!", line); } endToken(); return STRING_; } else { int isNum = TRUE; int isHex = FALSE; if (*mIterator == '0' && *(mIterator + 1) == 'x') { isHex = TRUE; pushTokenChar(*mIterator); ++mIterator; pushTokenChar(*mIterator); ++mIterator; } for (; *mIterator != '\0' && !isDelimiter(*mIterator) && !isWhiteSpace(*mIterator) && !isOperator(*mIterator); ++mIterator) { if (*mIterator == '#') break; else if (*mIterator == '/') { IScriptSource::Iterator next = mIterator + 1; if (*next != '\0' && (*next == '/' || *next == '*')) { break; } } else if (*mIterator == '.') { if (!isNum) { break; } else { IScriptSource::Iterator next = mIterator + 1; if (0 == myisdigit(*next, isHex)) { break; } } } else if (0 == myisdigit(*mIterator, isHex)) { isNum = FALSE; } pushTokenChar(*mIterator); } endToken(); if (isNum) return NUMBER_; else return IDENTIFIER_; } } }
void skipWhiteSpace() { while (!eof() && isWhiteSpace() && next()); }
static char* skipWhiteSpace(const char* line) { while(*line != '\n' && *line!= '\r' && *line != '\0' && isWhiteSpace(line)) line++; return ((char*)line); }
int isValid (char c){ return(c == '!' || c == '%' || c == '+' || c == ',' || c == '-' || c == '.' || c == '/' || c == ':' || c == '@' || c == '^' || c == '_' || isdigit((int) c) || isalpha((int) c) || isWhiteSpace(c) || isOperator(c) || c == '(' || c == ')' || c == '<' || c == '>' || c == '#' || c == '\n' || c == EOF); }
//returns an entire command stream; points to head of tree //grabs command struct 1, operator, command struct 2 and makes them a tree command_t make_single_command_stream (int (*get_next_byte) (void *), void *get_next_byte_argument, int *flag) { command_t a; command_t b; command_t o; int position = 0; int check_newline = 0; char *token_a = read_token(get_next_byte, get_next_byte_argument, &position, check_newline); //check for comments, if whiteSpace in front of comment int i = 0; while(isWhiteSpace(token_a[i])){ i++; } while (token_a[0] == '\n' || token_a[0] == '#') { position = 0; token_a = read_token(get_next_byte, get_next_byte_argument, &position, check_newline); } if (token_a[position] == EOF){ *flag = 1; return 0; } //Checking if token_a is an operator int k = 0; while(isWhiteSpace(token_a[k])){ k++; } if(isOperator(token_a[k])){ fprintf(stderr, "%d: Incorrect Syntax1 \n", line_number); exit(1); } char op; char *operator = (char*)checked_malloc(5); if (token_a[position] == '(') { paren_number++; a = make_single_command_stream(get_next_byte, get_next_byte_argument, flag); a = make_command(NULL, a); operator = read_token(get_next_byte, get_next_byte_argument, &position, 0); } else { operator[0] = token_a[position]; token_a[position] = '\0'; a = make_command(token_a, NULL); } while (*operator != EOF && *operator != ')') { k = 0; while(isWhiteSpace(operator[k])){ k++; } operator += k; if (paren_number == 0 && *operator == ';') break; if (paren_number == 0 && *operator == '\n') break; position = 0; if (operator[0] == '&') { operator[1] = get_next_byte(get_next_byte_argument); if(operator[1] == ';'){ fprintf(stderr, "%d: Incorrect Syntax2 \n", line_number); exit(1); }else if(operator[1] != '&') { fprintf(stderr, "%d: Incorrect Syntax3 \n", line_number); exit(1); } operator[2] = '\0'; } else if (operator[0] == '|') { op = get_next_byte(get_next_byte_argument); if (op == '|'){ op == '\0'; operator[1] = '|'; operator[2] = '\0'; } else if( op == ';'){ fprintf(stderr, "%d: Incorrect Syntax4 \n", line_number); exit(1); } else { operator[1] = '\0'; } } else if(operator[0] == ';' || operator[0] == EOF) { operator[1] = '\0'; } else if(operator[0] == '\n'){ operator[0] = ';'; operator[1] = '\0'; } o = make_command(operator, NULL); char *token_b = read_token(get_next_byte, get_next_byte_argument, &position, check_newline); //check for #, even ones that start with spaces in front of # while (position == 0 && check_newline == 0 && token_b[position] == '\n' && o->type != SEQUENCE_COMMAND) { token_b = read_token(get_next_byte, get_next_byte_argument, &position, check_newline); } if (o->type == PIPE_COMMAND && op != '\n') { int i = 0; char store = token_b[0]; token_b[0] = op; op = token_b[1]; i++; while (i <= position) { token_b[i] = store; store = op; i++; op = token_b[i]; } position++; token_b[position] = store; } //check token_b for operator or EOF int w =0; while(isWhiteSpace(token_b[w])){ w++; } if(isOperator(token_b[w]) || token_b[w] == EOF){ fprintf(stderr, "%d: Incorrect Syntax6 \n", line_number); exit(1); } if (token_b[position] == '(') { paren_number++; b = make_single_command_stream(get_next_byte, get_next_byte_argument, flag); b = make_command(NULL, b); operator = read_token(get_next_byte, get_next_byte_argument, &position, check_newline); } else { if (token_b[position-1] == '\n') { operator[0] = token_b[position-1]; token_b[position-1] = '\0'; } else { operator[0] = token_b[position]; token_b[position] = '\0'; } b = make_command(token_b, NULL); } a = create_command_tree(a, o, b); } if(*operator == EOF) *flag = 1; if(paren_number != 0) { fprintf(stderr, "%d: Invalid Syntax7", line_number); exit(1); } return a; }
void StringTokenizer::skipWhiteSpace() { while(*p && isWhiteSpace(*p)) ++p; }
/**************************************************************************** Desc: Parse a single line from the ini file into its name, value and comment parts. ****************************************************************************/ RCODE F_IniFile::parseBuffer( char * pszBuf, FLMUINT uiNumBytes) { RCODE rc = NE_FLM_OK; FLMUINT uiCurrentChar = 0; char * pszNameStart = NULL; char * pszNameEnd = NULL; char * pszValStart = NULL; char * pszValEnd = NULL; char * pszCommentStart = NULL; INI_LINE * pLine = NULL; FLMUINT uiStrLen = 0; f_assert( pszBuf); f_assert( uiNumBytes); // Start looking for the parameter name... while (uiCurrentChar < uiNumBytes) { if( !isWhiteSpace( pszBuf[uiCurrentChar])) { if (pszBuf[uiCurrentChar] == '#') { goto Comment; } else { pszNameStart = &pszBuf[uiCurrentChar]; break; } } uiCurrentChar++; } // We've found a param name, now mark the end of it // We determine the end by looking for whitespace or '=' // or '#' while (uiCurrentChar < uiNumBytes) { if( isWhiteSpace( pszBuf[uiCurrentChar]) || (pszBuf[uiCurrentChar] == '=') || (pszBuf[uiCurrentChar] == '#')) { pszNameEnd = &pszBuf[uiCurrentChar-1]; break; } uiCurrentChar++; } if( (uiCurrentChar == uiNumBytes) && (pszNameEnd == NULL) ) { pszNameEnd = &pszBuf[uiCurrentChar - 1]; } // Now, there may be a value part or a comment part next. If there's a // value, it had better be preceeded by an '=' while( (uiCurrentChar < uiNumBytes) && isWhiteSpace( pszBuf[uiCurrentChar]) ) { uiCurrentChar++; } if( uiCurrentChar < uiNumBytes && pszBuf[ uiCurrentChar] == '#') { goto Comment; } if( uiCurrentChar < uiNumBytes && pszBuf[uiCurrentChar] != '=' ) { rc = RC_SET( NE_FLM_SYNTAX); goto Exit; } // Ok - at this point pszBuf[uiCurrentChar] contains an =. Skip over // the = and any whitespace that follows. while( uiCurrentChar < uiNumBytes) { uiCurrentChar++; if( !isWhiteSpace( pszBuf[uiCurrentChar])) { pszValStart = &pszBuf[uiCurrentChar]; break; } } // Now mark the end of the value. // We determine the end by looking for whitespace or '#' while( uiCurrentChar < uiNumBytes) { if( isWhiteSpace( pszBuf[uiCurrentChar]) || (pszBuf[uiCurrentChar] == '#')) { pszValEnd = &pszBuf[uiCurrentChar-1]; break; } uiCurrentChar++; } if( uiCurrentChar == uiNumBytes && !pszValEnd) { pszValEnd = &pszBuf[uiCurrentChar-1]; } Comment: // Check out the rest of the line to see if there's a comment while( uiCurrentChar < uiNumBytes) { if( !isWhiteSpace( pszBuf[ uiCurrentChar]) && pszBuf[ uiCurrentChar] != '#') { rc = RC_SET( NE_FLM_SYNTAX); goto Exit; } else if( pszBuf[ uiCurrentChar] == '#') { // Comment found. Set pszCommentStart to the next char pszCommentStart = &pszBuf[uiCurrentChar+1]; break; } uiCurrentChar++; } // Done parsing. Now, assuming the line had any info in it, // store all the strings... if( pszNameStart || pszCommentStart) { if( RC_BAD( rc = m_pool.poolCalloc( sizeof( INI_LINE), (void **)&pLine))) { goto Exit; } if( pszNameStart) { uiStrLen = pszNameEnd - pszNameStart + 1; if( RC_BAD( rc = m_pool.poolAlloc( uiStrLen + 1, (void **)&pLine->pszParamName))) { goto Exit; } f_memcpy( pLine->pszParamName, pszNameStart, uiStrLen); pLine->pszParamName[uiStrLen] = '\0'; } if( pszValStart) { uiStrLen = pszValEnd - pszValStart + 1; if( RC_BAD( rc = m_pool.poolAlloc( uiStrLen + 1, (void **)&pLine->pszParamValue))) { goto Exit; } f_memcpy(pLine->pszParamValue, pszValStart, uiStrLen); pLine->pszParamValue[uiStrLen] = '\0'; } if (pszCommentStart) { uiStrLen = uiNumBytes-(pszCommentStart-pszBuf); if (RC_BAD( rc = m_pool.poolAlloc( uiStrLen + 1, (void **)&pLine->pszComment))) { goto Exit; } f_memcpy(pLine->pszComment, pszCommentStart, uiStrLen); pLine->pszComment[uiStrLen] = '\0'; } // Insert this struct into the linked list if( m_pLastLine) { m_pLastLine->pNext = pLine; } pLine->pPrev = m_pLastLine; pLine->pNext = NULL; m_pLastLine = pLine; if( !m_pFirstLine) { m_pFirstLine = pLine; } } Exit: return( rc); }
bool MorkParser::parseTable() { bool Result = true; std::string TextId; int Id = 0, Scope = 0; char cur = nextChar(); // Get id while ( cur != '{' && cur != '[' && cur != '}' && cur ) { if ( !isWhiteSpace( cur ) ) { TextId += cur; } cur = nextChar(); } parseScopeId( TextId, &Id, &Scope ); // Parse the table while ( Result && cur != '}' && cur ) { if ( !isWhiteSpace( cur ) ) { switch ( cur ) { case '{': Result = parseMeta( '}' ); break; case '[': Result = parseRow( Id, Scope ); break; case '-': case '+': break; default: { std::string JustId; while ( !isWhiteSpace( cur ) && cur ) { JustId += cur; cur = nextChar(); if ( cur == '}' ) { return Result; } } int JustIdNum = 0, JustScopeNum = 0; parseScopeId( JustId, &JustIdNum, &JustScopeNum ); setCurrentRow( Scope, Id, JustScopeNum, JustIdNum ); } break; } } cur = nextChar(); } return Result; }
Token DefaultLexer::readToken() { char c = lookChar(); while (true) { // skip whitespace while (isWhiteSpace(c)) { skipChar(); c = lookChar(); } // newlines if (isNewline(c)) { readNewline(c); if (interactive_ && getCurrentBraceNesting() == 0) return Token(TK_Newline); c = lookChar(); continue; } // treat line comments as newlines if (c == '/' && lookChar(1) == '/') { readLineComment(); c = lookChar(); continue; } break; } SourceLocation sloc = getCurrentLocation(); // punctuation if (c == '(') { skipChar(); signalOpenBrace(TK_LParen); return Token(TK_LParen, "(", sloc); } if (c == ')') { skipChar(); signalCloseBrace(TK_LParen); return Token(TK_RParen, ")", sloc); } if (c == '{') { skipChar(); signalOpenBrace(TK_LCurlyBrace); return Token(TK_LCurlyBrace, "{", sloc); } if (c == '}') { skipChar(); signalCloseBrace(TK_LCurlyBrace); return Token(TK_RCurlyBrace, "}", sloc); } if (c == '[') { skipChar(); signalOpenBrace(TK_LSquareBrace); return Token(TK_LSquareBrace, "[", sloc); } if (c == ']') { skipChar(); signalCloseBrace(TK_LSquareBrace); return Token(TK_RSquareBrace, "]", sloc); } if (c == ',') { skipChar(); return Token(TK_Comma, ",", sloc); } if (c == ';') { skipChar(); return Token(TK_Semicolon, ";", sloc); } if (c == ':' && !isOperatorChar(lookChar(1))) { skipChar(); return Token(TK_Colon, ":", sloc); } if (c == '.') { skipChar(); return Token(TK_Period, ".", sloc); } // identifiers if (isLetter(c)) { readIdentifier(c); StringRef str = copyStr(finishToken()); unsigned keyid = lookupKeyword(str.c_str()); if (keyid) { return Token(keyid, str, sloc); } return Token(TK_Identifier, str, sloc); } // generic operators if (isOperatorChar(c)) { readOperator(c); StringRef str = copyStr(finishToken()); unsigned keyid = lookupKeyword(str.c_str()); if (keyid) { return Token(keyid, str, sloc); } return Token(TK_Operator, str, sloc); } // numbers if (isDigit(c)) { readInteger(c); StringRef str = copyStr(finishToken()); return Token(TK_LitInteger, str, sloc); } // characters if (c == '\'') { if (!readCharacter()) return Token(TK_Error); StringRef str = copyStr(finishToken()); return Token(TK_LitCharacter, str, sloc); } // strings if (c == '\"') { if (!readString()) return Token(TK_Error); StringRef str = copyStr(finishToken()); return Token(TK_LitString, str, sloc); } // if we're out of buffer, put in an EOF token. if (c == 0 || stream_eof()) { return Token(TK_EOF, "", sloc); } // Can't get the next token -- signal an error and bail. signalLexicalError(); return Token(TK_Error, "", sloc); }
bool TSReader::read(Translator &translator) { STRING(both); STRING(byte); STRING(comment); STRING(context); STRING(defaultcodec); STRING(encoding); STRING(extracomment); STRING(filename); STRING(id); STRING(language); STRING(line); STRING(location); STRING(message); STRING(name); STRING(numerus); STRING(numerusform); STRING(obsolete); STRING(oldcomment); STRING(oldsource); STRING(source); STRING(sourcelanguage); STRING(translation); STRING(translatorcomment); STRING(true); STRING(TS); STRING(type); STRING(unfinished); STRING(userdata); STRING(utf8); STRING(value); //STRING(version); STRING(yes); static const QString strextrans(QLatin1String("extra-")); static const QString strUtf8(QLatin1String("UTF-8")); while (!atEnd()) { readNext(); if (isStartDocument()) { // <!DOCTYPE TS> //qDebug() << attributes(); } else if (isEndDocument()) { // <!DOCTYPE TS> //qDebug() << attributes(); } else if (isDTD()) { // <!DOCTYPE TS> //qDebug() << tokenString(); } else if (elementStarts(strTS)) { // <TS> //qDebug() << "TS " << attributes(); QHash<QString, int> currentLine; QString currentFile; QXmlStreamAttributes atts = attributes(); //QString version = atts.value(strversion).toString(); translator.setLanguageCode(atts.value(strlanguage).toString()); translator.setSourceLanguageCode(atts.value(strsourcelanguage).toString()); while (!atEnd()) { readNext(); if (isEndElement()) { // </TS> found, finish local loop break; } else if (isWhiteSpace()) { // ignore these, just whitespace } else if (elementStarts(strdefaultcodec)) { // <defaultcodec> const QString &codec = readElementText(); if (!codec.isEmpty()) translator.setCodecName(codec.toLatin1()); // </defaultcodec> } else if (isStartElement() && name().toString().startsWith(strextrans)) { // <extra-...> QString tag = name().toString(); translator.setExtra(tag.mid(6), readContents()); // </extra-...> } else if (elementStarts(strcontext)) { // <context> QString context; while (!atEnd()) { readNext(); if (isEndElement()) { // </context> found, finish local loop break; } else if (isWhiteSpace()) { // ignore these, just whitespace } else if (elementStarts(strname)) { // <name> context = readElementText(); // </name> } else if (elementStarts(strmessage)) { // <message> TranslatorMessage::References refs; QString currentMsgFile = currentFile; TranslatorMessage msg; msg.setId(attributes().value(strid).toString()); msg.setContext(context); msg.setType(TranslatorMessage::Finished); msg.setPlural(attributes().value(strnumerus) == stryes); const QStringRef &utf8Attr = attributes().value(strutf8); msg.setNonUtf8(utf8Attr == strboth); msg.setUtf8(msg.isNonUtf8() || utf8Attr == strtrue || attributes().value(strencoding) == strUtf8); while (!atEnd()) { readNext(); if (isEndElement()) { // </message> found, finish local loop msg.setReferences(refs); translator.append(msg); break; } else if (isWhiteSpace()) { // ignore these, just whitespace } else if (elementStarts(strsource)) { // <source>...</source> msg.setSourceText(readContents()); } else if (elementStarts(stroldsource)) { // <oldsource>...</oldsource> msg.setOldSourceText(readContents()); } else if (elementStarts(stroldcomment)) { // <oldcomment>...</oldcomment> msg.setOldComment(readContents()); } else if (elementStarts(strextracomment)) { // <extracomment>...</extracomment> msg.setExtraComment(readContents()); } else if (elementStarts(strtranslatorcomment)) { // <translatorcomment>...</translatorcomment> msg.setTranslatorComment(readContents()); } else if (elementStarts(strlocation)) { // <location/> QXmlStreamAttributes atts = attributes(); QString fileName = atts.value(strfilename).toString(); if (fileName.isEmpty()) { fileName = currentMsgFile; } else { if (refs.isEmpty()) currentFile = fileName; currentMsgFile = fileName; } const QString lin = atts.value(strline).toString(); if (lin.isEmpty()) { translator.setLocationsType(Translator::RelativeLocations); refs.append(TranslatorMessage::Reference(fileName, -1)); } else { bool bOK; int lineNo = lin.toInt(&bOK); if (bOK) { if (lin.startsWith(QLatin1Char('+')) || lin.startsWith(QLatin1Char('-'))) { lineNo = (currentLine[fileName] += lineNo); translator.setLocationsType(Translator::RelativeLocations); } else { translator.setLocationsType(Translator::AbsoluteLocations); } refs.append(TranslatorMessage::Reference(fileName, lineNo)); } } readContents(); } else if (elementStarts(strcomment)) { // <comment>...</comment> msg.setComment(readContents()); } else if (elementStarts(struserdata)) { // <userdata>...</userdata> msg.setUserData(readContents()); } else if (elementStarts(strtranslation)) { // <translation> QXmlStreamAttributes atts = attributes(); QStringRef type = atts.value(strtype); if (type == strunfinished) msg.setType(TranslatorMessage::Unfinished); else if (type == strobsolete) msg.setType(TranslatorMessage::Obsolete); if (msg.isPlural()) { QStringList translations; while (!atEnd()) { readNext(); if (isEndElement()) { break; } else if (isWhiteSpace()) { // ignore these, just whitespace } else if (elementStarts(strnumerusform)) { translations.append(readTransContents()); } else { handleError(); break; } } msg.setTranslations(translations); } else { msg.setTranslation(readTransContents()); } // </translation> } else if (isStartElement() && name().toString().startsWith(strextrans)) { // <extra-...> QString tag = name().toString(); msg.setExtra(tag.mid(6), readContents()); // </extra-...> } else { handleError(); } } // </message> } else { handleError(); } } // </context> } else { handleError(); } } // </TS> } else { handleError(); } } if (hasError()) { m_cd.appendError(errorString()); return false; } return true; }
OV_RESULT ov_readConfigFile(ov_options* opts, char* configFile){ char lineStr[256]; char* startRead = NULL; char* temp = NULL; char* temp2 = NULL; char* temp3 = NULL; FILE* cfFile; OV_UINT j; OV_UINT line = 0; /* * parse config file * some memory is allocated on the heap. these variables are used while the server is running * hence this is no memleak. they are freed by the operating system on termination of the server * * * options from the commandline are NOT overwritten * lines starting with # and empty lines are ignored * Space and tab serve as whitespaces * lines are parsed as a whole * lines may be 256 characters long * no multi-line entries * * lines have the pattern * KEY VALUE * e.g. SERVERNAME ov_server1 * * recognized options are: * DBFILE path to database file. set to '-' to use no database * SERVERNAME name of this ov_server * HEAPSIZE size of TLSF heap pool * ID Ticket Identification for server access * PORT server port number * LIBRARY Start server with library. one library per line * LOGFILE logfile name for runtimeserver, you may use stdout, stderr or (on NT-systems) ntlog * ACTIVITYLOCK Locks OV activities (scheduler and accessorfnc). No argument * OPTION text Appends the option text to the * cmdline_options variable in Vendortree * text is mandatory * NOSTARTUP Do not startup the database. No argument * EXIT Exit immediately (test if database loads). No argument * EXEC IDENTIFIER CLASS LIBRARY Executes the first event in the schedulers * queue that matches concerning IDENTIFIER * CLASS and LIBRARY before starting the server. * All parameters are mandatory. Use '/' as wildcard. * ALLOWEDJITTER number of microseconds the scheduler may jitter before incrementing numExceeds in vondortree (the latter documents the realtime behaviour of the system) * KSMAXSTRLENGTH maximum length of strings to process with ks * KSMAXVECLENGTH maximum length of vectors to process with ks * KSMAXITEMSPERREQ maximum number of items per ks-request * DBSIZE db size when need to create * DBUTILLOG logfile name for dbutil, you may use stdout, stderr or (on NT-systems) ntlog * DBNOMAP read database but do not map * DBFORCECREATE force creation of a new database * old database file is removed */ if(!configFile || !(*configFile)){ return EXIT_FAILURE; } cfFile = fopen(configFile, "r"); if(!cfFile) { fprintf(stderr, "Could not open config file"); return EXIT_FAILURE; } clearerr(cfFile); /* get base path from path part of configFile */ #if OV_SYSTEM_NT for(j = strlen(configFile); configFile[j] != '\\' && configFile[j] != '/' && (j>0); j--); #else for(j = strlen(configFile); (configFile[j] != '/') && (j>0); j--); #endif if((j>0)) { opts->configBasePath = malloc(j+2); if(!opts->configBasePath) { ov_logfile_error("Could not reserve memory for basePath. Aborting."); return EXIT_FAILURE; } strncpy(opts->configBasePath, configFile, j+1); opts->configBasePath[j+1] = '\0'; //ov_logfile_debug("BasePath: %s", configBasePath); } /* * loop over lines */ while(fgets(lineStr, sizeof(lineStr), cfFile)) { line++; /* check if line complete */ if(!strchr(lineStr, '\n') && !feof(cfFile)) { ov_logfile_error("Error reading config file: line %u too long", line); return EXIT_FAILURE; } if(isComment(lineStr)) { /* skip line if comment */ continue; } startRead = skipWhiteSpace(lineStr); if(*startRead == '\0') break; /* probably EOF */ if(*startRead == '\n' || *startRead == '\r') continue; /* empty line */ /* set terminating '\0' at occurrence of newline or '#' */ terminateLine(startRead); /********************************************************************************** * parse parameters *********************************************************************************/ /* DBFILE */ if(strstr(startRead, "DBFILE")==startRead) { opt_clear_string(&opts->dbFilename); opts->dbFilename = readValue(startRead); if(!opts->dbFilename || !*opts->dbFilename){ return EXIT_FAILURE; } } /* SERVERNAME */ else if(strstr(startRead, "SERVERNAME")==startRead) { opt_clear_string(&opts->servername); opts->servername = readValue(startRead); if(!opts->servername || !*opts->servername){ return EXIT_FAILURE; } if(!serverNameOk(opts->servername)) { ov_logfile_error("Error parsing SERVERNAME in line %u: only A-Z, a-z, 0-9 and _ allowed in server name.", line); return EXIT_FAILURE; } } /* HEAPSIZE */ else if(strstr(startRead, "HEAPSIZE")==startRead) { #if TLSF_HEAP //ov_logfile_info("TLSF is activated"); temp = readValue(startRead); if(!temp || !*temp) return EXIT_FAILURE; if(!opts->poolsize){ opts->poolsize = strtoul(temp, NULL, 0); } else{ return OV_ERR_HEAPOUTOFMEMORY; } free(temp); #endif } /* ID */ else if(strstr(startRead, "ID")==startRead) { opt_clear_string(&opts->password); opts->password = readValue(startRead); if(!opts->password || !*opts->password){ return EXIT_FAILURE; } } /* PORT */ else if(strstr(startRead, "PORT")==startRead) { temp = readValue(startRead); if(!temp || !*temp){ return EXIT_FAILURE; } opts->port = strtol(temp, &temp2, 0); if(*temp2) { ov_logfile_error("Error parsing line %u: too many arguments for PORT.", line); free(temp); return EXIT_FAILURE; } free(temp); } /* LIBRARY */ else if(strstr(startRead, "LIBRARY")==startRead) { if (opts->libcount<MAX_STARTUP_LIBRARIES) { opts->libraries[opts->libcount] = readValue(startRead); if(!opts->libraries[opts->libcount] || !*opts->libraries[opts->libcount]){ return EXIT_FAILURE; } opts->libcount++; } else ov_logfile_error("Too many libraries in start command and configfile.\n"); } /* LOGFILE */ else if((opts->ctx==ov_runtimeserver && strstr(startRead, "LOGFILE")==startRead )|| (opts->ctx==ov_dbutil && strstr(startRead, "DBUTILLOG")==startRead )) { temp = readValue(startRead); if(!temp || !*temp){ return EXIT_FAILURE; } set_logfile(opts, temp); free(temp); } /* ACTIVITYLOCK */ else if(strstr(startRead, "ACTIVITYLOCK")==startRead) { opts->activitylock = TRUE; if(*(startRead+12)) { ov_logfile_error("Error parsing line %u: ACTIVITYLOCK does not accept parameters.", line); return EXIT_FAILURE; } } /* OPTION */ else if(strstr(startRead, "OPTION")==startRead) { temp = readValue(startRead); if(!temp || !*temp){ return EXIT_FAILURE; } temp2 = temp; while((!isWhiteSpace(temp2)) && *temp2) temp2++; if(*temp2!='\0') { /* whitespaces at line end are stripped: nothing may follow here */ ov_logfile_error("Error parsing OPTION in line %u: no whitespaces allowed in options.", line); free(temp); return EXIT_FAILURE; } if(opts->commandline_options) { temp2 = realloc(opts->commandline_options, strlen(opts->commandline_options)+strlen(temp)+2); //oldlength + new option length + ' ' + '\0' if(temp2) { strcat(temp2, " "); strcat(temp2, temp); opts->commandline_options = temp2; } } else //first time commandline_options is a NULL-pointer --> strlen would crash { opts->commandline_options = malloc(strlen(temp)+1); if(opts->commandline_options) strcpy(opts->commandline_options, temp); } free(temp); } /* NOSTARTUP */ else if(strstr(startRead, "NOSTARTUP")==startRead) { opts->startup = FALSE; if(*(startRead+9)) { ov_logfile_error("Error parsing line %u: NOSTARTUP does not accept parameters.", line); return EXIT_FAILURE; } } /* EXIT */ else if(strstr(startRead, "EXIT")==startRead) { opts->exit = TRUE; if(*(startRead+4)) { ov_logfile_error("Error parsing line %u: EXIT does not accept parameters.", line); return EXIT_FAILURE; } } /* EXEC */ else if(strstr(startRead, "EXEC")==startRead) { if(!opts->exec) { temp = readValue(startRead); if(!temp || !*temp){ return EXIT_FAILURE; } temp2 = temp; while(!isWhiteSpace(temp2)){ if(!*temp2){ EXECFEWARGUMENTSERROR: ov_logfile_error("Error parsing line %u: not enough arguments to EXEC option.", line); free(temp); return EXIT_FAILURE; } temp2++; } *temp2 = '\0'; if(*temp!='/'){ opt_reassign_string(&opts->execIdent, temp); if(!opts->execIdent){ free(temp); return EXIT_FAILURE; } } else { opt_clear_string(&opts->execIdent); } temp3 = skipWhiteSpace(++temp2); temp2 = temp3; while(!isWhiteSpace(temp2)){ if(!temp2) goto EXECFEWARGUMENTSERROR; temp2++; } *temp2 = '\0'; if(*temp3!='/'){ opt_reassign_string(&opts->execClass, temp3); if(!opts->execClass){ free(temp); return EXIT_FAILURE; } } else { opt_clear_string(&opts->execClass); } temp3 = skipWhiteSpace(++temp2); temp2 = temp3; while(*temp2 && !isWhiteSpace(temp2)) temp2++; if(*temp2){ ov_logfile_error("Error parsing line %u: too many arguments to EXEC option.", line); free(temp); return EXIT_FAILURE; } if(*temp3!='/'){ opt_reassign_string(&opts->execLib, temp3); if(!opts->execLib){ free(temp); return EXIT_FAILURE; } } else { opt_clear_string(&opts->execLib); } opts->exec = TRUE; free(temp); } } /* ALLOWEDJITTER */ else if(strstr(startRead, "ALLOWEDJITTER")==startRead) { temp = readValue(startRead); if(!temp || !*temp){ return EXIT_FAILURE; } opts->maxAllowedJitter = strtoul(temp, &temp2, 0); if(*temp2) { ov_logfile_error("Error parsing line %u: too many arguments for ALLOWEDJITTER.", line); free(temp); return EXIT_FAILURE; } free(temp); } /* MAXSTRLENGTH */ else if(strstr(startRead, "MAXSTRLENGTH")==startRead) { temp = readValue(startRead); if(!temp || !*temp){ return EXIT_FAILURE; } opts->maxStringLength = strtoul(temp, &temp2, 0); if(*temp2) { ov_logfile_error("Error parsing line %u: too many arguments for MAXSTRLENGTH.", line); free(temp); return EXIT_FAILURE; } free(temp); } /* MAXVECLENGTH */ else if(strstr(startRead, "MAXVECLENGTH")==startRead) { temp = readValue(startRead); if(!temp || !*temp){ return EXIT_FAILURE; } opts->maxVectorLength = strtoul(temp, &temp2, 0); if(*temp2) { ov_logfile_error("Error parsing line %u: too many arguments for MAXVECLENGTH.", line); free(temp); return EXIT_FAILURE; } free(temp); } /* MAXNAMELENGTH */ else if(strstr(startRead, "MAXNAMELENGTH")==startRead) { temp = readValue(startRead); if(!temp || !*temp){ return EXIT_FAILURE; } opts->maxNameLength = strtoul(temp, &temp2, 0); if(*temp2) { ov_logfile_error("Error parsing line %u: too many arguments for MAXNAMELENGTH.", line); free(temp); return EXIT_FAILURE; } free(temp); } /* MAXHIERARCHYDEPTH */ else if(strstr(startRead, "MAXHIERARCHYDEPTH")==startRead) { temp = readValue(startRead); if(!temp || !*temp){ return EXIT_FAILURE; } opts->maxHierarchyDepth = strtoul(temp, &temp2, 0); if(*temp2) { ov_logfile_error("Error parsing line %u: too many arguments for MAXHIERARCHYDEPTH.", line); free(temp); return EXIT_FAILURE; } free(temp); } /* KSMAXITEMSPERREQ */ else if(strstr(startRead, "KSMAXITEMSPERREQ")==startRead) { temp = readValue(startRead); if(!temp || !*temp){ return EXIT_FAILURE; } opts->ks_maxItemsPerRequest = strtoul(temp, &temp2, 0); if(*temp2) { ov_logfile_error("Error parsing line %u: too many arguments for KSMAXITEMSPERREQ.", line); free(temp); return EXIT_FAILURE; } free(temp); } /* DBSIZE */ else if(strstr(startRead, "DBSIZE")==startRead) { temp = readValue(startRead); if(!temp || !*temp) return EXIT_FAILURE; opts->dbSize = strtoul(temp, NULL, 0); free(temp); } /* DBNOMAP */ else if(strstr(startRead, "DBNOMAP")==startRead) { opts->dbflags |= OV_DBOPT_NOMAP; } /* DBFORCECREATE */ else if(strstr(startRead, "DBFORCECREATE")==startRead) { opts->dbflags |= OV_DBOPT_FORCECREATE; } /* * default: option unknown */ else { ov_logfile_error("Error parsing line %u: unknown option", line); return EXIT_FAILURE; } } /* getline returns -1 on error or eof. eof is ok, error aborts program */ if(ferror(cfFile)) { fprintf(stderr, "Error reading config file"); fclose(cfFile); return EXIT_FAILURE; } fclose(cfFile); return OV_ERR_OK; }
static bool isWhiteSpaceOrComma(char ch) { return (ch == ',' || isWhiteSpace(ch)); }
/* # define identifier replacement-list new-line # define identifier lparen identifier-list[opt] ) replacement-list new-line # define identifier lparen ... ) replacement-list new-line # define identifier lparen identifier-list, ... ) replacement-list new-line */ bool Preprocessor::parseDefineDirective(Item *group) { Q_ASSERT(group->toItemComposite()); const TokenSection line = readLine(); const QVector<int> cleanedLine = cleanTokenRange(line); if(cleanedLine.count() < 3) return false; // get identifier const int identifier = cleanedLine.at(2); //skip "#" and "define" DefineDirective *defineDirective = 0; int replacementListStart; // check if this is a macro function if (cleanedLine.count() >= 4 && m_tokenContainer.text(cleanedLine.at(3)) == "(" && !isWhiteSpace(cleanedLine.at(3) - 1)) { MacroFunctionDefinition *macro; macro = createNode<MacroFunctionDefinition>(m_memoryPool, group); int tokenIndex = 4; //point to first argument or ')' QVector<int> macroParameterList; while(tokenIndex < cleanedLine.count()) { QByteArray currentText = m_tokenContainer.text(cleanedLine.at(tokenIndex)); ++tokenIndex; if(currentText == ")") break; if(currentText == ",") continue; macroParameterList.append(cleanedLine.at(tokenIndex - 1)); } macro->setParameters(TokenList(m_tokenContainer, macroParameterList)); defineDirective = macro; replacementListStart = tokenIndex; } else { MacroDefinition *macro; macro = createNode<MacroDefinition>(m_memoryPool, group); defineDirective = macro; replacementListStart = 3; } Q_ASSERT(defineDirective); // This is a bit hackish.. we want the replacement list with whitepspace // tokens, but cleanedLine() has already removed those. And we can't use // the original line, because that may contain escaped newline tokens. // So we remove the esacped newlines and search for the token number // given by cleanedLine.at(replacementListStart) QVector<int> replacementList; const QVector<int> noEscNewline = cleanEscapedNewLines(line); if (replacementListStart < cleanedLine.count()) { const int cleanedLineReplacementListStart = cleanedLine.at(replacementListStart); const int rListStart = noEscNewline.indexOf(cleanedLineReplacementListStart); if (rListStart != -1) { const int skipNewLineToken = 1; for (int i = rListStart; i < noEscNewline.count() - skipNewLineToken; ++i) { const int tokenContainerIndex = noEscNewline.at(i); const Type type = m_tokenTypeList.at(tokenContainerIndex); // Don't append comment tokens. if (type != Token_line_comment && type != Token_multiline_comment) { replacementList.append(tokenContainerIndex); } } } } defineDirective->setTokenSection(line); defineDirective->setIdentifier(TokenList(m_tokenContainer, QVector<int>() << identifier)); defineDirective->setReplacementList(TokenList(m_tokenContainer, replacementList)); group->toItemComposite()->add(defineDirective); return true; }
// ---------------------------------- void XML::Node::setAttributes(const char *n) { char c; attrData = strdup(n); // count maximum amount of attributes int maxAttr = 1; // 1 for tag name bool inQ = false; int i=0; while ((c=attrData[i++])!=0) { if (c=='\"') inQ ^= true; if (!inQ) if (c=='=') maxAttr++; } attr = new Attribute[maxAttr]; attr[0].namePos = 0; attr[0].valuePos = 0; numAttr=1; i=0; // skip until whitespace while (c=attrData[i++]) if (isWhiteSpace(c)) break; if (!c) return; // no values attrData[i-1]=0; while ((c=attrData[i])!=0) { if (!isWhiteSpace(c)) { if (numAttr>=maxAttr) throw StreamException("Too many attributes"); // get start of tag name attr[numAttr].namePos = i; // skip whitespaces until next '=' // terminate name on next whitespace or '=' while (attrData[i]) { c = attrData[i++]; if ((c == '=') || isWhiteSpace(c)) { attrData[i-1] = 0; // null term. name if (c == '=') break; } } // skip whitespaces while (attrData[i]) { if (isWhiteSpace(attrData[i])) i++; else break; } // check for valid start of attribute value - '"' if (attrData[i++] != '\"') throw StreamException("Bad tag value"); attr[numAttr++].valuePos = i; // terminate attribute value at next '"' while (attrData[i]) if (attrData[i++] == '\"') break; attrData[i-1] = 0; // null term. value }else{ i++; } } }
void filterSentence(String &dst, const String &src) { int prev_lang = initCurrentLanguage(*(src.begin()), *(src.begin()+1)); int curr_lang = prev_lang; for(auto iter=src.begin(); iter!=src.end(); iter++) { if(!isTwoBytes(*iter)) { // Is a space or tab if(!(isWhiteSpace(*iter) < 0)) { curr_lang = BLANK; if(prev_lang == curr_lang) continue; } // ASCII letters and numbers else if(isASCIInumber(*iter) || isASCIIalphabet(*iter)) { curr_lang = ENGLISH; if(prev_lang != curr_lang) dst.push_back(ASCII_SPACE); dst.push_back(*iter); } else { curr_lang = OTHERS; #ifndef _INGORE_SYMBOLS_ dst.push_back(ASCII_SPACE); dst.push_back(*iter); #endif } prev_lang = curr_lang; } else { auto c1 = *iter; auto c2 = *(iter+1); if(!isKoreanSymbols(c1, c2)) // Korean, Janpaness, Hanja { if(isJapaness(c1, c2)) curr_lang = JAPANAESS; else if(isHanja(c1, c2)) curr_lang = HANJA; else curr_lang = KOREAN; if(prev_lang != curr_lang) dst.push_back(ASCII_SPACE); dst.push_back(c1); dst.push_back(c2); } else // Korean symbols { curr_lang = OTHERS; #ifndef _INGORE_SYMBOLS_ dst.push_back(ASCII_SPACE); dst.push_back(c1); dst.push_back(c2); #endif } iter++; prev_lang = curr_lang; } } if(isWhiteSpace(*(dst.begin())) == BLANK) dst.erase(dst.begin()); }