uint basicInput::pipeRead(void *buffer, const uint length) { uint8 *array = (uint8 *)buffer; // The array buffer uint readed = 0; // Total readed uint readedAtOnce = 0; // How many uint8 readed in order. uint leftSize = length; // How many bytes to read more. /* Reading loop */ while (leftSize > 0) { // Try to read readedAtOnce = read(array, t_min(leftSize, getPipeReadBestRequest())); // Increase counters array += readedAtOnce; leftSize -= readedAtOnce; readed += readedAtOnce; // Test whether we reached the end of the stream if (isEOS()) { return readed; } } return readed; }
//////////////////////////////////////////////////////////////////////////////// // Lexer::Type::pair // <identifier> <separator> [ <string> | <word> ] // separator '::' | ':=' | ':' | '=' bool Lexer::isPair (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; std::string ignoredToken; Lexer::Type ignoredType; if (isIdentifier (ignoredToken, ignoredType)) { // Look for a valid separator. std::string separator = _text.substr (_cursor, 2); if (separator == "::" || separator == ":=") _cursor += 2; else if (separator[0] == ':' || separator[0] == '=') _cursor++; else { _cursor = marker; return false; } // String, word or nothing are all valid. if (readWord (_text, "'\"", _cursor, ignoredToken) || readWord (_text, _cursor, ignoredToken) || isEOS () || isWhitespace (_text[_cursor])) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::pair; return true; } } _cursor = marker; return false; }
int hasEOS(const lemur::api::Index* idx, const lemur::api::TermInfoList* tList) { tList->startIteration(); lemur::api::TermInfo* tEntry; while (tList->hasMore()) { tEntry = tList->nextEntry(); if ( isEOS(idx->term(tEntry->termID())) ) return true; } return false; }
uint8_t Tokenizer::tokenize() { if(buf_==nullptr) return 0; // sanitization while( isSeparator(*buf_) ) ++buf_; // skip leading separators if( isEOS(*buf_) ) // nothing inside? return 0; // process until the end of the string char* s =buf_; uint8_t count=1; while(true) { // end of string means end of processing if( isEOS(*s) ) return count; // field separators if( isSeparator(*s) ) { // skip all of the separators while( isSeparator(*s) ) { *s=0; // mark this as the end of the token ++s; } // mark next token, only if not the end of the sequence if( !isEOS(*s) ) ++count; } // proceed to the next character ++s; } return count; }
//////////////////////////////////////////////////////////////////////////////// // Lexer::Type::pattern // / <unquoted-string> / <EOS> | <isWhitespace> bool Lexer::isPattern (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; std::string word; if (readWord (_text, "/", _cursor, word) && (isEOS () || isWhitespace (_text[_cursor]))) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::pattern; return true; } _cursor = marker; return false; }
//////////////////////////////////////////////////////////////////////////////// // Lexer::Type::set // a single number: 1 // a list of numbers: 1,3,5 // a range: 5-10 // or a combination: 1,3,5-10 // // <id> [ - <id> ] [ , <id> [ - <id> ] ] ... bool Lexer::isSet (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; int count = 0; std::string dummyToken; Lexer::Type dummyType; do { if (isInteger (dummyToken, dummyType)) { ++count; if (isLiteral ("-", false, false)) { if (isInteger (dummyToken, dummyType)) ++count; else { _cursor = marker; return false; } } } else { _cursor = marker; return false; } } while (isLiteral (",", false, false)); // Success is multiple numbers, matching the pattern. if (count > 1 && (isEOS () || isWhitespace (_text[_cursor]) || isHardBoundary (_text[_cursor], _text[_cursor + 1]))) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::set; return true; } _cursor = marker; return false; }
//////////////////////////////////////////////////////////////////////////////// // When a Lexer object is constructed with a string, this method walks through // the stream of low-level tokens. bool Lexer::token (std::string& token, Lexer::Type& type) { // Eat white space. while (isWhitespace (_text[_cursor])) utf8_next_char (_text, _cursor); // Terminate at EOS. if (isEOS ()) return false; // The sequence is specific, and must follow these rules: // - date < duration < uuid < identifier // - dom < uuid // - uuid < hex < number // - url < pair < identifier // - hex < number // - separator < tag < operator // - path < substitution < pattern // - set < number // - word last if (isString (token, type, "'\"") || isDate (token, type) || isDuration (token, type) || isURL (token, type) || isPair (token, type) || isUUID (token, type, true) || isSet (token, type) || isDOM (token, type) || isHexNumber (token, type) || isNumber (token, type) || isSeparator (token, type) || isTag (token, type) || isPath (token, type) || isSubstitution (token, type) || isPattern (token, type) || isOperator (token, type) || isIdentifier (token, type) || isWord (token, type)) return true; return false; }