PreprocessedContents tokenizeFromByteArray(const QByteArray& array) { PreprocessedContents to; const char* data = array.constData(); const char* dataEnd = data + array.size(); //unsigned int* target = to.data(); KDevVarLengthArray<char, 100> identifier; KDevelop::IndexedString::RunningHash hash; bool tokenizing = false; while(data < dataEnd) { if(!tokenizing) { if(isLetter(*data) || *data == '_') tokenizing = true; } if(tokenizing) { if(isLetterOrNumber(*data) || *data == '_') { hash.append(*data); identifier.append(*data); }else{ //End of token to.append( KDevelop::IndexedString(identifier.constData(), identifier.size(), hash.hash).index() ); //kDebug() << "word" << "\"" + KDevelop::IndexedString(to.back()).str() + "\""; hash.clear(); identifier.clear(); tokenizing = false; } } if(!tokenizing) to.append( indexFromCharacter(*data) ); ++data; } if(tokenizing) to.append( KDevelop::IndexedString(identifier.constData(), identifier.size(), hash.hash).index() ); /* kDebug() << QString::fromUtf8(stringFromContents(to)); kDebug() << QString::fromUtf8(array); Q_ASSERT(stringFromContents(to) == array);*/ return to; }
uint pp_skip_identifier::operator()(Stream& input) { KDevVarLengthArray<char, 100> identifier; KDevelop::IndexedString::RunningHash hash; while (!input.atEnd()) { if(!isCharacter(input.current())) { //Do a more complex merge, where also tokenized identifiers can be merged KDevelop::IndexedString ret; if(!identifier.isEmpty()) ret = KDevelop::IndexedString(identifier.constData(), identifier.size(), hash.hash); while (!input.atEnd()) { uint current = input.current(); if (!isLetterOrNumber(current) && input != '_' && isCharacter(current)) break; if(ret.isEmpty()) ret = KDevelop::IndexedString::fromIndex(current); //The most common fast path else ///@todo Be better to build up a complete buffer and then append it all, so we don't get he intermediate strings into the repository ret = KDevelop::IndexedString(ret.byteArray() + KDevelop::IndexedString::fromIndex(input.current()).byteArray()); ++input; } return ret.index(); } //Collect characters and connect them to an IndexedString if (!isLetterOrNumber(input.current()) && input != '_') break; char c = characterFromIndex(input); hash.append(c); identifier.append(c); ++input; } return KDevelop::IndexedString(identifier.constData(), identifier.size(), hash.hash).index(); }