PreprocessedContents tokenizeFromByteArray(const QByteArray& array) {
  PreprocessedContents to;
  
  const char* data = array.constData();
  const char* dataEnd = data + array.size();
  //unsigned int* target = to.data();
  
  KDevVarLengthArray<char, 100> identifier;
  
  KDevelop::IndexedString::RunningHash hash;

  bool tokenizing = false;
  
  while(data < dataEnd) {
    
    if(!tokenizing) {
      if(isLetter(*data) || *data == '_')
        tokenizing = true;
    }
    
    if(tokenizing) {
      if(isLetterOrNumber(*data) || *data == '_') {
        hash.append(*data);
        identifier.append(*data);
      }else{
        //End of token
        to.append( KDevelop::IndexedString(identifier.constData(), identifier.size(), hash.hash).index() );
        //kDebug() << "word" << "\"" + KDevelop::IndexedString(to.back()).str() + "\"";
        hash.clear();
        identifier.clear();
        tokenizing = false;
      }
    }
    
    if(!tokenizing)
      to.append( indexFromCharacter(*data) );
    ++data;
  }
  
  if(tokenizing)
    to.append( KDevelop::IndexedString(identifier.constData(), identifier.size(), hash.hash).index() );
  
  
/*  kDebug() << QString::fromUtf8(stringFromContents(to));
  kDebug() << QString::fromUtf8(array);
  Q_ASSERT(stringFromContents(to) == array);*/
  
  return to;
}
uint pp_skip_identifier::operator()(Stream& input)
{
  KDevVarLengthArray<char, 100> identifier;
  
  KDevelop::IndexedString::RunningHash hash;

  while (!input.atEnd()) {
    if(!isCharacter(input.current())) {
      //Do a more complex merge, where also tokenized identifiers can be merged
      KDevelop::IndexedString ret;
      if(!identifier.isEmpty())
        ret = KDevelop::IndexedString(identifier.constData(), identifier.size(), hash.hash);
      
      while (!input.atEnd()) {
        uint current = input.current();
        
        if (!isLetterOrNumber(current) && input != '_' && isCharacter(current))
          break;
        
        if(ret.isEmpty())
          ret = KDevelop::IndexedString::fromIndex(current); //The most common fast path
        else ///@todo Be better to build up a complete buffer and then append it all, so we don't get he intermediate strings into the repository
          ret = KDevelop::IndexedString(ret.byteArray() + KDevelop::IndexedString::fromIndex(input.current()).byteArray());
        
        ++input;
      }
      return ret.index();
    }
    //Collect characters and connect them to an IndexedString
    
    if (!isLetterOrNumber(input.current()) && input != '_')
        break;

    char c = characterFromIndex(input);
    hash.append(c);
    identifier.append(c);
    ++input;
  }

  return KDevelop::IndexedString(identifier.constData(), identifier.size(), hash.hash).index();
}