void ZLStatisticsXMLReader::startElementHandler(const char *tag, const char **attributes) { if (STATISTICS_TAG == tag) { size_t volume = atoi(attributeValue(attributes, "volume")); unsigned long long squaresVolume = atoll(attributeValue(attributes, "squaresVolume")); //std::cerr << "XMLReader: frequencies sum & ^2: " << volume << ":" << squaresVolume << "\n"; myStatisticsPtr = new ZLArrayBasedStatistics( atoi(attributeValue(attributes, "charSequenceSize")), atoi(attributeValue(attributes, "size")), volume, squaresVolume); } else if (ITEM_TAG == tag) { const char *sequence = attributeValue(attributes, "sequence"); const char *frequency = attributeValue(attributes, "frequency"); if ((sequence != 0) && (frequency != 0)) { std::string hexString(sequence); myStatisticsPtr->insert(ZLCharSequence(hexString), atoi(frequency)); } } }
void ZLStatisticsGenerator::generate(const char* buffer, std::size_t length, std::size_t charSequenceSize, ZLMapBasedStatistics &statistics) { const char *start = buffer; const char *end = buffer + length; std::map<ZLCharSequence, std::size_t> dictionary; std::size_t locker = charSequenceSize; for (const char *ptr = start; ptr < end;) { if (myBreakSymbolsTable[(unsigned char)*(ptr)] == 1) { locker = charSequenceSize; } else if (locker != 0) { --locker; } if (locker == 0) { const char* sequenceStart = ptr - charSequenceSize + 1; ++dictionary[ZLCharSequence(sequenceStart, charSequenceSize)]; } ++ptr; } statistics = ZLMapBasedStatistics(dictionary); }
ZLCharSequence ZLArrayBasedStatisticsItem::sequence() const { return ZLCharSequence(mySequencePtr, mySequenceLength); }