bool BulkHeaderGroup::BulkHeaderGroupBody() { // belt and braces key.set_data(keymem); data.set_data(datamem); if (m_cancel) { emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq); return false; } emit updateJob(JobList::BulkHeaderGroup, JobList::Running, job->seq); NewsGroup* ng = job->ng; Db* db = ng->getDb(); MultiPartHeader mph; SinglePartHeader sph; HeaderBase* hb = 0; HeaderGroup* headerGroup = 0; HeaderGroup* advancedHeaderGroup = 0; // typedef QMap<QString, QString> HeaderGroupIndexes; // subj, headerGroup index // typedef QMap<QString, HeaderGroup*> HeaderGroups; // headerGroup index, headerGroup * HeaderGroupIndexes headerGroupIndexes; HeaderGroups headerGroups; DBC *dbcp = 0; DBT ckey, cdata; memset(&ckey, 0, sizeof(ckey)); memset(&cdata, 0, sizeof(cdata)); size_t retklen, retdlen; void *retkey = 0, *retdata = 0; int ret, t_ret; void *p = 0; quint64 count=0; cdata.data = (void *) new char[HEADER_BULK_BUFFER_LENGTH]; cdata.ulen = HEADER_BULK_BUFFER_LENGTH; cdata.flags = DB_DBT_USERMEM; ckey.data = (void *) new char[HEADER_BULK_BUFFER_LENGTH]; ckey.ulen = HEADER_BULK_BUFFER_LENGTH; ckey.flags = DB_DBT_USERMEM; /* Acquire a cursor for the database. */ if ((ret = db->get_DB()->cursor(db->get_DB(), NULL, &dbcp, DB_CURSOR_BULK)) != 0) { db->err(ret, "DB->cursor"); char* ptr = 0; ptr = (char*)(ckey.data); Q_DELETE_ARRAY(ptr); ptr = (char*)(cdata.data); Q_DELETE_ARRAY(ptr); return false; } // To save the group records ng->articlesNeedDeleting(false); // Store the data in the database - flush first ... u_int32_t delCount; uchar keymem[KEYMEM_SIZE]; uchar datamem[DATAMEM_SIZE]; Dbt key, data; char* p2 = 0; QByteArray ba; const char *k = 0; key.set_flags(DB_DBT_USERMEM); key.set_data(&keymem); key.set_ulen(KEYMEM_SIZE); data.set_flags(DB_DBT_USERMEM); data.set_ulen(DATAMEM_SIZE); data.set_data(&datamem); QString subj = "MDQuban", from = "MDQuban"; //QString rs1 = "^(.*)(\".*\")"; //QString rs2 = "^(.*)\\s-\\s(.*)$"; //QString rs3 = "^(\\S+.*)\\[.*\\].*(\".*\")"; //QString rs3 = "^(.*)\\s-\\s.*\\s-\\s(.*)$"; QRegExp rx[3]; bool rxPosBack[3]; bool noRegexpGrouping; QString recKey, storeIndex; QString prevSubj = "MDQuban", prevFrom = "MDQuban"; int pos; bool newGroup = false; bool mphFound = false; quint32 grouped = 0, single = 0, numGroups = 0; qint16 stringDiff = -1; bool prevGroup = false; bool advancedPlacement = false; bool skipAdvanced = false; noRegexpGrouping = ng->isThereNoRegexOnGrouping(); if (noRegexpGrouping == false) // need regex for grouping { rx[0].setPattern(ng->getGroupRE1()); rx[1].setPattern(ng->getGroupRE2()); rx[2].setPattern(ng->getGroupRE3()); rxPosBack[0] = ng->getGroupRE1Back(); rxPosBack[1] = ng->getGroupRE2Back(); rxPosBack[2] = ng->getGroupRE3Back(); } ng->getGroupingDb()->truncate(0, &delCount, 0); qDebug() << "Deleted " << delCount << " records from group db"; QMapIterator<QString, QString> it(headerGroupIndexes); QString advancedIndex; for (;;) { /* * Acquire the next set of key/data pairs. This code * does not handle single key/data pairs that won't fit * in a BUFFER_LENGTH size buffer, instead returning * DB_BUFFER_SMALL to our caller. */ if ((ret = dbcp->get(dbcp, &ckey, &cdata, DB_MULTIPLE_KEY | DB_NEXT)) != 0) { if (ret != DB_NOTFOUND) db->err(ret, "DBcursor->get"); break; } for (DB_MULTIPLE_INIT(p, &cdata);;) { DB_MULTIPLE_KEY_NEXT(p, &cdata, retkey, retklen, retdata, retdlen); if (p == NULL) break; if (retdlen){;} // MD TODO compiler .... unused variable recKey = QString::fromLocal8Bit((char*)retkey, retklen); if (*((char *)retdata) == 'm') { MultiPartHeader::getMultiPartHeader((unsigned int)retklen, (char *)retkey, (char *)retdata, &mph); hb = (HeaderBase*)&mph; mphFound = true; } else if (*((char *)retdata) == 's') { SinglePartHeader::getSinglePartHeader((unsigned int)retklen, (char *)retkey, (char *)retdata, &sph); hb = (HeaderBase*)&sph; mphFound = false; } else { // What have we found ????? qDebug() << "Found unexpected identifier for header : " << (char)*((char *)retdata); continue; } ++count; prevSubj = subj; prevFrom = from; subj = hb->getSubj(); from = hb->getFrom(); if (noRegexpGrouping == false) // need regex for grouping { for (int i=0; i<3; ++i) { if (rx[i].isEmpty() == false) { if (rxPosBack[i] == true) // from the back { pos = subj.lastIndexOf(rx[i]); if (pos != -1) subj.truncate(pos); } else // from the front { pos = rx[i].indexIn(subj); if (pos > -1) subj = rx[i].cap(0); } } } } //qDebug() << "Stripped down to: " << subj; stringDiff = -1; if (prevFrom != from) // change of contributor { newGroup = true; } else // same contributor { if ((stringDiff = levenshteinDistance(prevSubj, subj)) > ng->getMatchDistance()) // no match ... newGroup = true; else newGroup = false; //qDebug() << "Diff between " << prevSubj << " and " << subj << " is " << stringDiff; } if (newGroup) { if (ng->isThereAdvancedGrouping()) { it.toFront(); // decide if we can match to a previous group while (it.hasNext()) { it.next(); if ((stringDiff = levenshteinDistance(it.key(), subj)) <= ng->getMatchDistance()) // match ... { // The index for this group is in it.value() // See if we have the HeaderGroup in our cache headerGroups) if (headerGroups.contains(it.value())) { advancedHeaderGroup = headerGroups.value(it.value()); } else // not in cache { advancedIndex = it.value(); advancedHeaderGroup = getGroup(ng, advancedIndex); if (advancedHeaderGroup) { headerGroups.insert(advancedIndex, advancedHeaderGroup); } else // db read failed .. { skipAdvanced = true; } } if (skipAdvanced == false) { if (mphFound) advancedHeaderGroup->addMphKey(recKey); else advancedHeaderGroup->addSphKey(recKey); advancedPlacement = true; subj = prevSubj; // ignore this header as it's been placed out of sequence from = prevFrom; newGroup = false; // as we managed to relocate to an existing group break; // stop looking at previous groups } else skipAdvanced = false; } } } } if (newGroup) { if (prevGroup) // save before moving on { ba = storeIndex.toLocal8Bit(); k= ba.constData(); memcpy(keymem, k, storeIndex.length()); key.set_size(storeIndex.length()); p2=headerGroup->data(); data.set_data(p2); data.set_size(headerGroup->getRecordSize()); ret=ng->getGroupingDb()->put(NULL, &key, &data, 0); if (ret!=0) qDebug("Error updating record: %d", ret); if (ng->isThereAdvancedGrouping()) headerGroupIndexes.insert(storeIndex.section('\n', 0, 0), storeIndex); Q_DELETE_ARRAY(p2); Q_DELETE(headerGroup); numGroups++; } prevGroup = true; storeIndex = subj % "\n" % from; headerGroup = new HeaderGroup(); headerGroup->setDisplayName(subj); headerGroup->setPostingDate(hb->getPostingDate()); headerGroup->setDownloadDate(hb->getDownloadDate()); headerGroup->setStatus(hb->getStatus()); headerGroup->setNextDistance(stringDiff); } // if we've found somewhere else to place this header then don't add again if (!advancedPlacement) { if (mphFound) headerGroup->addMphKey(recKey); else headerGroup->addSphKey(recKey); } else advancedPlacement = false; if (count % 250 == 0) { QCoreApplication::processEvents(); emit updateJob(JobList::BulkHeaderGroup, tr("Header bulk grouping for newsgroup ") + job->ng->getAlias() + ": " + QString::number(count) + " out of " + QString::number(ng->getTotal()) + tr(" grouped"), job->seq); } if (m_cancel) { emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq); return false; } } if (m_cancel) { emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq); return false; } } if ((t_ret = dbcp->close(dbcp)) != 0) { db->err(ret, "DBcursor->close"); if (ret == 0) ret = t_ret; } char* ptr = ((char*)ckey.data); Q_DELETE_ARRAY(ptr); ptr = ((char*)cdata.data); Q_DELETE_ARRAY(ptr); if (headerGroups.count()) { qDeleteAll(headerGroups); headerGroups.clear(); } qDebug() << "Multi = " << grouped << ", single = " << single; ng->setHeadersNeedGrouping(false); // Finally update the newsgroup emit saveGroup(ng); emit updateJob(JobList::BulkHeaderGroup, tr("Header bulk grouping for newsgroup ") + job->ng->getAlias() + ": " + QString::number(count) + " out of " + QString::number(ng->getTotal()) + tr(" grouped"), job->seq); if (m_cancel) { emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq); return false; } emit logEvent(tr("Bulk grouping of ") + ng->getTotal() + tr(" articles completed successfully.")); emit updateJob(JobList::BulkHeaderGroup, JobList::Finished_Ok, job->seq); ng->setTotalGroups(numGroups); Q_DELETE(headerGroup); return true; }