Esempio n. 1
0
bool BulkHeaderGroup::BulkHeaderGroupBody()
{
    // belt and braces
    key.set_data(keymem);
    data.set_data(datamem);

    if (m_cancel)
    {
        emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq);
        return false;
    }

    emit updateJob(JobList::BulkHeaderGroup, JobList::Running, job->seq);

    NewsGroup* ng = job->ng;
    Db* db = ng->getDb();

    MultiPartHeader mph;
    SinglePartHeader sph;
    HeaderBase* hb = 0;
    HeaderGroup* headerGroup = 0;
    HeaderGroup* advancedHeaderGroup = 0;

    // typedef QMap<QString, QString> HeaderGroupIndexes; // subj, headerGroup index
    // typedef QMap<QString, HeaderGroup*> HeaderGroups; //  headerGroup index, headerGroup *

    HeaderGroupIndexes headerGroupIndexes;
    HeaderGroups       headerGroups;

    DBC *dbcp = 0;
    DBT ckey, cdata;

    memset(&ckey, 0, sizeof(ckey));
    memset(&cdata, 0, sizeof(cdata));

    size_t retklen, retdlen;
    void *retkey = 0, *retdata = 0;
    int ret, t_ret;
    void *p = 0;

    quint64 count=0;

    cdata.data = (void *) new char[HEADER_BULK_BUFFER_LENGTH];
    cdata.ulen = HEADER_BULK_BUFFER_LENGTH;
    cdata.flags = DB_DBT_USERMEM;

    ckey.data = (void *) new char[HEADER_BULK_BUFFER_LENGTH];
    ckey.ulen = HEADER_BULK_BUFFER_LENGTH;
    ckey.flags = DB_DBT_USERMEM;

    /* Acquire a cursor for the database. */
    if ((ret = db->get_DB()->cursor(db->get_DB(), NULL, &dbcp, DB_CURSOR_BULK)) != 0)
    {
        db->err(ret, "DB->cursor");
        char* ptr = 0;

        ptr = (char*)(ckey.data);
        Q_DELETE_ARRAY(ptr);
        ptr = (char*)(cdata.data);
        Q_DELETE_ARRAY(ptr);
        return false;
    }

    // To save the group records
    ng->articlesNeedDeleting(false);

    // Store the data in the database - flush first ...

    u_int32_t delCount;

    uchar keymem[KEYMEM_SIZE];
    uchar datamem[DATAMEM_SIZE];
    Dbt key, data;
    char* p2 = 0;
    QByteArray ba;
    const char *k = 0;

    key.set_flags(DB_DBT_USERMEM);
    key.set_data(&keymem);
    key.set_ulen(KEYMEM_SIZE);

    data.set_flags(DB_DBT_USERMEM);
    data.set_ulen(DATAMEM_SIZE);
    data.set_data(&datamem);

    QString subj =  "MDQuban", from = "MDQuban";

    //QString rs1 = "^(.*)(\".*\")";
    //QString rs2 = "^(.*)\\s-\\s(.*)$";
    //QString rs3 = "^(\\S+.*)\\[.*\\].*(\".*\")";

    //QString rs3 = "^(.*)\\s-\\s.*\\s-\\s(.*)$";
    QRegExp rx[3];
    bool    rxPosBack[3];
    bool    noRegexpGrouping;

    QString recKey, storeIndex;
    QString prevSubj = "MDQuban", prevFrom = "MDQuban";

    int pos;
    bool newGroup = false;

    bool mphFound = false;

    quint32 grouped = 0,
            single = 0,
            numGroups = 0;

    qint16 stringDiff = -1;

    bool prevGroup = false;
    bool advancedPlacement = false;
    bool skipAdvanced = false;

    noRegexpGrouping = ng->isThereNoRegexOnGrouping();

    if (noRegexpGrouping == false) // need regex for grouping
    {
        rx[0].setPattern(ng->getGroupRE1());
        rx[1].setPattern(ng->getGroupRE2());
        rx[2].setPattern(ng->getGroupRE3());

        rxPosBack[0] = ng->getGroupRE1Back();
        rxPosBack[1] = ng->getGroupRE2Back();
        rxPosBack[2] = ng->getGroupRE3Back();
    }

    ng->getGroupingDb()->truncate(0, &delCount, 0);
    qDebug() << "Deleted " << delCount << " records from group db";

    QMapIterator<QString, QString> it(headerGroupIndexes);
    QString advancedIndex;

    for (;;)
    {
        /*
         * Acquire the next set of key/data pairs.  This code
         * does not handle single key/data pairs that won't fit
         * in a BUFFER_LENGTH size buffer, instead returning
         * DB_BUFFER_SMALL to our caller.
         */
        if ((ret = dbcp->get(dbcp, &ckey, &cdata, DB_MULTIPLE_KEY | DB_NEXT)) != 0)
        {
            if (ret != DB_NOTFOUND)
                db->err(ret, "DBcursor->get");
            break;
        }

        for (DB_MULTIPLE_INIT(p, &cdata);;)
        {
            DB_MULTIPLE_KEY_NEXT(p, &cdata, retkey, retklen, retdata, retdlen);
            if (p == NULL)
                break;

            if (retdlen){;} // MD TODO compiler .... unused variable

            recKey = QString::fromLocal8Bit((char*)retkey, retklen);

            if (*((char *)retdata) == 'm')
            {
                MultiPartHeader::getMultiPartHeader((unsigned int)retklen, (char *)retkey, (char *)retdata, &mph);
                hb = (HeaderBase*)&mph;
                mphFound = true;
            }
            else if (*((char *)retdata) == 's')
            {
                SinglePartHeader::getSinglePartHeader((unsigned int)retklen, (char *)retkey, (char *)retdata, &sph);
                hb = (HeaderBase*)&sph;
                mphFound = false;
            }
            else
            {
                // What have we found ?????
                qDebug() << "Found unexpected identifier for header : " << (char)*((char *)retdata);
                continue;
            }

            ++count;

            prevSubj = subj;

            prevFrom = from;

            subj = hb->getSubj();
            from = hb->getFrom();

            if (noRegexpGrouping == false) // need regex for grouping
            {
                for (int i=0; i<3; ++i)
                {
                    if (rx[i].isEmpty() == false)
                    {
                        if (rxPosBack[i] == true) // from the back
                        {
                            pos = subj.lastIndexOf(rx[i]);
                            if (pos != -1)
                                subj.truncate(pos);
                        }
                        else // from the front
                        {
                            pos = rx[i].indexIn(subj);
                            if (pos > -1)
                                subj = rx[i].cap(0);
                        }
                    }
                }
            }

            //qDebug() << "Stripped down to: " << subj;

            stringDiff = -1;

            if (prevFrom != from) // change of contributor
            {
                newGroup = true;
            }
            else // same contributor
            {
               if ((stringDiff = levenshteinDistance(prevSubj, subj)) > ng->getMatchDistance()) // no match ...
                   newGroup = true;
               else
                   newGroup = false;

               //qDebug() << "Diff between " << prevSubj << " and " << subj << " is " << stringDiff;
            }

            if (newGroup)
            {
                if (ng->isThereAdvancedGrouping())
                {
                    it.toFront();

                    // decide if we can match to a previous group
                    while (it.hasNext())
                    {
                        it.next();
                        if ((stringDiff = levenshteinDistance(it.key(), subj)) <= ng->getMatchDistance()) // match ...
                        {
                            // The index for this group is in it.value()
                            // See if we have the HeaderGroup in our cache headerGroups)

                            if (headerGroups.contains(it.value()))
                            {
                                advancedHeaderGroup = headerGroups.value(it.value());
                            }
                            else // not in cache
                            {
                                advancedIndex = it.value();
                                advancedHeaderGroup = getGroup(ng, advancedIndex);
                                if (advancedHeaderGroup)
                                {
                                    headerGroups.insert(advancedIndex, advancedHeaderGroup);
                                }
                                else // db read failed ..
                                {
                                    skipAdvanced = true;
                                }
                            }

                            if (skipAdvanced == false)
                            {
                                if (mphFound)
                                    advancedHeaderGroup->addMphKey(recKey);
                                else
                                    advancedHeaderGroup->addSphKey(recKey);

                                advancedPlacement = true;
                                subj = prevSubj; // ignore this header as it's been placed out of sequence
                                from = prevFrom;
                                newGroup = false; // as we managed to relocate to an existing group

                                break; // stop looking at previous groups
                            }
                            else
                                skipAdvanced = false;
                        }
                    }
                }
            }

            if (newGroup)
            {
                if (prevGroup) // save before moving on
                {
                    ba = storeIndex.toLocal8Bit();
                    k= ba.constData();
                    memcpy(keymem, k, storeIndex.length());
                    key.set_size(storeIndex.length());

                    p2=headerGroup->data();
                    data.set_data(p2);
                    data.set_size(headerGroup->getRecordSize());
                    ret=ng->getGroupingDb()->put(NULL, &key, &data, 0);
                    if (ret!=0)
                        qDebug("Error updating record: %d", ret);

                    if (ng->isThereAdvancedGrouping())
                        headerGroupIndexes.insert(storeIndex.section('\n', 0, 0), storeIndex);

                    Q_DELETE_ARRAY(p2);
                    Q_DELETE(headerGroup);
                    numGroups++;
                }

                prevGroup = true;

                storeIndex = subj % "\n" % from;

                headerGroup = new HeaderGroup();

                headerGroup->setDisplayName(subj);
                headerGroup->setPostingDate(hb->getPostingDate());
                headerGroup->setDownloadDate(hb->getDownloadDate());
                headerGroup->setStatus(hb->getStatus());
                headerGroup->setNextDistance(stringDiff);
            }

            // if we've found somewhere else to place this header then don't add again
            if (!advancedPlacement)
            {
                if (mphFound)
                    headerGroup->addMphKey(recKey);
                else
                    headerGroup->addSphKey(recKey);
            }
            else
                advancedPlacement = false;

            if (count % 250 == 0)
            {
                QCoreApplication::processEvents();

                emit updateJob(JobList::BulkHeaderGroup, tr("Header bulk grouping for newsgroup ") + job->ng->getAlias() + ": " +
                    QString::number(count) + " out of " + QString::number(ng->getTotal()) + tr(" grouped"), job->seq);
            }

            if (m_cancel)
            {
                emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq);
                return false;
            }
        }

        if (m_cancel)
        {
            emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq);
            return false;
        }
    }

    if ((t_ret = dbcp->close(dbcp)) != 0)
    {
        db->err(ret, "DBcursor->close");
        if (ret == 0)
            ret = t_ret;
    }

    char* ptr = ((char*)ckey.data);
    Q_DELETE_ARRAY(ptr);
    ptr = ((char*)cdata.data);
    Q_DELETE_ARRAY(ptr);
    if (headerGroups.count())
    {
        qDeleteAll(headerGroups);
        headerGroups.clear();
    }

    qDebug() << "Multi = " << grouped << ", single = " << single;

    ng->setHeadersNeedGrouping(false);
    // Finally update the newsgroup
    emit saveGroup(ng);

    emit updateJob(JobList::BulkHeaderGroup, tr("Header bulk grouping for newsgroup ") + job->ng->getAlias() + ": " +
                  QString::number(count) + " out of " + QString::number(ng->getTotal()) + tr(" grouped"), job->seq);

    if (m_cancel)
    {
        emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq);
        return false;
    }

    emit logEvent(tr("Bulk grouping of ") + ng->getTotal() + tr(" articles completed successfully."));

    emit updateJob(JobList::BulkHeaderGroup, JobList::Finished_Ok, job->seq);

    ng->setTotalGroups(numGroups);

    Q_DELETE(headerGroup);

    return true;
}