Beispiel #1
0
int TagSet::distance(const TagSet *other)
{
    int res = 0;
    QString str1 = this->diskTag(TAG_PERFORMER).toUpper().replace("THE ", "");
    QString str2 = other->diskTag(TAG_PERFORMER).toUpper().replace("THE ", "");
    res += levenshteinDistance(str1, str2) * 3;

    str1 = this->diskTag(TAG_ALBUM).toUpper().replace("THE ", "");
    str2 = other->diskTag(TAG_ALBUM).toUpper().replace("THE ", "");
    res += levenshteinDistance(str1, str2);

    return res;
}
void levenshteinDistanceFunction(sqlite3_context *aCtx,
                                 int aArgc,
                                 sqlite3_value **aArgv)
{
  NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");

  // If either argument is a SQL NULL, then return SQL NULL.
  if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
      ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
    ::sqlite3_result_null(aCtx);
    return;
  }

  int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(PRUnichar);
  const PRUnichar *a = static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[0]));

  int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(PRUnichar);
  const PRUnichar *b = static_cast<const PRUnichar *>(::sqlite3_value_text16(aArgv[1]));

  // Compute the Levenshtein Distance, and return the result (or error).
  int distance = -1;
  const nsDependentString A(a, aLen);
  const nsDependentString B(b, bLen);
  int status = levenshteinDistance(A, B, &distance);
  if (status == SQLITE_OK) {
    ::sqlite3_result_int(aCtx, distance);    
  }
  else if (status == SQLITE_NOMEM) {
    ::sqlite3_result_error_nomem(aCtx);
  }
  else {
    ::sqlite3_result_error(aCtx, "User function returned error code", -1);
  }
}
Beispiel #3
0
int CoprocEditD(int origLenL, int origLenR, char *chL, char *chR) {
   string strL, strR;
   // convert to a string with a max length of COPROC_STR_LEN
   strL.assign(chL,STR_CMP_LEN);
   strR.assign(chR,STR_CMP_LEN);
   return (int)levenshteinDistance(strL, strR);
}
Beispiel #4
0
int WithinEditN_CP( unsigned char *l, int origlenL, unsigned char *r, int origlenR, int d, int filter_width )
{
   int lenL = min(origlenL, filter_width);
   int lenR = min(origlenR, filter_width);
 
   string strL, strR;
   strL.assign((char *)l, lenL);
   strR.assign((char *)r, lenR);
 
   return (int)levenshteinDistance(strL, strR) <= d;
}
Beispiel #5
0
std::string
GestureRecognizer::matchGesture()
{
    if (_moves.empty())
        return "";

    int distance = INT_MAX;
    std::string match = "";

    for (GestureMap::iterator i = _gestures.begin(); i != _gestures.end(); ++i)
    {
        int temp = levenshteinDistance(i->second, _moves);
        if (temp <= _sensitivity && temp < distance)
        {
            distance = temp;
            match = i->first;
        }
    }
    return match;
}
Beispiel #6
0
bool BulkHeaderGroup::BulkHeaderGroupBody()
{
    // belt and braces
    key.set_data(keymem);
    data.set_data(datamem);

    if (m_cancel)
    {
        emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq);
        return false;
    }

    emit updateJob(JobList::BulkHeaderGroup, JobList::Running, job->seq);

    NewsGroup* ng = job->ng;
    Db* db = ng->getDb();

    MultiPartHeader mph;
    SinglePartHeader sph;
    HeaderBase* hb = 0;
    HeaderGroup* headerGroup = 0;
    HeaderGroup* advancedHeaderGroup = 0;

    // typedef QMap<QString, QString> HeaderGroupIndexes; // subj, headerGroup index
    // typedef QMap<QString, HeaderGroup*> HeaderGroups; //  headerGroup index, headerGroup *

    HeaderGroupIndexes headerGroupIndexes;
    HeaderGroups       headerGroups;

    DBC *dbcp = 0;
    DBT ckey, cdata;

    memset(&ckey, 0, sizeof(ckey));
    memset(&cdata, 0, sizeof(cdata));

    size_t retklen, retdlen;
    void *retkey = 0, *retdata = 0;
    int ret, t_ret;
    void *p = 0;

    quint64 count=0;

    cdata.data = (void *) new char[HEADER_BULK_BUFFER_LENGTH];
    cdata.ulen = HEADER_BULK_BUFFER_LENGTH;
    cdata.flags = DB_DBT_USERMEM;

    ckey.data = (void *) new char[HEADER_BULK_BUFFER_LENGTH];
    ckey.ulen = HEADER_BULK_BUFFER_LENGTH;
    ckey.flags = DB_DBT_USERMEM;

    /* Acquire a cursor for the database. */
    if ((ret = db->get_DB()->cursor(db->get_DB(), NULL, &dbcp, DB_CURSOR_BULK)) != 0)
    {
        db->err(ret, "DB->cursor");
        char* ptr = 0;

        ptr = (char*)(ckey.data);
        Q_DELETE_ARRAY(ptr);
        ptr = (char*)(cdata.data);
        Q_DELETE_ARRAY(ptr);
        return false;
    }

    // To save the group records
    ng->articlesNeedDeleting(false);

    // Store the data in the database - flush first ...

    u_int32_t delCount;

    uchar keymem[KEYMEM_SIZE];
    uchar datamem[DATAMEM_SIZE];
    Dbt key, data;
    char* p2 = 0;
    QByteArray ba;
    const char *k = 0;

    key.set_flags(DB_DBT_USERMEM);
    key.set_data(&keymem);
    key.set_ulen(KEYMEM_SIZE);

    data.set_flags(DB_DBT_USERMEM);
    data.set_ulen(DATAMEM_SIZE);
    data.set_data(&datamem);

    QString subj =  "MDQuban", from = "MDQuban";

    //QString rs1 = "^(.*)(\".*\")";
    //QString rs2 = "^(.*)\\s-\\s(.*)$";
    //QString rs3 = "^(\\S+.*)\\[.*\\].*(\".*\")";

    //QString rs3 = "^(.*)\\s-\\s.*\\s-\\s(.*)$";
    QRegExp rx[3];
    bool    rxPosBack[3];
    bool    noRegexpGrouping;

    QString recKey, storeIndex;
    QString prevSubj = "MDQuban", prevFrom = "MDQuban";

    int pos;
    bool newGroup = false;

    bool mphFound = false;

    quint32 grouped = 0,
            single = 0,
            numGroups = 0;

    qint16 stringDiff = -1;

    bool prevGroup = false;
    bool advancedPlacement = false;
    bool skipAdvanced = false;

    noRegexpGrouping = ng->isThereNoRegexOnGrouping();

    if (noRegexpGrouping == false) // need regex for grouping
    {
        rx[0].setPattern(ng->getGroupRE1());
        rx[1].setPattern(ng->getGroupRE2());
        rx[2].setPattern(ng->getGroupRE3());

        rxPosBack[0] = ng->getGroupRE1Back();
        rxPosBack[1] = ng->getGroupRE2Back();
        rxPosBack[2] = ng->getGroupRE3Back();
    }

    ng->getGroupingDb()->truncate(0, &delCount, 0);
    qDebug() << "Deleted " << delCount << " records from group db";

    QMapIterator<QString, QString> it(headerGroupIndexes);
    QString advancedIndex;

    for (;;)
    {
        /*
         * Acquire the next set of key/data pairs.  This code
         * does not handle single key/data pairs that won't fit
         * in a BUFFER_LENGTH size buffer, instead returning
         * DB_BUFFER_SMALL to our caller.
         */
        if ((ret = dbcp->get(dbcp, &ckey, &cdata, DB_MULTIPLE_KEY | DB_NEXT)) != 0)
        {
            if (ret != DB_NOTFOUND)
                db->err(ret, "DBcursor->get");
            break;
        }

        for (DB_MULTIPLE_INIT(p, &cdata);;)
        {
            DB_MULTIPLE_KEY_NEXT(p, &cdata, retkey, retklen, retdata, retdlen);
            if (p == NULL)
                break;

            if (retdlen){;} // MD TODO compiler .... unused variable

            recKey = QString::fromLocal8Bit((char*)retkey, retklen);

            if (*((char *)retdata) == 'm')
            {
                MultiPartHeader::getMultiPartHeader((unsigned int)retklen, (char *)retkey, (char *)retdata, &mph);
                hb = (HeaderBase*)&mph;
                mphFound = true;
            }
            else if (*((char *)retdata) == 's')
            {
                SinglePartHeader::getSinglePartHeader((unsigned int)retklen, (char *)retkey, (char *)retdata, &sph);
                hb = (HeaderBase*)&sph;
                mphFound = false;
            }
            else
            {
                // What have we found ?????
                qDebug() << "Found unexpected identifier for header : " << (char)*((char *)retdata);
                continue;
            }

            ++count;

            prevSubj = subj;

            prevFrom = from;

            subj = hb->getSubj();
            from = hb->getFrom();

            if (noRegexpGrouping == false) // need regex for grouping
            {
                for (int i=0; i<3; ++i)
                {
                    if (rx[i].isEmpty() == false)
                    {
                        if (rxPosBack[i] == true) // from the back
                        {
                            pos = subj.lastIndexOf(rx[i]);
                            if (pos != -1)
                                subj.truncate(pos);
                        }
                        else // from the front
                        {
                            pos = rx[i].indexIn(subj);
                            if (pos > -1)
                                subj = rx[i].cap(0);
                        }
                    }
                }
            }

            //qDebug() << "Stripped down to: " << subj;

            stringDiff = -1;

            if (prevFrom != from) // change of contributor
            {
                newGroup = true;
            }
            else // same contributor
            {
               if ((stringDiff = levenshteinDistance(prevSubj, subj)) > ng->getMatchDistance()) // no match ...
                   newGroup = true;
               else
                   newGroup = false;

               //qDebug() << "Diff between " << prevSubj << " and " << subj << " is " << stringDiff;
            }

            if (newGroup)
            {
                if (ng->isThereAdvancedGrouping())
                {
                    it.toFront();

                    // decide if we can match to a previous group
                    while (it.hasNext())
                    {
                        it.next();
                        if ((stringDiff = levenshteinDistance(it.key(), subj)) <= ng->getMatchDistance()) // match ...
                        {
                            // The index for this group is in it.value()
                            // See if we have the HeaderGroup in our cache headerGroups)

                            if (headerGroups.contains(it.value()))
                            {
                                advancedHeaderGroup = headerGroups.value(it.value());
                            }
                            else // not in cache
                            {
                                advancedIndex = it.value();
                                advancedHeaderGroup = getGroup(ng, advancedIndex);
                                if (advancedHeaderGroup)
                                {
                                    headerGroups.insert(advancedIndex, advancedHeaderGroup);
                                }
                                else // db read failed ..
                                {
                                    skipAdvanced = true;
                                }
                            }

                            if (skipAdvanced == false)
                            {
                                if (mphFound)
                                    advancedHeaderGroup->addMphKey(recKey);
                                else
                                    advancedHeaderGroup->addSphKey(recKey);

                                advancedPlacement = true;
                                subj = prevSubj; // ignore this header as it's been placed out of sequence
                                from = prevFrom;
                                newGroup = false; // as we managed to relocate to an existing group

                                break; // stop looking at previous groups
                            }
                            else
                                skipAdvanced = false;
                        }
                    }
                }
            }

            if (newGroup)
            {
                if (prevGroup) // save before moving on
                {
                    ba = storeIndex.toLocal8Bit();
                    k= ba.constData();
                    memcpy(keymem, k, storeIndex.length());
                    key.set_size(storeIndex.length());

                    p2=headerGroup->data();
                    data.set_data(p2);
                    data.set_size(headerGroup->getRecordSize());
                    ret=ng->getGroupingDb()->put(NULL, &key, &data, 0);
                    if (ret!=0)
                        qDebug("Error updating record: %d", ret);

                    if (ng->isThereAdvancedGrouping())
                        headerGroupIndexes.insert(storeIndex.section('\n', 0, 0), storeIndex);

                    Q_DELETE_ARRAY(p2);
                    Q_DELETE(headerGroup);
                    numGroups++;
                }

                prevGroup = true;

                storeIndex = subj % "\n" % from;

                headerGroup = new HeaderGroup();

                headerGroup->setDisplayName(subj);
                headerGroup->setPostingDate(hb->getPostingDate());
                headerGroup->setDownloadDate(hb->getDownloadDate());
                headerGroup->setStatus(hb->getStatus());
                headerGroup->setNextDistance(stringDiff);
            }

            // if we've found somewhere else to place this header then don't add again
            if (!advancedPlacement)
            {
                if (mphFound)
                    headerGroup->addMphKey(recKey);
                else
                    headerGroup->addSphKey(recKey);
            }
            else
                advancedPlacement = false;

            if (count % 250 == 0)
            {
                QCoreApplication::processEvents();

                emit updateJob(JobList::BulkHeaderGroup, tr("Header bulk grouping for newsgroup ") + job->ng->getAlias() + ": " +
                    QString::number(count) + " out of " + QString::number(ng->getTotal()) + tr(" grouped"), job->seq);
            }

            if (m_cancel)
            {
                emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq);
                return false;
            }
        }

        if (m_cancel)
        {
            emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq);
            return false;
        }
    }

    if ((t_ret = dbcp->close(dbcp)) != 0)
    {
        db->err(ret, "DBcursor->close");
        if (ret == 0)
            ret = t_ret;
    }

    char* ptr = ((char*)ckey.data);
    Q_DELETE_ARRAY(ptr);
    ptr = ((char*)cdata.data);
    Q_DELETE_ARRAY(ptr);
    if (headerGroups.count())
    {
        qDeleteAll(headerGroups);
        headerGroups.clear();
    }

    qDebug() << "Multi = " << grouped << ", single = " << single;

    ng->setHeadersNeedGrouping(false);
    // Finally update the newsgroup
    emit saveGroup(ng);

    emit updateJob(JobList::BulkHeaderGroup, tr("Header bulk grouping for newsgroup ") + job->ng->getAlias() + ": " +
                  QString::number(count) + " out of " + QString::number(ng->getTotal()) + tr(" grouped"), job->seq);

    if (m_cancel)
    {
        emit updateJob(JobList::BulkHeaderGroup, JobList::Cancelled, job->seq);
        return false;
    }

    emit logEvent(tr("Bulk grouping of ") + ng->getTotal() + tr(" articles completed successfully."));

    emit updateJob(JobList::BulkHeaderGroup, JobList::Finished_Ok, job->seq);

    ng->setTotalGroups(numGroups);

    Q_DELETE(headerGroup);

    return true;
}