Ejemplo n.º 1
0
/*!
 *  recogAppend()
 *
 *      Input:  recog1
 *              recog2 (gets added to recog1)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) This is used to make a training recognizer from more than
 *          one trained recognizer source.  It should only be used
 *          when the bitmaps for corresponding character classes are
 *          very similar.  That constraint does not arise when
 *          the character classes are disjoint; e.g., if recog1 is
 *          digits and recog2 is alphabetical.
 *      (2) This is done by appending recog2 to recog1.  Averages are
 *          computed for each recognizer, if necessary, before appending.
 *      (3) Non-array fields are combined using the appropriate min and max.
 */
l_int32
recogAppend(L_RECOG  *recog1,
            L_RECOG  *recog2)
{
    PROCNAME("recogAppend");

    if (!recog1)
        return ERROR_INT("recog1 not defined", procName, 1);
    if (!recog2)
        return ERROR_INT("recog2 not defined", procName, 1);

        /* Make sure both are finalized with all arrays computed */
    recogAverageSamples(recog1, 0);
    recogAverageSamples(recog2, 0);

        /* Combine non-array field values */
    recog1->minwidth_u = L_MIN(recog1->minwidth_u, recog2->minwidth_u);
    recog1->maxwidth_u = L_MAX(recog1->maxwidth_u, recog2->maxwidth_u);
    recog1->minheight_u = L_MIN(recog1->minheight_u, recog2->minheight_u);
    recog1->maxheight_u = L_MAX(recog1->maxheight_u, recog2->maxheight_u);
    recog1->minwidth = L_MIN(recog1->minwidth, recog2->minwidth);
    recog1->maxwidth = L_MAX(recog1->maxwidth, recog2->maxwidth);
    recog1->min_splitw = L_MIN(recog1->min_splitw, recog2->min_splitw);
    recog1->min_splith = L_MIN(recog1->min_splith, recog2->min_splith);
    recog1->max_splith = L_MAX(recog1->max_splith, recog2->max_splith);

        /* Combine array field values */
    recog1->setsize += recog2->setsize;
    sarrayAppendRange(recog1->sa_text, recog2->sa_text, 0, -1);
    l_dnaJoin(recog1->dna_tochar, recog2->dna_tochar, 0, -1);
    pixaaJoin(recog1->pixaa_u, recog2->pixaa_u, 0, -1);
    pixaJoin(recog1->pixa_u, recog2->pixa_u, 0, -1);
    ptaaJoin(recog1->ptaa_u, recog2->ptaa_u, 0, -1);
    ptaJoin(recog1->pta_u, recog2->pta_u, 0, -1);
    numaaJoin(recog1->naasum_u, recog2->naasum_u, 0, -1);
    numaJoin(recog1->nasum_u, recog2->nasum_u, 0, -1);
    pixaaJoin(recog1->pixaa, recog2->pixaa, 0, -1);
    pixaJoin(recog1->pixa, recog2->pixa, 0, -1);
    ptaaJoin(recog1->ptaa, recog2->ptaa, 0, -1);
    ptaJoin(recog1->pta, recog2->pta, 0, -1);
    numaaJoin(recog1->naasum, recog2->naasum, 0, -1);
    numaJoin(recog1->nasum, recog2->nasum, 0, -1);
    return 0;
}
Ejemplo n.º 2
0
/*!
 * \brief   l_dnaUnionByAset()
 *
 * \param[in]    da1, da2
 * \return  dad with the union of the set of numbers, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) See sarrayUnionByAset() for the approach.
 *      (2) Here, the key in building the sorted tree is the number itself.
 *      (3) Operations using an underlying tree are O(nlogn), which is
 *          typically less efficient than hashing, which is O(n).
 * </pre>
 */
L_DNA *
l_dnaUnionByAset(L_DNA  *da1,
                 L_DNA  *da2)
{
L_DNA  *da3, *dad;

    PROCNAME("l_dnaUnionByAset");

    if (!da1)
        return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL);
    if (!da2)
        return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL);

        /* Join */
    da3 = l_dnaCopy(da1);
    l_dnaJoin(da3, da2, 0, -1);

        /* Eliminate duplicates */
    dad = l_dnaRemoveDupsByAset(da3);
    l_dnaDestroy(&da3);
    return dad;
}
Ejemplo n.º 3
0
/*!
 * \brief   l_dnaaFlattenToDna()
 *
 * \param[in]    daa
 * \return  dad, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) This 'flattens' the dnaa to a dna, by joining successively
 *          each dna in the dnaa.
 *      (2) It leaves the input dnaa unchanged.
 * </pre>
 */
L_DNA *
l_dnaaFlattenToDna(L_DNAA  *daa)
{
l_int32  i, nalloc;
L_DNA   *da, *dad;
L_DNA  **array;

    PROCNAME("l_dnaaFlattenToDna");

    if (!daa)
        return (L_DNA *)ERROR_PTR("daa not defined", procName, NULL);

    nalloc = daa->nalloc;
    array = daa->dna;
    dad = l_dnaCreate(0);
    for (i = 0; i < nalloc; i++) {
        da = array[i];
        if (!da) continue;
        l_dnaJoin(dad, da, 0, -1);
    }

    return dad;
}
Ejemplo n.º 4
0
l_int32 main(int    argc,
             char **argv)
{
    L_ASET     *set;
    L_DNA      *da1, *da2, *da3, *da4, *da5, *da6, *da7, *da8, *dav, *dac;
    L_DNAHASH  *dahash;
    NUMA       *nav, *nac;
    PTA        *pta1, *pta2, *pta3;
    SARRAY     *sa1, *sa2, *sa3, *sa4;

    lept_mkdir("lept/hash");

#if 1
    /* Test string hashing with aset */
    fprintf(stderr, "Set results with string hashing:\n");
    sa1 = BuildShortStrings(3, 0);
    sa2 = BuildShortStrings(3, 1);
    fprintf(stderr, "  size with unique strings: %d\n", sarrayGetCount(sa1));
    fprintf(stderr, "  size with dups: %d\n", sarrayGetCount(sa2));
    startTimer();
    set = l_asetCreateFromSarray(sa2);
    fprintf(stderr, "  time to make set: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size of set without dups: %d\n", l_asetSize(set));
    l_asetDestroy(&set);
    startTimer();
    sa3 = sarrayRemoveDupsByAset(sa2);
    fprintf(stderr, "  time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", sarrayGetCount(sa3));
    startTimer();
    sa4 = sarrayIntersectionByAset(sa1, sa2);
    fprintf(stderr, "  time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", sarrayGetCount(sa4));
    sarrayDestroy(&sa3);
    sarrayDestroy(&sa4);

    /* Test sarray set operations with dna hash.
     * We use the same hash function as is used with aset. */
    fprintf(stderr, "\nDna hash results for sarray:\n");
    fprintf(stderr, "  size with unique strings: %d\n", sarrayGetCount(sa1));
    fprintf(stderr, "  size with dups: %d\n", sarrayGetCount(sa2));
    startTimer();
    dahash = l_dnaHashCreateFromSarray(sa2);
    fprintf(stderr, "  time to make hashmap: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  entries in hashmap with dups: %d\n",
            l_dnaHashGetTotalCount(dahash));
    l_dnaHashDestroy(&dahash);
    startTimer();
    sarrayRemoveDupsByHash(sa2, &sa3, NULL);
    fprintf(stderr, "  time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", sarrayGetCount(sa3));
    startTimer();
    sa4 = sarrayIntersectionByHash(sa1, sa2);
    fprintf(stderr, "  time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", sarrayGetCount(sa4));
    sarrayDestroy(&sa3);
    sarrayDestroy(&sa4);
    sarrayDestroy(&sa1);
    sarrayDestroy(&sa2);
#endif

#if 1
    /* Test point hashing with aset.
     * Enter all points within a 1500 x 1500 image in pta1, and include
     * 450,000 duplicates in pta2.  With this pt hashing function,
     * there are no hash collisions among any of the 400 million pixel
     * locations in a 20000 x 20000 image. */
    pta1 = BuildPointSet(1500, 1500, 0);
    pta2 = BuildPointSet(1500, 1500, 1);
    fprintf(stderr, "\nSet results for pta:\n");
    fprintf(stderr, "  pta1 size with unique points: %d\n", ptaGetCount(pta1));
    fprintf(stderr, "  pta2 size with dups: %d\n", ptaGetCount(pta2));
    startTimer();
    pta3 = ptaRemoveDupsByAset(pta2);
    fprintf(stderr, "  Time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta3);

    startTimer();
    pta3 = ptaIntersectionByAset(pta1, pta2);
    fprintf(stderr, "  Time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta1);
    ptaDestroy(&pta2);
    ptaDestroy(&pta3);
#endif

#if 1
    /* Test pta set operations with dna hash, using the same pt hashing
     * function.  Although there are no collisions in 20K x 20K images,
     * the dna hash implementation works properly even if there are some. */
    pta1 = BuildPointSet(1500, 1500, 0);
    pta2 = BuildPointSet(1500, 1500, 1);
    fprintf(stderr, "\nDna hash results for pta:\n");
    fprintf(stderr, "  pta1 size with unique points: %d\n", ptaGetCount(pta1));
    fprintf(stderr, "  pta2 size with dups: %d\n", ptaGetCount(pta2));
    startTimer();
    ptaRemoveDupsByHash(pta2, &pta3, NULL);
    fprintf(stderr, "  Time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta3);

    startTimer();
    pta3 = ptaIntersectionByHash(pta1, pta2);
    fprintf(stderr, "  Time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta1);
    ptaDestroy(&pta2);
    ptaDestroy(&pta3);
#endif

    /* Test dna set and histo operations using dna hash */
#if 1
    fprintf(stderr, "\nDna hash results for dna:\n");
    da1 = l_dnaMakeSequence(0.0, 0.125, 8000);
    da2 = l_dnaMakeSequence(300.0, 0.125, 8000);
    da3 = l_dnaMakeSequence(600.0, 0.125, 8000);
    da4 = l_dnaMakeSequence(900.0, 0.125, 8000);
    da5 = l_dnaMakeSequence(1200.0, 0.125, 8000);
    l_dnaJoin(da1, da2, 0, -1);
    l_dnaJoin(da1, da3, 0, -1);
    l_dnaJoin(da1, da4, 0, -1);
    l_dnaJoin(da1, da5, 0, -1);
    l_dnaRemoveDupsByHash(da1, &da6, &dahash);
    l_dnaHashDestroy(&dahash);
    fprintf(stderr, "  dna size with dups = %d\n", l_dnaGetCount(da1));
    fprintf(stderr, "  dna size of unique numbers = %d\n", l_dnaGetCount(da6));
    l_dnaMakeHistoByHash(da1, &dahash, &dav, &dac);
    nav = l_dnaConvertToNuma(dav);
    nac = l_dnaConvertToNuma(dac);
    fprintf(stderr, "  dna number of histo points = %d\n", l_dnaGetCount(dac));
    gplotSimpleXY1(nav, nac, GPLOT_IMPULSES, GPLOT_PNG,
                   "/tmp/lept/hash/histo", "Histo");
    da7 = l_dnaIntersectionByHash(da2, da3);
    fprintf(stderr, "  dna number of points: da2 = %d, da3 = %d\n",
            l_dnaGetCount(da2), l_dnaGetCount(da3));
    fprintf(stderr, "  dna number of da2/da3 intersection points = %d\n",
            l_dnaGetCount(da7));
    l_fileDisplay("/tmp/lept/hash/histo.png", 700, 100, 1.0);
    l_dnaDestroy(&da1);
    l_dnaDestroy(&da2);
    l_dnaDestroy(&da3);
    l_dnaDestroy(&da4);
    l_dnaDestroy(&da5);
    l_dnaDestroy(&da6);
    l_dnaDestroy(&da7);
    l_dnaDestroy(&dac);
    l_dnaDestroy(&dav);
    l_dnaHashDestroy(&dahash);
    numaDestroy(&nav);
    numaDestroy(&nac);
#endif

#if 1
    da1 = l_dnaMakeSequence(0, 3, 10000);
    da2 = l_dnaMakeSequence(0, 5, 10000);
    da3 = l_dnaMakeSequence(0, 7, 10000);
    l_dnaJoin(da1, da2, 0, -1);
    l_dnaJoin(da1, da3, 0, -1);

    fprintf(stderr, "\nDna results using set:\n");
    fprintf(stderr, "  da1 count: %d\n", l_dnaGetCount(da1));
    set = l_asetCreateFromDna(da1);
    fprintf(stderr, "  da1 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da4 = l_dnaUnionByAset(da2, da3);
    fprintf(stderr, "  da4 count: %d\n", l_dnaGetCount(da4));
    set = l_asetCreateFromDna(da4);
    fprintf(stderr, "  da4 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da5 = l_dnaIntersectionByAset(da1, da2);
    fprintf(stderr, "  da5 count: %d\n", l_dnaGetCount(da5));
    set = l_asetCreateFromDna(da5);
    fprintf(stderr, "  da5 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da6 = l_dnaMakeSequence(100000, 11, 5000);
    l_dnaJoin(da6, da1, 0, -1);
    fprintf(stderr, "  da6 count: %d\n", l_dnaGetCount(da6));
    set = l_asetCreateFromDna(da6);
    fprintf(stderr, "  da6 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da7 = l_dnaIntersectionByAset(da6, da3);
    fprintf(stderr, "  da7 count: %d\n", l_dnaGetCount(da7));
    set = l_asetCreateFromDna(da7);
    fprintf(stderr, "  da7 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da8 = l_dnaRemoveDupsByAset(da1);
    fprintf(stderr, "  da8 count: %d\n\n", l_dnaGetCount(da8));

    l_dnaDestroy(&da1);
    l_dnaDestroy(&da2);
    l_dnaDestroy(&da3);
    l_dnaDestroy(&da4);
    l_dnaDestroy(&da5);
    l_dnaDestroy(&da6);
    l_dnaDestroy(&da7);
    l_dnaDestroy(&da8);
#endif

    return 0;
}