Beispiel #1
0
/*
 *  mergeFiles(temp_file_idx)
 *
 *    Merge the temporary files numbered from 0 to 'temp_file_idx'
 *    inclusive into the output file 'out_ios', maintaining sorted
 *    order.  Exits the application if an error occurs.
 */
static void
mergeFiles(
    int                 temp_file_idx)
{
    char errbuf[2 * PATH_MAX];
    skstream_t *fps[MAX_MERGE_FILES];
    uint8_t recs[MAX_MERGE_FILES][NODE_SIZE];
    uint8_t lowest_rec[NODE_SIZE];
    int j;
    uint16_t open_count;
    uint16_t i;
    uint16_t lowest;
    uint16_t *top_heap;
    int tmp_idx_a;
    int tmp_idx_b;
    skstream_t *fp_intermediate = NULL;
    int tmp_idx_intermediate;
    skheap_t *heap;
    uint32_t heap_count;
    int opened_all_temps = 0;
    ssize_t rv;

    /* the index of the first temp file to the merge */
    tmp_idx_a = 0;

    TRACEMSG(("Merging #%d through #%d to '%s'",
              tmp_idx_a, temp_file_idx, skStreamGetPathname(out_rwios)));

    heap = skHeapCreate2(compHeapNodes, MAX_MERGE_FILES, sizeof(uint16_t),
                         NULL, recs);
    if (NULL == heap) {
        skAppPrintOutOfMemory("heap");
        appExit(EXIT_FAILURE);
    }

    /* This loop repeats as long as we haven't read all of the temp
     * files generated in the qsort stage. */
    do {
        assert(SKHEAP_ERR_EMPTY==skHeapPeekTop(heap,(skheapnode_t*)&top_heap));

        /* the index of the list temp file to merge */
        tmp_idx_b = temp_file_idx;

        /* open an intermediate temp file.  The merge-sort will have
         * to write records here if there are not enough file handles
         * available to open all the existing tempoary files. */
        fp_intermediate = skTempFileCreateStream(tmpctx, &tmp_idx_intermediate);
        if (fp_intermediate == NULL) {
            skAppPrintSyserror("Error creating new temporary file");
            appExit(EXIT_FAILURE);
        }

        /* count number of files we open */
        open_count = 0;

        /* Attempt to open up to MAX_MERGE_FILES, though we an open
         * may fail due to lack of resources (EMFILE or ENOMEM) */
        for (j = tmp_idx_a; j <= tmp_idx_b; ++j) {
            fps[open_count] = skTempFileOpenStream(tmpctx, j);
            if (fps[open_count] == NULL) {
                if ((open_count > 0)
                    && ((errno == EMFILE) || (errno == ENOMEM)))
                {
                    /* We cannot open any more files.  Rewind counter
                     * by one to catch this file on the next merge */
                    tmp_idx_b = j - 1;
                    TRACEMSG((("FILE limit hit--"
                               "merging #%d through #%d into #%d: %s"),
                              tmp_idx_a, tmp_idx_b, tmp_idx_intermediate,
                              strerror(errno)));
                    break;
                } else {
                    skAppPrintSyserror(("Error opening existing"
                                        " temporary file '%s'"),
                                       skTempFileGetName(tmpctx, j));
                    appExit(EXIT_FAILURE);
                }
            }

            /* read the first record */
            rv = skStreamRead(fps[open_count], recs[open_count], NODE_SIZE);
            if (NODE_SIZE == rv) {
                /* insert the file index into the heap */
                skHeapInsert(heap, &open_count);
                ++open_count;
                if (open_count == MAX_MERGE_FILES) {
                    /* We've reached the limit for this pass.  Set
                     * tmp_idx_b to the file we just opened. */
                    tmp_idx_b = j;
                    TRACEMSG((("MAX_MERGE_FILES limit hit--"
                           "merging #%d through #%d to #%d"),
                              tmp_idx_a, tmp_idx_b, tmp_idx_intermediate));
                    break;
                }
            } else if (0 == rv) {
                TRACEMSG(("Ignoring empty temporary file '%s'",
                          skTempFileGetName(tmpctx, j)));
                skStreamDestroy(&fps[open_count]);
            } else {
                if (rv > 0) {
                    snprintf(errbuf, sizeof(errbuf),
                             "Short read %" SK_PRIdZ "/%" PRIu32 " from '%s'",
                             rv, NODE_SIZE,
                             skStreamGetPathname(fps[open_count]));
                } else {
                    skStreamLastErrMessage(
                        fps[open_count], rv, errbuf, sizeof(errbuf));
                }
                skAppPrintErr(
                    "Error reading first record from temporary file: %s",
                    errbuf);
                appExit(EXIT_FAILURE);
            }
        }

        /* Here, we check to see if we've opened all temp files.  If
         * so, set a flag so we write data to final destination and
         * break out of the loop after we're done. */
        if (tmp_idx_b == temp_file_idx) {
            opened_all_temps = 1;
            /* no longer need the intermediate temp file */
            skStreamDestroy(&fp_intermediate);
        } else {
            /* we could not open all temp files, so merge all opened
             * temp files into the intermediate file.  Add the
             * intermediate file to the list of files to merge */
            temp_file_idx = tmp_idx_intermediate;
        }

        TRACEMSG((("Merging %" PRIu16 " temporary files"), open_count));

        heap_count = skHeapGetNumberEntries(heap);
        assert(heap_count == open_count);

        /* get the index of the file with the lowest record; which is
         * at the top of the heap */
        if (skHeapPeekTop(heap, (skheapnode_t*)&top_heap) != SKHEAP_OK) {
            skAppPrintErr("Unable to open and read any temporary files.");
            appExit(EXIT_FAILURE);
        }
        lowest = *top_heap;

        /* exit this do...while() once all records for all opened
         * files have been read */
        do {
            /* lowest_rec is the record pointed to by the index at the
             * top of the heap */
            memcpy(lowest_rec, recs[lowest], NODE_SIZE);

            /* write the record */
            if (fp_intermediate) {
                /* write the record to intermediate tmp file */
                rv = skStreamWrite(fp_intermediate, lowest_rec, NODE_SIZE);
                if (NODE_SIZE != rv) {
                    skAppPrintSyserror(
                        "Error writing record to temporary file '%s'",
                        skTempFileGetName(tmpctx, tmp_idx_intermediate));
                    appExit(EXIT_FAILURE);
                }
            } else {
                /* we successfully opened all (remaining) temp files,
                 * write to record to the final destination */
                rv = skStreamWriteRecord(out_rwios, (rwRec*)lowest_rec);
                if (0 != rv) {
                    skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr);
                    if (SKSTREAM_ERROR_IS_FATAL(rv)) {
                        appExit(EXIT_FAILURE);
                    }
                }
            }

            /* replace the record we just processed and loop over all
             * files until we get a record that is not a duplicate */
            do {
                if ((rv = skStreamRead(fps[lowest], recs[lowest], NODE_SIZE))
                    != NODE_SIZE)
                {
                    /* read failed.  there is no more data for this
                     * file; remove it from the heap; if the heap is
                     * empty, exit the loop */
                    skHeapExtractTop(heap, NULL);
                    --heap_count;
#if TRACEMSG_LEVEL > 0
                    if (rv == 0) {
                        TRACEMSG(
                            ("Finished reading file #%u: EOF; %u files remain",
                             (tmp_idx_a + lowest), heap_count));
                    } else if (rv > 0) {
                        TRACEMSG(
                            ("Finished reading file #%u: Short read "
                             "%" SK_PRIdZ "/%" PRIu32 "; %u files remain",
                             tmp_idx_a + lowest, rv, NODE_SIZE, heap_count));
                    } else {
                        skStreamLastErrMessage(
                            fps[open_count], rv, errbuf, sizeof(errbuf));
                        TRACEMSG(
                            ("Finished reading file #%u: %s; %u files remain",
                             (tmp_idx_a + lowest), errbuf, heap_count));
                    }
#endif  /* TRACEMSG_LEVEL */
                    if (0 == heap_count) {
                        break;
                    }

                } else if (rwrecCompare(lowest_rec, recs[lowest])) {
                    /* read succeeded.  new record is not a
                     * duplicate and we insert it into the heap */
                    /* FIXME: This comparison reduces work when the
                     * keys are the same, but it adds another
                     * comparison when the keys are different; is this
                     * an overall win or lose? */
                    skHeapReplaceTop(heap, &lowest, NULL);

                } else {
                    /* read succeeded.  record is a duplicate; ignore
                     * the record and leave the heap unchanged */
                    continue;
                }

                /* get the record at the top of the heap and see if it
                 * is a duplicate; if it is, ignore it. */
                skHeapPeekTop(heap, (skheapnode_t*)&top_heap);
                lowest = *top_heap;
            } while (0 == rwrecCompare(lowest_rec, recs[lowest]));
        } while (heap_count > 0);

        TRACEMSG((("Finished processing #%d through #%d"),
                  tmp_idx_a, tmp_idx_b));

        /* Close all open temp files */
        for (i = 0; i < open_count; ++i) {
            skStreamDestroy(&fps[i]);
        }
        /* Delete all temp files we opened (or attempted to open) this
         * time */
        for (j = tmp_idx_a; j <= tmp_idx_b; ++j) {
            skTempFileRemove(tmpctx, j);
        }

        /* Close the intermediate temp file. */
        if (fp_intermediate) {
            rv = skStreamClose(fp_intermediate);
            if (rv) {
                skStreamLastErrMessage(
                    fp_intermediate, rv, errbuf, sizeof(errbuf));
                skAppPrintErr("Error closing temporary file: %s", errbuf);
                skStreamDestroy(&fp_intermediate);
                appExit(EXIT_FAILURE);
            }
            skStreamDestroy(&fp_intermediate);
        }

        /* Start the next merge with the next input temp file */
        tmp_idx_a = tmp_idx_b + 1;

    } while (!opened_all_temps);

    skHeapFree(heap);
}
Beispiel #2
0
int main(void)
{
#define DATA_SIZE 15
    skheap_t *heap;
    int data[2*DATA_SIZE] = {
        201, 34, 202, 56, 203,  2,
        204, 65, 205,  3, 206,  5,
        207,  8, 208, 74, 209, 32,
        210, 78, 211, 79, 212, 80,
        213,  5, 214,  5, 215,  1};
    int heaps_data[2*DATA_SIZE];
    int i;
    int j;
    int *iptr;
    uint32_t k;
    int* top;
    int top_value[2];
    int status;
    int replace_tested = 0;
    int heap_init_size = 10;
    skheapiterator_t *iter_down;
    skheapiterator_t *iter_up;


    /* first run uses a heap where caller provides the memory */

    heap = skHeapCreate(&compare, heap_init_size, 2*sizeof(int),
                        (skheapnode_t*)heaps_data);
    if (NULL == heap) {
        printf("Cannot create heap\n");
        exit(EXIT_FAILURE);
    }

    for (i = 0, iptr = data; i < DATA_SIZE; ++i, iptr += 2) {
        if (*iptr == 206) {
            continue;
        }
        printf("\n** adding %d/%d...", data[2*i], data[2*i+1]);
        status = skHeapInsert(heap, (skheapnode_t)iptr);
        if (SKHEAP_OK == status) {
            printf("OK\n");
        } else if (SKHEAP_ERR_FULL != status) {
            printf("NOPE. Got wierd error status %d\n", status);
        } else {
            printf("NOPE. Heap full.  Comparing with the top.\n");
            skHeapPeekTop(heap, (skheapnode_t*)&top);
            if (0 >= compare(top, iptr)) {
                printf("Not added to heap since <= top (%d/%d) [%d]\n",
                       *top, *(top+1), compare(top, iptr));
            } else if (!replace_tested) {
                replace_tested = 1;
                printf("Replacing top of heap (%d/%d)...", *top, *(top+1));
                if (skHeapReplaceTop(heap, (skheapnode_t)iptr, NULL)
                    == SKHEAP_OK)
                {
                    printf("OK\n");
                } else {
                    printf("Problem adding '%d/%d' to heap\n",
                           data[2*i], data[2*i+1]);
                }
            } else {
                printf("Removing top of heap (%d/%d)...", *top, *(top+1));
                skHeapExtractTop(heap, NULL);
                if (SKHEAP_OK == skHeapInsert(heap, (skheapnode_t)iptr)) {
                    printf("OK\n");
                } else {
                    printf("Problem adding '%d/%d' to heap\n",
                           data[2*i], data[2*i+1]);
                }
            }
        }
        printf("heap %d/%d\n",
               skHeapGetNumberEntries(heap),
               skHeapGetCapacity(heap));
        for (k = 0; k < skHeapGetNumberEntries(heap); ++k) {
            printf("%5d  %d/%d\n", k,
                   heaps_data[2*k], heaps_data[2*k+1]);
        }

        if (i == 0) {
            printf("\n** Sorting the heap...");
            if (SKHEAP_OK == skHeapSortEntries(heap)) {
                printf("OK\n");
            }
        }
    }

    printf("\n** Sorting the heap...");
    if (SKHEAP_OK == skHeapSortEntries(heap)) {
        printf("OK\n");
    } else {
        printf("Got error\n");
    }
    printf("heap %d/%d\n",
           skHeapGetNumberEntries(heap), skHeapGetCapacity(heap));
    for (k = 0; k < skHeapGetNumberEntries(heap); ++k) {
        printf("%5d  %d/%d\n", k,
               heaps_data[2*k], heaps_data[2*k+1]);
    }

    printf("\n** Iterating over the heap...\n");
    iter_down = skHeapIteratorCreate(heap, 1);
    iter_up = skHeapIteratorCreate(heap, -1);
    while (skHeapIteratorNext(iter_down, (skheapnode_t*)&iptr) == SKHEAP_OK) {
        printf("Down: %d/%d\t\t", *iptr, *(iptr+1));
        skHeapIteratorNext(iter_up, (skheapnode_t*)&iptr);
        printf("Up: %d/%d\n", *iptr, *(iptr+1));
    }
    skHeapIteratorFree(iter_down);
    skHeapIteratorFree(iter_up);

    printf("\n** Removing sorted data from the heap:\n");
    while (SKHEAP_OK == skHeapExtractTop(heap, (skheapnode_t)top_value)) {
        printf("%d/%d\n", top_value[0], top_value[1]);
    }

    skHeapFree(heap);


    /* second run uses a heap where heap manages its own memory */

    printf("\n** Creating growable heap with initial size %d...",
           heap_init_size);
    heap = skHeapCreate2(&compare2, heap_init_size, sizeof(int),
                         NULL, &cmpfun_data);
    if (NULL == heap) {
        printf("Cannot create heap\n");
        exit(EXIT_FAILURE);
    }
    printf("OK\n");

#define REPEATS  4

    for (j = 0; j < REPEATS; ++j) {
        printf("\n** Inserting %d entries...",
               (int)(sizeof(data)/sizeof(data[0])));
        for (i = 0, iptr = data;
             i < (int)(sizeof(data)/sizeof(data[0]));
             ++i, ++iptr)
        {
            status = skHeapInsert(heap, (skheapnode_t)iptr);
            if (SKHEAP_OK == status) {
                /* okay */
            } else if (SKHEAP_ERR_FULL != status) {
                printf("NOPE. Got wierd error status %d\n", status);
            } else {
                printf("NOPE. Heap full.  Contains %d entries\n",
                       skHeapGetCapacity(heap));
            }
        }
        printf("OK\n");
        printf("heap %d/%d\n",
               skHeapGetNumberEntries(heap),
               skHeapGetCapacity(heap));
    }

    skHeapSortEntries(heap);

    printf("\n** Removing data from the heap...");
    j = skHeapGetNumberEntries(heap);
    i = 0;
    while (SKHEAP_OK == skHeapExtractTop(heap, (skheapnode_t)top_value)) {
        /* printf("%d\n", top_value[0]); */
        ++i;
    }
    printf("got %d entries\n", i);

    if (i != j) {
        printf("error extracting from heap: expected %d; got %d\n", j, i);
    }
    i = skHeapGetNumberEntries(heap);
    if (i != 0) {
        printf("error in heap contents: expected 0; got %d\n", i);
    }

    skHeapFree(heap);

    exit(0);
}