/* * status = appOptionsHandler(cData, opt_index, opt_arg); * * This function is passed to skOptionsRegister(); it will be called * by skOptionsParse() for each user-specified switch that the * application has registered; it should handle the switch as * required---typically by setting global variables---and return 1 * if the switch processing failed or 0 if it succeeded. Returning * a non-zero from from the handler causes skOptionsParse() to return * a negative value. * * The clientData in 'cData' is typically ignored; 'opt_index' is * the index number that was specified as the last value for each * struct option in appOptions[]; 'opt_arg' is the user's argument * to the switch for options that have a REQUIRED_ARG or an * OPTIONAL_ARG. */ static int appOptionsHandler( clientData UNUSED(cData), int opt_index, char *opt_arg) { size_t sz; int rv; switch ((appOptionsEnum)opt_index) { case OPT_SILK_OUTPUT: if (silk_output) { skAppPrintErr("Invalid %s: Switch used multiple times", appOptions[opt_index].name); return 1; } if ((rv =skStreamCreate(&silk_output,SK_IO_WRITE,SK_CONTENT_SILK_FLOW)) || (rv = skStreamBind(silk_output, opt_arg))) { skStreamPrintLastErr(silk_output, rv, &skAppPrintErr); exit(EXIT_FAILURE); } break; case OPT_PRINT_STATISTICS: print_statistics = 1; break; case OPT_LOG_DESTINATION: if ('\0' != log_destination[0]) { skAppPrintErr("Invalid %s: Switch used multiple times", appOptions[opt_index].name); } if ('\0' == opt_arg[0]) { skAppPrintErr("Invalid %s: Path name is required", appOptions[opt_index].name); return 1; } if (0 == strcmp("stdout", opt_arg) || 0 == strcmp("stderr", opt_arg) || 0 == strcmp("none", opt_arg)) { strncpy(log_destination, opt_arg, sizeof(log_destination)); break; } if ('/' == opt_arg[0]) { if (strlen(opt_arg) >= sizeof(log_destination)) { skAppPrintErr("Invalid %s: Name is too long", appOptions[opt_index].name); return 1; } strncpy(log_destination, opt_arg, sizeof(log_destination)); break; } if (NULL == getcwd(log_destination, sizeof(log_destination))) { skAppPrintSyserror("Unable to get current directory"); return 1; } sz = strlen(log_destination); if (sz + strlen(opt_arg) + 1 >= sizeof(log_destination)) { skAppPrintErr("Invalid %s: Name is too long", appOptions[opt_index].name); return 1; } snprintf(log_destination + sz, sizeof(log_destination) - sz, "/%s", opt_arg); break; case OPT_LOG_FLAGS: if (log_flags) { skAppPrintErr("Invaild %s: Switch used multiple times", appOptions[opt_index].name); return 1; } log_flags = opt_arg; break; } return 0; /* OK */ }
/* * mergeFiles(temp_file_idx) * * Merge the temporary files numbered from 0 to 'temp_file_idx' * inclusive into the output file 'out_ios', maintaining sorted * order. Exits the application if an error occurs. */ static void mergeFiles( int temp_file_idx) { char errbuf[2 * PATH_MAX]; skstream_t *fps[MAX_MERGE_FILES]; uint8_t recs[MAX_MERGE_FILES][NODE_SIZE]; uint8_t lowest_rec[NODE_SIZE]; int j; uint16_t open_count; uint16_t i; uint16_t lowest; uint16_t *top_heap; int tmp_idx_a; int tmp_idx_b; skstream_t *fp_intermediate = NULL; int tmp_idx_intermediate; skheap_t *heap; uint32_t heap_count; int opened_all_temps = 0; ssize_t rv; /* the index of the first temp file to the merge */ tmp_idx_a = 0; TRACEMSG(("Merging #%d through #%d to '%s'", tmp_idx_a, temp_file_idx, skStreamGetPathname(out_rwios))); heap = skHeapCreate2(compHeapNodes, MAX_MERGE_FILES, sizeof(uint16_t), NULL, recs); if (NULL == heap) { skAppPrintOutOfMemory("heap"); appExit(EXIT_FAILURE); } /* This loop repeats as long as we haven't read all of the temp * files generated in the qsort stage. */ do { assert(SKHEAP_ERR_EMPTY==skHeapPeekTop(heap,(skheapnode_t*)&top_heap)); /* the index of the list temp file to merge */ tmp_idx_b = temp_file_idx; /* open an intermediate temp file. The merge-sort will have * to write records here if there are not enough file handles * available to open all the existing tempoary files. */ fp_intermediate = skTempFileCreateStream(tmpctx, &tmp_idx_intermediate); if (fp_intermediate == NULL) { skAppPrintSyserror("Error creating new temporary file"); appExit(EXIT_FAILURE); } /* count number of files we open */ open_count = 0; /* Attempt to open up to MAX_MERGE_FILES, though we an open * may fail due to lack of resources (EMFILE or ENOMEM) */ for (j = tmp_idx_a; j <= tmp_idx_b; ++j) { fps[open_count] = skTempFileOpenStream(tmpctx, j); if (fps[open_count] == NULL) { if ((open_count > 0) && ((errno == EMFILE) || (errno == ENOMEM))) { /* We cannot open any more files. Rewind counter * by one to catch this file on the next merge */ tmp_idx_b = j - 1; TRACEMSG((("FILE limit hit--" "merging #%d through #%d into #%d: %s"), tmp_idx_a, tmp_idx_b, tmp_idx_intermediate, strerror(errno))); break; } else { skAppPrintSyserror(("Error opening existing" " temporary file '%s'"), skTempFileGetName(tmpctx, j)); appExit(EXIT_FAILURE); } } /* read the first record */ rv = skStreamRead(fps[open_count], recs[open_count], NODE_SIZE); if (NODE_SIZE == rv) { /* insert the file index into the heap */ skHeapInsert(heap, &open_count); ++open_count; if (open_count == MAX_MERGE_FILES) { /* We've reached the limit for this pass. Set * tmp_idx_b to the file we just opened. */ tmp_idx_b = j; TRACEMSG((("MAX_MERGE_FILES limit hit--" "merging #%d through #%d to #%d"), tmp_idx_a, tmp_idx_b, tmp_idx_intermediate)); break; } } else if (0 == rv) { TRACEMSG(("Ignoring empty temporary file '%s'", skTempFileGetName(tmpctx, j))); skStreamDestroy(&fps[open_count]); } else { if (rv > 0) { snprintf(errbuf, sizeof(errbuf), "Short read %" SK_PRIdZ "/%" PRIu32 " from '%s'", rv, NODE_SIZE, skStreamGetPathname(fps[open_count])); } else { skStreamLastErrMessage( fps[open_count], rv, errbuf, sizeof(errbuf)); } skAppPrintErr( "Error reading first record from temporary file: %s", errbuf); appExit(EXIT_FAILURE); } } /* Here, we check to see if we've opened all temp files. If * so, set a flag so we write data to final destination and * break out of the loop after we're done. */ if (tmp_idx_b == temp_file_idx) { opened_all_temps = 1; /* no longer need the intermediate temp file */ skStreamDestroy(&fp_intermediate); } else { /* we could not open all temp files, so merge all opened * temp files into the intermediate file. Add the * intermediate file to the list of files to merge */ temp_file_idx = tmp_idx_intermediate; } TRACEMSG((("Merging %" PRIu16 " temporary files"), open_count)); heap_count = skHeapGetNumberEntries(heap); assert(heap_count == open_count); /* get the index of the file with the lowest record; which is * at the top of the heap */ if (skHeapPeekTop(heap, (skheapnode_t*)&top_heap) != SKHEAP_OK) { skAppPrintErr("Unable to open and read any temporary files."); appExit(EXIT_FAILURE); } lowest = *top_heap; /* exit this do...while() once all records for all opened * files have been read */ do { /* lowest_rec is the record pointed to by the index at the * top of the heap */ memcpy(lowest_rec, recs[lowest], NODE_SIZE); /* write the record */ if (fp_intermediate) { /* write the record to intermediate tmp file */ rv = skStreamWrite(fp_intermediate, lowest_rec, NODE_SIZE); if (NODE_SIZE != rv) { skAppPrintSyserror( "Error writing record to temporary file '%s'", skTempFileGetName(tmpctx, tmp_idx_intermediate)); appExit(EXIT_FAILURE); } } else { /* we successfully opened all (remaining) temp files, * write to record to the final destination */ rv = skStreamWriteRecord(out_rwios, (rwRec*)lowest_rec); if (0 != rv) { skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr); if (SKSTREAM_ERROR_IS_FATAL(rv)) { appExit(EXIT_FAILURE); } } } /* replace the record we just processed and loop over all * files until we get a record that is not a duplicate */ do { if ((rv = skStreamRead(fps[lowest], recs[lowest], NODE_SIZE)) != NODE_SIZE) { /* read failed. there is no more data for this * file; remove it from the heap; if the heap is * empty, exit the loop */ skHeapExtractTop(heap, NULL); --heap_count; #if TRACEMSG_LEVEL > 0 if (rv == 0) { TRACEMSG( ("Finished reading file #%u: EOF; %u files remain", (tmp_idx_a + lowest), heap_count)); } else if (rv > 0) { TRACEMSG( ("Finished reading file #%u: Short read " "%" SK_PRIdZ "/%" PRIu32 "; %u files remain", tmp_idx_a + lowest, rv, NODE_SIZE, heap_count)); } else { skStreamLastErrMessage( fps[open_count], rv, errbuf, sizeof(errbuf)); TRACEMSG( ("Finished reading file #%u: %s; %u files remain", (tmp_idx_a + lowest), errbuf, heap_count)); } #endif /* TRACEMSG_LEVEL */ if (0 == heap_count) { break; } } else if (rwrecCompare(lowest_rec, recs[lowest])) { /* read succeeded. new record is not a * duplicate and we insert it into the heap */ /* FIXME: This comparison reduces work when the * keys are the same, but it adds another * comparison when the keys are different; is this * an overall win or lose? */ skHeapReplaceTop(heap, &lowest, NULL); } else { /* read succeeded. record is a duplicate; ignore * the record and leave the heap unchanged */ continue; } /* get the record at the top of the heap and see if it * is a duplicate; if it is, ignore it. */ skHeapPeekTop(heap, (skheapnode_t*)&top_heap); lowest = *top_heap; } while (0 == rwrecCompare(lowest_rec, recs[lowest])); } while (heap_count > 0); TRACEMSG((("Finished processing #%d through #%d"), tmp_idx_a, tmp_idx_b)); /* Close all open temp files */ for (i = 0; i < open_count; ++i) { skStreamDestroy(&fps[i]); } /* Delete all temp files we opened (or attempted to open) this * time */ for (j = tmp_idx_a; j <= tmp_idx_b; ++j) { skTempFileRemove(tmpctx, j); } /* Close the intermediate temp file. */ if (fp_intermediate) { rv = skStreamClose(fp_intermediate); if (rv) { skStreamLastErrMessage( fp_intermediate, rv, errbuf, sizeof(errbuf)); skAppPrintErr("Error closing temporary file: %s", errbuf); skStreamDestroy(&fp_intermediate); appExit(EXIT_FAILURE); } skStreamDestroy(&fp_intermediate); } /* Start the next merge with the next input temp file */ tmp_idx_a = tmp_idx_b + 1; } while (!opened_all_temps); skHeapFree(heap); }
/* * sortRandom(); * * Don't make any assumptions about the input. Store the input * records in a large buffer, and sort those in-core records once * all records are processed or the buffer is full. If the buffer * fills up, store the sorted records into temporary files. Once * all records are read, use mergeFiles() above to merge-sort the * temporary files. * * Exits the application if an error occurs. */ static void sortRandom( void) { int temp_file_idx = -1; skstream_t *input_rwios = NULL; /* input stream */ uint8_t *record_buffer = NULL; /* Region of memory for records */ uint8_t *cur_node = NULL; /* Ptr into record_buffer */ uint8_t *next_node = NULL; /* Ptr into record_buffer */ uint32_t buffer_max_recs; /* max buffer size (in number of recs) */ uint32_t buffer_recs; /* current buffer size (# records) */ uint32_t buffer_chunk_recs; /* how to grow from current to max buf */ uint32_t num_chunks; /* how quickly to grow buffer */ uint32_t record_count = 0; /* Number of records read */ int rv; /* Determine the maximum number of records that will fit into the * buffer if it grows the maximum size */ buffer_max_recs = buffer_size / NODE_SIZE; TRACEMSG((("buffer_size = %" PRIu64 "\nnode_size = %" PRIu32 "\nbuffer_max_recs = %" PRIu32), buffer_size, NODE_SIZE, buffer_max_recs)); /* We will grow to the maximum size in chunks */ num_chunks = NUM_CHUNKS; if (num_chunks <= 0) { num_chunks = 1; } /* Attempt to allocate the initial chunk. If we fail, increment * the number of chunks---which will decrease the amount we * attempt to allocate at once---and try again. */ for (;;) { buffer_chunk_recs = buffer_max_recs / num_chunks; TRACEMSG((("num_chunks = %" PRIu32 "\nbuffer_chunk_recs = %" PRIu32), num_chunks, buffer_chunk_recs)); record_buffer = (uint8_t*)malloc(NODE_SIZE * buffer_chunk_recs); if (record_buffer) { /* malloc was successful */ break; } else if (buffer_chunk_recs < MIN_IN_CORE_RECORDS) { /* give up at this point */ skAppPrintErr("Error allocating space for %d records", MIN_IN_CORE_RECORDS); appExit(EXIT_FAILURE); } else { /* reduce the amount we allocate at once by increasing the * number of chunks and try again */ TRACEMSG(("malloc() failed")); ++num_chunks; } } buffer_recs = buffer_chunk_recs; TRACEMSG((("buffer_recs = %" PRIu32), buffer_recs)); /* open first file */ rv = appNextInput(&input_rwios); if (rv < 0) { free(record_buffer); appExit(EXIT_FAILURE); } record_count = 0; cur_node = record_buffer; while (input_rwios != NULL) { /* read record */ if ((rv = skStreamReadRecord(input_rwios, (rwRec*)cur_node)) != SKSTREAM_OK) { if (rv != SKSTREAM_ERR_EOF) { skStreamPrintLastErr(input_rwios, rv, &skAppPrintErr); } /* end of file: close current and open next */ skStreamDestroy(&input_rwios); rv = appNextInput(&input_rwios); if (rv < 0) { free(record_buffer); appExit(EXIT_FAILURE); } continue; } ++record_count; cur_node += NODE_SIZE; if (record_count == buffer_recs) { /* Filled the current buffer */ /* If buffer not at max size, see if we can grow it */ if (buffer_recs < buffer_max_recs) { uint8_t *old_buf = record_buffer; /* add a chunk of records. if we are near the max, * set the size to the max */ buffer_recs += buffer_chunk_recs; if (buffer_recs + buffer_chunk_recs > buffer_max_recs) { buffer_recs = buffer_max_recs; } TRACEMSG((("Buffer full---attempt to grow to %" PRIu32 " records, %" PRIu32 " bytes"), buffer_recs, NODE_SIZE * buffer_recs)); /* attempt to grow */ record_buffer = (uint8_t*)realloc(record_buffer, NODE_SIZE * buffer_recs); if (record_buffer) { /* Success, make certain cur_node points into the * new buffer */ cur_node = (record_buffer + (record_count * NODE_SIZE)); } else { /* Unable to grow it */ TRACEMSG(("realloc() failed")); record_buffer = old_buf; buffer_max_recs = buffer_recs = record_count; } } /* Either buffer at maximum size or attempt to grow it * failed. */ if (record_count == buffer_max_recs) { /* Sort */ skQSort(record_buffer, record_count, NODE_SIZE, &rwrecCompare); /* Write to temp file */ if (skTempFileWriteBufferStream( tmpctx, &temp_file_idx, record_buffer, NODE_SIZE, record_count)) { skAppPrintSyserror( "Error writing sorted buffer to temporary file"); free(record_buffer); appExit(EXIT_FAILURE); } /* Reset record buffer to 'empty' */ record_count = 0; cur_node = record_buffer; } } } /* Sort (and maybe store) last batch of records */ if (record_count > 0) { skQSort(record_buffer, record_count, NODE_SIZE, &rwrecCompare); if (temp_file_idx >= 0) { /* Write last batch to temp file */ if (skTempFileWriteBufferStream( tmpctx, &temp_file_idx, record_buffer, NODE_SIZE, record_count)) { skAppPrintSyserror( "Error writing sorted buffer to temporary file"); free(record_buffer); appExit(EXIT_FAILURE); } } } /* Generate the output */ if (record_count == 0 && temp_file_idx == -1) { /* No records were read at all; write the header to the output * file */ rv = skStreamWriteSilkHeader(out_rwios); if (0 != rv) { skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr); } } else if (temp_file_idx == -1) { /* No temp files written, just output batch of records */ uint32_t c; TRACEMSG((("Writing %" PRIu32 " records to '%s'"), record_count, skStreamGetPathname(out_rwios))); /* get first two records from the sorted buffer */ cur_node = record_buffer; next_node = record_buffer + NODE_SIZE; for (c = 1; c < record_count; ++c, next_node += NODE_SIZE) { if (0 != rwrecCompare(cur_node, next_node)) { /* records differ. print earlier record */ rv = skStreamWriteRecord(out_rwios, (rwRec*)cur_node); if (0 != rv) { skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr); if (SKSTREAM_ERROR_IS_FATAL(rv)) { free(record_buffer); appExit(EXIT_FAILURE); } } cur_node = next_node; } /* else records are duplicates: ignore latter record */ } /* print remaining record */ rv = skStreamWriteRecord(out_rwios, (rwRec*)cur_node); if (0 != rv) { skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr); if (SKSTREAM_ERROR_IS_FATAL(rv)) { free(record_buffer); appExit(EXIT_FAILURE); } } } else { /* no longer have a need for the record buffer */ free(record_buffer); record_buffer = NULL; /* now merge all the temp files */ mergeFiles(temp_file_idx); } if (record_buffer) { free(record_buffer); } }
int main(int argc, char **argv) { char pathname[PATH_MAX]; struct stat statbuf; unsigned int numFiles = 0; unsigned int numOnTape = 0; appSetup(argc, argv); switch ((no_block_check << 1) | (no_file_names)) { case 3: /* do not stat() the files; do not print file names */ while (fglobNext(pathname, sizeof(pathname)) != NULL) { ++numFiles; } break; case 2: /* do not stat() the files; print file names */ while (fglobNext(pathname, sizeof(pathname)) != NULL) { fprintf(OUTPUT_FH, "%s\n", pathname); ++numFiles; } break; case 1: /* stat the files; do not print the file names */ while (fglobNext(pathname, sizeof(pathname)) != NULL) { if (-1 == stat(pathname, &statbuf)) { /* should never happen; fglob wouldn't have returned it */ skAppPrintSyserror("Cannot stat '%s'", pathname); exit(EXIT_FAILURE); } if (0 == statbuf.st_blocks && statbuf.st_size > 0) { ++numOnTape; } ++numFiles; } break; case 0: /* stat the files; print the file names */ while (fglobNext(pathname, sizeof(pathname)) != NULL) { if (-1 == stat(pathname, &statbuf)) { /* should never happen; fglob wouldn't have returned it */ skAppPrintSyserror("Cannot stat '%s'", pathname); exit(EXIT_FAILURE); } if (0 == statbuf.st_blocks && statbuf.st_size > 0) { fprintf(OUTPUT_FH, "%s%s\n", BLOCK_CHECK_ZERO_MSG, pathname); ++numOnTape; } else { fprintf(OUTPUT_FH, "%s\n", pathname); } ++numFiles; } break; default: skAbortBadCase((no_block_check << 1) | (no_file_names)); } if (!no_summary) { if (no_block_check) { fprintf(OUTPUT_FH, "globbed %u files\n", numFiles); } else { fprintf(OUTPUT_FH, "globbed %u files; %u on tape\n", numFiles, numOnTape); } } return 0; }