/* * count = pdu2silk(filename); * * Read PDUs from 'filename' and write records to the global * 'silk_output' file. Return number of records processed, or -1 * on error. */ static int64_t pdu2silk( const char *filename) { static unsigned int file_count = 0; char probe_name[128]; skPDUSource_t *pdu_src; skFlowSourceParams_t params; int64_t count; rwRec rwrec; int rv; ++file_count; snprintf(probe_name, sizeof(probe_name), "input%04u", file_count); params.path_name = filename; skpcProbeSetName(probe, probe_name); pdu_src = skPDUSourceCreate(probe, ¶ms); if (pdu_src == NULL) { return -1; } count = 0; while (-1 != skPDUSourceGetGeneric(pdu_src, &rwrec)) { ++count; rv = skStreamWriteRecord(silk_output, &rwrec); if (rv) { skStreamPrintLastErr(silk_output, rv, &skAppPrintErr); if (SKSTREAM_ERROR_IS_FATAL(rv)) { exit(EXIT_FAILURE); } } } skPDUSourceLogStatsAndClear(pdu_src); skPDUSourceDestroy(pdu_src); return count; }
/* * sortRandom(); * * Don't make any assumptions about the input. Store the input * records in a large buffer, and sort those in-core records once * all records are processed or the buffer is full. If the buffer * fills up, store the sorted records into temporary files. Once * all records are read, use mergeFiles() above to merge-sort the * temporary files. * * Exits the application if an error occurs. */ static void sortRandom( void) { int temp_file_idx = -1; skstream_t *input_rwios = NULL; /* input stream */ uint8_t *record_buffer = NULL; /* Region of memory for records */ uint8_t *cur_node = NULL; /* Ptr into record_buffer */ uint8_t *next_node = NULL; /* Ptr into record_buffer */ uint32_t buffer_max_recs; /* max buffer size (in number of recs) */ uint32_t buffer_recs; /* current buffer size (# records) */ uint32_t buffer_chunk_recs; /* how to grow from current to max buf */ uint32_t num_chunks; /* how quickly to grow buffer */ uint32_t record_count = 0; /* Number of records read */ int rv; /* Determine the maximum number of records that will fit into the * buffer if it grows the maximum size */ buffer_max_recs = buffer_size / NODE_SIZE; TRACEMSG((("buffer_size = %" PRIu64 "\nnode_size = %" PRIu32 "\nbuffer_max_recs = %" PRIu32), buffer_size, NODE_SIZE, buffer_max_recs)); /* We will grow to the maximum size in chunks */ num_chunks = NUM_CHUNKS; if (num_chunks <= 0) { num_chunks = 1; } /* Attempt to allocate the initial chunk. If we fail, increment * the number of chunks---which will decrease the amount we * attempt to allocate at once---and try again. */ for (;;) { buffer_chunk_recs = buffer_max_recs / num_chunks; TRACEMSG((("num_chunks = %" PRIu32 "\nbuffer_chunk_recs = %" PRIu32), num_chunks, buffer_chunk_recs)); record_buffer = (uint8_t*)malloc(NODE_SIZE * buffer_chunk_recs); if (record_buffer) { /* malloc was successful */ break; } else if (buffer_chunk_recs < MIN_IN_CORE_RECORDS) { /* give up at this point */ skAppPrintErr("Error allocating space for %d records", MIN_IN_CORE_RECORDS); appExit(EXIT_FAILURE); } else { /* reduce the amount we allocate at once by increasing the * number of chunks and try again */ TRACEMSG(("malloc() failed")); ++num_chunks; } } buffer_recs = buffer_chunk_recs; TRACEMSG((("buffer_recs = %" PRIu32), buffer_recs)); /* open first file */ rv = appNextInput(&input_rwios); if (rv < 0) { free(record_buffer); appExit(EXIT_FAILURE); } record_count = 0; cur_node = record_buffer; while (input_rwios != NULL) { /* read record */ if ((rv = skStreamReadRecord(input_rwios, (rwRec*)cur_node)) != SKSTREAM_OK) { if (rv != SKSTREAM_ERR_EOF) { skStreamPrintLastErr(input_rwios, rv, &skAppPrintErr); } /* end of file: close current and open next */ skStreamDestroy(&input_rwios); rv = appNextInput(&input_rwios); if (rv < 0) { free(record_buffer); appExit(EXIT_FAILURE); } continue; } ++record_count; cur_node += NODE_SIZE; if (record_count == buffer_recs) { /* Filled the current buffer */ /* If buffer not at max size, see if we can grow it */ if (buffer_recs < buffer_max_recs) { uint8_t *old_buf = record_buffer; /* add a chunk of records. if we are near the max, * set the size to the max */ buffer_recs += buffer_chunk_recs; if (buffer_recs + buffer_chunk_recs > buffer_max_recs) { buffer_recs = buffer_max_recs; } TRACEMSG((("Buffer full---attempt to grow to %" PRIu32 " records, %" PRIu32 " bytes"), buffer_recs, NODE_SIZE * buffer_recs)); /* attempt to grow */ record_buffer = (uint8_t*)realloc(record_buffer, NODE_SIZE * buffer_recs); if (record_buffer) { /* Success, make certain cur_node points into the * new buffer */ cur_node = (record_buffer + (record_count * NODE_SIZE)); } else { /* Unable to grow it */ TRACEMSG(("realloc() failed")); record_buffer = old_buf; buffer_max_recs = buffer_recs = record_count; } } /* Either buffer at maximum size or attempt to grow it * failed. */ if (record_count == buffer_max_recs) { /* Sort */ skQSort(record_buffer, record_count, NODE_SIZE, &rwrecCompare); /* Write to temp file */ if (skTempFileWriteBufferStream( tmpctx, &temp_file_idx, record_buffer, NODE_SIZE, record_count)) { skAppPrintSyserror( "Error writing sorted buffer to temporary file"); free(record_buffer); appExit(EXIT_FAILURE); } /* Reset record buffer to 'empty' */ record_count = 0; cur_node = record_buffer; } } } /* Sort (and maybe store) last batch of records */ if (record_count > 0) { skQSort(record_buffer, record_count, NODE_SIZE, &rwrecCompare); if (temp_file_idx >= 0) { /* Write last batch to temp file */ if (skTempFileWriteBufferStream( tmpctx, &temp_file_idx, record_buffer, NODE_SIZE, record_count)) { skAppPrintSyserror( "Error writing sorted buffer to temporary file"); free(record_buffer); appExit(EXIT_FAILURE); } } } /* Generate the output */ if (record_count == 0 && temp_file_idx == -1) { /* No records were read at all; write the header to the output * file */ rv = skStreamWriteSilkHeader(out_rwios); if (0 != rv) { skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr); } } else if (temp_file_idx == -1) { /* No temp files written, just output batch of records */ uint32_t c; TRACEMSG((("Writing %" PRIu32 " records to '%s'"), record_count, skStreamGetPathname(out_rwios))); /* get first two records from the sorted buffer */ cur_node = record_buffer; next_node = record_buffer + NODE_SIZE; for (c = 1; c < record_count; ++c, next_node += NODE_SIZE) { if (0 != rwrecCompare(cur_node, next_node)) { /* records differ. print earlier record */ rv = skStreamWriteRecord(out_rwios, (rwRec*)cur_node); if (0 != rv) { skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr); if (SKSTREAM_ERROR_IS_FATAL(rv)) { free(record_buffer); appExit(EXIT_FAILURE); } } cur_node = next_node; } /* else records are duplicates: ignore latter record */ } /* print remaining record */ rv = skStreamWriteRecord(out_rwios, (rwRec*)cur_node); if (0 != rv) { skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr); if (SKSTREAM_ERROR_IS_FATAL(rv)) { free(record_buffer); appExit(EXIT_FAILURE); } } } else { /* no longer have a need for the record buffer */ free(record_buffer); record_buffer = NULL; /* now merge all the temp files */ mergeFiles(temp_file_idx); } if (record_buffer) { free(record_buffer); } }
/* * mergeFiles(temp_file_idx) * * Merge the temporary files numbered from 0 to 'temp_file_idx' * inclusive into the output file 'out_ios', maintaining sorted * order. Exits the application if an error occurs. */ static void mergeFiles( int temp_file_idx) { char errbuf[2 * PATH_MAX]; skstream_t *fps[MAX_MERGE_FILES]; uint8_t recs[MAX_MERGE_FILES][NODE_SIZE]; uint8_t lowest_rec[NODE_SIZE]; int j; uint16_t open_count; uint16_t i; uint16_t lowest; uint16_t *top_heap; int tmp_idx_a; int tmp_idx_b; skstream_t *fp_intermediate = NULL; int tmp_idx_intermediate; skheap_t *heap; uint32_t heap_count; int opened_all_temps = 0; ssize_t rv; /* the index of the first temp file to the merge */ tmp_idx_a = 0; TRACEMSG(("Merging #%d through #%d to '%s'", tmp_idx_a, temp_file_idx, skStreamGetPathname(out_rwios))); heap = skHeapCreate2(compHeapNodes, MAX_MERGE_FILES, sizeof(uint16_t), NULL, recs); if (NULL == heap) { skAppPrintOutOfMemory("heap"); appExit(EXIT_FAILURE); } /* This loop repeats as long as we haven't read all of the temp * files generated in the qsort stage. */ do { assert(SKHEAP_ERR_EMPTY==skHeapPeekTop(heap,(skheapnode_t*)&top_heap)); /* the index of the list temp file to merge */ tmp_idx_b = temp_file_idx; /* open an intermediate temp file. The merge-sort will have * to write records here if there are not enough file handles * available to open all the existing tempoary files. */ fp_intermediate = skTempFileCreateStream(tmpctx, &tmp_idx_intermediate); if (fp_intermediate == NULL) { skAppPrintSyserror("Error creating new temporary file"); appExit(EXIT_FAILURE); } /* count number of files we open */ open_count = 0; /* Attempt to open up to MAX_MERGE_FILES, though we an open * may fail due to lack of resources (EMFILE or ENOMEM) */ for (j = tmp_idx_a; j <= tmp_idx_b; ++j) { fps[open_count] = skTempFileOpenStream(tmpctx, j); if (fps[open_count] == NULL) { if ((open_count > 0) && ((errno == EMFILE) || (errno == ENOMEM))) { /* We cannot open any more files. Rewind counter * by one to catch this file on the next merge */ tmp_idx_b = j - 1; TRACEMSG((("FILE limit hit--" "merging #%d through #%d into #%d: %s"), tmp_idx_a, tmp_idx_b, tmp_idx_intermediate, strerror(errno))); break; } else { skAppPrintSyserror(("Error opening existing" " temporary file '%s'"), skTempFileGetName(tmpctx, j)); appExit(EXIT_FAILURE); } } /* read the first record */ rv = skStreamRead(fps[open_count], recs[open_count], NODE_SIZE); if (NODE_SIZE == rv) { /* insert the file index into the heap */ skHeapInsert(heap, &open_count); ++open_count; if (open_count == MAX_MERGE_FILES) { /* We've reached the limit for this pass. Set * tmp_idx_b to the file we just opened. */ tmp_idx_b = j; TRACEMSG((("MAX_MERGE_FILES limit hit--" "merging #%d through #%d to #%d"), tmp_idx_a, tmp_idx_b, tmp_idx_intermediate)); break; } } else if (0 == rv) { TRACEMSG(("Ignoring empty temporary file '%s'", skTempFileGetName(tmpctx, j))); skStreamDestroy(&fps[open_count]); } else { if (rv > 0) { snprintf(errbuf, sizeof(errbuf), "Short read %" SK_PRIdZ "/%" PRIu32 " from '%s'", rv, NODE_SIZE, skStreamGetPathname(fps[open_count])); } else { skStreamLastErrMessage( fps[open_count], rv, errbuf, sizeof(errbuf)); } skAppPrintErr( "Error reading first record from temporary file: %s", errbuf); appExit(EXIT_FAILURE); } } /* Here, we check to see if we've opened all temp files. If * so, set a flag so we write data to final destination and * break out of the loop after we're done. */ if (tmp_idx_b == temp_file_idx) { opened_all_temps = 1; /* no longer need the intermediate temp file */ skStreamDestroy(&fp_intermediate); } else { /* we could not open all temp files, so merge all opened * temp files into the intermediate file. Add the * intermediate file to the list of files to merge */ temp_file_idx = tmp_idx_intermediate; } TRACEMSG((("Merging %" PRIu16 " temporary files"), open_count)); heap_count = skHeapGetNumberEntries(heap); assert(heap_count == open_count); /* get the index of the file with the lowest record; which is * at the top of the heap */ if (skHeapPeekTop(heap, (skheapnode_t*)&top_heap) != SKHEAP_OK) { skAppPrintErr("Unable to open and read any temporary files."); appExit(EXIT_FAILURE); } lowest = *top_heap; /* exit this do...while() once all records for all opened * files have been read */ do { /* lowest_rec is the record pointed to by the index at the * top of the heap */ memcpy(lowest_rec, recs[lowest], NODE_SIZE); /* write the record */ if (fp_intermediate) { /* write the record to intermediate tmp file */ rv = skStreamWrite(fp_intermediate, lowest_rec, NODE_SIZE); if (NODE_SIZE != rv) { skAppPrintSyserror( "Error writing record to temporary file '%s'", skTempFileGetName(tmpctx, tmp_idx_intermediate)); appExit(EXIT_FAILURE); } } else { /* we successfully opened all (remaining) temp files, * write to record to the final destination */ rv = skStreamWriteRecord(out_rwios, (rwRec*)lowest_rec); if (0 != rv) { skStreamPrintLastErr(out_rwios, rv, &skAppPrintErr); if (SKSTREAM_ERROR_IS_FATAL(rv)) { appExit(EXIT_FAILURE); } } } /* replace the record we just processed and loop over all * files until we get a record that is not a duplicate */ do { if ((rv = skStreamRead(fps[lowest], recs[lowest], NODE_SIZE)) != NODE_SIZE) { /* read failed. there is no more data for this * file; remove it from the heap; if the heap is * empty, exit the loop */ skHeapExtractTop(heap, NULL); --heap_count; #if TRACEMSG_LEVEL > 0 if (rv == 0) { TRACEMSG( ("Finished reading file #%u: EOF; %u files remain", (tmp_idx_a + lowest), heap_count)); } else if (rv > 0) { TRACEMSG( ("Finished reading file #%u: Short read " "%" SK_PRIdZ "/%" PRIu32 "; %u files remain", tmp_idx_a + lowest, rv, NODE_SIZE, heap_count)); } else { skStreamLastErrMessage( fps[open_count], rv, errbuf, sizeof(errbuf)); TRACEMSG( ("Finished reading file #%u: %s; %u files remain", (tmp_idx_a + lowest), errbuf, heap_count)); } #endif /* TRACEMSG_LEVEL */ if (0 == heap_count) { break; } } else if (rwrecCompare(lowest_rec, recs[lowest])) { /* read succeeded. new record is not a * duplicate and we insert it into the heap */ /* FIXME: This comparison reduces work when the * keys are the same, but it adds another * comparison when the keys are different; is this * an overall win or lose? */ skHeapReplaceTop(heap, &lowest, NULL); } else { /* read succeeded. record is a duplicate; ignore * the record and leave the heap unchanged */ continue; } /* get the record at the top of the heap and see if it * is a duplicate; if it is, ignore it. */ skHeapPeekTop(heap, (skheapnode_t*)&top_heap); lowest = *top_heap; } while (0 == rwrecCompare(lowest_rec, recs[lowest])); } while (heap_count > 0); TRACEMSG((("Finished processing #%d through #%d"), tmp_idx_a, tmp_idx_b)); /* Close all open temp files */ for (i = 0; i < open_count; ++i) { skStreamDestroy(&fps[i]); } /* Delete all temp files we opened (or attempted to open) this * time */ for (j = tmp_idx_a; j <= tmp_idx_b; ++j) { skTempFileRemove(tmpctx, j); } /* Close the intermediate temp file. */ if (fp_intermediate) { rv = skStreamClose(fp_intermediate); if (rv) { skStreamLastErrMessage( fp_intermediate, rv, errbuf, sizeof(errbuf)); skAppPrintErr("Error closing temporary file: %s", errbuf); skStreamDestroy(&fp_intermediate); appExit(EXIT_FAILURE); } skStreamDestroy(&fp_intermediate); } /* Start the next merge with the next input temp file */ tmp_idx_a = tmp_idx_b + 1; } while (!opened_all_temps); skHeapFree(heap); }