END_TEST static void test_manifest_tell_seek(enum protocol protocol, int phase) { struct slist *slist; struct manio *manio; struct sbuf *sb=NULL; man_off_t *offset=NULL; int entries=1000; prng_init(0); base64_init(); hexmap_init(); recursive_delete(path); slist=build_manifest(path, protocol, entries, phase); fail_unless(slist!=NULL); sb=slist->head; fail_unless((manio=do_manio_open(path, "rb", protocol, phase))!=NULL); read_manifest(&sb, manio, 0, entries/2, protocol, phase); fail_unless((offset=manio_tell(manio))!=NULL); fail_unless(sb!=NULL); fail_unless(!manio_close(&manio)); fail_unless((manio=do_manio_open(path, "rb", protocol, phase))!=NULL); fail_unless(!manio_seek(manio, offset)); read_manifest(&sb, manio, entries/2, entries, protocol, phase); fail_unless(sb==NULL); fail_unless(!manio_close(&manio)); fail_unless(!manio); slist_free(&slist); man_off_t_free(&offset); tear_down(); }
/* * Try to get the object hash from a manifest file. Caller frees. Returns NULL * on failure. */ struct file_hash * manifest_get(struct conf *conf, const char *manifest_path) { int fd; gzFile f = NULL; struct manifest *mf = NULL; struct hashtable *hashed_files = NULL; /* path --> struct file_hash */ struct hashtable *stated_files = NULL; /* path --> struct file_stats */ uint32_t i; struct file_hash *fh = NULL; fd = open(manifest_path, O_RDONLY | O_BINARY); if (fd == -1) { /* Cache miss. */ cc_log("No such manifest file"); goto out; } f = gzdopen(fd, "rb"); if (!f) { close(fd); cc_log("Failed to gzdopen manifest file"); goto out; } mf = read_manifest(f); if (!mf) { cc_log("Error reading manifest file"); goto out; } hashed_files = create_hashtable(1000, hash_from_string, strings_equal); stated_files = create_hashtable(1000, hash_from_string, strings_equal); /* Check newest object first since it's a bit more likely to match. */ for (i = mf->n_objects; i > 0; i--) { if (verify_object(conf, mf, &mf->objects[i - 1], stated_files, hashed_files)) { fh = x_malloc(sizeof(*fh)); *fh = mf->objects[i - 1].hash; goto out; } } out: if (hashed_files) { hashtable_destroy(hashed_files, 1); } if (stated_files) { hashtable_destroy(stated_files, 1); } if (f) { gzclose(f); } if (mf) { free_manifest(mf); } return fh; }
bool manifest_dump(const char *manifest_path, FILE *stream) { struct manifest *mf = NULL; gzFile f = NULL; bool ret = false; int fd = open(manifest_path, O_RDONLY | O_BINARY); if (fd == -1) { fprintf(stderr, "No such manifest file: %s\n", manifest_path); goto out; } f = gzdopen(fd, "rb"); if (!f) { fprintf(stderr, "Failed to dzopen manifest file\n"); close(fd); goto out; } mf = read_manifest(f); if (!mf) { fprintf(stderr, "Error reading manifest file\n"); goto out; } fprintf(stream, "Magic: %c%c%c%c\n", (MAGIC >> 24) & 0xFF, (MAGIC >> 16) & 0xFF, (MAGIC >> 8) & 0xFF, MAGIC & 0xFF); fprintf(stream, "Version: %u\n", mf->version); fprintf(stream, "Hash size: %u\n", (unsigned)mf->hash_size); fprintf(stream, "Reserved field: %u\n", (unsigned)mf->reserved); fprintf(stream, "File paths (%u):\n", (unsigned)mf->n_files); for (unsigned i = 0; i < mf->n_files; ++i) { fprintf(stream, " %u: %s\n", i, mf->files[i]); } fprintf(stream, "File infos (%u):\n", (unsigned)mf->n_file_infos); for (unsigned i = 0; i < mf->n_file_infos; ++i) { char *hash; fprintf(stream, " %u:\n", i); fprintf(stream, " Path index: %u\n", mf->file_infos[i].index); hash = format_hash_as_string(mf->file_infos[i].hash, -1); fprintf(stream, " Hash: %s\n", hash); free(hash); fprintf(stream, " Size: %u\n", mf->file_infos[i].size); fprintf(stream, " Mtime: %lld\n", (long long)mf->file_infos[i].mtime); fprintf(stream, " Ctime: %lld\n", (long long)mf->file_infos[i].ctime); } fprintf(stream, "Results (%u):\n", (unsigned)mf->n_objects); for (unsigned i = 0; i < mf->n_objects; ++i) { char *hash; fprintf(stream, " %u:\n", i); fprintf(stream, " File info indexes:"); for (unsigned j = 0; j < mf->objects[i].n_file_info_indexes; ++j) { fprintf(stream, " %u", mf->objects[i].file_info_indexes[j]); } fprintf(stream, "\n"); hash = format_hash_as_string(mf->objects[i].hash.hash, -1); fprintf(stream, " Hash: %s\n", hash); free(hash); fprintf(stream, " Size: %u\n", (unsigned)mf->objects[i].hash.size); } ret = true; out: if (mf) { free_manifest(mf); } if (f) { gzclose(f); } return ret; }
// Put the object name into a manifest file given a set of included files. // Returns true on success, otherwise false. bool manifest_put(const char *manifest_path, struct file_hash *object_hash, struct hashtable *included_files) { int ret = 0; gzFile f2 = NULL; struct manifest *mf = NULL; char *tmp_file = NULL; // We don't bother to acquire a lock when writing the manifest to disk. A // race between two processes will only result in one lost entry, which is // not a big deal, and it's also very unlikely. int fd1 = open(manifest_path, O_RDONLY | O_BINARY); if (fd1 == -1) { // New file. mf = create_empty_manifest(); } else { gzFile f1 = gzdopen(fd1, "rb"); if (!f1) { cc_log("Failed to gzdopen manifest file"); close(fd1); goto out; } mf = read_manifest(f1); gzclose(f1); if (!mf) { cc_log("Failed to read manifest file; deleting it"); x_unlink(manifest_path); mf = create_empty_manifest(); } } if (mf->n_objects > MAX_MANIFEST_ENTRIES) { // Normally, there shouldn't be many object entries in the manifest since // new entries are added only if an include file has changed but not the // source file, and you typically change source files more often than // header files. However, it's certainly possible to imagine cases where // the manifest will grow large (for instance, a generated header file that // changes for every build), and this must be taken care of since // processing an ever growing manifest eventually will take too much time. // A good way of solving this would be to maintain the object entries in // LRU order and discarding the old ones. An easy way is to throw away all // entries when there are too many. Let's do that for now. cc_log("More than %u entries in manifest file; discarding", MAX_MANIFEST_ENTRIES); free_manifest(mf); mf = create_empty_manifest(); } else if (mf->n_file_infos > MAX_MANIFEST_FILE_INFO_ENTRIES) { // Rarely, file_info entries can grow large in pathological cases where // many included files change, but the main file does not. This also puts // an upper bound on the number of file_info entries. cc_log("More than %u file_info entries in manifest file; discarding", MAX_MANIFEST_FILE_INFO_ENTRIES); free_manifest(mf); mf = create_empty_manifest(); } tmp_file = format("%s.tmp", manifest_path); int fd2 = create_tmp_fd(&tmp_file); f2 = gzdopen(fd2, "wb"); if (!f2) { cc_log("Failed to gzdopen %s", tmp_file); goto out; } add_object_entry(mf, object_hash, included_files); if (write_manifest(f2, mf)) { gzclose(f2); f2 = NULL; if (x_rename(tmp_file, manifest_path) == 0) { ret = 1; } else { cc_log("Failed to rename %s to %s", tmp_file, manifest_path); goto out; } } else { cc_log("Failed to write manifest file"); goto out; } out: if (mf) { free_manifest(mf); } if (tmp_file) { free(tmp_file); } if (f2) { gzclose(f2); } return ret; }
/* F U N C T I O N S *********************************************************/ void process_sff_to_fastq(char *sff_file, int trim_flag) { sff_read_header rh; sff_read_data rd; FILE *sff_fp, *fastq_fp; if ( (sff_fp = fopen(sff_file, "r")) == NULL ) { fprintf(stderr, "[err] Could not open file '%s' for reading.\n", sff_file); exit(1); } get_sff_file_size(sff_fp); read_sff_common_header(sff_fp, &h); verify_sff_common_header(&h); if ( keep_fastq_orig == true ) { vector<string> tmp_rep; split_str(string(sff_file), tmp_rep, "//"); if ( ( fastq_fp = fopen( (tmp_rep[tmp_rep.size() - 1].substr(0,tmp_rep[tmp_rep.size() - 1].length()-4) + ".fastq").c_str(), "w") ) == NULL ) { fprintf(stderr, "[err] Could not open file '%s' for writing.\n", ""); exit(1); } } int left_clip = 0, right_clip = 0, nbases = 0; char *name; char *bases; uint8_t *quality; //register int i; unsigned int numreads = h.nreads; for (int i = 0; i < numreads; i++) { //cout << i << " " << numreads << endl; read_sff_read_header(sff_fp, &rh); read_sff_read_data(sff_fp, &rd, h.flow_len, rh.nbases); //rheaders.push_back(rh); // get clipping points get_clip_values(rh, trim_flag, &left_clip, &right_clip); nbases = right_clip - left_clip; // create bases string bases = get_read_bases(rd, left_clip, right_clip); // create quality array quality = get_read_quality_values(rd, left_clip, right_clip); //Create new read Read *read = new Read(); read->initial_length = nbases; read->read = string(bases); uint8_t quality_char; read->quality = (uint8_t*)malloc(sizeof(uint8_t)*nbases); for (int j = 0; j < nbases; j++) { quality_char = (quality[j] <= 93 ? quality[j] : 93) + 33; read->quality[j] = quality_char; } //read->rd = rd; read->flowgram = new uint16_t[h.flow_len]; for(int j=0; j<h.flow_len; j++) { read->flowgram[j] = rd.flowgram[j]; //cout << rd.flowgram[j] << " " << endl; } read->flow_index = (uint8_t*)malloc(sizeof(uint8_t)*nbases); for(int j=0; j<nbases; j++) { read->flow_index[j] = rd.flow_index[j]; } read->roche_left_clip = (int) max(1, max(rh.clip_qual_left, rh.clip_adapter_left)) - 1; read->roche_right_clip = (int) min( (rh.clip_qual_right == 0 ? rh.nbases : rh.clip_qual_right ), (rh.clip_adapter_right == 0 ? rh.nbases : rh.clip_adapter_right) ); reads.push_back(read); string tstr = string(rh.name) + " " + string(itoa(rh.clip_adapter_left,new char[5],10)) + " " + string(itoa(rh.clip_adapter_right,new char[5],10))+ " " + string(itoa(rh.clip_qual_left,new char[5],10)) + " " + string(itoa(rh.clip_qual_right,new char[5],10)) + " " + string(itoa(rh.clip_qual_right,new char[5],10)); int t_len = tstr.length(); // create read name string int name_length = (int) t_len + 1; // account for NULL termination name = (char *) malloc( name_length * sizeof(char) ); if (!name) { fprintf(stderr, "Out of memory! For read name string!\n"); exit(1); } memset(name, '\0', (size_t) name_length); read->readID = (char *) malloc( rh.name_len * sizeof(char) ); //read->readID = rh.name; memcpy( read->readID, rh.name, (size_t) rh.name_len ); //strncpy(name, rh.name, (size_t) rh.name_len); strncpy(name, tstr.c_str(), (size_t)t_len); if ( keep_fastq_orig == true ) construct_fastq_entry(fastq_fp, name, bases, quality, nbases); //printf("%d\n",rh.name_len); free(name); free(bases); free(quality); free_sff_read_header(&rh); free_sff_read_data(&rd); } read_manifest(sff_fp); //free_sff_common_header(&h); if ( keep_fastq_orig == true ) fclose(fastq_fp); fclose(sff_fp); }
/* * Put the object name into a manifest file given a set of included files. * Returns true on success, otherwise false. */ bool manifest_put(const char *manifest_path, struct file_hash *object_hash, struct hashtable *included_files) { int ret = 0; int fd1; int fd2; gzFile f2 = NULL; struct manifest *mf = NULL; char *tmp_file = NULL; /* * We don't bother to acquire a lock when writing the manifest to disk. A * race between two processes will only result in one lost entry, which is * not a big deal, and it's also very unlikely. */ fd1 = open(manifest_path, O_RDONLY | O_BINARY); if (fd1 == -1) { /* New file. */ mf = create_empty_manifest(); } else { gzFile f1 = gzdopen(fd1, "rb"); if (!f1) { cc_log("Failed to gzdopen manifest file"); close(fd1); goto out; } mf = read_manifest(f1); gzclose(f1); if (!mf) { cc_log("Failed to read manifest file; deleting it"); x_unlink(manifest_path); mf = create_empty_manifest(); } } if (mf->n_objects > MAX_MANIFEST_ENTRIES) { /* * Normally, there shouldn't be many object entries in the manifest since * new entries are added only if an include file has changed but not the * source file, and you typically change source files more often than * header files. However, it's certainly possible to imagine cases where * the manifest will grow large (for instance, a generated header file that * changes for every build), and this must be taken care of since * processing an ever growing manifest eventually will take too much time. * A good way of solving this would be to maintain the object entries in * LRU order and discarding the old ones. An easy way is to throw away all * entries when there are too many. Let's do that for now. */ cc_log("More than %u entries in manifest file; discarding", MAX_MANIFEST_ENTRIES); free_manifest(mf); mf = create_empty_manifest(); } tmp_file = format("%s.tmp.%s", manifest_path, tmp_string()); fd2 = safe_create_wronly(tmp_file); if (fd2 == -1) { cc_log("Failed to open %s", tmp_file); goto out; } f2 = gzdopen(fd2, "wb"); if (!f2) { cc_log("Failed to gzdopen %s", tmp_file); goto out; } add_object_entry(mf, object_hash, included_files); if (write_manifest(f2, mf)) { gzclose(f2); f2 = NULL; if (x_rename(tmp_file, manifest_path) == 0) { ret = 1; } else { cc_log("Failed to rename %s to %s", tmp_file, manifest_path); goto out; } } else { cc_log("Failed to write manifest file"); goto out; } out: if (mf) { free_manifest(mf); } if (tmp_file) { free(tmp_file); } if (f2) { gzclose(f2); } return ret; }