Exemple #1
0
END_TEST

static void test_manifest_tell_seek(enum protocol protocol, int phase)
{
	struct slist *slist;
	struct manio *manio;
	struct sbuf *sb=NULL;
	man_off_t *offset=NULL;
	int entries=1000;
	prng_init(0);
	base64_init();
	hexmap_init();
	recursive_delete(path);

	slist=build_manifest(path, protocol, entries, phase);
	fail_unless(slist!=NULL);

	sb=slist->head;
	fail_unless((manio=do_manio_open(path, "rb", protocol, phase))!=NULL);
	read_manifest(&sb, manio, 0, entries/2, protocol, phase);
	fail_unless((offset=manio_tell(manio))!=NULL);
	fail_unless(sb!=NULL);
	fail_unless(!manio_close(&manio));

	fail_unless((manio=do_manio_open(path, "rb", protocol, phase))!=NULL);
	fail_unless(!manio_seek(manio, offset));
	read_manifest(&sb, manio, entries/2, entries, protocol, phase);
	fail_unless(sb==NULL);
	fail_unless(!manio_close(&manio));
	fail_unless(!manio);

	slist_free(&slist);
	man_off_t_free(&offset);
	tear_down();
}
Exemple #2
0
/*
 * Try to get the object hash from a manifest file. Caller frees. Returns NULL
 * on failure.
 */
struct file_hash *
manifest_get(struct conf *conf, const char *manifest_path)
{
	int fd;
	gzFile f = NULL;
	struct manifest *mf = NULL;
	struct hashtable *hashed_files = NULL; /* path --> struct file_hash */
	struct hashtable *stated_files = NULL; /* path --> struct file_stats */
	uint32_t i;
	struct file_hash *fh = NULL;

	fd = open(manifest_path, O_RDONLY | O_BINARY);
	if (fd == -1) {
		/* Cache miss. */
		cc_log("No such manifest file");
		goto out;
	}
	f = gzdopen(fd, "rb");
	if (!f) {
		close(fd);
		cc_log("Failed to gzdopen manifest file");
		goto out;
	}
	mf = read_manifest(f);
	if (!mf) {
		cc_log("Error reading manifest file");
		goto out;
	}

	hashed_files = create_hashtable(1000, hash_from_string, strings_equal);
	stated_files = create_hashtable(1000, hash_from_string, strings_equal);

	/* Check newest object first since it's a bit more likely to match. */
	for (i = mf->n_objects; i > 0; i--) {
		if (verify_object(conf, mf, &mf->objects[i - 1],
		                  stated_files, hashed_files)) {
			fh = x_malloc(sizeof(*fh));
			*fh = mf->objects[i - 1].hash;
			goto out;
		}
	}

out:
	if (hashed_files) {
		hashtable_destroy(hashed_files, 1);
	}
	if (stated_files) {
		hashtable_destroy(stated_files, 1);
	}
	if (f) {
		gzclose(f);
	}
	if (mf) {
		free_manifest(mf);
	}
	return fh;
}
Exemple #3
0
bool
manifest_dump(const char *manifest_path, FILE *stream)
{
	struct manifest *mf = NULL;
	gzFile f = NULL;
	bool ret = false;

	int fd = open(manifest_path, O_RDONLY | O_BINARY);
	if (fd == -1) {
		fprintf(stderr, "No such manifest file: %s\n", manifest_path);
		goto out;
	}
	f = gzdopen(fd, "rb");
	if (!f) {
		fprintf(stderr, "Failed to dzopen manifest file\n");
		close(fd);
		goto out;
	}
	mf = read_manifest(f);
	if (!mf) {
		fprintf(stderr, "Error reading manifest file\n");
		goto out;
	}

	fprintf(stream, "Magic: %c%c%c%c\n",
	        (MAGIC >> 24) & 0xFF,
	        (MAGIC >> 16) & 0xFF,
	        (MAGIC >> 8) & 0xFF,
	        MAGIC & 0xFF);
	fprintf(stream, "Version: %u\n", mf->version);
	fprintf(stream, "Hash size: %u\n", (unsigned)mf->hash_size);
	fprintf(stream, "Reserved field: %u\n", (unsigned)mf->reserved);
	fprintf(stream, "File paths (%u):\n", (unsigned)mf->n_files);
	for (unsigned i = 0; i < mf->n_files; ++i) {
		fprintf(stream, "  %u: %s\n", i, mf->files[i]);
	}
	fprintf(stream, "File infos (%u):\n", (unsigned)mf->n_file_infos);
	for (unsigned i = 0; i < mf->n_file_infos; ++i) {
		char *hash;
		fprintf(stream, "  %u:\n", i);
		fprintf(stream, "    Path index: %u\n", mf->file_infos[i].index);
		hash = format_hash_as_string(mf->file_infos[i].hash, -1);
		fprintf(stream, "    Hash: %s\n", hash);
		free(hash);
		fprintf(stream, "    Size: %u\n", mf->file_infos[i].size);
		fprintf(stream, "    Mtime: %lld\n", (long long)mf->file_infos[i].mtime);
		fprintf(stream, "    Ctime: %lld\n", (long long)mf->file_infos[i].ctime);
	}
	fprintf(stream, "Results (%u):\n", (unsigned)mf->n_objects);
	for (unsigned i = 0; i < mf->n_objects; ++i) {
		char *hash;
		fprintf(stream, "  %u:\n", i);
		fprintf(stream, "    File info indexes:");
		for (unsigned j = 0; j < mf->objects[i].n_file_info_indexes; ++j) {
			fprintf(stream, " %u", mf->objects[i].file_info_indexes[j]);
		}
		fprintf(stream, "\n");
		hash = format_hash_as_string(mf->objects[i].hash.hash, -1);
		fprintf(stream, "    Hash: %s\n", hash);
		free(hash);
		fprintf(stream, "    Size: %u\n", (unsigned)mf->objects[i].hash.size);
	}

	ret = true;

out:
	if (mf) {
		free_manifest(mf);
	}
	if (f) {
		gzclose(f);
	}
	return ret;
}
Exemple #4
0
// Put the object name into a manifest file given a set of included files.
// Returns true on success, otherwise false.
bool
manifest_put(const char *manifest_path, struct file_hash *object_hash,
             struct hashtable *included_files)
{
	int ret = 0;
	gzFile f2 = NULL;
	struct manifest *mf = NULL;
	char *tmp_file = NULL;

	// We don't bother to acquire a lock when writing the manifest to disk. A
	// race between two processes will only result in one lost entry, which is
	// not a big deal, and it's also very unlikely.

	int fd1 = open(manifest_path, O_RDONLY | O_BINARY);
	if (fd1 == -1) {
		// New file.
		mf = create_empty_manifest();
	} else {
		gzFile f1 = gzdopen(fd1, "rb");
		if (!f1) {
			cc_log("Failed to gzdopen manifest file");
			close(fd1);
			goto out;
		}
		mf = read_manifest(f1);
		gzclose(f1);
		if (!mf) {
			cc_log("Failed to read manifest file; deleting it");
			x_unlink(manifest_path);
			mf = create_empty_manifest();
		}
	}

	if (mf->n_objects > MAX_MANIFEST_ENTRIES) {
		// Normally, there shouldn't be many object entries in the manifest since
		// new entries are added only if an include file has changed but not the
		// source file, and you typically change source files more often than
		// header files. However, it's certainly possible to imagine cases where
		// the manifest will grow large (for instance, a generated header file that
		// changes for every build), and this must be taken care of since
		// processing an ever growing manifest eventually will take too much time.
		// A good way of solving this would be to maintain the object entries in
		// LRU order and discarding the old ones. An easy way is to throw away all
		// entries when there are too many. Let's do that for now.
		cc_log("More than %u entries in manifest file; discarding",
		       MAX_MANIFEST_ENTRIES);
		free_manifest(mf);
		mf = create_empty_manifest();
	} else if (mf->n_file_infos > MAX_MANIFEST_FILE_INFO_ENTRIES) {
		// Rarely, file_info entries can grow large in pathological cases where
		// many included files change, but the main file does not. This also puts
		// an upper bound on the number of file_info entries.
		cc_log("More than %u file_info entries in manifest file; discarding",
		       MAX_MANIFEST_FILE_INFO_ENTRIES);
		free_manifest(mf);
		mf = create_empty_manifest();
	}

	tmp_file = format("%s.tmp", manifest_path);
	int fd2 = create_tmp_fd(&tmp_file);
	f2 = gzdopen(fd2, "wb");
	if (!f2) {
		cc_log("Failed to gzdopen %s", tmp_file);
		goto out;
	}

	add_object_entry(mf, object_hash, included_files);
	if (write_manifest(f2, mf)) {
		gzclose(f2);
		f2 = NULL;
		if (x_rename(tmp_file, manifest_path) == 0) {
			ret = 1;
		} else {
			cc_log("Failed to rename %s to %s", tmp_file, manifest_path);
			goto out;
		}
	} else {
		cc_log("Failed to write manifest file");
		goto out;
	}

out:
	if (mf) {
		free_manifest(mf);
	}
	if (tmp_file) {
		free(tmp_file);
	}
	if (f2) {
		gzclose(f2);
	}
	return ret;
}
Exemple #5
0
/* F U N C T I O N S *********************************************************/
void process_sff_to_fastq(char *sff_file, int trim_flag) {
    sff_read_header rh;
    sff_read_data rd;
    FILE *sff_fp, *fastq_fp;

    if ( (sff_fp = fopen(sff_file, "r")) == NULL ) {
        fprintf(stderr,
                "[err] Could not open file '%s' for reading.\n", sff_file);
        exit(1);
    }
    
    get_sff_file_size(sff_fp);
    
    read_sff_common_header(sff_fp, &h);
    verify_sff_common_header(&h);


    if ( keep_fastq_orig == true ) {
        vector<string> tmp_rep;
        split_str(string(sff_file), tmp_rep, "//");
    
        if ( ( fastq_fp = fopen( (tmp_rep[tmp_rep.size() - 1].substr(0,tmp_rep[tmp_rep.size() - 1].length()-4) + ".fastq").c_str(), "w") ) == NULL ) {
            fprintf(stderr,
                    "[err] Could not open file '%s' for writing.\n",
                    "");
            exit(1);
        }
    }

    int left_clip = 0, right_clip = 0, nbases = 0;
    char *name;
    char *bases;
    uint8_t *quality;
    //register int i;
    
    unsigned int numreads = h.nreads;
    
    for (int i = 0; i < numreads; i++) { //cout << i << " " << numreads << endl;
        read_sff_read_header(sff_fp, &rh);
        read_sff_read_data(sff_fp, &rd, h.flow_len, rh.nbases);
        
        //rheaders.push_back(rh);
        // get clipping points 
        get_clip_values(rh, trim_flag, &left_clip, &right_clip);
        nbases = right_clip - left_clip;

        // create bases string 
        bases = get_read_bases(rd, left_clip, right_clip);

        // create quality array 
        quality = get_read_quality_values(rd, left_clip, right_clip);

        //Create new read
        Read *read = new Read();
        
        read->initial_length = nbases;
        read->read = string(bases);
        uint8_t quality_char;
        read->quality = (uint8_t*)malloc(sizeof(uint8_t)*nbases);
        for (int j = 0; j < nbases; j++) 
        {
           quality_char = (quality[j] <= 93 ? quality[j] : 93) + 33;
           read->quality[j] = quality_char;
        }
       
        //read->rd = rd;
        read->flowgram = new uint16_t[h.flow_len];
        for(int j=0; j<h.flow_len; j++) {
                read->flowgram[j] = rd.flowgram[j];
                //cout << rd.flowgram[j] << " " << endl;
                
        }
        
        read->flow_index = (uint8_t*)malloc(sizeof(uint8_t)*nbases);
        for(int j=0; j<nbases; j++) {
                read->flow_index[j] = rd.flow_index[j];
                
        }
        
        read->roche_left_clip = (int) max(1, max(rh.clip_qual_left, rh.clip_adapter_left)) - 1;
        read->roche_right_clip = (int) min( (rh.clip_qual_right    == 0 ? rh.nbases : rh.clip_qual_right   ), (rh.clip_adapter_right == 0 ? rh.nbases : rh.clip_adapter_right) );
        
        reads.push_back(read);
        
        
        string tstr = string(rh.name) + " " + string(itoa(rh.clip_adapter_left,new char[5],10)) +  " " + string(itoa(rh.clip_adapter_right,new char[5],10))+  " " + string(itoa(rh.clip_qual_left,new char[5],10))  +   " " + string(itoa(rh.clip_qual_right,new char[5],10)) + " " + string(itoa(rh.clip_qual_right,new char[5],10)); 
        int t_len = tstr.length();
        
        
        // create read name string 
        int name_length = (int) t_len + 1; // account for NULL termination
        name = (char *) malloc( name_length * sizeof(char) );
        if (!name) {
            fprintf(stderr, "Out of memory! For read name string!\n");
            exit(1);
        }
        memset(name, '\0', (size_t) name_length);
        
        read->readID = (char *) malloc( rh.name_len * sizeof(char) );
        //read->readID = rh.name;
        memcpy( read->readID, rh.name, (size_t) rh.name_len );
        
        //strncpy(name, rh.name, (size_t) rh.name_len);
        strncpy(name, tstr.c_str(), (size_t)t_len);
        
        if ( keep_fastq_orig == true )
            construct_fastq_entry(fastq_fp, name, bases, quality, nbases);
        //printf("%d\n",rh.name_len);
        free(name);
        free(bases);
        free(quality);
        free_sff_read_header(&rh);
        free_sff_read_data(&rd);
        
        
    }
    
    read_manifest(sff_fp);

    //free_sff_common_header(&h);
    if ( keep_fastq_orig == true )
        fclose(fastq_fp);
    
    fclose(sff_fp);
}
Exemple #6
0
/*
 * Put the object name into a manifest file given a set of included files.
 * Returns true on success, otherwise false.
 */
bool
manifest_put(const char *manifest_path, struct file_hash *object_hash,
             struct hashtable *included_files)
{
	int ret = 0;
	int fd1;
	int fd2;
	gzFile f2 = NULL;
	struct manifest *mf = NULL;
	char *tmp_file = NULL;

	/*
	 * We don't bother to acquire a lock when writing the manifest to disk. A
	 * race between two processes will only result in one lost entry, which is
	 * not a big deal, and it's also very unlikely.
	 */

	fd1 = open(manifest_path, O_RDONLY | O_BINARY);
	if (fd1 == -1) {
		/* New file. */
		mf = create_empty_manifest();
	} else {
		gzFile f1 = gzdopen(fd1, "rb");
		if (!f1) {
			cc_log("Failed to gzdopen manifest file");
			close(fd1);
			goto out;
		}
		mf = read_manifest(f1);
		gzclose(f1);
		if (!mf) {
			cc_log("Failed to read manifest file; deleting it");
			x_unlink(manifest_path);
			mf = create_empty_manifest();
		}
	}

	if (mf->n_objects > MAX_MANIFEST_ENTRIES) {
		/*
		 * Normally, there shouldn't be many object entries in the manifest since
		 * new entries are added only if an include file has changed but not the
		 * source file, and you typically change source files more often than
		 * header files. However, it's certainly possible to imagine cases where
		 * the manifest will grow large (for instance, a generated header file that
		 * changes for every build), and this must be taken care of since
		 * processing an ever growing manifest eventually will take too much time.
		 * A good way of solving this would be to maintain the object entries in
		 * LRU order and discarding the old ones. An easy way is to throw away all
		 * entries when there are too many. Let's do that for now.
		 */
		cc_log("More than %u entries in manifest file; discarding",
		       MAX_MANIFEST_ENTRIES);
		free_manifest(mf);
		mf = create_empty_manifest();
	}

	tmp_file = format("%s.tmp.%s", manifest_path, tmp_string());
	fd2 = safe_create_wronly(tmp_file);
	if (fd2 == -1) {
		cc_log("Failed to open %s", tmp_file);
		goto out;
	}
	f2 = gzdopen(fd2, "wb");
	if (!f2) {
		cc_log("Failed to gzdopen %s", tmp_file);
		goto out;
	}

	add_object_entry(mf, object_hash, included_files);
	if (write_manifest(f2, mf)) {
		gzclose(f2);
		f2 = NULL;
		if (x_rename(tmp_file, manifest_path) == 0) {
			ret = 1;
		} else {
			cc_log("Failed to rename %s to %s", tmp_file, manifest_path);
			goto out;
		}
	} else {
		cc_log("Failed to write manifest file");
		goto out;
	}

out:
	if (mf) {
		free_manifest(mf);
	}
	if (tmp_file) {
		free(tmp_file);
	}
	if (f2) {
		gzclose(f2);
	}
	return ret;
}