Ejemplo n.º 1
0
static void print_restart(const struct md *md)
{
	msg("    RESTART DATA\n\n");

	for (size_t i = 0; i < md->n_bodies; i++) {
		struct body *body = md->bodies + i;

		char name[64];
		check_fail(efp_get_frag_name(md->state->efp, i, sizeof(name), name));

		double xyzabc[6] = { body->pos.x * BOHR_RADIUS,
				     body->pos.y * BOHR_RADIUS,
				     body->pos.z * BOHR_RADIUS };

		matrix_to_euler(&body->rotmat, xyzabc + 3, xyzabc + 4, xyzabc + 5);

		double vel[6] = { body->vel.x,
				  body->vel.y,
				  body->vel.z,
				  body->angmom.x * body->inertia_inv.x,
				  body->angmom.y * body->inertia_inv.y,
				  body->angmom.z * body->inertia_inv.z };

		print_fragment(name, xyzabc, vel);
	}

	msg("\n");
}
Ejemplo n.º 2
0
// sort the fragment list by 'mate-position or position of 2nd read' so that reads that are from the same DNA fragment are together
// also takes care of overlapping paired-end reads to avoid duplicates in fragments
void clean_fragmentlist(FRAGMENT* flist,int* fragments,VARIANT* varlist,int currchrom,int currpos,int prevchrom)
{			
	int i=0,j=0,k=0,first=0,sl=0;
	FRAGMENT fragment; fragment.variants =0; fragment.alist = (allele*)malloc(sizeof(allele)*16184);
	if (*fragments > 1) qsort(flist,*fragments,sizeof(FRAGMENT),compare_fragments);
	// sort such that mate pairs are together and reverse sorted by starting position of second read in a mate-piar
	//for (i=0;i<*fragments;i++) fprintf(stdout,"frag %s %d vars %d \n",flist[i].id,flist[i].alist[0].varid,flist[i].variants);
	if (currchrom== prevchrom) // need to ignore the top of the fragment list 
	{
		first=0; while (flist[first].matepos >= currpos && first < *fragments) first++; 
	}
	//	fprintf(stdout,"cleaning the fragment list: current chrom %d %d first %d fragments %d\n",currchrom,currpos,first,*fragments);

	if (*fragments > 1) // bug fixed jan 13 2012, when there is only one fragment, we don't need to check if it is part of mate-pair
	{
		// serious bug fixed here: mate-pairs being examined twice April 5 2012 
		// check this code for corrrectness: mate-pairs will be adjacent to each other.
		i =first; 
		while (i< (*fragments)-1)
		{
			if (strcmp(flist[i].id,flist[i+1].id) == 0)  // mate pair with both ends having at least one variant
			{
				//fprintf(stdout,"mate-pair %s %s %s\n",flist[i].id);
				if (flist[i].alist[flist[i].variants-1].varid < flist[i+1].alist[0].varid) print_matepair(&flist[i],&flist[i+1],varlist,fragment_file);
				else if (flist[i+1].alist[flist[i+1].variants-1].varid < flist[i].alist[0].varid) print_matepair(&flist[i+1],&flist[i],varlist,fragment_file);
				else if (flist[i].variants+flist[i+1].variants > 2)
				{
					j=0;k=0; fragment.variants =0;
					while (j < flist[i].variants || k < flist[i+1].variants)
					{
						if (j >= flist[i].variants)
						{
							fragment.alist[fragment.variants].varid = flist[i+1].alist[k].varid;
							fragment.alist[fragment.variants].allele = flist[i+1].alist[k].allele;
							fragment.alist[fragment.variants].qv = flist[i+1].alist[k].qv;
							fragment.variants++;
							k++;
							continue;
						}
						if (k >= flist[i+1].variants)
						{
							fragment.alist[fragment.variants].varid = flist[i].alist[j].varid;
							fragment.alist[fragment.variants].allele = flist[i].alist[j].allele;
							fragment.alist[fragment.variants].qv = flist[i].alist[j].qv;
							fragment.variants++;
							j++;
							continue;
						}

						if (flist[i].alist[j].varid < flist[i+1].alist[k].varid)
						{
							fragment.alist[fragment.variants].varid = flist[i].alist[j].varid;
							fragment.alist[fragment.variants].allele = flist[i].alist[j].allele;
							fragment.alist[fragment.variants].qv = flist[i].alist[j].qv;
							fragment.variants++;
							j++;
						}
						else if (flist[i].alist[j].varid > flist[i+1].alist[k].varid)
						{
							fragment.alist[fragment.variants].varid = flist[i+1].alist[k].varid;
							fragment.alist[fragment.variants].allele = flist[i+1].alist[k].allele;
							fragment.alist[fragment.variants].qv = flist[i+1].alist[k].qv;
							fragment.variants++;
							k++;
						}
						else if (flist[i].alist[j].allele==flist[i+1].alist[k].allele) // consistent
						{
							fragment.alist[fragment.variants].varid = flist[i].alist[j].varid;
							fragment.alist[fragment.variants].allele = flist[i].alist[j].allele;
							fragment.alist[fragment.variants].qv = flist[i].alist[j].qv;
							if (flist[i+1].alist[k].qv > flist[i].alist[j].qv) fragment.alist[fragment.variants].qv =  flist[i+1].alist[k].qv;
							fragment.variants++;
							j++; k++;
						}
						else { j++;k++;  } 
					}
					if (fragment.variants >= 2) 
					{
						sl = strlen(flist[i].id); fragment.id = (char*)malloc(sl+1);
						for (j=0;j<sl;j++) fragment.id[j] = flist[i].id[j]; fragment.id[j] = '\0';

						//for (j=0;j<flist[i].variants;j++) fprintf(stdout,"%d ",flist[i].alist[j].varid); fprintf(stdout,"| ");
						//for (j=0;j<flist[i+1].variants;j++) fprintf(stdout,"%d ",flist[i+1].alist[j].varid);
						//fprintf(stdout,"order of variants not correct %s \t",flist[i].id);
						print_fragment(&fragment,varlist,fragment_file);
						free(fragment.id);
					}
					
				}
				else if (flist[i].variants+flist[i+1].variants ==2 && SINGLEREADS ==1)print_fragment(&flist[i],varlist,fragment_file); // added 05/31/2017 for OPE
				
				//else if (flist[i].variants ==1 && flist[i+1].variants >1) print_fragment(&flist[i+1],varlist);
				//else if (flist[i].variants > 1 && flist[i+1].variants ==1) print_fragment(&flist[i],varlist);
				// april 27 2012 these PE reads were being ignored until now
				i +=2; 
				// what about overlapping paired-end reads.... reads..... ???? jan 13 2012, 
			}
			else if ( flist[i].variants >= 2 || SINGLEREADS ==1) 
			{
				print_fragment(&flist[i],varlist,fragment_file);		i++;
			}
			else i++; 

		}
		// last read examined if it is not paired
		if (i < *fragments)
		{
			if (flist[i].variants >= 2 || SINGLEREADS ==1) print_fragment(&flist[i],varlist,fragment_file);
		}
	}
	else // only one fragment in fraglist single end
	{
		if (flist[first].variants >= 2 || SINGLEREADS ==1) print_fragment(&flist[first],varlist,fragment_file);
	}

	// free the fragments starting from first....
	if (*fragments > 0)// check added jan 13 2012
	{
		for (i=first;i<*fragments;i++) { free(flist[i].id); free(flist[i].alist); } 
	}
	(*fragments) = first; 
	free(fragment.alist);
}
Ejemplo n.º 3
0
// extract haplotype informative reads from sorted bam file //
// need to discard reads that are marked as duplicates using flag //
int parse_bamfile_sorted(char* bamfile,HASHTABLE* ht,CHROMVARS* chromvars,VARIANT* varlist,REFLIST* reflist)
{
	fprintf(stderr,"reading sorted bamfile %s \n",bamfile);
	int reads=0;
	struct alignedread* read = (struct alignedread*)malloc(sizeof(struct alignedread));
	
	int i=0; int sl=0; int chrom=0;
	int v1,v2; int absIS;
	int prevchrom=-1; int prevtid = -1;

	FRAGMENT* flist = (FRAGMENT*)malloc(sizeof(FRAGMENT)*MAXFRAG); int fragments =0; int prevfragments =0;
	FRAGMENT fragment; fragment.variants =0; fragment.alist = (allele*)malloc(sizeof(allele)*4096);

	samfile_t *fp;
	if ((fp = samopen(bamfile, "rb", 0)) == 0) { fprintf(stderr, "Fail to open BAM file %s\n", bamfile); return -1; }
	bam1_t *b = bam_init1();

	while (samread(fp, b) >= 0)
	{
		fetch_func(b, fp,read);
		if ((read->flag & (BAM_FUNMAP|BAM_FSECONDARY|BAM_FQCFAIL|BAM_FDUP)) || read->mquality < MIN_MQ) 
		{
			free_readmemory(read); continue;
		}
		// find the chromosome in reflist that matches read->chrom if the previous chromosome is different from current chromosome
		if (read->tid != prevtid)
		{
			chrom = getindex(ht,read->chrom); // doing this for every read, can replace this by string comparison ..april 4 2012
			i = read->tid;
			if (reflist->ns > 0)
			{
				reflist->current = i;
				if (i >= reflist->ns || i < 0 || strcmp(reflist->names[i],read->chrom) !=0)
				{
					reflist->current = -1;
					for (i=0;i<reflist->ns;i++)
					{
						if (strcmp(reflist->names[i],read->chrom) ==0) { reflist->current = i; break; }
					}
				}
			}
		}
		else chrom = prevchrom;
		if (read->tid == read->mtid)  // use mateposition to calculate insert size, march 12 2013, wrong since we need to consider the readlength/cigar
		{
			//read->IS = read->mateposition - read->position; 
		}

		absIS = (read->IS < 0) ? -1*read->IS: read->IS; 
		// add check to see if the mate and its read are on same chromosome, bug for contigs, july 16 2012
		if ((read->flag & 8) || absIS > MAX_IS || absIS < MIN_IS || read->IS ==0 || !(read->flag & 1) || read->tid != read->mtid) // single read
		{
			fragment.variants =0; v1 =0; v2=0; 
			if (chrom >=0 && PEONLY ==0) 
			{
				fragment.id = read->readid;
				v1 = extract_variants_read(read,ht,chromvars,varlist,0,&fragment,chrom,reflist);
				if (fragment.variants >= 2 || (SINGLEREADS ==1 && fragment.variants >=1))	
				{
					// instead of printing fragment, we could change this to update genotype likelihoods 
					print_fragment(&fragment,varlist,fragment_file);
				}
			}
		}
		else  // paired-end read 
		{
			//fprintf(stdout,"tid %d %d \n",read->tid,read->mtid);
			fragment.variants =0; v1 =0; v2=0; fragment.id = read->readid;
			if (chrom >=0) 	v1 = extract_variants_read(read,ht,chromvars,varlist,1,&fragment,chrom,reflist);
			//fprintf(stderr,"paired read stats %s %d flag %d IS %d\n",read->chrom,read->cigs,read->flag,read->IS);
			if (fragment.variants > 0)
			{
				//fprintf(stderr,"variants %d read %s %s \n",fragment.variants,read->chrom,read->readid);
				add_fragment(flist,&fragment,read,fragments); fragments++;
				if (fragments >= MAXFRAG)
				{
					fprintf(stderr,"exceeded max #cached fragments: %d,increase MAXFRAGMENTS using --maxfragments option \n",MAXFRAG);
					return -1;
				}
			}
		}
		// BUG here when the fragment list cannot be cleaned due to long mate-pair fragments (accumulated for large IS)
		// fragments >= 100000 and we will clean it repeatedly...
		// need to fix this june 4 2012.... even for long mate-pairs this could be a problem...
		if ( (fragments-prevfragments >= 100000) || fragments >= MAXFRAG -10000 || (chrom != prevchrom && prevchrom != -1 && fragments > 0)) // chrom of current read is not the same as previous read's chromosome...
		{
			if (PFLAG ==1) fprintf(stderr,"cleaning buffer: current chrom %s %d fragments %d\n",read->chrom,read->position,fragments);
			// BUG HERE when trying to clean empty fragment list (fragments ==0)
			if (fragments > 0) clean_fragmentlist(flist,&fragments,varlist,chrom,read->position,prevchrom);
			prevfragments = fragments;
			//fprintf(stderr,"remaining %d\n",fragments);
		}

		reads+=1; if (reads%2000000 ==0) fprintf(stderr,"processed %d reads, useful fragments %d\n",reads,fragments);
		prevchrom = chrom; prevtid = read->tid;
		free_readmemory(read);
	}
	if (fragments > 0) 
	{
		fprintf(stderr,"final cleanup of fragment list: %d current chrom %s %d \n",fragments,read->chrom,read->position);
		clean_fragmentlist(flist,&fragments,varlist,-1,read->position,prevchrom);
	}
	bam_destroy1(b);
}
Ejemplo n.º 4
0
int generate_single_fragment(struct alignedread** readlist, int s, int e, int length, double read_density, FRAGMENT* flist, VARIANT* varlist) {
    int j = 0, i = 0, k = 0;
    FRAGMENT fragment;
    fragment.variants = 0;
    fragment.alist = (allele*) malloc(sizeof (allele)*4096);
    for (k = s; k < e; k++) {
        i = k;
        if (readlist[i]->IS < 0 || ((readlist[i]->flag & 1024) == 1024)) continue;
        if (readlist[i]->findex >= 0) {
            for (j = 0; j < flist[readlist[i]->findex].variants; j++) {
                fragment.alist[fragment.variants].varid = flist[readlist[i]->findex].alist[j].varid;
                fragment.alist[fragment.variants].allele = flist[readlist[i]->findex].alist[j].allele;
                fragment.alist[fragment.variants].qv = flist[readlist[i]->findex].alist[j].qv;
                fragment.variants++;
            }
        }
        i = readlist[i]->mateindex;
        if (i >= 0 && readlist[i]->findex >= 0) {
            for (j = 0; j < flist[readlist[i]->findex].variants; j++) {
                fragment.alist[fragment.variants].varid = flist[readlist[i]->findex].alist[j].varid;
                fragment.alist[fragment.variants].allele = flist[readlist[i]->findex].alist[j].allele;
                fragment.alist[fragment.variants].qv = flist[readlist[i]->findex].alist[j].qv;
                fragment.variants++;
            }
        }
    }

    int unique_variants = 1;
    int hets = 0;
    int counts[4];
    int qv = 0;
    qsort(fragment.alist, fragment.variants, sizeof (allele), compare_alleles);
    for (i = 0; i < fragment.variants; i++) {
        j = fragment.alist[i].varid;
        if (i > 0 && fragment.alist[i].varid != fragment.alist[i - 1].varid) unique_variants++;
        if (i > 0 && j == fragment.alist[i - 1].varid && fragment.alist[i].allele != fragment.alist[i - 1].allele) hets++;
    }
    if (hets >= 2 || hets * 3 >= unique_variants || unique_variants < 2) // fragment only has single variant or has 2 or more heterzygous variants
    {
        free(fragment.alist);
        return 0;
    }

    FRAGMENT fp;
    fp.variants = 0;
    fp.alist = (allele*) malloc(sizeof (allele) * unique_variants);

    counts[0] = counts[1] = counts[2] = counts[3] = 0;
    counts[(int) fragment.alist[0].allele - 48]++;
    counts[(int) fragment.alist[0].allele - 48 + 2] += (int) fragment.alist[0].qv - QVoffset;

    j = 0;
    for (i = 1; i <= fragment.variants; i++) {
        if (i == fragment.variants || fragment.alist[i].varid != fragment.alist[i - 1].varid) {
            // print consensus base
            if (counts[0] > counts[1] && counts[1] <= 1) {
                fp.alist[j].varid = fragment.alist[i - 1].varid;
                fp.alist[j].allele = '0';
                qv = (QVoffset + counts[2] - counts[3]);
                if (counts[2] - counts[3] >= 60) qv = 60 + QVoffset;
                fp.alist[j].qv = (char) (qv);
                if (qv - QVoffset >= MINQ) j++;
            } else if (counts[1] > counts[0] && counts[0] <= 1) {
                fp.alist[j].varid = fragment.alist[i - 1].varid;
                fp.alist[j].allele = '1';
                qv = (QVoffset + counts[3] - counts[2]);
                if (counts[3] - counts[2] >= 60) qv = 60 + QVoffset;
                fp.alist[j].qv = (char) (qv);
                if (qv - QVoffset >= MINQ) j++;
            }
            counts[0] = counts[1] = counts[2] = counts[3] = 0;
        }
        if (i < fragment.variants) {
            counts[(int) fragment.alist[i].allele - 48]++;
            counts[(int) fragment.alist[i].allele - 48 + 2] += (int) fragment.alist[i].qv - QVoffset;
        }
    }
    /*
     */
    fprintf(stdout, "fragment %d %d \n", unique_variants, j);
    fp.id = (char*) malloc(1024);
    //if (GROUPNAME != NULL) sprintf(fp.id,"%s:%s:%d_%d_%d_%0.1f",GROUPNAME,varlist[fp.alist[0].varid].chrom,readlist[s].position,readlist[e-1].position,length,read_density);
    //else sprintf(fp.id,"%s:%d_%d_%d_%0.1f",varlist[fp.alist[0].varid].chrom,readlist[s].position,readlist[e-1].position,length,read_density);
    sprintf(fp.id, "%s:%d_%d_%d_%0.1f", varlist[fp.alist[0].varid].chrom, readlist[s]->position, readlist[e - 1]->position, length, read_density);

    fp.variants = j;
    if (j >= 2) {
        fprintf(stdout, "FRAGMENT ");
        print_fragment(&fp, varlist, stdout);
        //fprintf(stderr,"fragfile %s \n",fragment_file);
        //if (fragment_file != stdout)
        print_fragment(&fp, varlist, fragment_file);
    }
    free(fp.alist);
    free(fp.id);

    for (i = 0; i < fragment.variants; i++) {
        j = fragment.alist[i].varid;
        if (i == 0 || j != fragment.alist[i - 1].varid) fprintf(stdout, "\n %d:%d %s/%s %c:%d | ", j, varlist[j].position, varlist[j].allele1, varlist[j].allele2, fragment.alist[i].allele, fragment.alist[i].qv - 33);
        else if (fragment.alist[i].allele != fragment.alist[i - 1].allele) fprintf(stdout, "%c:%d:HET | ", fragment.alist[i].allele, fragment.alist[i].qv - 33);
        else fprintf(stdout, "%c:%d | ", fragment.alist[i].allele, fragment.alist[i].qv - 33);
    }
    fprintf(stdout, "\n");
    free(fragment.alist);
    return 1;
}