Example #1
0
static int convert(char *in, mFILE *ofp, char *out, int format, int prec,
		   int comp, int normalise) {
    Read *r;

    if (NULL == (r = read_reading(in, format))) {
	fprintf(stderr, "%s: failed to read\n", in);
	return 1;
    }

    if (normalise) {
	subtract_background(r);
	reset_max_called_height(r);
	rescale_heights(r);
    }

    add_comments(r, in, format);
    if (prec == 1)
	scale_trace8(r);

    if (comp != -1)
	set_compression_method(comp);
    if (0 != (mfwrite_reading(ofp, r, TT_SCF))) {
	fprintf(stderr, "%s: failed to write\n", out);
	read_deallocate(r);
	return 1;
    }

    read_deallocate(r);
    return 0;
}
Example #2
0
int convert(FILE *infp, FILE *outfp, char *infname, char *outfname,
	    struct opts *opts) {
    Read *r;

    if (NULL == (r = fread_reading(infp, infname, opts->in_format))) {
	fprintf(stderr, "failed to read file %s\n", infname);
	return 1;
    }

    if (opts->sub_background) { 
	/*
	trace_freq(r->traceA, r->NPoints);	
	trace_freq(r->traceC, r->NPoints);	
	trace_freq(r->traceG, r->NPoints);	
	trace_freq(r->traceT, r->NPoints);	
	*/
	subtract_background(r);
	/*
	separate_dyes(r, matrix);
	trace_freq(r->traceA, r->NPoints);	
	trace_freq(r->traceC, r->NPoints);	
	trace_freq(r->traceG, r->NPoints);	
	trace_freq(r->traceT, r->NPoints);
	*/
	reset_max_called_height(r);
    }

    if (opts->normalise) {
	rescale_heights(r);
    }

    if (opts->scale) {
	rescale_trace(r, opts->scale);
    }

    if (opts->name)
	r->ident = strdup(opts->name);
    else if (0 == strcmp(outfname, "(stdout)"))
	r->ident = strdup(infname);
    else
	r->ident = strdup(outfname);

    if (opts->compress_mode != -1)
	set_compression_method(opts->compress_mode);

    if (0 != (fwrite_reading(outfp, r, opts->out_format))) {
	fprintf(stderr, "failed to write file %s\n", outfname);
	read_deallocate(r);
	return 1;
    }

    read_deallocate(r);
    return 0;
}
Example #3
0
/*
 * Read the ALF format sequence from FILE *fp into a Read structure.
 * All printing characters (as defined by ANSII C `isprint')
 * are accepted, but `N's are translated to `-'s. In this respect we
 * are adhering (more or less) to the CSET_DEFAULT uncertainty code set.
 * 
 * Returns:
 *   Read *	- Success, the Read structure read.
 *   NULLRead	- Failure.
 */
Read *fread_alf(FILE *fp) {
    Read *read = NULLRead;
    int i;
    int numPoints;
    int sections = read_sections(0);
    
    uint_4 data_size;
    uint_4 dataO;
    uint_4 header_size=396; /* size of the header of the processed data
			       section */
    uint_2 actBaseDataSize; /* actual number of bytes of data of information
			       containing the base and basePos information */
    int num_points;         /* keeps track of the actual number of points,
			       rather than the early guess of numPoints */

    off_t indexO;           /* File offset where the index is */
    uint_4 baseO;           /* File offset where the bases are stored */
    
    
    /*
     * RMD lots of changes below here until end of data reading section
     * Some are cosmetic.
     * getIndexEntry calls in front of where they were needed, and made
     * There is a substantive change to the inner loop of the sequence
     * reading section.  This now uses fscanf - much less rigid than the
     * previous scheme.  Note that it reads bp as a float.  This is because
     * it is a float in multiple trace data files! (bizarre Pharmacia
     * programming!).
     */
    
    
    /*************************************************************
     * Read the various file offsets
     *************************************************************/

    /* indexO is the offset of the index.
     * Or I could look for the first label, starting 'ALF'
     * if I used 512 then none of the entries are on long 
     * word boundaries
     */
    indexO = 522;
    
    /* offset in file of first base of sequence */
    if (! (getIndexEntryLW(fp,indexO,BaseEntryLabel,12,&baseO)) )
	goto bail_out;
    
    /* actual size of region containing this data */
    if (! (getIndexEntryW(fp,indexO,BaseEntryLabel,10,&actBaseDataSize)) )
	goto bail_out;
    
    /* Look for Processed data first. If we fail to find it, then look for
     * the Raw data (same format).
     */

    /* offset in file to start of processed data segment - there 
     * is then a header of size header_size (currently 396)
     */
    if (! (getIndexEntryLW(fp,indexO,DataEntryLabel,12,&dataO)) ) {
	if (! (getIndexEntryLW(fp,indexO,RawDataEntryLabel,12,&dataO)) )
	    goto bail_out;

	/* actual size of region containing this data */
	if (! (getIndexEntryLW(fp,indexO,RawDataEntryLabel,10,&data_size)) )
	    goto bail_out;
    } else {
	/* actual size of region containing this data */
	if (! (getIndexEntryLW(fp,indexO,DataEntryLabel,10,&data_size)) )
	    goto bail_out;
    }
    
    /* Because each trace value is stored in a 2 byte
     * integer, thus to store A C G T information
     * it takes 8 bytes.  So subtract off the header and
     * divide by 8
     */
    numPoints = (int)((data_size - header_size)/ 8); 
    
    /* Allocate the sequence */
    if (NULLRead == (read = read_allocate(numPoints, BASELIMIT)))
	goto bail_out;
    
    /*************************************************************
     * Read the bases information
     *************************************************************/
    if (sections & READ_BASES) {
	/* new locals introduced by LFW and/or RMD for the ALF */
	int numBases;	/* number of nucleotides read in */
	float bp;
	char ch;

	if (!(fseek(fp, (off_t)baseO, 0) == 0))
	    goto bail_out;
	
	for (numBases = 0; (unsigned)ftell(fp) < baseO+(unsigned short)actBaseDataSize
	                   && numBases<BASELIMIT;) {
	    char line[200];

	    fgets(line, (int)sizeof(line), fp);
	    sscanf(line, "%c %*d %f", &ch, &bp);
	    
	    /* we convert ch to Staden format here */
	    switch (ch) {
	    case 'A':
	    case 'C':
	    case 'G':
	    case 'T':
		break;
	    default:
		ch = '-';
/*
		if (isupper(ch))
		    ch = '-';
		else
		    ch = '\0';
*/
	    }
	    
	    if (ch) {
		read->base[numBases]    = ch;
		read->prob_A[numBases]	= 0;
		read->prob_C[numBases]	= 0;
		read->prob_G[numBases]	= 0;
		read->prob_T[numBases]	= 0;
		read->basePos[numBases] = bp;
		++numBases;
	    }
	}
	read->base[numBases] = 0;
	
	read->NBases  = numBases;
    }
    
    /*************************************************************
     * Read the trace information
     *************************************************************/
    
    if (sections & READ_SAMPLES) {
	
	/*
	 * Traces are stored as 2 byte integers in records in the order of
	 * A C G T A C G T ...
	 */
	
	if (fseek(fp, (off_t)(dataO+header_size), 0) != 0) 
	    goto bail_out;
	
	num_points = 0;
	
	for (i=0; i < read->NPoints; i++) {
	    if (!le_read_int_2(fp, &(read->traceA[i])))
		goto bail_out;
	    if (read->maxTraceVal < read->traceA[i])
		read->maxTraceVal = read->traceA[i];
	    
	    if (!le_read_int_2(fp, &(read->traceC[i])))
		goto bail_out;
	    if (read->maxTraceVal < read->traceC[i])
		read->maxTraceVal = read->traceC[i];
	    
	    if (!le_read_int_2(fp, &(read->traceG[i])))
		goto bail_out;
	    if (read->maxTraceVal < read->traceG[i])
		read->maxTraceVal = read->traceG[i];
	    
	    if (!le_read_int_2(fp, &(read->traceT[i])))
		goto bail_out;
	    if (read->maxTraceVal < read->traceT[i])
		read->maxTraceVal = read->traceT[i];
	    
	    if (read->traceA[i]==0 && read->traceT[i]==0 &&
		read->traceC[i]==0 && read->traceG[i]==0 &&
		i > (numPoints-64))
		break;
	    
	    num_points++;
	}
    }
    
    /* SUCCESS */

    read->format = TT_ALF;
    return(read);

    /* FAILURE */
 bail_out:
    if (read)
	read_deallocate(read);

    return NULLRead;
}
Example #4
0
int main(int argc, char **argv) {
    Read *r = NULL;
    char *directory = NULL;
    char *ident, *value;
    int ident_len, value_len;
    int i, j, found;
    int *found_args = NULL;
    char **FileList = NULL;
    char trace_filename[FILENAME_MAX]="";
    int num_traces, trace_iter, files_read = 0;
    char *str;

    if(argc != 2)
      usage();

    directory = argv[1];
    
    // Get a list of all chromatogram files in the directory
    num_traces = GetFileList(&FileList, directory);
    if(num_traces == 0){
      fprintf(stderr,"* Path %d yielded 0 files...exiting\n", num_traces);
      exit(2);
    }


  /* step through all the sequences */
  for (trace_iter = 0; trace_iter < num_traces; trace_iter++){

    if(r){
      read_deallocate(r);
    }
    //Get the file name from the iterator.
    sprintf(trace_filename, "%s/%s",directory, FileList[trace_iter]);

    /* Read the file */
    read_sections(READ_COMMENTS);
    if (NULL == (r = read_reading(trace_filename, TT_ANY))) {
      continue;  // don't worry about it
    }
    files_read++;

    if (!r->info)
	return 1;
    
    for(str = strtok(r->info,"\n"); str != NULL;       str = strtok(NULL,"\n")){
      char *name;
      char *value;
      int items = 0;
      char *start,*end;

      name = str;
      value = strchr(name,'=');
      *value = '\0'; // skip over the '='
      value++;
         
      if(!strcmp(name,"RUND")){
	start = value;
	end = strstr(value," - ");
	*end = '\0';   // terminate the start string
	end += 3;      // skip over the " - "
	fprintf(stdout,"%s.RUND=start=%s,end=%s\n", FileList[trace_iter], start,end);
      }else if(! strcmp(name,"DATE")){
	start = value;
	end = strstr(value," to ");
	*end = '\0'; // terminate the start string
	end += 4;    // skip over the " to "
	fprintf(stdout,"%s.DATE=start=%s,end=%s\n", FileList[trace_iter],start,end);
      }else{
	fprintf(stdout,"%s.%s=%s\n", FileList[trace_iter],name, value);
      }

    }
    
  }
  return (files_read == 0);  // is zero, unless we read nothing.
}
Example #5
0
/*
 * Translates an Scf structure into a Read structure.
 * The Scf structure is left unchanged.
 *
 * Returns:
 *    A pointer to an allocated Read structure upon success.
 *    NULLRead upon failure.
 */
Read *scf2read(Scf *scf) {
    Read *read;
    register int i, i_end;
    TRACE max_val = 0;
    int sections = read_sections(0);
    int nsamples = 0;
    int nbases = 0;

    /* allocate */
    if (sections & READ_SAMPLES)
	nsamples = scf->header.samples;
    if (sections & READ_BASES)
	nbases = scf->header.bases;
    read = read_allocate(nsamples, nbases);

    if (NULLRead == read)
	return NULLRead;

    if (sections & READ_SAMPLES) {
	/* copy the samples */
	i_end = scf->header.samples;
	read->NPoints = i_end;
	
	if (scf->header.sample_size == 1) {
	    for (i = 0; i < i_end; i++) {
		read->traceA[i] = scf->samples.samples1[i].sample_A;
		read->traceC[i] = scf->samples.samples1[i].sample_C;
		read->traceG[i] = scf->samples.samples1[i].sample_G;
		read->traceT[i] = scf->samples.samples1[i].sample_T;
		
		if (read->traceA[i] > max_val) max_val = read->traceA[i];
		if (read->traceC[i] > max_val) max_val = read->traceC[i];
		if (read->traceG[i] > max_val) max_val = read->traceG[i];
		if (read->traceT[i] > max_val) max_val = read->traceT[i];
	    }
	} else { /* sample_size == 2 */
	    for (i = 0; i < i_end; i++) {
		read->traceA[i] = scf->samples.samples2[i].sample_A;
		read->traceC[i] = scf->samples.samples2[i].sample_C;
		read->traceG[i] = scf->samples.samples2[i].sample_G;
		read->traceT[i] = scf->samples.samples2[i].sample_T;
		
		if (read->traceA[i] > max_val) max_val = read->traceA[i];
		if (read->traceC[i] > max_val) max_val = read->traceC[i];
		if (read->traceG[i] > max_val) max_val = read->traceG[i];
		if (read->traceT[i] > max_val) max_val = read->traceT[i];
	    }
	}
	
	read->maxTraceVal = max_val;
    }
    
    if (sections & READ_BASES) {
	/* copy the bases */
	i_end = scf->header.bases;
	read->NBases = i_end;

	for (i = 0; i < i_end; i++) {
	    read->basePos[i] = scf->bases[i].peak_index;
	    read->prob_A[i]  = scf->bases[i].prob_A;
	    read->prob_C[i]  = scf->bases[i].prob_C;
	    read->prob_G[i]  = scf->bases[i].prob_G;
	    read->prob_T[i]  = scf->bases[i].prob_T;
	    read->base[i]    = scf->bases[i].base;
	}
	read->base[i] = 0;
    }
    
    if (sections & READ_COMMENTS) {
	/* allocate and copy the comments */
	if (scf->header.comments_size > 0 && scf->comments) {
	    read->info = (char *)xmalloc(scf->header.comments_size+1);
	    if (NULL == read->info) {
		read_deallocate(read);
		return NULLRead;
	    }

	    memcpy(read->info, scf->comments, scf->header.comments_size);
	    read->info[scf->header.comments_size] = '\0';
	}
    }

    /* other bits and pieces */
    read->leftCutoff = scf->header.bases_left_clip;
    read->rightCutoff = read->NBases - scf->header.bases_right_clip + 1;
    read->format = TT_SCF;

    return read;
}
Example #6
0
int convert(char *file, int format, mFILE *ofp, char *name, int output_conf) {
    Read *r;
    Exp_info *e;
    char buf[50];
    double aq;

    if (format == TT_BIO) {
        if (NULL == (r = read_reading(file, format))) {
            fprintf(stderr, "%s: failed to read\n", file);
            return 1;
        }
    } else {
        FILE *infp;
        if (NULL == (infp = open_trace_file(file, NULL))) {
            perror(file);
            return 1;
        }
        if (NULL == (r = fread_reading(infp, file, format))) {
            fprintf(stderr, "%s: failed to read\n", file);
            return 1;
        }
        fclose(infp);
    }

    e = read2exp(r, name);
    if (NULL == e) {
        fprintf(stderr, "Failed to create experiment file.\n");
        read_deallocate(r);
        return 1;
    }

    sprintf(buf, "%f", aq = avg_qual(r));
    exp_set_entry(e, EFLT_AQ, buf);
    exp_print_mfile(ofp, e);

    if (output_conf && aq != 0) {
        char *cstr;
        int1 *conf;
        int i;

        conf = xmalloc(r->NBases * sizeof(*conf));
        cstr = xmalloc(5 * r->NBases+2);
        for (i = 0; i < r->NBases; i++) {
            switch (r->base[i]) {
            case 'a':
            case 'A':
                conf[i] = r->prob_A[i];
                break;
            case 'c':
            case 'C':
                conf[i] = r->prob_C[i];
                break;
            case 'g':
            case 'G':
                conf[i] = r->prob_G[i];
                break;
            case 't':
            case 'T':
                conf[i] = r->prob_T[i];
                break;
            default:
                conf[i] = (r->prob_A[i] +
                           r->prob_C[i] +
                           r->prob_G[i] +
                           r->prob_T[i]) / 4;
                break;
            }
        }

        conf2str(conf, r->NBases, cstr);
        exp_set_entry(e, EFLT_AV, cstr);

        xfree(cstr);
        xfree(conf);
    }

    read_deallocate(r);
    exp_destroy_info(e);

    mfflush(ofp);

    return 0;
}
Example #7
0
/*
 * Duplicates the read structure and optionally gives it a new filename.
 * The following fields are not duplicated:
 *    
 *  int  orig_trace_format;
 *  void (*orig_trace_free)(void *ptr);
 *  void *orig_trace;
 *  char *ident;
 *
 * Returns:
 *   "Read *" for success
 *   "NULLRead" for failure
 */
Read* read_dup( Read* src, const char* new_name )
{
    int   n;
    Read* dst;
    assert(src);

    /* Allocate storage and initialise */
    dst = read_allocate( src->NPoints, src->NBases );
    if( dst == NULLRead )
	return 0;
    dst->info       = 0;
    dst->trace_name = 0;


    /* Copy over possibly new name */
    if( new_name )
	n = strlen(new_name);
    else if( src->trace_name )
	n = strlen(src->trace_name);
    else
	n = 0;
    if( n > 0 )	{
	dst->trace_name = (char*) xmalloc(n+1);
	if( !dst->trace_name )
	    goto error;

	if(new_name) 
	    strcpy( dst->trace_name, new_name );
	else
	    strcpy( dst->trace_name, src->trace_name );
    }
	
	
    /* Copy over info */
    if( src->info ) {
	dst->info = (char*) xmalloc( strlen(src->info)+1 );
	if( !dst->info )
	    goto error;
    }


    /* Copy single fields */
    dst->format      = src->format;
    dst->maxTraceVal = src->maxTraceVal;
    dst->leftCutoff  = src->leftCutoff;
    dst->rightCutoff = src->rightCutoff;
    dst->baseline    = src->baseline; 
    

    /* Copy NPoints fields if they exist */
    if( src->traceA )
	{
	    for( n=0; n<src->NPoints; n++ )
		{
		    dst->traceA[n] = src->traceA[n];
		    dst->traceC[n] = src->traceC[n];
		    dst->traceG[n] = src->traceG[n];
		    dst->traceT[n] = src->traceT[n];
		}
	}
    
    
    /* Copy NBases fields if they exist */
    if( src->base && src->base[0] )
	{
	    for( n=0; n<src->NBases; n++ )
		{
		    dst->base[n]    = src->base[n];
		    dst->basePos[n] = src->basePos[n];
		    if( src->prob_A )
			{
			    dst->prob_A[n] = src->prob_A[n];
			    dst->prob_C[n] = src->prob_C[n];
			    dst->prob_G[n] = src->prob_G[n];
			    dst->prob_T[n] = src->prob_T[n];
			}
		}
	}
    
    
    /* Success */
    return dst;

 error:
    /* Failure */
    read_deallocate(dst);
    return NULLRead;
}
Example #8
0
/*
 * Allocate a new sequence, with the given sizes.
 * Returns:
 *   "Read *" for success
 *   "NULLRead" for failure
 */
Read *read_allocate(int num_points, int num_bases) {
    Read *seq = NULLRead;

    int sections = read_sections(0);

    /* Allocate the body of the sequence */
    if ((seq = (Read *)xmalloc(sizeof(Read))) == NULL)
	return(NULLRead);

    seq->NPoints = num_points;
    seq->NBases  = num_bases;

    /*   
     * Initialise the body, all pointers are set to NULL so we can
     * happily call `read_deallocate()`.
     */
    seq->leftCutoff  = 0;
    seq->rightCutoff = 0;
    seq->maxTraceVal = 0;
    seq->baseline = 0;

    seq->traceC    = NULL;
    seq->traceA    = NULL;
    seq->traceG    = NULL;
    seq->traceT    = NULL;

    seq->base      = NULL;
    seq->basePos   = NULL;

    seq->info = NULL;
    seq->format = TT_ANY;
    seq->trace_name = NULL;

    seq->prob_A = NULL;
    seq->prob_C = NULL;
    seq->prob_G = NULL;
    seq->prob_T = NULL;

    seq->orig_trace_format = TT_ANY;
    seq->orig_trace = NULL;
    seq->orig_trace_free = NULL;

    seq->ident = NULL;

    /* Allocate space for the bases - 1 extra for the ->base field so
     * that we can treat it as a NULL terminated string.
     */
    if (sections & READ_BASES &&
	(((seq->base	  = (char *)xcalloc(num_bases+1,1))   == NULL) ||
	 ((seq->basePos   = (uint_2 *)xcalloc(num_bases+1,2)) == NULL) ||
	 ((seq->prob_A    = (char *)xcalloc(num_bases+1,1))   == NULL) ||
	 ((seq->prob_C    = (char *)xcalloc(num_bases+1,1))   == NULL) ||
	 ((seq->prob_G    = (char *)xcalloc(num_bases+1,1))   == NULL) ||
	 ((seq->prob_T    = (char *)xcalloc(num_bases+1,1))   == NULL))
	)
    {
	read_deallocate(seq);
	return NULLRead;
    }

    if (sections & READ_SAMPLES &&
	(((seq->traceC   =(TRACE *)xcalloc(num_points+1, 2))  == NULL)||
	 ((seq->traceA   =(TRACE *)xcalloc(num_points+1, 2))  == NULL)||
	 ((seq->traceG   =(TRACE *)xcalloc(num_points+1, 2))  == NULL)||
	 ((seq->traceT   =(TRACE *)xcalloc(num_points+1, 2))  == NULL))
	)
    {
	read_deallocate(seq);
	return NULLRead;
    }
    
    return seq;
}
Example #9
0
/*
 * Produce a consensus trace from a specific region of this contig.
 */
Read *cons_trace(EdStruct *xx, int start, int end, int strand,
		 int match, int exception) {
    int *seqList, i, j, count, next;
    Read *r;
    int max_points = 10000;
    char *con = NULL;
    diff_cons_seq *rlist = NULL;
    char fileName[256];
    char t_type[5];
    int form;
    int offset = 0, w;

    /* Get the consensus sequence */
    if (NULL == (con = (char *)xmalloc(end - start + 2)))
	goto error;
    DBcalcConsensus(xx, start, end - start + 1, con, NULL, BOTH_STRANDS);

    /* Allocate a list of read pointers and positions */
    if (NULL == (rlist = (diff_cons_seq *)xcalloc(DBI_gelCount(xx),
						  sizeof(*rlist))))
	goto error;

    /* Allocate a read structure */
    if (NULL == (r = read_allocate(max_points, end - start + 1)))
	goto error;

    /* Derive the initial list of sequences covering the start point */
    count = 0;
    seqList = DBI_list(xx);
    for (i = 1;
	 i <= DBI_gelCount(xx) && DB_RelPos(xx, DBI_order(xx)[i]) <= start;
	 i++) {
	int seq = DBI_order(xx)[i];
	DBgetSeq(DBI(xx), seq);
	if (DB_RelPos(xx, seq) + DB_Length(xx, seq) > start &&
	    strand_matches(xx, seq, strand) &&
	    seq != exception) {
	    if (get_trace_path(xx, seq, fileName, t_type) == 0) {
		form = trace_type_str2int(t_type);
		rlist[count].r = read_reading(fileName, form);
		if (rlist[count].r) {
		    rlist[count].seq = DBgetSeq(DBI(xx), seq);
		    rlist[count].opos =
			get_trace_pos(rlist[count].r, xx, seq, 0,
				      DB_Start(xx, seq),
				      DB_Start(xx, seq) + DB_Length(xx, seq),
				      DB_Seq(xx, seq), 0);

		    seqList[count++] = seq;
		}
	    }
	}
    }
    if (i <= DBI_gelCount(xx))
	next = i;
    else
	next = 0;

    /*
     * Loop along the sequence updating seqList as we go.
     * At each point we know how many sequences there are so we can
     * produce the consensus from these sequences.
     */
    for (i = start; i <= end; i++) {
	w = do_cons_base(xx, con, i, start, count, seqList, rlist, r, offset,
			 match, &max_points);
	if (w == -1)
	    goto error;
	offset += w;

	/* Update seqList for the next position */
	if (i < end) {
	    /* Remove sequences */
	    for (j = 0; j < count; j++) {
		int seq = seqList[j];
		if (DB_RelPos(xx, seq) + DB_Length(xx, seq) - 1 <= i) {
		    read_deallocate(rlist[j].r);
		    xfree(rlist[j].opos);
		    memmove(&seqList[j], &seqList[j+1],
			    (count-1-j) * sizeof(*seqList));
		    memmove(&rlist[j], &rlist[j+1],
			    (count-1-j) * sizeof(*rlist));
		    count--;
		    j--;
		}
	    }

	    /* Add sequences */
	    while (next && DB_RelPos(xx, next) <= i+1) {
		/* printf("next=%d %d %d\n",
		       next, DB_RelPos(xx, next), i+1); */
		DBgetSeq(DBI(xx), next);
		if (strand_matches(xx, next, strand) &&
		    get_trace_path(xx, next, fileName, t_type) == 0) {
		    form = trace_type_str2int(t_type);
		    rlist[count].r = read_reading(fileName, form);
		    if (rlist[count].r) {
			rlist[count].seq = DBgetSeq(DBI(xx), next);
			rlist[count].opos =
			    get_trace_pos(rlist[count].r, xx, next, 0,
					  DB_Start(xx, next),
					  DB_Start(xx,next)+DB_Length(xx,next),
					  DB_Seq(xx, next), 0);

			seqList[count++] = next;
		    }
		}
		if (++next > DBI_gelCount(xx))
		    next = 0;
	    }
	}
    }

    for (i = 0; i < count; i++) {
	read_deallocate(rlist[i].r);
	xfree(rlist[i].opos);
    }

    tidy_up(r, end-start + 1, offset);

    xfree(con);
    xfree(rlist);
    return r;

 error:
    if (con) xfree(con);
    if (rlist) xfree(rlist);
    return NULL;
}
Example #10
0
/*
 * ---------------------------------------------------------------------------
 * Loads confidence values from the trace file and averages them.
 * 'opos' is optional - if not known then set to NULL.
 *
 * Returns 0 for success
 *        -1 for failure
 */
int get_read_conf(Exp_info *e, int length, int2 *opos, int1 *conf) {
    int ttype, i;
    FILE *fp;
    uint_1 *prob_A, *prob_C, *prob_G, *prob_T;
    char *seq;
    float scf_version;
    int nbases = 0;

    /* Sanity check */
    if (!(exp_Nentries(e,EFLT_LT) && exp_Nentries(e,EFLT_LN)))
	return -1;

    /* Find and load trace file */
    ttype = trace_type_str2int(exp_get_entry(e, EFLT_LT));

    if (ttype != TT_SCF &&
	ttype != TT_ZTR)
	return -1;

    /*
     * We only support direct reading accuracy values from SCF files.
     * Otherwise we have to take a slower approach.
     */
    if (ttype != TT_SCF) {
	Read *r;
	int sec = read_sections(0);
	read_sections(READ_BASES);

	if (NULL == (r = read_reading(exp_get_entry(e,EFLT_LN), TT_ANYTR))) {
	    read_sections(sec);
	    return -1;
	}

	prob_A = (int1 *)xmalloc(r->NBases);
	prob_C = (int1 *)xmalloc(r->NBases);
	prob_G = (int1 *)xmalloc(r->NBases);
	prob_T = (int1 *)xmalloc(r->NBases);
	seq    = (char *)xmalloc(r->NBases);

	memcpy(prob_A, r->prob_A, r->NBases);
	memcpy(prob_C, r->prob_C, r->NBases);
	memcpy(prob_G, r->prob_G, r->NBases);
	memcpy(prob_T, r->prob_T, r->NBases);
	memcpy(seq,    r->base,   r->NBases);

	nbases = r->NBases;

	read_deallocate(r);
	read_sections(sec);

    } else {
	Header h;
	/* For SCF files we read directly - the above code would also do. */

	if (NULL == (fp = open_trace_file(exp_get_entry(e,EFLT_LN), NULL)))
	    return -1;

	/* Read the SCF header */
	if (-1 == read_scf_header(fp, &h))
	    return -1;
	scf_version = scf_version_str2float(h.version);
	nbases = h.bases;

	/* Alloc memory */
	prob_A = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A));
	prob_C = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A));
	prob_G = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A));
	prob_T = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A));
	seq    = (char   *)xmalloc(h.bases * sizeof(*seq));
	if (NULL == prob_A ||
	    NULL == prob_C ||
	    NULL == prob_G ||
	    NULL == prob_T ||
	    NULL == seq)
	    return -1;

	/* Load base scores */
	if (scf_version >= 3.0) {
	    /*
	     * Version 3 base format:
	     * num_bases * 4byte peak index
	     * num_bases * prob_A
	     * num_bases * prob_C
	     * num_bases * prob_G
	     * num_bases * prob_T
	     * num_bases * base
	     * num_bases * spare (x3)
	     */
	    fseek(fp, (off_t)h.bases_offset + 4 * h.bases, SEEK_SET);
	    if (h.bases != fread(prob_A, 1, h.bases, fp))
		return -1;
	    if (h.bases != fread(prob_C, 1, h.bases, fp))
		return -1;
	    if (h.bases != fread(prob_G, 1, h.bases, fp))
		return -1;
	    if (h.bases != fread(prob_T, 1, h.bases, fp))
		return -1;
	    if (h.bases != fread(seq, 1, h.bases, fp))
		return -1;
	} else {
	    int i;
	    uint_1 buf[12];

	    /*
	     * Version 2 base format
	     * num_bases * base_struct,  where base_struct is 12 bytes:
	     *     0-3 peak_index
	     *     4-7 prob_A/C/G/T
	     *     8   base
	     *     9-  spare
	     */
	    fseek(fp, (off_t)h.bases_offset, SEEK_SET);

	    for (i = 0; (unsigned)i < h.bases; i++) {
		if (1 != fread(buf, 12, 1, fp))
		    return -1;
		prob_A[i] = buf[4];
		prob_C[i] = buf[5];
		prob_G[i] = buf[6];
		prob_T[i] = buf[7];
		seq[i]    = buf[8];
	    }
	}

	fclose(fp);
    }

    /* Determine confidence values */
    if (opos) {
	for (i=0; i<length; i++) {
	    if (opos[i] == 0) {
		/* Inserted base, change to 0% */
		conf[i] = 0;
	    } else {
		switch(seq[opos[i]-1]) {
		case 'a':
		case 'A':
		    conf[i] = prob_A[opos[i]-1];
		    break;
		case 'c':
		case 'C':
		    conf[i] = prob_C[opos[i]-1];
		    break;
		case 'g':
		case 'G':
		    conf[i] = prob_G[opos[i]-1];
		    break;
		case 't':
		case 'T':
		    conf[i] = prob_T[opos[i]-1];
		    break;
		default:
		    conf[i] = 2;
		}
	    }
	}
    } else {
	int mlength = MIN(length, nbases);

	for (i=0; i < mlength; i++) {
	    switch(seq[i]) {
	    case 'a':
	    case 'A':
		conf[i] = prob_A[i];
		break;
	    case 'c':
	    case 'C':
		conf[i] = prob_C[i];
		break;
	    case 'g':
	    case 'G':
		conf[i] = prob_G[i];
		break;
	    case 't':
	    case 'T':
		conf[i] = prob_T[i];
		break;
	    case 'n':
	    case 'N':
	    case '-':
		conf[i] = (prob_A[i] + prob_C[i] + prob_G[i] + prob_T[i]) / 4;
		break;
	    default:
		conf[i] = 2;
	    }
	}
	for (; i < length; i++)
	    conf[i] = 2;
    }

    xfree(prob_A);
    xfree(prob_C);
    xfree(prob_G);
    xfree(prob_T);
    xfree(seq);

    return 0;
}
Example #11
0
/*
 * Read the plain format sequence from FILE *fp into a Read structure.
 * All printing characters (as defined by ANSII C `isprint')
 * are accepted, but `N's are translated to `-'s.
 *
 * Returns:
 *   Read *     - Success, the Read structure read.
 *   NULLRead   - Failure.
 */
Read *fread_pln(FILE *fp) {
    Read *read = NULLRead;
    off_t fileLen;
    int  ch;
    char *leftc, *rightc, *leftcp, *rightcp;
    int first = 1;

    /*
     * Find the length of the file.
     * Use this as an overestimate of the length of the sequence.
     */
    fseek(fp, (off_t) 0, 2);
    if ((fileLen = ftell(fp)) > INT_MAX /*Was MAXINT2*/)
	goto bail_out;

    fseek(fp, (off_t) 0, 0);
    
    /* Allocate the sequence */
    if (NULLRead == (read = read_allocate(0, fileLen)))
	goto bail_out;

    if (NULL == (leftc = (char *)xmalloc(fileLen)))
	goto bail_out;

    if (NULL == (rightc = (char *)xmalloc(fileLen)))
	goto bail_out;

    leftcp = leftc;
    rightcp = rightc;

    /* Read in the bases */
    
    read->NBases = 0;
    read->format = TT_PLN;

    while ((ch = fgetc(fp)) != EOF) {
	if (ch == '>') {
	    /* Fasta format file - skip the header and load the first
	     * fasta sequence only. We don't even attempt to worry about
	     * multi-sequence file formats for now.
	     */
	    if (!first)
		break;

	    while(ch != '\n' && ch != EOF)
		ch = fgetc(fp);

	}  else if (ch==';') {
	    /*
	     * ;< is left cutoff,
	     * ;> is right cutoff.
	     * Any other ';'s we can treat as a comments.
	     */
	    ch = fgetc(fp);

	    if (first == 1 && ch != '<' && ch != '>') {
		int d;
		char type[5], name[17], line[1024];

		line[0] = ch;
		fgets(&line[1], 1022, fp);

		if (5 == sscanf(line, "%6d%6d%6d%4c%s",
				&d, &d, &d, type, name)) {
		    char * p;

		    if ((p = strchr(type, ' ')))
			*p = 0;

		    read->format = trace_type_str2int(type);
		    read->trace_name = (char *)xmalloc(strlen(name)+1);
		    if (read->trace_name)
			strcpy(read->trace_name, name);
		}
	    }

	    else if (ch == '<') {
		ch = fgetc(fp);
		while (ch != '\n') {
		    *leftcp++ = ch;
		    ch = fgetc(fp);
		}
	    } else if (ch == '>') {
		ch = fgetc(fp);
		while (ch != '\n') {
		    *rightcp++ = ch;
		    ch = fgetc(fp);
		}
	    } else {
		while(ch != '\n' && ch != EOF)
		    ch = fgetc(fp);
	    }
        } else if (isprint(ch) && !isspace(ch)) {
	    read->base[read->NBases++] = ((ch)=='N') ? '-' : (ch);
	}
	
	first = 0;
    }

    *leftcp = *rightcp = 0;

    read->leftCutoff = strlen(leftc);
    read->rightCutoff = read->leftCutoff + read->NBases + 1;
    memmove(&read->base[read->leftCutoff], read->base, read->NBases);
    memmove(read->base, leftc, read->leftCutoff);
    memmove(&read->base[read->leftCutoff + read->NBases],
	    rightc, strlen(rightc));

    read->NBases += read->leftCutoff + strlen(rightc);
    read->base[read->NBases] = 0;

    xfree(leftc);
    xfree(rightc);
    
    /* SUCCESS */
    return(read);

    /* FAILURE */
 bail_out:
    if (read)
	read_deallocate(read);

    return NULLRead;
}
Example #12
0
/*
 * Ripped out of io_lib's trace_dump program.
 * It reformats a trace to as printable ASCII.
 */
void dump_text(ztr_t *z, char *name, char mode, FILE **files) {
    Read *read;
    int i;

    uncompress_ztr(z);
    read = ztr2read(z); /* Inefficient; can do direct */

    if (read == NULL) {
        fprintf(stderr, "Tracedump was unable to open file %s\n", name );
        return;
    }

    fprintf(files[0], "[Trace]\n");
    fprintf(files[0], "%s\n", name);

    fprintf(files[0], "\n[Header]\n");
    fprintf(files[0], "%d\t\t# format\n",          read->format);
    fprintf(files[0], "%d\t\t# NPoints\n",         read->NPoints);
    fprintf(files[0], "%d\t\t# NBases\n",          read->NBases);
    fprintf(files[0], "%d\t\t# NFlows\n",          read->nflows);
    fprintf(files[0], "%d\t\t# maxTraceVal\n",     (int)read->maxTraceVal-read->baseline);
    fprintf(files[0], "%d\t\t# baseline\n",        read->baseline);
    fprintf(files[0], "%d\t\t# leftCutoff\n",      read->leftCutoff);
    fprintf(files[0], "%d\t\t# rightCutoff\n",     read->rightCutoff);

    fputs("\n[Bases]\n", files[0]);
    for (i = 0; i < read->NBases; i++) {
        fprintf(files[0], "%c %05d %+03d %+03d %+03d %+03d #%3d\n",
		read->base[i],
		read->basePos ? read->basePos[i] : 0,
		(int)read->prob_A[i],
		(int)read->prob_C[i],
		(int)read->prob_G[i],
		(int)read->prob_T[i],
		i);
    }

    if (read->NPoints) {
	fputs("\n[A_Trace]\n", files[0]);
	for(i = 0; i < read->NPoints; i++)
	    fprintf(files[0], "%d\t#%5d\n", (int)read->traceA[i] - read->baseline, i);

	fputs("\n[C_Trace]\n", files[0]);
	for(i = 0; i < read->NPoints; i++)
	    fprintf(files[0], "%d\t#%5d\n", (int)read->traceC[i] - read->baseline, i);

	fputs("\n[G_Trace]\n", files[0]);
	for(i = 0; i < read->NPoints; i++)
	    fprintf(files[0], "%d\t#%5d\n", (int)read->traceG[i] - read->baseline, i);

	fputs("\n[T_Trace]\n", files[0]);
	for(i = 0; i < read->NPoints; i++)
	    fprintf(files[0], "%d\t#%5d\n", (int)read->traceT[i] - read->baseline, i);
    }

    if (read->flow_order) {
        fputs("\n[Flows]\n", files[0]);
        for (i = 0; i < read->nflows; i++) {
            fprintf(files[0], "%c %5.2f  %u\t#%5d\n",
		    read->flow_order[i],
		    read->flow ? read->flow[i] : 0,
		    read->flow_raw ? read->flow_raw[i] : 0,
		    i);
        }
    }

    if (read->info) {
        fputs("\n[Info]\n", files[0]);
        fprintf(files[0], "%s\n", read->info);
    }

    read_deallocate(read);
}
Example #13
0
int main(int argc, char **argv)
{
    Read* read;
    int i;

    if (argc != 2) {
    fprintf(stderr, "Usage: trace_dump <trace file>\n");
    return 1;
    }


    read = read_reading( argv[1], TT_ANY );


    if (read == NULL) {
    fprintf(stderr, "Tracedump was unable to open file %s\n", argv[1] );
    return 1;
    }

    printf("[Trace]\n");
    printf("%s\n", read->trace_name );

    printf("\n[Header]\n");
    printf("%d\t\t# format\n",          read->format);
    printf("%d\t\t# NPoints\n",         read->NPoints);
    printf("%d\t\t# NBases\n",          read->NBases);
    printf("%d\t\t# NFlows\n",          read->nflows);
    printf("%d\t\t# maxTraceVal\n",     (int)read->maxTraceVal);
    printf("%d\t\t# baseline\n",        read->baseline);
    printf("%d\t\t# leftCutoff\n",      read->leftCutoff);
    printf("%d\t\t# rightCutoff\n",     read->rightCutoff);

    puts("\n[Bases]");
    for (i = 0; i < read->NBases; i++) {
    printf("%c %05d %03d %03d %03d %03d #%3d\n",
           read->base[i],
           read->basePos ? read->basePos[i] : 0,
           (int)read->prob_A[i],
           (int)read->prob_C[i],
           (int)read->prob_G[i],
           (int)read->prob_T[i],
           i);
    }

    if (read->NPoints) {
	puts("\n[A_Trace]");
	for(i = 0; i < read->NPoints; i++)
	    printf("%d\t#%5d\n", (int)read->traceA[i], i);

	puts("\n[C_Trace]");
	for(i = 0; i < read->NPoints; i++)
	    printf("%d\t#%5d\n", (int)read->traceC[i], i);

	puts("\n[G_Trace]");
	for(i = 0; i < read->NPoints; i++)
	    printf("%d\t#%5d\n", (int)read->traceG[i], i);

	puts("\n[T_Trace]");
	for(i = 0; i < read->NPoints; i++)
	    printf("%d\t#%5d\n", (int)read->traceT[i], i);
    }

    if (read->flow_order) {
	puts("\n[Flows]");
	for (i = 0; i < read->nflows; i++) {
	    printf("%c %5.2f  %u\t#%5d\n",
		   read->flow_order[i],
		   read->flow ? read->flow[i] : 0,
		   read->flow_raw ? read->flow_raw[i] : 0,
		   i);
	}
    }

    if (read->info) {
	puts("\n[Info]");
	printf("%s\n", read->info);
    }

    read_deallocate(read);

    return 0;
}