static int convert(char *in, mFILE *ofp, char *out, int format, int prec, int comp, int normalise) { Read *r; if (NULL == (r = read_reading(in, format))) { fprintf(stderr, "%s: failed to read\n", in); return 1; } if (normalise) { subtract_background(r); reset_max_called_height(r); rescale_heights(r); } add_comments(r, in, format); if (prec == 1) scale_trace8(r); if (comp != -1) set_compression_method(comp); if (0 != (mfwrite_reading(ofp, r, TT_SCF))) { fprintf(stderr, "%s: failed to write\n", out); read_deallocate(r); return 1; } read_deallocate(r); return 0; }
int convert(FILE *infp, FILE *outfp, char *infname, char *outfname, struct opts *opts) { Read *r; if (NULL == (r = fread_reading(infp, infname, opts->in_format))) { fprintf(stderr, "failed to read file %s\n", infname); return 1; } if (opts->sub_background) { /* trace_freq(r->traceA, r->NPoints); trace_freq(r->traceC, r->NPoints); trace_freq(r->traceG, r->NPoints); trace_freq(r->traceT, r->NPoints); */ subtract_background(r); /* separate_dyes(r, matrix); trace_freq(r->traceA, r->NPoints); trace_freq(r->traceC, r->NPoints); trace_freq(r->traceG, r->NPoints); trace_freq(r->traceT, r->NPoints); */ reset_max_called_height(r); } if (opts->normalise) { rescale_heights(r); } if (opts->scale) { rescale_trace(r, opts->scale); } if (opts->name) r->ident = strdup(opts->name); else if (0 == strcmp(outfname, "(stdout)")) r->ident = strdup(infname); else r->ident = strdup(outfname); if (opts->compress_mode != -1) set_compression_method(opts->compress_mode); if (0 != (fwrite_reading(outfp, r, opts->out_format))) { fprintf(stderr, "failed to write file %s\n", outfname); read_deallocate(r); return 1; } read_deallocate(r); return 0; }
/* * Read the ALF format sequence from FILE *fp into a Read structure. * All printing characters (as defined by ANSII C `isprint') * are accepted, but `N's are translated to `-'s. In this respect we * are adhering (more or less) to the CSET_DEFAULT uncertainty code set. * * Returns: * Read * - Success, the Read structure read. * NULLRead - Failure. */ Read *fread_alf(FILE *fp) { Read *read = NULLRead; int i; int numPoints; int sections = read_sections(0); uint_4 data_size; uint_4 dataO; uint_4 header_size=396; /* size of the header of the processed data section */ uint_2 actBaseDataSize; /* actual number of bytes of data of information containing the base and basePos information */ int num_points; /* keeps track of the actual number of points, rather than the early guess of numPoints */ off_t indexO; /* File offset where the index is */ uint_4 baseO; /* File offset where the bases are stored */ /* * RMD lots of changes below here until end of data reading section * Some are cosmetic. * getIndexEntry calls in front of where they were needed, and made * There is a substantive change to the inner loop of the sequence * reading section. This now uses fscanf - much less rigid than the * previous scheme. Note that it reads bp as a float. This is because * it is a float in multiple trace data files! (bizarre Pharmacia * programming!). */ /************************************************************* * Read the various file offsets *************************************************************/ /* indexO is the offset of the index. * Or I could look for the first label, starting 'ALF' * if I used 512 then none of the entries are on long * word boundaries */ indexO = 522; /* offset in file of first base of sequence */ if (! (getIndexEntryLW(fp,indexO,BaseEntryLabel,12,&baseO)) ) goto bail_out; /* actual size of region containing this data */ if (! (getIndexEntryW(fp,indexO,BaseEntryLabel,10,&actBaseDataSize)) ) goto bail_out; /* Look for Processed data first. If we fail to find it, then look for * the Raw data (same format). */ /* offset in file to start of processed data segment - there * is then a header of size header_size (currently 396) */ if (! (getIndexEntryLW(fp,indexO,DataEntryLabel,12,&dataO)) ) { if (! (getIndexEntryLW(fp,indexO,RawDataEntryLabel,12,&dataO)) ) goto bail_out; /* actual size of region containing this data */ if (! (getIndexEntryLW(fp,indexO,RawDataEntryLabel,10,&data_size)) ) goto bail_out; } else { /* actual size of region containing this data */ if (! (getIndexEntryLW(fp,indexO,DataEntryLabel,10,&data_size)) ) goto bail_out; } /* Because each trace value is stored in a 2 byte * integer, thus to store A C G T information * it takes 8 bytes. So subtract off the header and * divide by 8 */ numPoints = (int)((data_size - header_size)/ 8); /* Allocate the sequence */ if (NULLRead == (read = read_allocate(numPoints, BASELIMIT))) goto bail_out; /************************************************************* * Read the bases information *************************************************************/ if (sections & READ_BASES) { /* new locals introduced by LFW and/or RMD for the ALF */ int numBases; /* number of nucleotides read in */ float bp; char ch; if (!(fseek(fp, (off_t)baseO, 0) == 0)) goto bail_out; for (numBases = 0; (unsigned)ftell(fp) < baseO+(unsigned short)actBaseDataSize && numBases<BASELIMIT;) { char line[200]; fgets(line, (int)sizeof(line), fp); sscanf(line, "%c %*d %f", &ch, &bp); /* we convert ch to Staden format here */ switch (ch) { case 'A': case 'C': case 'G': case 'T': break; default: ch = '-'; /* if (isupper(ch)) ch = '-'; else ch = '\0'; */ } if (ch) { read->base[numBases] = ch; read->prob_A[numBases] = 0; read->prob_C[numBases] = 0; read->prob_G[numBases] = 0; read->prob_T[numBases] = 0; read->basePos[numBases] = bp; ++numBases; } } read->base[numBases] = 0; read->NBases = numBases; } /************************************************************* * Read the trace information *************************************************************/ if (sections & READ_SAMPLES) { /* * Traces are stored as 2 byte integers in records in the order of * A C G T A C G T ... */ if (fseek(fp, (off_t)(dataO+header_size), 0) != 0) goto bail_out; num_points = 0; for (i=0; i < read->NPoints; i++) { if (!le_read_int_2(fp, &(read->traceA[i]))) goto bail_out; if (read->maxTraceVal < read->traceA[i]) read->maxTraceVal = read->traceA[i]; if (!le_read_int_2(fp, &(read->traceC[i]))) goto bail_out; if (read->maxTraceVal < read->traceC[i]) read->maxTraceVal = read->traceC[i]; if (!le_read_int_2(fp, &(read->traceG[i]))) goto bail_out; if (read->maxTraceVal < read->traceG[i]) read->maxTraceVal = read->traceG[i]; if (!le_read_int_2(fp, &(read->traceT[i]))) goto bail_out; if (read->maxTraceVal < read->traceT[i]) read->maxTraceVal = read->traceT[i]; if (read->traceA[i]==0 && read->traceT[i]==0 && read->traceC[i]==0 && read->traceG[i]==0 && i > (numPoints-64)) break; num_points++; } } /* SUCCESS */ read->format = TT_ALF; return(read); /* FAILURE */ bail_out: if (read) read_deallocate(read); return NULLRead; }
int main(int argc, char **argv) { Read *r = NULL; char *directory = NULL; char *ident, *value; int ident_len, value_len; int i, j, found; int *found_args = NULL; char **FileList = NULL; char trace_filename[FILENAME_MAX]=""; int num_traces, trace_iter, files_read = 0; char *str; if(argc != 2) usage(); directory = argv[1]; // Get a list of all chromatogram files in the directory num_traces = GetFileList(&FileList, directory); if(num_traces == 0){ fprintf(stderr,"* Path %d yielded 0 files...exiting\n", num_traces); exit(2); } /* step through all the sequences */ for (trace_iter = 0; trace_iter < num_traces; trace_iter++){ if(r){ read_deallocate(r); } //Get the file name from the iterator. sprintf(trace_filename, "%s/%s",directory, FileList[trace_iter]); /* Read the file */ read_sections(READ_COMMENTS); if (NULL == (r = read_reading(trace_filename, TT_ANY))) { continue; // don't worry about it } files_read++; if (!r->info) return 1; for(str = strtok(r->info,"\n"); str != NULL; str = strtok(NULL,"\n")){ char *name; char *value; int items = 0; char *start,*end; name = str; value = strchr(name,'='); *value = '\0'; // skip over the '=' value++; if(!strcmp(name,"RUND")){ start = value; end = strstr(value," - "); *end = '\0'; // terminate the start string end += 3; // skip over the " - " fprintf(stdout,"%s.RUND=start=%s,end=%s\n", FileList[trace_iter], start,end); }else if(! strcmp(name,"DATE")){ start = value; end = strstr(value," to "); *end = '\0'; // terminate the start string end += 4; // skip over the " to " fprintf(stdout,"%s.DATE=start=%s,end=%s\n", FileList[trace_iter],start,end); }else{ fprintf(stdout,"%s.%s=%s\n", FileList[trace_iter],name, value); } } } return (files_read == 0); // is zero, unless we read nothing. }
/* * Translates an Scf structure into a Read structure. * The Scf structure is left unchanged. * * Returns: * A pointer to an allocated Read structure upon success. * NULLRead upon failure. */ Read *scf2read(Scf *scf) { Read *read; register int i, i_end; TRACE max_val = 0; int sections = read_sections(0); int nsamples = 0; int nbases = 0; /* allocate */ if (sections & READ_SAMPLES) nsamples = scf->header.samples; if (sections & READ_BASES) nbases = scf->header.bases; read = read_allocate(nsamples, nbases); if (NULLRead == read) return NULLRead; if (sections & READ_SAMPLES) { /* copy the samples */ i_end = scf->header.samples; read->NPoints = i_end; if (scf->header.sample_size == 1) { for (i = 0; i < i_end; i++) { read->traceA[i] = scf->samples.samples1[i].sample_A; read->traceC[i] = scf->samples.samples1[i].sample_C; read->traceG[i] = scf->samples.samples1[i].sample_G; read->traceT[i] = scf->samples.samples1[i].sample_T; if (read->traceA[i] > max_val) max_val = read->traceA[i]; if (read->traceC[i] > max_val) max_val = read->traceC[i]; if (read->traceG[i] > max_val) max_val = read->traceG[i]; if (read->traceT[i] > max_val) max_val = read->traceT[i]; } } else { /* sample_size == 2 */ for (i = 0; i < i_end; i++) { read->traceA[i] = scf->samples.samples2[i].sample_A; read->traceC[i] = scf->samples.samples2[i].sample_C; read->traceG[i] = scf->samples.samples2[i].sample_G; read->traceT[i] = scf->samples.samples2[i].sample_T; if (read->traceA[i] > max_val) max_val = read->traceA[i]; if (read->traceC[i] > max_val) max_val = read->traceC[i]; if (read->traceG[i] > max_val) max_val = read->traceG[i]; if (read->traceT[i] > max_val) max_val = read->traceT[i]; } } read->maxTraceVal = max_val; } if (sections & READ_BASES) { /* copy the bases */ i_end = scf->header.bases; read->NBases = i_end; for (i = 0; i < i_end; i++) { read->basePos[i] = scf->bases[i].peak_index; read->prob_A[i] = scf->bases[i].prob_A; read->prob_C[i] = scf->bases[i].prob_C; read->prob_G[i] = scf->bases[i].prob_G; read->prob_T[i] = scf->bases[i].prob_T; read->base[i] = scf->bases[i].base; } read->base[i] = 0; } if (sections & READ_COMMENTS) { /* allocate and copy the comments */ if (scf->header.comments_size > 0 && scf->comments) { read->info = (char *)xmalloc(scf->header.comments_size+1); if (NULL == read->info) { read_deallocate(read); return NULLRead; } memcpy(read->info, scf->comments, scf->header.comments_size); read->info[scf->header.comments_size] = '\0'; } } /* other bits and pieces */ read->leftCutoff = scf->header.bases_left_clip; read->rightCutoff = read->NBases - scf->header.bases_right_clip + 1; read->format = TT_SCF; return read; }
int convert(char *file, int format, mFILE *ofp, char *name, int output_conf) { Read *r; Exp_info *e; char buf[50]; double aq; if (format == TT_BIO) { if (NULL == (r = read_reading(file, format))) { fprintf(stderr, "%s: failed to read\n", file); return 1; } } else { FILE *infp; if (NULL == (infp = open_trace_file(file, NULL))) { perror(file); return 1; } if (NULL == (r = fread_reading(infp, file, format))) { fprintf(stderr, "%s: failed to read\n", file); return 1; } fclose(infp); } e = read2exp(r, name); if (NULL == e) { fprintf(stderr, "Failed to create experiment file.\n"); read_deallocate(r); return 1; } sprintf(buf, "%f", aq = avg_qual(r)); exp_set_entry(e, EFLT_AQ, buf); exp_print_mfile(ofp, e); if (output_conf && aq != 0) { char *cstr; int1 *conf; int i; conf = xmalloc(r->NBases * sizeof(*conf)); cstr = xmalloc(5 * r->NBases+2); for (i = 0; i < r->NBases; i++) { switch (r->base[i]) { case 'a': case 'A': conf[i] = r->prob_A[i]; break; case 'c': case 'C': conf[i] = r->prob_C[i]; break; case 'g': case 'G': conf[i] = r->prob_G[i]; break; case 't': case 'T': conf[i] = r->prob_T[i]; break; default: conf[i] = (r->prob_A[i] + r->prob_C[i] + r->prob_G[i] + r->prob_T[i]) / 4; break; } } conf2str(conf, r->NBases, cstr); exp_set_entry(e, EFLT_AV, cstr); xfree(cstr); xfree(conf); } read_deallocate(r); exp_destroy_info(e); mfflush(ofp); return 0; }
/* * Duplicates the read structure and optionally gives it a new filename. * The following fields are not duplicated: * * int orig_trace_format; * void (*orig_trace_free)(void *ptr); * void *orig_trace; * char *ident; * * Returns: * "Read *" for success * "NULLRead" for failure */ Read* read_dup( Read* src, const char* new_name ) { int n; Read* dst; assert(src); /* Allocate storage and initialise */ dst = read_allocate( src->NPoints, src->NBases ); if( dst == NULLRead ) return 0; dst->info = 0; dst->trace_name = 0; /* Copy over possibly new name */ if( new_name ) n = strlen(new_name); else if( src->trace_name ) n = strlen(src->trace_name); else n = 0; if( n > 0 ) { dst->trace_name = (char*) xmalloc(n+1); if( !dst->trace_name ) goto error; if(new_name) strcpy( dst->trace_name, new_name ); else strcpy( dst->trace_name, src->trace_name ); } /* Copy over info */ if( src->info ) { dst->info = (char*) xmalloc( strlen(src->info)+1 ); if( !dst->info ) goto error; } /* Copy single fields */ dst->format = src->format; dst->maxTraceVal = src->maxTraceVal; dst->leftCutoff = src->leftCutoff; dst->rightCutoff = src->rightCutoff; dst->baseline = src->baseline; /* Copy NPoints fields if they exist */ if( src->traceA ) { for( n=0; n<src->NPoints; n++ ) { dst->traceA[n] = src->traceA[n]; dst->traceC[n] = src->traceC[n]; dst->traceG[n] = src->traceG[n]; dst->traceT[n] = src->traceT[n]; } } /* Copy NBases fields if they exist */ if( src->base && src->base[0] ) { for( n=0; n<src->NBases; n++ ) { dst->base[n] = src->base[n]; dst->basePos[n] = src->basePos[n]; if( src->prob_A ) { dst->prob_A[n] = src->prob_A[n]; dst->prob_C[n] = src->prob_C[n]; dst->prob_G[n] = src->prob_G[n]; dst->prob_T[n] = src->prob_T[n]; } } } /* Success */ return dst; error: /* Failure */ read_deallocate(dst); return NULLRead; }
/* * Allocate a new sequence, with the given sizes. * Returns: * "Read *" for success * "NULLRead" for failure */ Read *read_allocate(int num_points, int num_bases) { Read *seq = NULLRead; int sections = read_sections(0); /* Allocate the body of the sequence */ if ((seq = (Read *)xmalloc(sizeof(Read))) == NULL) return(NULLRead); seq->NPoints = num_points; seq->NBases = num_bases; /* * Initialise the body, all pointers are set to NULL so we can * happily call `read_deallocate()`. */ seq->leftCutoff = 0; seq->rightCutoff = 0; seq->maxTraceVal = 0; seq->baseline = 0; seq->traceC = NULL; seq->traceA = NULL; seq->traceG = NULL; seq->traceT = NULL; seq->base = NULL; seq->basePos = NULL; seq->info = NULL; seq->format = TT_ANY; seq->trace_name = NULL; seq->prob_A = NULL; seq->prob_C = NULL; seq->prob_G = NULL; seq->prob_T = NULL; seq->orig_trace_format = TT_ANY; seq->orig_trace = NULL; seq->orig_trace_free = NULL; seq->ident = NULL; /* Allocate space for the bases - 1 extra for the ->base field so * that we can treat it as a NULL terminated string. */ if (sections & READ_BASES && (((seq->base = (char *)xcalloc(num_bases+1,1)) == NULL) || ((seq->basePos = (uint_2 *)xcalloc(num_bases+1,2)) == NULL) || ((seq->prob_A = (char *)xcalloc(num_bases+1,1)) == NULL) || ((seq->prob_C = (char *)xcalloc(num_bases+1,1)) == NULL) || ((seq->prob_G = (char *)xcalloc(num_bases+1,1)) == NULL) || ((seq->prob_T = (char *)xcalloc(num_bases+1,1)) == NULL)) ) { read_deallocate(seq); return NULLRead; } if (sections & READ_SAMPLES && (((seq->traceC =(TRACE *)xcalloc(num_points+1, 2)) == NULL)|| ((seq->traceA =(TRACE *)xcalloc(num_points+1, 2)) == NULL)|| ((seq->traceG =(TRACE *)xcalloc(num_points+1, 2)) == NULL)|| ((seq->traceT =(TRACE *)xcalloc(num_points+1, 2)) == NULL)) ) { read_deallocate(seq); return NULLRead; } return seq; }
/* * Produce a consensus trace from a specific region of this contig. */ Read *cons_trace(EdStruct *xx, int start, int end, int strand, int match, int exception) { int *seqList, i, j, count, next; Read *r; int max_points = 10000; char *con = NULL; diff_cons_seq *rlist = NULL; char fileName[256]; char t_type[5]; int form; int offset = 0, w; /* Get the consensus sequence */ if (NULL == (con = (char *)xmalloc(end - start + 2))) goto error; DBcalcConsensus(xx, start, end - start + 1, con, NULL, BOTH_STRANDS); /* Allocate a list of read pointers and positions */ if (NULL == (rlist = (diff_cons_seq *)xcalloc(DBI_gelCount(xx), sizeof(*rlist)))) goto error; /* Allocate a read structure */ if (NULL == (r = read_allocate(max_points, end - start + 1))) goto error; /* Derive the initial list of sequences covering the start point */ count = 0; seqList = DBI_list(xx); for (i = 1; i <= DBI_gelCount(xx) && DB_RelPos(xx, DBI_order(xx)[i]) <= start; i++) { int seq = DBI_order(xx)[i]; DBgetSeq(DBI(xx), seq); if (DB_RelPos(xx, seq) + DB_Length(xx, seq) > start && strand_matches(xx, seq, strand) && seq != exception) { if (get_trace_path(xx, seq, fileName, t_type) == 0) { form = trace_type_str2int(t_type); rlist[count].r = read_reading(fileName, form); if (rlist[count].r) { rlist[count].seq = DBgetSeq(DBI(xx), seq); rlist[count].opos = get_trace_pos(rlist[count].r, xx, seq, 0, DB_Start(xx, seq), DB_Start(xx, seq) + DB_Length(xx, seq), DB_Seq(xx, seq), 0); seqList[count++] = seq; } } } } if (i <= DBI_gelCount(xx)) next = i; else next = 0; /* * Loop along the sequence updating seqList as we go. * At each point we know how many sequences there are so we can * produce the consensus from these sequences. */ for (i = start; i <= end; i++) { w = do_cons_base(xx, con, i, start, count, seqList, rlist, r, offset, match, &max_points); if (w == -1) goto error; offset += w; /* Update seqList for the next position */ if (i < end) { /* Remove sequences */ for (j = 0; j < count; j++) { int seq = seqList[j]; if (DB_RelPos(xx, seq) + DB_Length(xx, seq) - 1 <= i) { read_deallocate(rlist[j].r); xfree(rlist[j].opos); memmove(&seqList[j], &seqList[j+1], (count-1-j) * sizeof(*seqList)); memmove(&rlist[j], &rlist[j+1], (count-1-j) * sizeof(*rlist)); count--; j--; } } /* Add sequences */ while (next && DB_RelPos(xx, next) <= i+1) { /* printf("next=%d %d %d\n", next, DB_RelPos(xx, next), i+1); */ DBgetSeq(DBI(xx), next); if (strand_matches(xx, next, strand) && get_trace_path(xx, next, fileName, t_type) == 0) { form = trace_type_str2int(t_type); rlist[count].r = read_reading(fileName, form); if (rlist[count].r) { rlist[count].seq = DBgetSeq(DBI(xx), next); rlist[count].opos = get_trace_pos(rlist[count].r, xx, next, 0, DB_Start(xx, next), DB_Start(xx,next)+DB_Length(xx,next), DB_Seq(xx, next), 0); seqList[count++] = next; } } if (++next > DBI_gelCount(xx)) next = 0; } } } for (i = 0; i < count; i++) { read_deallocate(rlist[i].r); xfree(rlist[i].opos); } tidy_up(r, end-start + 1, offset); xfree(con); xfree(rlist); return r; error: if (con) xfree(con); if (rlist) xfree(rlist); return NULL; }
/* * --------------------------------------------------------------------------- * Loads confidence values from the trace file and averages them. * 'opos' is optional - if not known then set to NULL. * * Returns 0 for success * -1 for failure */ int get_read_conf(Exp_info *e, int length, int2 *opos, int1 *conf) { int ttype, i; FILE *fp; uint_1 *prob_A, *prob_C, *prob_G, *prob_T; char *seq; float scf_version; int nbases = 0; /* Sanity check */ if (!(exp_Nentries(e,EFLT_LT) && exp_Nentries(e,EFLT_LN))) return -1; /* Find and load trace file */ ttype = trace_type_str2int(exp_get_entry(e, EFLT_LT)); if (ttype != TT_SCF && ttype != TT_ZTR) return -1; /* * We only support direct reading accuracy values from SCF files. * Otherwise we have to take a slower approach. */ if (ttype != TT_SCF) { Read *r; int sec = read_sections(0); read_sections(READ_BASES); if (NULL == (r = read_reading(exp_get_entry(e,EFLT_LN), TT_ANYTR))) { read_sections(sec); return -1; } prob_A = (int1 *)xmalloc(r->NBases); prob_C = (int1 *)xmalloc(r->NBases); prob_G = (int1 *)xmalloc(r->NBases); prob_T = (int1 *)xmalloc(r->NBases); seq = (char *)xmalloc(r->NBases); memcpy(prob_A, r->prob_A, r->NBases); memcpy(prob_C, r->prob_C, r->NBases); memcpy(prob_G, r->prob_G, r->NBases); memcpy(prob_T, r->prob_T, r->NBases); memcpy(seq, r->base, r->NBases); nbases = r->NBases; read_deallocate(r); read_sections(sec); } else { Header h; /* For SCF files we read directly - the above code would also do. */ if (NULL == (fp = open_trace_file(exp_get_entry(e,EFLT_LN), NULL))) return -1; /* Read the SCF header */ if (-1 == read_scf_header(fp, &h)) return -1; scf_version = scf_version_str2float(h.version); nbases = h.bases; /* Alloc memory */ prob_A = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A)); prob_C = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A)); prob_G = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A)); prob_T = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A)); seq = (char *)xmalloc(h.bases * sizeof(*seq)); if (NULL == prob_A || NULL == prob_C || NULL == prob_G || NULL == prob_T || NULL == seq) return -1; /* Load base scores */ if (scf_version >= 3.0) { /* * Version 3 base format: * num_bases * 4byte peak index * num_bases * prob_A * num_bases * prob_C * num_bases * prob_G * num_bases * prob_T * num_bases * base * num_bases * spare (x3) */ fseek(fp, (off_t)h.bases_offset + 4 * h.bases, SEEK_SET); if (h.bases != fread(prob_A, 1, h.bases, fp)) return -1; if (h.bases != fread(prob_C, 1, h.bases, fp)) return -1; if (h.bases != fread(prob_G, 1, h.bases, fp)) return -1; if (h.bases != fread(prob_T, 1, h.bases, fp)) return -1; if (h.bases != fread(seq, 1, h.bases, fp)) return -1; } else { int i; uint_1 buf[12]; /* * Version 2 base format * num_bases * base_struct, where base_struct is 12 bytes: * 0-3 peak_index * 4-7 prob_A/C/G/T * 8 base * 9- spare */ fseek(fp, (off_t)h.bases_offset, SEEK_SET); for (i = 0; (unsigned)i < h.bases; i++) { if (1 != fread(buf, 12, 1, fp)) return -1; prob_A[i] = buf[4]; prob_C[i] = buf[5]; prob_G[i] = buf[6]; prob_T[i] = buf[7]; seq[i] = buf[8]; } } fclose(fp); } /* Determine confidence values */ if (opos) { for (i=0; i<length; i++) { if (opos[i] == 0) { /* Inserted base, change to 0% */ conf[i] = 0; } else { switch(seq[opos[i]-1]) { case 'a': case 'A': conf[i] = prob_A[opos[i]-1]; break; case 'c': case 'C': conf[i] = prob_C[opos[i]-1]; break; case 'g': case 'G': conf[i] = prob_G[opos[i]-1]; break; case 't': case 'T': conf[i] = prob_T[opos[i]-1]; break; default: conf[i] = 2; } } } } else { int mlength = MIN(length, nbases); for (i=0; i < mlength; i++) { switch(seq[i]) { case 'a': case 'A': conf[i] = prob_A[i]; break; case 'c': case 'C': conf[i] = prob_C[i]; break; case 'g': case 'G': conf[i] = prob_G[i]; break; case 't': case 'T': conf[i] = prob_T[i]; break; case 'n': case 'N': case '-': conf[i] = (prob_A[i] + prob_C[i] + prob_G[i] + prob_T[i]) / 4; break; default: conf[i] = 2; } } for (; i < length; i++) conf[i] = 2; } xfree(prob_A); xfree(prob_C); xfree(prob_G); xfree(prob_T); xfree(seq); return 0; }
/* * Read the plain format sequence from FILE *fp into a Read structure. * All printing characters (as defined by ANSII C `isprint') * are accepted, but `N's are translated to `-'s. * * Returns: * Read * - Success, the Read structure read. * NULLRead - Failure. */ Read *fread_pln(FILE *fp) { Read *read = NULLRead; off_t fileLen; int ch; char *leftc, *rightc, *leftcp, *rightcp; int first = 1; /* * Find the length of the file. * Use this as an overestimate of the length of the sequence. */ fseek(fp, (off_t) 0, 2); if ((fileLen = ftell(fp)) > INT_MAX /*Was MAXINT2*/) goto bail_out; fseek(fp, (off_t) 0, 0); /* Allocate the sequence */ if (NULLRead == (read = read_allocate(0, fileLen))) goto bail_out; if (NULL == (leftc = (char *)xmalloc(fileLen))) goto bail_out; if (NULL == (rightc = (char *)xmalloc(fileLen))) goto bail_out; leftcp = leftc; rightcp = rightc; /* Read in the bases */ read->NBases = 0; read->format = TT_PLN; while ((ch = fgetc(fp)) != EOF) { if (ch == '>') { /* Fasta format file - skip the header and load the first * fasta sequence only. We don't even attempt to worry about * multi-sequence file formats for now. */ if (!first) break; while(ch != '\n' && ch != EOF) ch = fgetc(fp); } else if (ch==';') { /* * ;< is left cutoff, * ;> is right cutoff. * Any other ';'s we can treat as a comments. */ ch = fgetc(fp); if (first == 1 && ch != '<' && ch != '>') { int d; char type[5], name[17], line[1024]; line[0] = ch; fgets(&line[1], 1022, fp); if (5 == sscanf(line, "%6d%6d%6d%4c%s", &d, &d, &d, type, name)) { char * p; if ((p = strchr(type, ' '))) *p = 0; read->format = trace_type_str2int(type); read->trace_name = (char *)xmalloc(strlen(name)+1); if (read->trace_name) strcpy(read->trace_name, name); } } else if (ch == '<') { ch = fgetc(fp); while (ch != '\n') { *leftcp++ = ch; ch = fgetc(fp); } } else if (ch == '>') { ch = fgetc(fp); while (ch != '\n') { *rightcp++ = ch; ch = fgetc(fp); } } else { while(ch != '\n' && ch != EOF) ch = fgetc(fp); } } else if (isprint(ch) && !isspace(ch)) { read->base[read->NBases++] = ((ch)=='N') ? '-' : (ch); } first = 0; } *leftcp = *rightcp = 0; read->leftCutoff = strlen(leftc); read->rightCutoff = read->leftCutoff + read->NBases + 1; memmove(&read->base[read->leftCutoff], read->base, read->NBases); memmove(read->base, leftc, read->leftCutoff); memmove(&read->base[read->leftCutoff + read->NBases], rightc, strlen(rightc)); read->NBases += read->leftCutoff + strlen(rightc); read->base[read->NBases] = 0; xfree(leftc); xfree(rightc); /* SUCCESS */ return(read); /* FAILURE */ bail_out: if (read) read_deallocate(read); return NULLRead; }
/* * Ripped out of io_lib's trace_dump program. * It reformats a trace to as printable ASCII. */ void dump_text(ztr_t *z, char *name, char mode, FILE **files) { Read *read; int i; uncompress_ztr(z); read = ztr2read(z); /* Inefficient; can do direct */ if (read == NULL) { fprintf(stderr, "Tracedump was unable to open file %s\n", name ); return; } fprintf(files[0], "[Trace]\n"); fprintf(files[0], "%s\n", name); fprintf(files[0], "\n[Header]\n"); fprintf(files[0], "%d\t\t# format\n", read->format); fprintf(files[0], "%d\t\t# NPoints\n", read->NPoints); fprintf(files[0], "%d\t\t# NBases\n", read->NBases); fprintf(files[0], "%d\t\t# NFlows\n", read->nflows); fprintf(files[0], "%d\t\t# maxTraceVal\n", (int)read->maxTraceVal-read->baseline); fprintf(files[0], "%d\t\t# baseline\n", read->baseline); fprintf(files[0], "%d\t\t# leftCutoff\n", read->leftCutoff); fprintf(files[0], "%d\t\t# rightCutoff\n", read->rightCutoff); fputs("\n[Bases]\n", files[0]); for (i = 0; i < read->NBases; i++) { fprintf(files[0], "%c %05d %+03d %+03d %+03d %+03d #%3d\n", read->base[i], read->basePos ? read->basePos[i] : 0, (int)read->prob_A[i], (int)read->prob_C[i], (int)read->prob_G[i], (int)read->prob_T[i], i); } if (read->NPoints) { fputs("\n[A_Trace]\n", files[0]); for(i = 0; i < read->NPoints; i++) fprintf(files[0], "%d\t#%5d\n", (int)read->traceA[i] - read->baseline, i); fputs("\n[C_Trace]\n", files[0]); for(i = 0; i < read->NPoints; i++) fprintf(files[0], "%d\t#%5d\n", (int)read->traceC[i] - read->baseline, i); fputs("\n[G_Trace]\n", files[0]); for(i = 0; i < read->NPoints; i++) fprintf(files[0], "%d\t#%5d\n", (int)read->traceG[i] - read->baseline, i); fputs("\n[T_Trace]\n", files[0]); for(i = 0; i < read->NPoints; i++) fprintf(files[0], "%d\t#%5d\n", (int)read->traceT[i] - read->baseline, i); } if (read->flow_order) { fputs("\n[Flows]\n", files[0]); for (i = 0; i < read->nflows; i++) { fprintf(files[0], "%c %5.2f %u\t#%5d\n", read->flow_order[i], read->flow ? read->flow[i] : 0, read->flow_raw ? read->flow_raw[i] : 0, i); } } if (read->info) { fputs("\n[Info]\n", files[0]); fprintf(files[0], "%s\n", read->info); } read_deallocate(read); }
int main(int argc, char **argv) { Read* read; int i; if (argc != 2) { fprintf(stderr, "Usage: trace_dump <trace file>\n"); return 1; } read = read_reading( argv[1], TT_ANY ); if (read == NULL) { fprintf(stderr, "Tracedump was unable to open file %s\n", argv[1] ); return 1; } printf("[Trace]\n"); printf("%s\n", read->trace_name ); printf("\n[Header]\n"); printf("%d\t\t# format\n", read->format); printf("%d\t\t# NPoints\n", read->NPoints); printf("%d\t\t# NBases\n", read->NBases); printf("%d\t\t# NFlows\n", read->nflows); printf("%d\t\t# maxTraceVal\n", (int)read->maxTraceVal); printf("%d\t\t# baseline\n", read->baseline); printf("%d\t\t# leftCutoff\n", read->leftCutoff); printf("%d\t\t# rightCutoff\n", read->rightCutoff); puts("\n[Bases]"); for (i = 0; i < read->NBases; i++) { printf("%c %05d %03d %03d %03d %03d #%3d\n", read->base[i], read->basePos ? read->basePos[i] : 0, (int)read->prob_A[i], (int)read->prob_C[i], (int)read->prob_G[i], (int)read->prob_T[i], i); } if (read->NPoints) { puts("\n[A_Trace]"); for(i = 0; i < read->NPoints; i++) printf("%d\t#%5d\n", (int)read->traceA[i], i); puts("\n[C_Trace]"); for(i = 0; i < read->NPoints; i++) printf("%d\t#%5d\n", (int)read->traceC[i], i); puts("\n[G_Trace]"); for(i = 0; i < read->NPoints; i++) printf("%d\t#%5d\n", (int)read->traceG[i], i); puts("\n[T_Trace]"); for(i = 0; i < read->NPoints; i++) printf("%d\t#%5d\n", (int)read->traceT[i], i); } if (read->flow_order) { puts("\n[Flows]"); for (i = 0; i < read->nflows; i++) { printf("%c %5.2f %u\t#%5d\n", read->flow_order[i], read->flow ? read->flow[i] : 0, read->flow_raw ? read->flow_raw[i] : 0, i); } } if (read->info) { puts("\n[Info]"); printf("%s\n", read->info); } read_deallocate(read); return 0; }