int main(int argc, char **argv) { struct opts opts; opts.in_format = TT_ANY; opts.out_format = TT_ZTR; opts.scale = 0; opts.sub_background = 0; opts.normalise = 0; opts.name = NULL; opts.compress_mode = -1; opts.dots = 0; opts.fofn = NULL; opts.passed = NULL; opts.failed = NULL; opts.error = NULL; for (argc--, argv++; argc > 0; argc--, argv++) { if (**argv != '-') break; if (strcmp(*argv, "-scale") == 0) { opts.scale = atoi(*++argv); argc--; } else if (strcmp(*argv, "-fofn") == 0) { opts.fofn = *++argv; argc--; } else if (strcmp(*argv, "-passed") == 0) { opts.passed = *++argv; argc--; } else if (strcmp(*argv, "-failed") == 0) { opts.failed = *++argv; argc--; } else if (strcmp(*argv, "-error") == 0) { opts.error = *++argv; fprintf(stderr,"* Detected error argument %s\n", opts.error); argc--; } else if (strcmp(*argv, "-subtract_background") == 0) { opts.sub_background = 1; } else if (strcmp(*argv, "-normalise") == 0) { opts.normalise = 1; } else if (strcmp(*argv, "-dots") == 0) { opts.dots = 1; } else if (strcmp(*argv, "-in_format") == 0) { argv++; argc--; if (TT_UNK == (opts.in_format = trace_type_str2int(*argv))) opts.in_format = atoi(*argv); } else if (strcmp(*argv, "-name") == 0) { opts.name = *++argv; argc--; } else if (strcmp(*argv, "-out_format") == 0) { argv++; argc--; if (TT_UNK == (opts.out_format = trace_type_str2int(*argv))) opts.out_format = atoi(*argv); } else if (strcasecmp(*argv, "-compress") == 0) { opts.compress_mode = compress_str2int(*++argv); argc--; } else if (strcmp(*argv, "-abi_data") == 0) { int c1, c2, c3, c4; argc--; if (4 == sscanf(*++argv, "%d,%d,%d,%d", &c1, &c2, &c3, &c4)) { abi_set_data_counts(c1, c2, c3, c4); } else { usage(); } } else if (strcmp(*argv, "--") == 0) { break; } else { usage(); } } if (argc == 2) { /* Old syntax, for backwards compatibility */ if (TT_UNK == (opts.in_format = trace_type_str2int(argv[0]))) opts.in_format = atoi(argv[0]); if (TT_UNK == (opts.out_format = trace_type_str2int(argv[1]))) opts.out_format = atoi(argv[1]); } else if (argc != 0) { usage(); } // Added by SAK: Allow redirection of error output to file, due to problems with Java exec if( NULL != opts.error){ int fd; fprintf(stderr,"* Redirecting stderr to %s\n", opts.error); close(2); // close fd with stderr fd = creat(opts.error, -1 ); if(fd == -1){ exit(-1); } } if (!opts.fofn) { return convert(stdin, stdout, "(stdin)", "(stdout)", &opts); } else { FILE *fpin, *fpout; FILE *fppassed = NULL, *fpfailed = NULL; char *infname, *outfname; int ret, ret_all = 0; char line[8192], line2[8192]; FILE *fofn_fp; if (NULL == (fofn_fp = fopen(opts.fofn, "r"))) { perror(opts.fofn); return -1; } if (opts.passed && NULL == (fppassed = fopen(opts.passed, "w"))) { perror(opts.passed); return -1; } if (opts.failed && NULL == (fpfailed = fopen(opts.failed, "w"))) { perror(opts.failed); return -1; } while (fgets(line, 8192, fofn_fp) != NULL) { int i, j, len; time_t ret_time; // SAK /* Find input and output name, escaping spaces as needed */ len = strlen(line); outfname = NULL; for (i = j = 0; i < len; i++) { if (line[i] == '\\' && i != len-1) { line2[j++] = line[++i]; } else if (line[i] == ' ') { line2[j++] = 0; outfname = &line2[j]; } else if (line[i] != '\n') { line2[j++] = line[i]; } } line2[j] = 0; infname = line2; /* Open input and output files */ if (NULL == (fpin = fopen(infname, "rb"))) { char buf[2048]; sprintf(buf, "ERROR %s", infname); perror(buf); if (opts.dots) { fputc('!', stdout); fflush(stdout); } if (fpfailed) fprintf(fpfailed, "%s\n", infname); continue; } if (outfname) { if (NULL == (fpout = fopen(outfname, "wb+"))) { char buf[2048]; sprintf(buf, "ERROR %s", outfname); perror(buf); fclose(fpin); if (opts.dots) { fputc('!', stdout); fflush(stdout); } if (fpfailed) fprintf(fpfailed, "%s\n", infname); continue; } } else { outfname = "(stdout)"; fpout = stdout; } /* Convert */ ret = convert(fpin, fpout, infname, outfname, &opts); ret_time = time(NULL); // SAK ret_all |= ret; if (opts.dots) { fputc(ret ? '!' : '.', stdout); fflush(stdout); } if (ret) { if (fpfailed) fprintf(fpfailed, "%s %s", infname, ctime(&ret_time)); // SAK } else { if (fppassed) fprintf(fppassed, "%s %s", infname, ctime(&ret_time)); // SAK } /* Tidy up */ fclose(fpin); if (fpout != stdout) fclose(fpout); } if(ret_all) fprintf(stderr,"* ret_all = %d\n", ret_all); fclose(fofn_fp); return ret_all; } return 0; }
/* * --------------------------------------------------------------------------- * Loads confidence values from the trace file and averages them. * 'opos' is optional - if not known then set to NULL. * * Returns 0 for success * -1 for failure */ int get_read_conf(Exp_info *e, int length, int2 *opos, int1 *conf) { int ttype, i; FILE *fp; uint_1 *prob_A, *prob_C, *prob_G, *prob_T; char *seq; float scf_version; int nbases = 0; /* Sanity check */ if (!(exp_Nentries(e,EFLT_LT) && exp_Nentries(e,EFLT_LN))) return -1; /* Find and load trace file */ ttype = trace_type_str2int(exp_get_entry(e, EFLT_LT)); if (ttype != TT_SCF && ttype != TT_ZTR) return -1; /* * We only support direct reading accuracy values from SCF files. * Otherwise we have to take a slower approach. */ if (ttype != TT_SCF) { Read *r; int sec = read_sections(0); read_sections(READ_BASES); if (NULL == (r = read_reading(exp_get_entry(e,EFLT_LN), TT_ANYTR))) { read_sections(sec); return -1; } prob_A = (int1 *)xmalloc(r->NBases); prob_C = (int1 *)xmalloc(r->NBases); prob_G = (int1 *)xmalloc(r->NBases); prob_T = (int1 *)xmalloc(r->NBases); seq = (char *)xmalloc(r->NBases); memcpy(prob_A, r->prob_A, r->NBases); memcpy(prob_C, r->prob_C, r->NBases); memcpy(prob_G, r->prob_G, r->NBases); memcpy(prob_T, r->prob_T, r->NBases); memcpy(seq, r->base, r->NBases); nbases = r->NBases; read_deallocate(r); read_sections(sec); } else { Header h; /* For SCF files we read directly - the above code would also do. */ if (NULL == (fp = open_trace_file(exp_get_entry(e,EFLT_LN), NULL))) return -1; /* Read the SCF header */ if (-1 == read_scf_header(fp, &h)) return -1; scf_version = scf_version_str2float(h.version); nbases = h.bases; /* Alloc memory */ prob_A = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A)); prob_C = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A)); prob_G = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A)); prob_T = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A)); seq = (char *)xmalloc(h.bases * sizeof(*seq)); if (NULL == prob_A || NULL == prob_C || NULL == prob_G || NULL == prob_T || NULL == seq) return -1; /* Load base scores */ if (scf_version >= 3.0) { /* * Version 3 base format: * num_bases * 4byte peak index * num_bases * prob_A * num_bases * prob_C * num_bases * prob_G * num_bases * prob_T * num_bases * base * num_bases * spare (x3) */ fseek(fp, (off_t)h.bases_offset + 4 * h.bases, SEEK_SET); if (h.bases != fread(prob_A, 1, h.bases, fp)) return -1; if (h.bases != fread(prob_C, 1, h.bases, fp)) return -1; if (h.bases != fread(prob_G, 1, h.bases, fp)) return -1; if (h.bases != fread(prob_T, 1, h.bases, fp)) return -1; if (h.bases != fread(seq, 1, h.bases, fp)) return -1; } else { int i; uint_1 buf[12]; /* * Version 2 base format * num_bases * base_struct, where base_struct is 12 bytes: * 0-3 peak_index * 4-7 prob_A/C/G/T * 8 base * 9- spare */ fseek(fp, (off_t)h.bases_offset, SEEK_SET); for (i = 0; (unsigned)i < h.bases; i++) { if (1 != fread(buf, 12, 1, fp)) return -1; prob_A[i] = buf[4]; prob_C[i] = buf[5]; prob_G[i] = buf[6]; prob_T[i] = buf[7]; seq[i] = buf[8]; } } fclose(fp); } /* Determine confidence values */ if (opos) { for (i=0; i<length; i++) { if (opos[i] == 0) { /* Inserted base, change to 0% */ conf[i] = 0; } else { switch(seq[opos[i]-1]) { case 'a': case 'A': conf[i] = prob_A[opos[i]-1]; break; case 'c': case 'C': conf[i] = prob_C[opos[i]-1]; break; case 'g': case 'G': conf[i] = prob_G[opos[i]-1]; break; case 't': case 'T': conf[i] = prob_T[opos[i]-1]; break; default: conf[i] = 2; } } } } else { int mlength = MIN(length, nbases); for (i=0; i < mlength; i++) { switch(seq[i]) { case 'a': case 'A': conf[i] = prob_A[i]; break; case 'c': case 'C': conf[i] = prob_C[i]; break; case 'g': case 'G': conf[i] = prob_G[i]; break; case 't': case 'T': conf[i] = prob_T[i]; break; case 'n': case 'N': case '-': conf[i] = (prob_A[i] + prob_C[i] + prob_G[i] + prob_T[i]) / 4; break; default: conf[i] = 2; } } for (; i < length; i++) conf[i] = 2; } xfree(prob_A); xfree(prob_C); xfree(prob_G); xfree(prob_T); xfree(seq); return 0; }
/* * Produce a consensus trace from a specific region of this contig. */ Read *cons_trace(EdStruct *xx, int start, int end, int strand, int match, int exception) { int *seqList, i, j, count, next; Read *r; int max_points = 10000; char *con = NULL; diff_cons_seq *rlist = NULL; char fileName[256]; char t_type[5]; int form; int offset = 0, w; /* Get the consensus sequence */ if (NULL == (con = (char *)xmalloc(end - start + 2))) goto error; DBcalcConsensus(xx, start, end - start + 1, con, NULL, BOTH_STRANDS); /* Allocate a list of read pointers and positions */ if (NULL == (rlist = (diff_cons_seq *)xcalloc(DBI_gelCount(xx), sizeof(*rlist)))) goto error; /* Allocate a read structure */ if (NULL == (r = read_allocate(max_points, end - start + 1))) goto error; /* Derive the initial list of sequences covering the start point */ count = 0; seqList = DBI_list(xx); for (i = 1; i <= DBI_gelCount(xx) && DB_RelPos(xx, DBI_order(xx)[i]) <= start; i++) { int seq = DBI_order(xx)[i]; DBgetSeq(DBI(xx), seq); if (DB_RelPos(xx, seq) + DB_Length(xx, seq) > start && strand_matches(xx, seq, strand) && seq != exception) { if (get_trace_path(xx, seq, fileName, t_type) == 0) { form = trace_type_str2int(t_type); rlist[count].r = read_reading(fileName, form); if (rlist[count].r) { rlist[count].seq = DBgetSeq(DBI(xx), seq); rlist[count].opos = get_trace_pos(rlist[count].r, xx, seq, 0, DB_Start(xx, seq), DB_Start(xx, seq) + DB_Length(xx, seq), DB_Seq(xx, seq), 0); seqList[count++] = seq; } } } } if (i <= DBI_gelCount(xx)) next = i; else next = 0; /* * Loop along the sequence updating seqList as we go. * At each point we know how many sequences there are so we can * produce the consensus from these sequences. */ for (i = start; i <= end; i++) { w = do_cons_base(xx, con, i, start, count, seqList, rlist, r, offset, match, &max_points); if (w == -1) goto error; offset += w; /* Update seqList for the next position */ if (i < end) { /* Remove sequences */ for (j = 0; j < count; j++) { int seq = seqList[j]; if (DB_RelPos(xx, seq) + DB_Length(xx, seq) - 1 <= i) { read_deallocate(rlist[j].r); xfree(rlist[j].opos); memmove(&seqList[j], &seqList[j+1], (count-1-j) * sizeof(*seqList)); memmove(&rlist[j], &rlist[j+1], (count-1-j) * sizeof(*rlist)); count--; j--; } } /* Add sequences */ while (next && DB_RelPos(xx, next) <= i+1) { /* printf("next=%d %d %d\n", next, DB_RelPos(xx, next), i+1); */ DBgetSeq(DBI(xx), next); if (strand_matches(xx, next, strand) && get_trace_path(xx, next, fileName, t_type) == 0) { form = trace_type_str2int(t_type); rlist[count].r = read_reading(fileName, form); if (rlist[count].r) { rlist[count].seq = DBgetSeq(DBI(xx), next); rlist[count].opos = get_trace_pos(rlist[count].r, xx, next, 0, DB_Start(xx, next), DB_Start(xx,next)+DB_Length(xx,next), DB_Seq(xx, next), 0); seqList[count++] = next; } } if (++next > DBI_gelCount(xx)) next = 0; } } } for (i = 0; i < count; i++) { read_deallocate(rlist[i].r); xfree(rlist[i].opos); } tidy_up(r, end-start + 1, offset); xfree(con); xfree(rlist); return r; error: if (con) xfree(con); if (rlist) xfree(rlist); return NULL; }
/* * Read the plain format sequence from FILE *fp into a Read structure. * All printing characters (as defined by ANSII C `isprint') * are accepted, but `N's are translated to `-'s. * * Returns: * Read * - Success, the Read structure read. * NULLRead - Failure. */ Read *fread_pln(FILE *fp) { Read *read = NULLRead; off_t fileLen; int ch; char *leftc, *rightc, *leftcp, *rightcp; int first = 1; /* * Find the length of the file. * Use this as an overestimate of the length of the sequence. */ fseek(fp, (off_t) 0, 2); if ((fileLen = ftell(fp)) > INT_MAX /*Was MAXINT2*/) goto bail_out; fseek(fp, (off_t) 0, 0); /* Allocate the sequence */ if (NULLRead == (read = read_allocate(0, fileLen))) goto bail_out; if (NULL == (leftc = (char *)xmalloc(fileLen))) goto bail_out; if (NULL == (rightc = (char *)xmalloc(fileLen))) goto bail_out; leftcp = leftc; rightcp = rightc; /* Read in the bases */ read->NBases = 0; read->format = TT_PLN; while ((ch = fgetc(fp)) != EOF) { if (ch == '>') { /* Fasta format file - skip the header and load the first * fasta sequence only. We don't even attempt to worry about * multi-sequence file formats for now. */ if (!first) break; while(ch != '\n' && ch != EOF) ch = fgetc(fp); } else if (ch==';') { /* * ;< is left cutoff, * ;> is right cutoff. * Any other ';'s we can treat as a comments. */ ch = fgetc(fp); if (first == 1 && ch != '<' && ch != '>') { int d; char type[5], name[17], line[1024]; line[0] = ch; fgets(&line[1], 1022, fp); if (5 == sscanf(line, "%6d%6d%6d%4c%s", &d, &d, &d, type, name)) { char * p; if ((p = strchr(type, ' '))) *p = 0; read->format = trace_type_str2int(type); read->trace_name = (char *)xmalloc(strlen(name)+1); if (read->trace_name) strcpy(read->trace_name, name); } } else if (ch == '<') { ch = fgetc(fp); while (ch != '\n') { *leftcp++ = ch; ch = fgetc(fp); } } else if (ch == '>') { ch = fgetc(fp); while (ch != '\n') { *rightcp++ = ch; ch = fgetc(fp); } } else { while(ch != '\n' && ch != EOF) ch = fgetc(fp); } } else if (isprint(ch) && !isspace(ch)) { read->base[read->NBases++] = ((ch)=='N') ? '-' : (ch); } first = 0; } *leftcp = *rightcp = 0; read->leftCutoff = strlen(leftc); read->rightCutoff = read->leftCutoff + read->NBases + 1; memmove(&read->base[read->leftCutoff], read->base, read->NBases); memmove(read->base, leftc, read->leftCutoff); memmove(&read->base[read->leftCutoff + read->NBases], rightc, strlen(rightc)); read->NBases += read->leftCutoff + strlen(rightc); read->base[read->NBases] = 0; xfree(leftc); xfree(rightc); /* SUCCESS */ return(read); /* FAILURE */ bail_out: if (read) read_deallocate(read); return NULLRead; }