Example #1
0
int main(int argc, char **argv) {
    struct opts opts;

    opts.in_format = TT_ANY;
    opts.out_format = TT_ZTR;
    opts.scale = 0;
    opts.sub_background = 0;
    opts.normalise = 0;
    opts.name = NULL;
    opts.compress_mode = -1;
    opts.dots = 0;
    opts.fofn = NULL;
    opts.passed = NULL;
    opts.failed = NULL;
    opts.error = NULL;
    
    for (argc--, argv++; argc > 0; argc--, argv++) {
	if (**argv != '-')
	    break;

	if (strcmp(*argv, "-scale") == 0) {
	    opts.scale = atoi(*++argv);
	    argc--;

	} else if (strcmp(*argv, "-fofn") == 0) {
	    opts.fofn = *++argv;
	    argc--;

	} else if (strcmp(*argv, "-passed") == 0) {
	    opts.passed = *++argv;
	    argc--;

	} else if (strcmp(*argv, "-failed") == 0) {
	    opts.failed = *++argv;
	    argc--;

	} else if (strcmp(*argv, "-error") == 0) {
	    opts.error = *++argv;
	  fprintf(stderr,"* Detected error argument %s\n", opts.error);
	    argc--;

	} else if (strcmp(*argv, "-subtract_background") == 0) {
	    opts.sub_background = 1;

	} else if (strcmp(*argv, "-normalise") == 0) {
	    opts.normalise = 1;

	} else if (strcmp(*argv, "-dots") == 0) {
	    opts.dots = 1;

	} else if (strcmp(*argv, "-in_format") == 0) {
	    argv++;
	    argc--;
	    if (TT_UNK == (opts.in_format = trace_type_str2int(*argv)))
		opts.in_format = atoi(*argv);

	} else if (strcmp(*argv, "-name") == 0) {
	    opts.name = *++argv;
	    argc--;

	} else if (strcmp(*argv, "-out_format") == 0) {
	    argv++;
	    argc--;
	    if (TT_UNK == (opts.out_format = trace_type_str2int(*argv)))
		opts.out_format = atoi(*argv);

	} else if (strcasecmp(*argv, "-compress") == 0) {
	    opts.compress_mode = compress_str2int(*++argv);
	    argc--;

	} else if (strcmp(*argv, "-abi_data") == 0) {
	    int c1, c2, c3, c4;
	    argc--;
	    if (4 == sscanf(*++argv, "%d,%d,%d,%d", &c1, &c2, &c3, &c4)) {
		abi_set_data_counts(c1, c2, c3, c4);
	    } else {
		usage();
	    }

	} else if (strcmp(*argv, "--") == 0) {
	    break;

	} else {
	    usage();
	}
    }

    if (argc == 2) {
	/* Old syntax, for backwards compatibility */

	if (TT_UNK == (opts.in_format = trace_type_str2int(argv[0])))
	    opts.in_format = atoi(argv[0]);
	if (TT_UNK == (opts.out_format = trace_type_str2int(argv[1])))
	    opts.out_format = atoi(argv[1]);
    } else if (argc != 0) {
	usage();
    }


    // Added by SAK: Allow redirection of error output to file, due to problems with Java exec
    if( NULL != opts.error){
      int fd;

      fprintf(stderr,"* Redirecting stderr to %s\n", opts.error);

      close(2); // close fd with stderr
      fd = creat(opts.error, -1  );
      if(fd == -1){
	exit(-1);
      }
    }
    if (!opts.fofn) {
	return convert(stdin, stdout, "(stdin)", "(stdout)", &opts);
    } else {
	FILE *fpin, *fpout;
	FILE *fppassed = NULL, *fpfailed = NULL;
	char *infname, *outfname;
	int ret, ret_all = 0;
	char line[8192], line2[8192];

	FILE *fofn_fp;

	if (NULL == (fofn_fp = fopen(opts.fofn, "r"))) {
	    perror(opts.fofn);
	    return -1;
	}

	if (opts.passed && NULL == (fppassed = fopen(opts.passed, "w"))) {
	    perror(opts.passed);
	    return -1;
	}

	if (opts.failed && NULL == (fpfailed = fopen(opts.failed, "w"))) {
	    perror(opts.failed);
	    return -1;
	}

	while (fgets(line, 8192, fofn_fp) != NULL) {
	    int i, j, len;
	    time_t ret_time; // SAK
	    /* Find input and output name, escaping spaces as needed */
	    len = strlen(line);
	    outfname = NULL;
	    for (i = j = 0; i < len; i++) {
		if (line[i] == '\\' && i != len-1) {
		    line2[j++] = line[++i];
		} else if (line[i] == ' ') {
		    line2[j++] = 0;
		    outfname = &line2[j];
		} else if (line[i] != '\n') {
		    line2[j++] = line[i];
		}
	    }
	    line2[j] = 0;
	    infname = line2;

	    /* Open input and output files */
	    if (NULL == (fpin = fopen(infname, "rb"))) {
		char buf[2048];
		sprintf(buf, "ERROR %s", infname);
		perror(buf);
		if (opts.dots) {
		    fputc('!', stdout);
		    fflush(stdout);
		}
		if (fpfailed)
		    fprintf(fpfailed, "%s\n", infname);
		continue;
	    }

	    if (outfname) {
		if (NULL == (fpout = fopen(outfname, "wb+"))) {
		    char buf[2048];
		    sprintf(buf, "ERROR %s", outfname);
		    perror(buf);
		    fclose(fpin);
		    if (opts.dots) {
			fputc('!', stdout);
			fflush(stdout);
		    }
		    if (fpfailed)
			fprintf(fpfailed, "%s\n", infname);
		    continue;
		}
	    } else {
		outfname = "(stdout)";
		fpout = stdout;
	    }

	    /* Convert */
	    ret = convert(fpin, fpout, infname, outfname, &opts);
	    ret_time = time(NULL); // SAK
	    ret_all |= ret;
	    if (opts.dots) {
		fputc(ret ? '!' : '.', stdout);
		fflush(stdout);
	    }
	    if (ret) {
		if (fpfailed)
		  fprintf(fpfailed, "%s %s", infname, ctime(&ret_time)); // SAK
	    } else {
		if (fppassed)
		  fprintf(fppassed, "%s %s", infname, ctime(&ret_time)); // SAK
	    }

	    /* Tidy up */
	    fclose(fpin);
	    if (fpout != stdout)
		fclose(fpout);
	}

	if(ret_all)
	  fprintf(stderr,"* ret_all = %d\n", ret_all);

	fclose(fofn_fp);

	return ret_all;
    }

    return 0;
}
Example #2
0
/*
 * ---------------------------------------------------------------------------
 * Loads confidence values from the trace file and averages them.
 * 'opos' is optional - if not known then set to NULL.
 *
 * Returns 0 for success
 *        -1 for failure
 */
int get_read_conf(Exp_info *e, int length, int2 *opos, int1 *conf) {
    int ttype, i;
    FILE *fp;
    uint_1 *prob_A, *prob_C, *prob_G, *prob_T;
    char *seq;
    float scf_version;
    int nbases = 0;

    /* Sanity check */
    if (!(exp_Nentries(e,EFLT_LT) && exp_Nentries(e,EFLT_LN)))
	return -1;

    /* Find and load trace file */
    ttype = trace_type_str2int(exp_get_entry(e, EFLT_LT));

    if (ttype != TT_SCF &&
	ttype != TT_ZTR)
	return -1;

    /*
     * We only support direct reading accuracy values from SCF files.
     * Otherwise we have to take a slower approach.
     */
    if (ttype != TT_SCF) {
	Read *r;
	int sec = read_sections(0);
	read_sections(READ_BASES);

	if (NULL == (r = read_reading(exp_get_entry(e,EFLT_LN), TT_ANYTR))) {
	    read_sections(sec);
	    return -1;
	}

	prob_A = (int1 *)xmalloc(r->NBases);
	prob_C = (int1 *)xmalloc(r->NBases);
	prob_G = (int1 *)xmalloc(r->NBases);
	prob_T = (int1 *)xmalloc(r->NBases);
	seq    = (char *)xmalloc(r->NBases);

	memcpy(prob_A, r->prob_A, r->NBases);
	memcpy(prob_C, r->prob_C, r->NBases);
	memcpy(prob_G, r->prob_G, r->NBases);
	memcpy(prob_T, r->prob_T, r->NBases);
	memcpy(seq,    r->base,   r->NBases);

	nbases = r->NBases;

	read_deallocate(r);
	read_sections(sec);

    } else {
	Header h;
	/* For SCF files we read directly - the above code would also do. */

	if (NULL == (fp = open_trace_file(exp_get_entry(e,EFLT_LN), NULL)))
	    return -1;

	/* Read the SCF header */
	if (-1 == read_scf_header(fp, &h))
	    return -1;
	scf_version = scf_version_str2float(h.version);
	nbases = h.bases;

	/* Alloc memory */
	prob_A = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A));
	prob_C = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A));
	prob_G = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A));
	prob_T = (uint_1 *)xmalloc(h.bases * sizeof(*prob_A));
	seq    = (char   *)xmalloc(h.bases * sizeof(*seq));
	if (NULL == prob_A ||
	    NULL == prob_C ||
	    NULL == prob_G ||
	    NULL == prob_T ||
	    NULL == seq)
	    return -1;

	/* Load base scores */
	if (scf_version >= 3.0) {
	    /*
	     * Version 3 base format:
	     * num_bases * 4byte peak index
	     * num_bases * prob_A
	     * num_bases * prob_C
	     * num_bases * prob_G
	     * num_bases * prob_T
	     * num_bases * base
	     * num_bases * spare (x3)
	     */
	    fseek(fp, (off_t)h.bases_offset + 4 * h.bases, SEEK_SET);
	    if (h.bases != fread(prob_A, 1, h.bases, fp))
		return -1;
	    if (h.bases != fread(prob_C, 1, h.bases, fp))
		return -1;
	    if (h.bases != fread(prob_G, 1, h.bases, fp))
		return -1;
	    if (h.bases != fread(prob_T, 1, h.bases, fp))
		return -1;
	    if (h.bases != fread(seq, 1, h.bases, fp))
		return -1;
	} else {
	    int i;
	    uint_1 buf[12];

	    /*
	     * Version 2 base format
	     * num_bases * base_struct,  where base_struct is 12 bytes:
	     *     0-3 peak_index
	     *     4-7 prob_A/C/G/T
	     *     8   base
	     *     9-  spare
	     */
	    fseek(fp, (off_t)h.bases_offset, SEEK_SET);

	    for (i = 0; (unsigned)i < h.bases; i++) {
		if (1 != fread(buf, 12, 1, fp))
		    return -1;
		prob_A[i] = buf[4];
		prob_C[i] = buf[5];
		prob_G[i] = buf[6];
		prob_T[i] = buf[7];
		seq[i]    = buf[8];
	    }
	}

	fclose(fp);
    }

    /* Determine confidence values */
    if (opos) {
	for (i=0; i<length; i++) {
	    if (opos[i] == 0) {
		/* Inserted base, change to 0% */
		conf[i] = 0;
	    } else {
		switch(seq[opos[i]-1]) {
		case 'a':
		case 'A':
		    conf[i] = prob_A[opos[i]-1];
		    break;
		case 'c':
		case 'C':
		    conf[i] = prob_C[opos[i]-1];
		    break;
		case 'g':
		case 'G':
		    conf[i] = prob_G[opos[i]-1];
		    break;
		case 't':
		case 'T':
		    conf[i] = prob_T[opos[i]-1];
		    break;
		default:
		    conf[i] = 2;
		}
	    }
	}
    } else {
	int mlength = MIN(length, nbases);

	for (i=0; i < mlength; i++) {
	    switch(seq[i]) {
	    case 'a':
	    case 'A':
		conf[i] = prob_A[i];
		break;
	    case 'c':
	    case 'C':
		conf[i] = prob_C[i];
		break;
	    case 'g':
	    case 'G':
		conf[i] = prob_G[i];
		break;
	    case 't':
	    case 'T':
		conf[i] = prob_T[i];
		break;
	    case 'n':
	    case 'N':
	    case '-':
		conf[i] = (prob_A[i] + prob_C[i] + prob_G[i] + prob_T[i]) / 4;
		break;
	    default:
		conf[i] = 2;
	    }
	}
	for (; i < length; i++)
	    conf[i] = 2;
    }

    xfree(prob_A);
    xfree(prob_C);
    xfree(prob_G);
    xfree(prob_T);
    xfree(seq);

    return 0;
}
Example #3
0
/*
 * Produce a consensus trace from a specific region of this contig.
 */
Read *cons_trace(EdStruct *xx, int start, int end, int strand,
		 int match, int exception) {
    int *seqList, i, j, count, next;
    Read *r;
    int max_points = 10000;
    char *con = NULL;
    diff_cons_seq *rlist = NULL;
    char fileName[256];
    char t_type[5];
    int form;
    int offset = 0, w;

    /* Get the consensus sequence */
    if (NULL == (con = (char *)xmalloc(end - start + 2)))
	goto error;
    DBcalcConsensus(xx, start, end - start + 1, con, NULL, BOTH_STRANDS);

    /* Allocate a list of read pointers and positions */
    if (NULL == (rlist = (diff_cons_seq *)xcalloc(DBI_gelCount(xx),
						  sizeof(*rlist))))
	goto error;

    /* Allocate a read structure */
    if (NULL == (r = read_allocate(max_points, end - start + 1)))
	goto error;

    /* Derive the initial list of sequences covering the start point */
    count = 0;
    seqList = DBI_list(xx);
    for (i = 1;
	 i <= DBI_gelCount(xx) && DB_RelPos(xx, DBI_order(xx)[i]) <= start;
	 i++) {
	int seq = DBI_order(xx)[i];
	DBgetSeq(DBI(xx), seq);
	if (DB_RelPos(xx, seq) + DB_Length(xx, seq) > start &&
	    strand_matches(xx, seq, strand) &&
	    seq != exception) {
	    if (get_trace_path(xx, seq, fileName, t_type) == 0) {
		form = trace_type_str2int(t_type);
		rlist[count].r = read_reading(fileName, form);
		if (rlist[count].r) {
		    rlist[count].seq = DBgetSeq(DBI(xx), seq);
		    rlist[count].opos =
			get_trace_pos(rlist[count].r, xx, seq, 0,
				      DB_Start(xx, seq),
				      DB_Start(xx, seq) + DB_Length(xx, seq),
				      DB_Seq(xx, seq), 0);

		    seqList[count++] = seq;
		}
	    }
	}
    }
    if (i <= DBI_gelCount(xx))
	next = i;
    else
	next = 0;

    /*
     * Loop along the sequence updating seqList as we go.
     * At each point we know how many sequences there are so we can
     * produce the consensus from these sequences.
     */
    for (i = start; i <= end; i++) {
	w = do_cons_base(xx, con, i, start, count, seqList, rlist, r, offset,
			 match, &max_points);
	if (w == -1)
	    goto error;
	offset += w;

	/* Update seqList for the next position */
	if (i < end) {
	    /* Remove sequences */
	    for (j = 0; j < count; j++) {
		int seq = seqList[j];
		if (DB_RelPos(xx, seq) + DB_Length(xx, seq) - 1 <= i) {
		    read_deallocate(rlist[j].r);
		    xfree(rlist[j].opos);
		    memmove(&seqList[j], &seqList[j+1],
			    (count-1-j) * sizeof(*seqList));
		    memmove(&rlist[j], &rlist[j+1],
			    (count-1-j) * sizeof(*rlist));
		    count--;
		    j--;
		}
	    }

	    /* Add sequences */
	    while (next && DB_RelPos(xx, next) <= i+1) {
		/* printf("next=%d %d %d\n",
		       next, DB_RelPos(xx, next), i+1); */
		DBgetSeq(DBI(xx), next);
		if (strand_matches(xx, next, strand) &&
		    get_trace_path(xx, next, fileName, t_type) == 0) {
		    form = trace_type_str2int(t_type);
		    rlist[count].r = read_reading(fileName, form);
		    if (rlist[count].r) {
			rlist[count].seq = DBgetSeq(DBI(xx), next);
			rlist[count].opos =
			    get_trace_pos(rlist[count].r, xx, next, 0,
					  DB_Start(xx, next),
					  DB_Start(xx,next)+DB_Length(xx,next),
					  DB_Seq(xx, next), 0);

			seqList[count++] = next;
		    }
		}
		if (++next > DBI_gelCount(xx))
		    next = 0;
	    }
	}
    }

    for (i = 0; i < count; i++) {
	read_deallocate(rlist[i].r);
	xfree(rlist[i].opos);
    }

    tidy_up(r, end-start + 1, offset);

    xfree(con);
    xfree(rlist);
    return r;

 error:
    if (con) xfree(con);
    if (rlist) xfree(rlist);
    return NULL;
}
Example #4
0
/*
 * Read the plain format sequence from FILE *fp into a Read structure.
 * All printing characters (as defined by ANSII C `isprint')
 * are accepted, but `N's are translated to `-'s.
 *
 * Returns:
 *   Read *     - Success, the Read structure read.
 *   NULLRead   - Failure.
 */
Read *fread_pln(FILE *fp) {
    Read *read = NULLRead;
    off_t fileLen;
    int  ch;
    char *leftc, *rightc, *leftcp, *rightcp;
    int first = 1;

    /*
     * Find the length of the file.
     * Use this as an overestimate of the length of the sequence.
     */
    fseek(fp, (off_t) 0, 2);
    if ((fileLen = ftell(fp)) > INT_MAX /*Was MAXINT2*/)
	goto bail_out;

    fseek(fp, (off_t) 0, 0);
    
    /* Allocate the sequence */
    if (NULLRead == (read = read_allocate(0, fileLen)))
	goto bail_out;

    if (NULL == (leftc = (char *)xmalloc(fileLen)))
	goto bail_out;

    if (NULL == (rightc = (char *)xmalloc(fileLen)))
	goto bail_out;

    leftcp = leftc;
    rightcp = rightc;

    /* Read in the bases */
    
    read->NBases = 0;
    read->format = TT_PLN;

    while ((ch = fgetc(fp)) != EOF) {
	if (ch == '>') {
	    /* Fasta format file - skip the header and load the first
	     * fasta sequence only. We don't even attempt to worry about
	     * multi-sequence file formats for now.
	     */
	    if (!first)
		break;

	    while(ch != '\n' && ch != EOF)
		ch = fgetc(fp);

	}  else if (ch==';') {
	    /*
	     * ;< is left cutoff,
	     * ;> is right cutoff.
	     * Any other ';'s we can treat as a comments.
	     */
	    ch = fgetc(fp);

	    if (first == 1 && ch != '<' && ch != '>') {
		int d;
		char type[5], name[17], line[1024];

		line[0] = ch;
		fgets(&line[1], 1022, fp);

		if (5 == sscanf(line, "%6d%6d%6d%4c%s",
				&d, &d, &d, type, name)) {
		    char * p;

		    if ((p = strchr(type, ' ')))
			*p = 0;

		    read->format = trace_type_str2int(type);
		    read->trace_name = (char *)xmalloc(strlen(name)+1);
		    if (read->trace_name)
			strcpy(read->trace_name, name);
		}
	    }

	    else if (ch == '<') {
		ch = fgetc(fp);
		while (ch != '\n') {
		    *leftcp++ = ch;
		    ch = fgetc(fp);
		}
	    } else if (ch == '>') {
		ch = fgetc(fp);
		while (ch != '\n') {
		    *rightcp++ = ch;
		    ch = fgetc(fp);
		}
	    } else {
		while(ch != '\n' && ch != EOF)
		    ch = fgetc(fp);
	    }
        } else if (isprint(ch) && !isspace(ch)) {
	    read->base[read->NBases++] = ((ch)=='N') ? '-' : (ch);
	}
	
	first = 0;
    }

    *leftcp = *rightcp = 0;

    read->leftCutoff = strlen(leftc);
    read->rightCutoff = read->leftCutoff + read->NBases + 1;
    memmove(&read->base[read->leftCutoff], read->base, read->NBases);
    memmove(read->base, leftc, read->leftCutoff);
    memmove(&read->base[read->leftCutoff + read->NBases],
	    rightc, strlen(rightc));

    read->NBases += read->leftCutoff + strlen(rightc);
    read->base[read->NBases] = 0;

    xfree(leftc);
    xfree(rightc);
    
    /* SUCCESS */
    return(read);

    /* FAILURE */
 bail_out:
    if (read)
	read_deallocate(read);

    return NULLRead;
}