예제 #1
0
/*
 * Unpacks the 31-byte fixed size part of the SFF common header.
 * It allocates memory for this and for the flow order and key, but does
 * not read the flow & key information (as this may not be in buf).
 * It also checks that the MAGIC and VERSION match as expected.
 *
 * Returns sff_common_header* on success
 *         NULL on failure
 */
sff_common_header *decode_sff_common_header(unsigned char *buf) {
    sff_common_header *h;

    if (NULL == (h = (sff_common_header *)xcalloc(1, sizeof(*h))))
	return NULL;

    h->magic           = be_int4(*(uint32_t *)(buf+0));
    memcpy(h->version, buf+4, 4);
    h->index_offset    = be_int8(*(uint64_t *)(buf+8));
    h->index_len       = be_int4(*(uint32_t *)(buf+16));
    h->nreads          = be_int4(*(uint32_t *)(buf+20));
    h->header_len      = be_int2(*(uint16_t *)(buf+24));
    h->key_len         = be_int2(*(uint16_t *)(buf+26));
    h->flow_len        = be_int2(*(uint16_t *)(buf+28));
    h->flowgram_format = be_int1(*(uint8_t  *)(buf+30));

    if (h->magic != SFF_MAGIC || memcmp(h->version, SFF_VERSION, 4)) {
	xfree(h);
	return NULL;
    }

    if (NULL == (h->flow = (char *)xmalloc(h->flow_len)))
	return free_sff_common_header(h), NULL;
    if (NULL == (h->key  = (char *)xmalloc(h->key_len)))
	return free_sff_common_header(h), NULL;

    return h;
}
예제 #2
0
static sff_common_header *fread_sff_common_header(FILE *fp) {
    sff_common_header *h;
    unsigned char chdr[31];

    if (31 != fread(chdr, 1, 31, fp))
	return NULL;
    h = decode_sff_common_header(chdr);
    if (h->flow_len != fread(h->flow, 1, h->flow_len, fp))
	return free_sff_common_header(h), NULL;
    if (h->key_len != fread(h->key , 1, h->key_len,  fp))
	return free_sff_common_header(h), NULL;

    /* Pad to 8 chars */
    fseek(fp, (ftell(fp) + 7)& ~7, SEEK_SET);

    return h;
}
예제 #3
0
/*
 * Reads a common header (including variable length components) from an mFILE.
 *
 * Returns the a pointer to the header on success
 *         NULL on failure
 */
sff_common_header *read_sff_common_header(mFILE *mf) {
    sff_common_header *h;
    unsigned char chdr[31];

    if (31 != mfread(chdr, 1, 31, mf))
	return NULL;
    h = decode_sff_common_header(chdr);

    if (h->flow_len != mfread(h->flow, 1, h->flow_len, mf))
	return free_sff_common_header(h), NULL;
    if (h->key_len != mfread(h->key , 1, h->key_len,  mf))
	return free_sff_common_header(h), NULL;

    /* Pad to 8 chars */
    mfseek(mf, (mftell(mf) + 7)& ~7, SEEK_SET);

    return h;
}
예제 #4
0
파일: sff.c 프로젝트: pawelsm/sff_demult
void 
verify_sff_common_header(char *prg_name, 
                         char *prg_version, 
                         sff_common_header *h) {
    /* ensure that the magic file type is valid */
    if (h->magic != SFF_MAGIC) {
        fprintf(stderr, "The SFF header has magic value '%d' \n", h->magic);
        fprintf(stderr,
                "[err] %s (version %s) %s : '%d' \n", 
                prg_name, 
                prg_version, 
                "only knows how to deal an SFF magic value of type",
                SFF_MAGIC);
        free_sff_common_header(h);
        exit(2);
    }

    /* ensure that the version header is valid */
    if ( memcmp(h->version, SFF_VERSION, SFF_VERSION_LENGTH) ) {
        fprintf(stderr, "The SFF file has header version: ");
        int i;
        char *sff_header_version = h->version;
        for (i=0; i < SFF_VERSION_LENGTH; i++) {
            printf("0x%02x ", sff_header_version[i]);
        }
        printf("\n");
        fprintf(stderr,
                "[err] %s (version %s) %s : ", 
                prg_name, 
                prg_version, 
                "only knows how to deal an SFF header version: ");
        char valid_header_version[SFF_VERSION_LENGTH] = SFF_VERSION;
        for (i=0; i < SFF_VERSION_LENGTH; i++) {
            printf("0x%02x ", valid_header_version[i]);
        }
        free_sff_common_header(h);
        exit(2);
    }
}
예제 #5
0
/*
 * Reads an SFF file from an mFILE and decodes it to a Read struct.
 *
 * Returns Read* on success
 *         NULL on failure
 */
Read *mfread_sff(mFILE *mf) {
    int i, bpos;
    Read *r;
    sff_common_header *ch;
    sff_read_header *rh;
    sff_read_data *rd;

    /* Load the SFF contents */
    if (NULL == (ch = read_sff_common_header(mf)))
	return NULL;
    if (NULL == (rh = read_sff_read_header(mf))) {
	free_sff_common_header(ch);
	return NULL;
    }
    if (NULL == (rd = read_sff_read_data(mf, ch->flow_len, rh->nbases))) {
	free_sff_common_header(ch);
	free_sff_read_header(rh);
	return NULL;
    }

    /* Convert to Read struct */
    r = read_allocate(0,0);
    if (r->basePos) free(r->basePos);
    if (r->base)    free(r->base);
    if (r->prob_A)  free(r->prob_A);
    if (r->prob_C)  free(r->prob_C);
    if (r->prob_G)  free(r->prob_G);
    if (r->prob_T)  free(r->prob_T);

    r->nflows = ch->flow_len;
    r->flow_order = ch->flow; ch->flow = NULL;
    r->flow_raw = NULL;
    r->flow = (float *)malloc(r->nflows * sizeof(float));
    for (i = 0; i < r->nflows; i++) {
	r->flow[i] = rd->flowgram[i] / 100.0;
    }

    r->NBases = rh->nbases;
    r->basePos = (uint_2 *)calloc(r->NBases, 2);
    r->base    = rd->bases; rd->bases = NULL;
    r->prob_A  = (char *)calloc(r->NBases, 1);
    r->prob_C  = (char *)calloc(r->NBases, 1);
    r->prob_G  = (char *)calloc(r->NBases, 1);
    r->prob_T  = (char *)calloc(r->NBases, 1);

    bpos = 0;
    for (i=0; i < r->NBases; i++) {
	r->prob_A[i] = 0;
	r->prob_C[i] = 0;
	r->prob_G[i] = 0;
	r->prob_T[i] = 0;
	switch (r->base[i]) {
	case 'A':
	case 'a':
	    r->prob_A[i] = rd->quality[i];
	    break;
	case 'C':
	case 'c':
	    r->prob_C[i] = rd->quality[i];
	    break;
	case 'G':
	case 'g':
	    r->prob_G[i] = rd->quality[i];
	    break;
	case 'T':
	case 't':
	    r->prob_T[i] = rd->quality[i];
	    break;
	}

	bpos += rd->flow_index[i];
	r->basePos[i] = bpos;
    }

    r->leftCutoff = MAX(rh->clip_qual_left, rh->clip_adapter_left);
    r->rightCutoff = MIN(rh->clip_qual_right
			 ? rh->clip_qual_right
			 : r->NBases+1,
			 rh->clip_adapter_right
			 ? rh->clip_adapter_right
			 : r->NBases+1);

    free_sff_common_header(ch);
    free_sff_read_header(rh);
    free_sff_read_data(rd);

    return r;
}
예제 #6
0
파일: main.c 프로젝트: b4winckler/sff2fastq
void
process_sff_to_fastq(char *sff_file, char *fastq_file, int trim_flag) {
    sff_common_header h;
    sff_read_header rh;
    sff_read_data rd;
    FILE *sff_fp, *fastq_fp;

    if ( !strlen(sff_file) ) {
        sff_fp = stdin;
    }
    else if ( (sff_fp = fopen(sff_file, "r")) == NULL ) {
        fprintf(stderr,
                "[err] Could not open file '%s' for reading.\n", sff_file);
        exit(1);
    }

    read_sff_common_header(sff_fp, &h);
    verify_sff_common_header(PRG_NAME, VERSION, &h);

//    printf("size of header: %d \n", sizeof(sff_common_header));
//    printf("\tmagic        : 0x%x\n" , h.magic);
//    printf("\tindex_offset : 0x%llx\n", h.index_offset);
//    printf("\tindex_len    : 0x%x\n" , h.index_len);
//    printf("\tnumreads     : 0x%x\n" , h.nreads);
//    printf("\theader_len   : 0x%x\n" , h.header_len);
//    printf("\tkey_len      : 0x%x\n" , h.key_len);
//    printf("\tflow_len     : 0x%x\n" , h.flow_len);
//    printf("\tflowgram_fmt : 0x%x\n" , h.flowgram_format);
//    printf("\tflow         : %s\n  " , h.flow);
//    printf("\tkey          : %s\n  " , h.key);
//    printf("\n\n");

    if ( !strlen(fastq_file) ) {
        fastq_fp = stdout;
    }
    else {
        if ( (fastq_fp = fopen(fastq_file, "w")) == NULL ) {
            fprintf(stderr,
                    "[err] Could not open file '%s' for writing.\n",
                    fastq_file);
            exit(1);
        }
    }

    int left_clip = 0, right_clip = 0, nbases = 0;
    char *name;
    char *bases;
    uint8_t *quality;
    register int i;
    int numreads = (int) h.nreads;
    for (i = 0; i < numreads; i++) {
        read_sff_read_header(sff_fp, &rh);
        read_sff_read_data(sff_fp, &rd, h.flow_len, rh.nbases);

        /* get clipping points */
        get_clip_values(rh, trim_flag, &left_clip, &right_clip);
        nbases = right_clip - left_clip;

        /* create bases string */
        bases = get_read_bases(rd, left_clip, right_clip);

        /* create quality array */
        quality = get_read_quality_values(rd, left_clip, right_clip);

        /* create read name string */
        int name_length = (int) rh.name_len + 1; // account for NULL termination
        name = (char *) malloc( name_length * sizeof(char) );
        if (!name) {
            fprintf(stderr, "Out of memory! For read name string!\n");
            exit(1);
        }
        memset(name, '\0', (size_t) name_length);
        strncpy(name, rh.name, (size_t) rh.name_len);

        construct_fastq_entry(fastq_fp, name, bases, quality, nbases);

        free(name);
        free(bases);
        free(quality);
        free_sff_read_header(&rh);
        free_sff_read_data(&rd);
    }

    free_sff_common_header(&h);
    fclose(fastq_fp);
    fclose(sff_fp);
}