Exemplo n.º 1
0
/*
 * Unpacks the 31-byte fixed size part of the SFF common header.
 * It allocates memory for this and for the flow order and key, but does
 * not read the flow & key information (as this may not be in buf).
 * It also checks that the MAGIC and VERSION match as expected.
 *
 * Returns sff_common_header* on success
 *         NULL on failure
 */
sff_common_header *decode_sff_common_header(unsigned char *buf) {
    sff_common_header *h;

    if (NULL == (h = (sff_common_header *)xcalloc(1, sizeof(*h))))
	return NULL;

    h->magic           = be_int4(*(uint32_t *)(buf+0));
    memcpy(h->version, buf+4, 4);
    h->index_offset    = be_int8(*(uint64_t *)(buf+8));
    h->index_len       = be_int4(*(uint32_t *)(buf+16));
    h->nreads          = be_int4(*(uint32_t *)(buf+20));
    h->header_len      = be_int2(*(uint16_t *)(buf+24));
    h->key_len         = be_int2(*(uint16_t *)(buf+26));
    h->flow_len        = be_int2(*(uint16_t *)(buf+28));
    h->flowgram_format = be_int1(*(uint8_t  *)(buf+30));

    if (h->magic != SFF_MAGIC || memcmp(h->version, SFF_VERSION, 4)) {
	xfree(h);
	return NULL;
    }

    if (NULL == (h->flow = (char *)xmalloc(h->flow_len)))
	return free_sff_common_header(h), NULL;
    if (NULL == (h->key  = (char *)xmalloc(h->key_len)))
	return free_sff_common_header(h), NULL;

    return h;
}
Exemplo n.º 2
0
/*
 * ztr_read_chunk_hdr
 *
 * Reads a ZTR chunk header and metadata, but not the main data segment.
 *
 * Arguments:
 * 	fp		A FILE pointer
 *
 * Returns:
 *	Success: a chunk pointer (malloced)
 *	Failure: NULL
 */
ztr_chunk_t *ztr_read_chunk_hdr(FILE *fp) {
    int4 bei4;
    ztr_chunk_t *chunk;

    if (NULL == (chunk = (ztr_chunk_t *)xmalloc(sizeof(*chunk))))
	return NULL;

    /* type */
    if (1 != fread(&bei4, 4, 1, fp)) {
	xfree(chunk);
	return NULL;
    }
    chunk->type = be_int4(bei4);

    /* metadata length */
    if (1 != fread(&bei4, 4, 1, fp)) {
	xfree(chunk);
	return NULL;
    }
    chunk->mdlength = be_int4(bei4);

    /* metadata */
    chunk->ztr_owns = 1;
    if (chunk->mdlength) {
	if (NULL == (chunk->mdata = (char *)xmalloc(chunk->mdlength))) {
	    xfree(chunk);
	    return NULL;
	}
	if (chunk->mdlength != fread(chunk->mdata, 1, chunk->mdlength, fp)) {
	    xfree(chunk->mdata);
	    xfree(chunk);
	    return NULL;
	}
    } else {
	chunk->mdata = NULL;
    }

    /* data length */
    if (1 != fread(&bei4, 4, 1, fp)) {
	if (chunk->mdata)
	    xfree(chunk->mdata);
	xfree(chunk);
	return NULL;
    }
    chunk->dlength = be_int4(bei4);

    return chunk;
}
Exemplo n.º 3
0
/*
 * Encodes the data in 'h' to the file SFF representation. Buf should be
 * allocated to be 31 + h->flow_len + h->key_len + 8.
 *
 * Returns: the written length of buf
 */
int encode_sff_common_header(sff_common_header *h, unsigned char *buf) {
    int end;

    *(uint32_t *)(buf+0)  = be_int4(h->magic);
    memcpy(buf+4, h->version, 4);
    *(uint64_t *)(buf+8)  = be_int8(h->index_offset);
    *(uint32_t *)(buf+16) = be_int4(h->index_len);
    *(uint32_t *)(buf+20) = be_int4(h->nreads);
    *(uint16_t *)(buf+24) = be_int2(h->header_len);
    *(uint16_t *)(buf+26) = be_int2(h->key_len);
    *(uint16_t *)(buf+28) = be_int2(h->flow_len);
    *(uint8_t  *)(buf+30) = be_int1(h->flowgram_format);
    memcpy(buf+31, h->flow, h->flow_len);
    memcpy(buf+31+h->flow_len, h->key, h->key_len);
    end = 31+h->flow_len+h->key_len;
    memcpy(buf+end, "\0\0\0\0\0\0\0\0", ((end+7)&~7)-end);
    
    return (end+7)&~7;
}
Exemplo n.º 4
0
/*
 * ztr_write_chunk
 *
 * Writes a ZTR chunk including chunk header and data
 *
 * Arguments:
 * 	fp		A FILE pointer
 *	chunk		A pointer to the chunk to write
 *
 * Returns:
 *	Success:  0
 *	Failure: -1
 */
static int ztr_write_chunk(FILE *fp, ztr_chunk_t *chunk) {
    int4 bei4;

    /*
    {
	char str[5];
	fprintf(stderr, "Write chunk %.4s %08x length %d\n",
		ZTR_BE2STR(chunk->type, str), chunk->type, chunk->dlength);
    }
    */

    /* type */
    bei4 = be_int4(chunk->type);
    if (1 != fwrite(&bei4, 4, 1, fp))
	return -1;

    /* metadata length */
    bei4 = be_int4(chunk->mdlength);
    if (1 != fwrite(&bei4, 4, 1, fp))
	return -1;

    /* metadata */
    if (chunk->mdlength)
	if (chunk->mdlength != fwrite(chunk->mdata, 1, chunk->mdlength, fp))
	    return -1;

    /* data length */
    bei4 = be_int4(chunk->dlength);
    if (1 != fwrite(&bei4, 4, 1, fp))
	return -1;

    /* data */
    if (chunk->dlength != fwrite(chunk->data, 1, chunk->dlength, fp))
	return -1;

    return 0;
}
Exemplo n.º 5
0
/*
 * Encodes the data in 'h' to the file SFF representation. Buf should be
 * allocated to be 16 + h->name_len + 8.
 *
 * Returns: the written length of buf
 */
int encode_sff_read_header(sff_read_header *h, unsigned char *buf) {
    int end;

    *(uint16_t *)(buf+0)  = be_int2(h->header_len);
    *(uint16_t *)(buf+2)  = be_int2(h->name_len);
    *(uint32_t *)(buf+4)  = be_int4(h->nbases);
    *(uint16_t *)(buf+8)  = be_int2(h->clip_qual_left);
    *(uint16_t *)(buf+10) = be_int2(h->clip_qual_right);
    *(uint16_t *)(buf+12) = be_int2(h->clip_adapter_left);
    *(uint16_t *)(buf+14) = be_int2(h->clip_adapter_right);
    memcpy(buf+16, h->name, h->name_len);
    end = 16+h->name_len;
    memcpy(buf+end, "\0\0\0\0\0\0\0\0", ((end+7)&~7)-end);
    
    return (end+7)&~7;
}
Exemplo n.º 6
0
int read_scf_base(FILE *fp, Bases *b)
{
    uint_1 buf[12];

    if (1 != fread(buf, 12, 1, fp)) return -1;
    b->peak_index = be_int4(((uint_4 *)buf)[0]);
    b->prob_A = buf[4];
    b->prob_C = buf[5];
    b->prob_G = buf[6];
    b->prob_T = buf[7];
    b->base   = buf[8];
    b->spare[0] = buf[9];
    b->spare[1] = buf[10];
    b->spare[2] = buf[11];

    return 0;
}
Exemplo n.º 7
0
int write_scf_base(FILE *fp, Bases *b)
{
    uint_1 buf[12];

    ((uint_4 *)buf)[0] = be_int4(b->peak_index);
    buf[4] = b->prob_A;
    buf[5] = b->prob_C;
    buf[6] = b->prob_G;
    buf[7] = b->prob_T;
    buf[8] = b->base;
    buf[9] = b->spare[0];
    buf[10] = b->spare[1];
    buf[11] = b->spare[2];

    if (12 != fwrite(buf, 1, 12, fp)) return -1;

    return 0;
}
Exemplo n.º 8
0
/*
 * Unpacks the 16-byte fixed size part of the SFF read header.
 * It allocates memory for this and for the base calls, but does not
 * unpack these.
 *
 * Returns sff_read_header* on success
 *         NULL on failure
 */
sff_read_header *decode_sff_read_header(unsigned char *buf) {
    sff_read_header *h;

    if (NULL == (h = (sff_read_header *)xcalloc(1, sizeof(*h))))
	return NULL;

    h->header_len         = be_int2(*(uint16_t *)(buf+0));
    h->name_len           = be_int2(*(uint16_t *)(buf+2));
    h->nbases             = be_int4(*(uint32_t *)(buf+4));
    h->clip_qual_left     = be_int2(*(uint16_t *)(buf+8));
    h->clip_qual_right    = be_int2(*(uint16_t *)(buf+10));
    h->clip_adapter_left  = be_int2(*(uint16_t *)(buf+12));
    h->clip_adapter_right = be_int2(*(uint16_t *)(buf+14));

    if (NULL == (h->name  = (char *)xmalloc(h->name_len)))
	return free_sff_read_header(h), NULL;

    return h;
}
Exemplo n.º 9
0
int read_scf_base(FILE *fp, Bases *b)
{
    union {
	uint_1 u1[12];
	uint_4 u4[3];
    } buf;

    if (1 != fread(buf.u1, 12, 1, fp)) return -1;
    b->peak_index = be_int4(buf.u4[0]);
    b->prob_A   = buf.u1[4];
    b->prob_C   = buf.u1[5];
    b->prob_G   = buf.u1[6];
    b->prob_T   = buf.u1[7];
    b->base     = buf.u1[8];
    b->spare[0] = buf.u1[9];
    b->spare[1] = buf.u1[10];
    b->spare[2] = buf.u1[11];

    return 0;
}
Exemplo n.º 10
0
int write_scf_bases3(FILE *fp, Bases *b, size_t num_bases)
{
    size_t i;
    uint_4 *buf4;
    uint_1 *buf1;

    if (NULL == (buf4 = (uint_4 *)xmalloc(1 + 4 * num_bases)))
	return -1;

    if (NULL == (buf1 = (uint_1 *)xmalloc(1 + 8 * num_bases))) {
	xfree(buf4);
	return -1;
    }

    for (i = 0; i < num_bases; i++) {
	buf4[i] = be_int4((&b[i])->peak_index);
    }
    fwrite(buf4, 4, num_bases, fp);
    
    for (i=0; i < num_bases; i++) {
	buf1[i            ] = (&b[i])->prob_A;
	buf1[i+  num_bases] = (&b[i])->prob_C;
	buf1[i+2*num_bases] = (&b[i])->prob_G;
	buf1[i+3*num_bases] = (&b[i])->prob_T;
	buf1[i+4*num_bases] = (&b[i])->base;
	buf1[i+5*num_bases] = (&b[i])->spare[0];
	buf1[i+6*num_bases] = (&b[i])->spare[1];
	buf1[i+7*num_bases] = (&b[i])->spare[2];
    }
    if (8 * num_bases != (fwrite(buf1, 1, 8 * num_bases, fp))) {
	xfree(buf1);
	xfree(buf4);
	return -1;
    }

    xfree(buf1);
    xfree(buf4);
    return 0;
}
Exemplo n.º 11
0
int read_scf_bases3(FILE *fp, Bases *b, size_t num_bases)
{
    size_t i;
    uint_4 *buf4;
    uint_1 *buf1;

    if (NULL == (buf4 = (uint_4 *)xmalloc(1 + 4 * num_bases)))
	return -1;

    if (NULL == (buf1 = (uint_1 *)xmalloc(1 + 8 * num_bases))) {
	xfree(buf4);
	return -1;
    }

    if (num_bases != fread(buf4, 4, num_bases, fp)) return -1;
    for (i=0; i < num_bases; i++)
	(&b[i])->peak_index = be_int4(buf4[i]);

    if (8 * num_bases != fread(buf1, 1, 8 * num_bases, fp)) return -1;

    for (i=0; i < num_bases; i++) {
	(&b[i])->prob_A   = buf1[i];
	(&b[i])->prob_C   = buf1[i+num_bases];
	(&b[i])->prob_G   = buf1[i+2*num_bases];
	(&b[i])->prob_T   = buf1[i+3*num_bases];
	(&b[i])->base     = buf1[i+4*num_bases];
	(&b[i])->spare[0] = buf1[i+5*num_bases];
	(&b[i])->spare[1] = buf1[i+6*num_bases];
	(&b[i])->spare[2] = buf1[i+7*num_bases];
    }

    xfree(buf4);
    xfree(buf1);

    return 0;
}
Exemplo n.º 12
0
/*
 * Parse the REGN chunk, add to regn HASH
 *
 * Returns corresponding HashItem * from regn Hash
 */
HashItem *parse_regn(ztr_t *z, ztr_chunk_t *chunk, HashTable *regn_hash) {
    char key[1024];
    char *name;
    HashItem *hi;
    regn_t *regn;
    size_t l;
    
    uncompress_chunk(z, chunk);

    /* the hash key is a combination of the region names and boundaries */
    name = ztr_lookup_mdata_value(z, chunk, "NAME");
    l = snprintf(key, sizeof(key), "names=%s", name);
    if( chunk->dlength ){
        int nbndy = (chunk->dlength-1)/4;
        uint4 *bndy = (uint4 *)(chunk->data+1);
        int ibndy;
	for (ibndy=0; ibndy<nbndy; ibndy++) {
            if( ibndy )
                l += snprintf(key + l, sizeof(key) - l,
			      ";%d", be_int4(bndy[ibndy]));
            else
                l += snprintf(key + l, sizeof(key) - l,
			      " boundaries=%d", be_int4(bndy[ibndy]));
        }
    }

    if (NULL == (hi = (HashTableSearch(regn_hash, key, strlen(key))))) {
        int iregion, nregions = 0;
        char *coord;
	char *cp1;
        uint4 bndy[MAX_REGIONS];
        int ibndy, nbndy = 0;
        HashData hd;

        if( NULL == (regn = (regn_t *)malloc(sizeof(regn_t)))) {
	    return NULL;
	}

	coord = ztr_lookup_mdata_value(z, chunk, "COORD");
	regn->coord = (NULL == coord ? 'B' : *coord );

	regn->region_names = strdup(name);

        cp1 = strtok (regn->region_names,";");
        while(cp1) {
            char *cp2;
            if(NULL == (cp2 = strchr(cp1,':'))) {
                fprintf(stderr, "Invalid region name/code pair %s\n", cp1);
                return NULL;
            }
            *cp2++ = '\0';
            regn->name[nregions] = cp1;
            regn->code[nregions] = *cp2;
            nregions++;
            cp1 = strtok (NULL, ";");
        }

        regn->nregions = nregions;

	if( chunk->dlength ) {
            nbndy = (chunk->dlength-1)/4;
            memcpy(bndy, chunk->data+1, chunk->dlength-1);
	}

        for( iregion=0, ibndy=0; iregion<nregions; iregion++) {
            /* start = (start + length of previous region) or 0 if no previous region */
            /* length = (next boundary - start of region) or -1 if no next boundary */
            if( regn->code[iregion] == 'E' ){
                /* no sequence, length = 0 */
                regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
                regn->length[iregion] = 0;
            }else{
                if( ibndy > nbndy ){
                    fprintf(stderr, "More name/code pairs than boundaries\n");
                    return NULL;
                }
                regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
                regn->length[iregion] = (ibndy == nbndy ? -1 : (be_int4(bndy[ibndy])-regn->start[iregion]));
                ibndy++;
            }
        }

        regn->count = 1;
            
	hd.p = regn;
	if (NULL == (hi = HashTableAdd(regn_hash, key, strlen(key), hd, NULL))) {
	    free(regn->region_names);
	    free(regn);
	    return NULL;
	}
    } else {
	regn = (regn_t *)(hi->data.p);
	regn->count++;
    }

    return hi;
}
Exemplo n.º 13
0
int main(int argc, char **argv) {
    HashFile *hf;
    sff_common_header *ch;
    sff_read_header *rh;
    int i, dot, arg;
    char *sff;
    char hdr[31];
    uint64_t index_offset = 0;
    uint32_t index_size, index_skipped;
    FILE *fp, *fpout = NULL;
    int copy_archive = 1;
    

    /* process command line arguments of the form -arg */
    for (argc--, argv++; argc > 0; argc--, argv++) {
	if (**argv != '-' || strcmp(*argv, "--") == 0)
	    break;

	if (strcmp(*argv, "-o") == 0 && argc > 1) {
	    if (NULL == (fpout = fopen(argv[1], "wb+"))) {
		perror(argv[1]);
		return 1;
	    }
	    argv++;
	    argc--;

	} else if (strcmp(*argv, "-t") == 0) {
	    copy_archive = 0;

	} else if (**argv == '-') {
	    usage();
	}

    }

    if (argc < 1)
	usage();

    if (copy_archive == 0 && argc != 1) {
	fprintf(stderr, "-t option only supported with a single sff argument\n");
	return 1;
    }

    /* Create the hash table */
    hf = HashFileCreate(0, HASH_DYNAMIC_SIZE);
    hf->nheaders = 0;
    hf->headers = NULL;

    for (arg = 0; arg < argc; arg++) {
	/* open (and read) the entire sff file */
	sff = argv[arg];

	printf("Indexing %s:\n", sff);
	if (fpout) {
	    if (NULL == (fp = fopen(sff, "rb"))) {
		perror(sff);
		return 1;
	    }
	} else { 
	    if (NULL == (fp = fopen(sff, "rb+"))) {
		perror(sff);
		return 1;
	    }
	}

	/* Read the common header */
	ch = fread_sff_common_header(fp);

	if (ch->index_len && !fpout) {
	    fprintf(stderr, "Archive already contains index.\nReplacing the"
		    " index requires the \"-o outfile\" option.\n");
	    return 1;
	}

	/* Add the SFF common header as a hash file-header */
	hf->nheaders++;
	hf->headers = (HashFileSection *)realloc(hf->headers, hf->nheaders *
						 sizeof(*hf->headers));
	hf->headers[hf->nheaders-1].pos = 0;
	hf->headers[hf->nheaders-1].size = ch->header_len;
	hf->headers[hf->nheaders-1].cached_data = NULL;

	/* Read the index items, adding to the hash */
	index_skipped = 0;
	dot = 0;
	printf("                                                                       |\r|");
	for (i = 0; i < ch->nreads; i++) {
	    int dlen;
	    uint32_t offset;
	    HashData hd;
	    HashFileItem *hfi;
	    
	    if (i >= dot * (ch->nreads/69)) {
		putchar('.');
		fflush(stdout);
		dot++;
	    }

	    /* Skip old index if present */
	    offset = ftell(fp);
	    if (offset == ch->index_offset) {
		fseek(fp, ch->index_len, SEEK_CUR);
		index_skipped = ch->index_len;
		continue;
	    }

	    hfi = (HashFileItem *)calloc(1, sizeof(*hfi));
	    rh = fread_sff_read_header(fp);
	    dlen = (2*ch->flow_len + 3*rh->nbases + 7) & ~7;
	    fseek(fp, dlen, SEEK_CUR);
	
	    hfi->header = hf->nheaders;
	    hfi->footer = 0;
	    hfi->pos = offset - index_skipped;
	    hfi->size = (ftell(fp) - index_skipped) - hfi->pos;
	    hd.p = hfi;

	    HashTableAdd(hf->h, rh->name, rh->name_len, hd, NULL);
	}
	printf("\n");
	HashTableStats(hf->h, stdout);

	index_offset = ftell(fp) - index_skipped;

	/* Copy the archive if needed, minus the old index */
	if (fpout && copy_archive) {
	    char block[8192];
	    size_t len;
	    uint64_t pos = 0;

	    printf("\nCopying archive\n");

	    fseek(fp, 0, SEEK_SET);
	    while (len = fread(block, 1, 8192, fp)) {
		/* Skip previous index */
		if (pos < ch->index_offset && pos+len > ch->index_offset) {
		    len = ch->index_offset - pos;
		    fseek(fp, ch->index_offset + ch->index_len, SEEK_SET);
		}
		if (len && len != fwrite(block, 1, len, fpout)) {
		    fprintf(stderr, "Failed to output new archive\n");
		    return 1;
		}
		pos += len;
	    }
	}
	
	if (!fpout) {
	    /* Save the hash */
	    printf("Saving index\n");
	    fseek(fp, 0, SEEK_END);
	    index_size = HashFileSave(hf, fp, 0);
	    HashFileDestroy(hf);

	    /* Update the common header */
	    fseek(fp, 0, SEEK_SET);
	    fread(hdr, 1, 31, fp);
	    *(uint64_t *)(hdr+8)  = be_int8(index_offset);
	    *(uint32_t *)(hdr+16) = be_int4(index_size);
	    fseek(fp, 0, SEEK_SET);
	    fwrite(hdr, 1, 31, fp);
	}

	fclose(fp);
    }

    if (fpout) {
	/* Save the hash */
	printf("Saving index\n");

	if (!copy_archive) {
	    hf->archive = strdup(argv[0]);
	    index_offset = 0;
	}

	fseek(fpout, 0, SEEK_END);
	index_size = HashFileSave(hf, fpout, 0);
	HashFileDestroy(hf);

	/* Update the common header to indicate index location */
	if (copy_archive) {
	    fseek(fpout, 0, SEEK_SET);
	    fread(hdr, 1, 31, fpout);
	    *(uint64_t *)(hdr+8)  = be_int8(index_offset);
	    *(uint32_t *)(hdr+16) = be_int4(index_size);
	    fseek(fpout, 0, SEEK_SET);
	    fwrite(hdr, 1, 31, fpout);
	}
	fclose(fpout);
    }
    
    return 0;
}