Example #1
0
int GetMultiSeq (InFileList *ifp, MULTISEQ *mseqs, const int pe, int(*get_read)(FILE * , seq_t * , const int)){
#ifdef DEBUG
//	fprintf(stderr, "Get Multi Seqs\n");
#endif
	ALNSEQ *alnSeq;
	alnSeq= mseqs->seqList;
	int num, len, id;
	FILE * ifpA, * ifpB;
	num = 0;
	ifpA = ifp->ifpA; ifpB = ifp->ifpB;
	id  = ifp->id;
	seq_t tmp;
	tmp.max = tmp.l = 0; 
	tmp.seq = tmp.rc = tmp.qual = NULL;
	while(num < MAX_MULTI_READS){
		tmp.ns = 0;
		if (feof(ifpA)||(pe && feof(ifpB))) break;
		if ((len=get_read(ifpA, &tmp, TRUE))>0){
			SEQDUP(alnSeq, tmp, pe);
			++num;
			if(pe &&(len=get_read(ifpB, &tmp, TRUE))>0){
				SEQDUP(alnSeq, tmp, pe);
				++num ;
			}
			++id;
		}
	}
	mseqs->n = num;
	ifp->id = id;
//	fprintf(stderr, "%d\n", num);
	/*
#ifdef DEBUG
	int j;
	fprintf(stderr, "len :%d\n", len);
	fprintf(stderr, "fw\n");
	for(j = 0; j<len; j++) fprintf(stderr, "%d", *(tmp.seq+j));
	fprintf(stderr, "\n");
	fprintf(stderr, "rc\n");
	for(j = 0; j<len; j++) fprintf(stderr, "%d", *(tmp.rc+j));
	fprintf(stderr, "\n");
	alnSeq--;
	for(j = 0; j < alnSeq->len; j++) fprintf(stderr, "rc %d", *(alnSeq->rc+j));
	fprintf(stderr, "\n");
	fprintf(stderr, "%s\n%s\n", alnSeq->name, alnSeq->qual);
	fprintf(stderr, "soap get multisequences ...\n");
#endif
	exit(0);
	//*/
	free(tmp.seq); free(tmp.qual);free(tmp.rc);
	return num;
}
Example #2
0
void read_sff_file(char* addr) {
    sff_common_header* head = (sff_common_header*) addr;
    
    if(ntohl(head->magic) != SFF_MAGIC) {
        fprintf(stderr, "Warning: Bad magic number\n");
    }

    uint32_t nreads     = ntohl(head->nreads);
    uint16_t header_len = ntohs(head->header_len);
    uint16_t flow_len   = ntohs(head->flow_len);
    char* current_addr  = addr + header_len;
        
    for(int i = 0; i < int(nreads); ++i) {
        sff_read_header* read = (sff_read_header*) current_addr;
        uint16_t rheader_len = ntohs(read->header_len);
        uint32_t nbases = ntohl(read->nbases);
        uint16_t name_len = ntohs(read->name_len);
        uint16_t trim_left = ntohs(read->clip_qual_left) - 1; // these appear to be indexed from 1
        uint16_t trim_right = ntohs(read->clip_qual_right) - 1;
        
        /*
        printf( "header_len = %d\n"
                "name_len = %d\n"
                "nbases = %d\n\n",
                ntohs(read->header_len),
                ntohs(read->name_len),
                ntohl(read->nbases));
        */
        
        get_read(current_addr + rheader_len, flow_len, nbases, trim_left, trim_right);
        
        //exit(EXIT_SUCCESS);
                
        current_addr = current_addr + rheader_len + pad((flow_len * 2) + (nbases * 3));
    }
}
Example #3
0
/*
 * process one BAM record, and store accumulated results in 'results'
 */
int seqchksum_processRecord(bam1_t *rec, HASH_TYPE hash, chksum_results_t *results)
{
    uint32_t crc = 0;

    uint16_t aflags = rec->core.flag;
    uint8_t *seq = get_read(rec);
    uint8_t *qual = get_quality(rec);
    uint16_t flag_mask = BAM_FPAIRED | BAM_FREAD1 | BAM_FREAD2;
    uint8_t flags = (aflags & flag_mask) & 0xFF;
    bool pass = !(aflags & BAM_FQCFAIL);;
    char *qname = bam_get_qname(rec);
    uint8_t *tag;
    char *rgid;
    HashItem *hi;
    HashData hd;
    int newitem;
    digest_line_t *dline_all;
    digest_line_t *dline;

    // look up the RG tag
    tag = bam_aux_get(rec, "RG");
    //hd.p = malloc(sizeof(digest_line_t));
    if (tag) rgid = bam_aux2Z(tag);
    else     rgid = "";

    hd.p = NULL;
    hi = HashTableAdd(results->rgHash, rgid, 0, hd, &newitem);
    if (newitem) {
        hi->data.p = malloc(sizeof(digest_line_t));
        dline = hi->data.p;
        init_digest_line(hash,dline);
    } else {
        dline = hi->data.p;
    }

    dline_all = &(results->all);

    // flags + sequence chksum
    update_crc(&crc,&flags,1);
    update_crc(&crc,seq,strlen((char*)seq));

    update_digest_line(hash, pass, dline, crc, 0);
    update_digest_line(hash, pass, dline_all, crc, 0);

    // flags + sequence + quality chksum (don't reset crc, just add quality)
    update_crc(&crc,qual,strlen((char*)qual));
    update_digest_line(hash, pass, dline, crc, 2);
    update_digest_line(hash, pass, dline_all, crc, 2);

    // name + flags + sequence chksum
    crc = 0;
    update_crc(&crc, (uint8_t *)qname, strlen(qname)+1);
    update_crc(&crc, &flags, 1);
    update_crc(&crc,seq,strlen((char*)seq));
    update_digest_line(hash, pass, dline, crc, 1);
    update_digest_line(hash, pass, dline_all, crc, 1);

    // flags + sequence + tags chksum
    crc = 0;
    update_crc(&crc, &flags, 1);
    update_crc(&crc,seq,strlen((char*)seq));
    tag = bam_aux_get(rec,"BC"); if (tag) update_crc(&crc,tag-2,aux_type2size(tag)+3);
    tag = bam_aux_get(rec,"FI"); if (tag) update_crc(&crc,tag-2,aux_type2size(tag)+3);
    tag = bam_aux_get(rec,"QT"); if (tag) update_crc(&crc,tag-2,aux_type2size(tag)+3);
    tag = bam_aux_get(rec,"RT"); if (tag) update_crc(&crc,tag-2,aux_type2size(tag)+3);
    tag = bam_aux_get(rec,"TC"); if (tag) update_crc(&crc,tag-2,aux_type2size(tag)+3);
    update_digest_line(hash, pass, dline, crc, 3);
    update_digest_line(hash, pass, dline_all, crc, 3);

    free(seq); free(qual);
    return 0;
}