コード例 #1
0
static void append_header_text(bam_header_t *header, char* text, int len)
{
	int x = header->l_text + 1;
	int y = header->l_text + len + 1; // 1 byte null
	if (text == 0) return;
	kroundup32(x); 
	kroundup32(y);
	if (x < y) header->text = (char*)realloc(header->text, y);
	strncpy(header->text + header->l_text, text, len); // we cannot use strcpy() here.
	header->l_text += len;
	header->text[header->l_text] = 0;
}
コード例 #2
0
ファイル: ssw-s.c プロジェクト: 22noon/batalign
s_align* mengyao_ssw_core (
		char *pacseq,//Refernce seq(this is the pacseq),len
		int len,//orignal seq length.. 
		char *seq,//Original seq... 
		int reflen, //length of reference string..
		int filter,
		int Skip_DP,
		s_profile* p
		)
{ 		
	//init_SSW();
	
	kseq_t *read_seq, *ref_seq;
	//int32_t m;

	int8_t* ref_num = (int8_t*)pacseq;
	int8_t* num = (int8_t*)seq;
	
	//s_profile* p;
	//this block can be called once for every difference query (1 read is aligned to multiple references and thus we do not have to init these lines again
	//jq: we throw in the query here

	read_seq = (kseq_t*)calloc(1, sizeof(kseq_t));
	read_seq->seq.l=len;
	read_seq->seq.m=len;
	read_seq->seq.s=seq;
	kroundup32(read_seq->seq.m);
	
	//for (m = 0; m < read_seq->seq.l; ++m) num[m] = table[(int)read_seq->seq.s[m]];
	//p = ssw_init(num, read_seq->seq.l, mata, n, 1);
	
	
	ref_seq = (kseq_t*)calloc(1, sizeof(kseq_t));
	ref_seq->seq.l=reflen;
	ref_seq->seq.m=reflen;
	ref_seq->seq.s=pacseq;
	kroundup32(ref_seq->seq.m);
	
	s_align* result;

	//for (m = 0; m < ref_seq->seq.l; ++m) ref_num[m] = table[(int)ref_seq->seq.s[m]];
	result = ssw_align (p, ref_num, ref_seq->seq.l, gap_open, gap_extension, 2, filter,Skip_DP, 0);
	//if (result && result->score1 >= filter){
	result = ssw_write(result, ref_seq, read_seq);
	//} 
	//align_destroy(result);
		
	
	//free(num);
	//free(ref_num);
	free (read_seq);free(ref_seq);
	return result;
}
コード例 #3
0
ファイル: regidx.c プロジェクト: ColonelHou/hpg-bigdata
int _regidx_build_index(regidx_t *idx)
{
    int iseq;
    for (iseq=0; iseq<idx->nseq; iseq++)
    {
        reglist_t *list = &idx->seq[iseq];
        int j,k, imax = 0;   // max index bin
        for (j=0; j<list->nregs; j++)
        {
            int ibeg = list->regs[j].start >> LIDX_SHIFT;
            int iend = list->regs[j].end >> LIDX_SHIFT;
            if ( imax < iend + 1 )
            {
                int old_imax = imax; 
                imax = iend + 1;
                kroundup32(imax);
                list->idx = (int*) realloc(list->idx, imax*sizeof(int));
                for (k=old_imax; k<imax; k++) list->idx[k] = -1;
            }
            if ( ibeg==iend )
            {
                if ( list->idx[ibeg]<0 ) list->idx[ibeg] = j;
            }
            else
            {
                for (k=ibeg; k<=iend; k++)
                    if ( list->idx[k]<0 ) list->idx[k] = j;
            }
            list->nidx = iend + 1;
        }
    }
    return 0;
}
コード例 #4
0
ファイル: rnaseq.c プロジェクト: agongdai/peta
bwa_seq_t *load_reads(const char *fa_fn, uint32_t *n_seqs) {
	bwa_seq_t *seqs, *part_seqs;
	bwa_seqio_t *ks;
	int n_part_seqs = 0, n_seqs_full = 0, n_seqs_loaded = 0;
	clock_t t = clock();

	ks = bwa_open_reads(BWA_MODE, fa_fn);
	n_seqs_full = N_CHUNK_SEQS;
	show_msg(__func__, "Loading reads from library %s...\n", fa_fn);
	seqs = (bwa_seq_t*) calloc (N_DF_MAX_SEQS, sizeof(bwa_seq_t));
	while ((part_seqs = bwa_read_seq(ks, N_CHUNK_SEQS, &n_part_seqs, BWA_MODE, 0))
			!= 0) {
		show_msg(__func__, "%d sequences loaded: %.2f sec... \n",
				n_seqs_loaded + n_part_seqs, fa_fn, (float) (clock() - t) / CLOCKS_PER_SEC);
		pe_reverse_seqs(part_seqs, n_part_seqs);

		if ((n_seqs_loaded + n_part_seqs) > n_seqs_full) {
			n_seqs_full += n_part_seqs + 2;
			kroundup32(n_seqs_full);
			seqs = (bwa_seq_t*) realloc(seqs, sizeof(bwa_seq_t) * n_seqs_full);
		}
		memmove(&seqs[n_seqs_loaded], part_seqs, sizeof(bwa_seq_t) * n_part_seqs);
		free(part_seqs);
		n_seqs_loaded += n_part_seqs;
	}
	bwa_seq_close(ks);
	if (n_seqs_loaded < 1) {
		err_fatal(__func__,
				"No sequence in file %s, make sure the format is correct! \n",
				fa_fn);
	}
	*n_seqs = n_seqs_loaded;
	return seqs;
}
コード例 #5
0
ファイル: maskripper.cpp プロジェクト: NoSeatbelts/maskripper
 void resize(uint32_t new_min) {
     if(new_min > m_data) {
         m_data = new_min;
         kroundup32(m_data);
         data = (uint8_t *)realloc(data, m_data * sizeof(uint8_t));
     }
 }
コード例 #6
0
ファイル: bam.c プロジェクト: Bioinformaticsnl/SimSeq
int bam_read1(bamFile fp, bam1_t *b)
{
	bam1_core_t *c = &b->core;
	int32_t block_len, ret, i;
	uint32_t x[8];

	assert(BAM_CORE_SIZE == 32);
	if ((ret = bam_read(fp, &block_len, 4)) != 4) {
		if (ret == 0) return -1; // normal end-of-file
		else return -2; // truncated
	}
	if (bam_read(fp, x, BAM_CORE_SIZE) != BAM_CORE_SIZE) return -3;
	if (bam_is_be) {
		bam_swap_endian_4p(&block_len);
		for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i);
	}
	c->tid = x[0]; c->pos = x[1];
	c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff;
	c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff;
	c->l_qseq = x[4];
	c->mtid = x[5]; c->mpos = x[6]; c->isize = x[7];
	b->data_len = block_len - BAM_CORE_SIZE;
	if (b->m_data < b->data_len) {
		b->m_data = b->data_len;
		kroundup32(b->m_data);
		b->data = (uint8_t*)realloc(b->data, b->m_data);
	}
	if (bam_read(fp, b->data, b->data_len) != b->data_len) return -4;
	b->l_aux = b->data_len - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2;
	if (bam_is_be) swap_endian_data(c, b->data_len, b->data);
	return 4 + block_len;
}
コード例 #7
0
void osl_grow(overlapSetList *osl) {
    int i;
    osl->m++;
    kroundup32(osl->m);
    osl->os = realloc(osl->os, osl->m * sizeof(overlapSet*));
    assert(osl->os);
    for(i=osl->l; i<osl->m; i++) osl->os[i] = NULL;
}
コード例 #8
0
ファイル: GBam.cpp プロジェクト: gpertea/stringtie
//auxiliary functions for low level BAM record creation
uint8_t* realloc_bdata(bam1_t *b, int size) {
  if (b->m_data < size) {
        b->m_data = size;
        kroundup32(b->m_data);
        b->data = (uint8_t*)realloc(b->data, b->m_data);
        }
  if (b->data_len<size) b->data_len=size;
  return b->data;
}
コード例 #9
0
ファイル: bam_import.c プロジェクト: 05curranth/samtools
static inline uint8_t *alloc_data(bam1_t *b, int size)
{
	if (b->m_data < size) {
		b->m_data = size;
		kroundup32(b->m_data);
		b->data = (uint8_t*)realloc(b->data, b->m_data);
	}
	return b->data;
}
コード例 #10
0
ファイル: keep.hpp プロジェクト: ANGSD/angsd
void realloc(keep<T> *k,size_t newlen){
  //  fprintf(stderr,"[%s] k:%p k->m:%lu newlen:%lu\n",__FUNCTION__,k,k->m,newlen);
  kroundup32(newlen);
  k->d = (T*) realloc(k->d,sizeof(T)*newlen);
  assert(k->d!=NULL);
  memset(k->d+k->m,0,(newlen-k->m)*sizeof(T));
  k->m=newlen;
  //fprintf(stderr,"[%s] k:%p k->m:%lu newlen:%lu\n",__FUNCTION__,k,k->m,newlen);
}
コード例 #11
0
overlapSet *os_grow(overlapSet *os) {
    int i;
    os->m++;
    kroundup32(os->m);
    os->overlaps = realloc(os->overlaps, os->m * sizeof(GTFentry*));
    assert(os->overlaps);
    for(i=os->l; i<os->m; i++) os->overlaps[i] = NULL;

    return os;
}
コード例 #12
0
ファイル: pairs.c プロジェクト: SidBhadra-Lobo/seqqs
/* from seqtk.c */
static void cpy_kstr(kstring_t *dst, const kstring_t *src) {
  if (src->l == 0) return;
  if (src->l + 1 > dst->m) {
    dst->m = src->l + 1;
    kroundup32(dst->m);
    dst->s = realloc(dst->s, dst->m);
  }
  dst->l = src->l;
  memcpy(dst->s, src->s, src->l + 1);
}
コード例 #13
0
ファイル: GBam.cpp プロジェクト: gpertea/stringtie
uint8_t* dupalloc_bdata(bam1_t *b, int size) {
  //same as realloc_bdata, but does not free previous data
  //but returns it instead
  //it ALWAYS duplicates data
  b->m_data = size;
  kroundup32(b->m_data);
  uint8_t* odata=b->data;
  b->data = (uint8_t*)malloc(b->m_data);
  memcpy((void*)b->data, (void*)odata, b->data_len);
  b->data_len=size;
  return odata; //user must FREE this after
}
コード例 #14
0
ファイル: bwtsw2_aux.c プロジェクト: drio/bwa
bwtsw2_t *bsw2_dup_no_cigar(const bwtsw2_t *b)
{
	bwtsw2_t *p;
	p = calloc(1, sizeof(bwtsw2_t));
	p->max = p->n = b->n;
	if (b->n) {
		kroundup32(p->max);
		p->hits = calloc(p->max, sizeof(bsw2hit_t));
		memcpy(p->hits, b->hits, p->n * sizeof(bsw2hit_t));
	}
	return p;
}
コード例 #15
0
ファイル: cntHash.c プロジェクト: dpryan79/countRepeats
static void growHT2(hashTable *ht, int len) {
    int i;
    ht->m = ht->l+1;
    kroundup32(ht->m);
    ht->str = realloc(ht->str, ht->m*sizeof(char*));
    assert(ht->str);
    ht->elements = realloc(ht->elements, ht->m*sizeof(hashTableElement*));

    for(i=ht->l; i<ht->m; i++) {
        ht->str[i] = NULL;
        ht->elements[i] = NULL;
    }
    rehashHT2(ht, len);
}
コード例 #16
0
ファイル: bwaseqio.c プロジェクト: madhanap/bwa
 static int ks_getuntil1(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int iter, int tid, int thrds, bool *first)
 {
    if (dret) *dret = 0;
    str->l = 0;
    if (ks->begin >= ks->end && ks->is_eof) return -1;
    for (;;) {
       int i;
       if (ks->begin >= ks->end) {
          if (*first) {
             gzseek(ks->f, ((iter*thrds)+tid)*READ_SIZE, SEEK_SET);
             *first = false;
             ks->atend = false;
             ks->begin = 0;
             ks->end = gzread(ks->f, ks->buf, READ_SIZE);
             move_to_start(ks, iter, tid, thrds);
             if (ks->end < READ_SIZE) ks->is_eof = 1;
             if (ks->end == 0) return -1;
          }
          else if (!ks->is_eof) {
             ks->begin = 0;
             ks->atend = true;
             ks->end = gzread(ks->f, ks->buf, READ_IND_SIZE);
             if (ks->end < READ_IND_SIZE) ks->is_eof = 1;
             if (ks->end == 0) break;
          } else break;
       }
       if (delimiter) {
          for (i = ks->begin; i < ks->end; ++i)
             if (ks->buf[i] == delimiter) break;
       } else {
          for (i = ks->begin; i < ks->end; ++i)
             if (isspace(ks->buf[i])) break;
       }
       if (str->m - str->l < i - ks->begin + 1) {
          str->m = str->l + (i - ks->begin) + 1;
          kroundup32(str->m);
          str->s = (char*)realloc(str->s, str->m);
       }
       memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin);
       str->l = str->l + (i - ks->begin);
       ks->begin = i + 1;
       if (i < ks->end) {
          if (dret) *dret = ks->buf[i];
          break;
       }
    }
    str->s[str->l] = '\0';
    return str->l;
 }
コード例 #17
0
static uniqueSet *us_grow(uniqueSet *us) {
    int i;
    us->m++;
    kroundup32(us->m);
    us->IDs = realloc(us->IDs, us->m * sizeof(int32_t));
    assert(us->IDs);
    us->cnts = realloc(us->cnts, us->m * sizeof(uint32_t));
    assert(us->cnts);
    for(i=us->l; i<us->m; i++) {
        us->IDs[i] = -1;
        us->cnts[i] = 0;
    }

    return us;
}
コード例 #18
0
ファイル: padding.c プロジェクト: AngieHinrichs/samtabix
static void replace_cigar(bam1_t *b, int n, uint32_t *cigar)
{
	if (n != b->core.n_cigar) {
		int o = b->core.l_qname + b->core.n_cigar * 4;
		if (b->data_len + (n - b->core.n_cigar) * 4 > b->m_data) {
			b->m_data = b->data_len + (n - b->core.n_cigar) * 4;
			kroundup32(b->m_data);
			b->data = (uint8_t*)realloc(b->data, b->m_data);
		}
		memmove(b->data + b->core.l_qname + n * 4, b->data + o, b->data_len - o);
		memcpy(b->data + b->core.l_qname, cigar, n * 4);
		b->data_len += (n - b->core.n_cigar) * 4;
		b->core.n_cigar = n;
	} else memcpy(b->data + b->core.l_qname, cigar, n * 4);
}
コード例 #19
0
ファイル: kstring.c プロジェクト: BioInfoTools/lobstr-code
int ksprintf(kstring_t *s, const char *fmt, ...)
{
	va_list ap;
	int l;
	va_start(ap, fmt);
	l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
	va_end(ap);
	if (l + 1 > s->m - s->l) {
		s->m = s->l + l + 2;
		kroundup32(s->m);
		s->s = (char*)realloc(s->s, s->m);
		va_start(ap, fmt);
		l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
	}
	va_end(ap);
	s->l += l;
	return l;
}
コード例 #20
0
ファイル: BAMfunctions.cpp プロジェクト: thomsoncd/STAR
int bam_read1_fromArray(char *bamChar, bam1_t *b) //modified from samtools bam_read1 to assign BAM record in mmemry to bam structure
{
	bam1_core_t *c = &b->core;
	int32_t block_len; //, ret, i;
// // 	uint32_t x[8];
// // 	if ((ret = bgzf_read(fp, &block_len, 4)) != 4) {
// // 		if (ret == 0) return -1; // normal end-of-file
// // 		else return -2; // truncated
// // 	}
 	uint32_t *x;

    uint32_t *bamU32=(uint32_t*) bamChar;
    block_len=bamU32[0];
    
// // 	if (bgzf_read(fp, x, 32) != 32) return -3;
// // 	if (fp->is_be) {
// // 		ed_swap_4p(&block_len);
// // 		for (i = 0; i < 8; ++i) ed_swap_4p(x + i);
// // 	}
    x=bamU32+1;
    
	c->tid = x[0]; c->pos = x[1];
	c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff;
	c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff;
	c->l_qseq = x[4];
	c->mtid = x[5]; c->mpos = x[6]; c->isize = x[7];
	b->l_data = block_len - 32;
	if (b->l_data < 0 || c->l_qseq < 0) return -4;
	if ((char *)bam_get_aux(b) - (char *)b->data > b->l_data)
		return -4;
	if (b->m_data < b->l_data) {
		b->m_data = b->l_data;
		kroundup32(b->m_data);
		b->data = (uint8_t*)realloc(b->data, b->m_data);
		if (!b->data)
			return -4;
	}
// // 	if (bgzf_read(fp, b->data, b->l_data) != b->l_data) return -4;
// // 	//b->l_aux = b->l_data - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2;
// // 	if (fp->is_be) swap_data(c, b->l_data, b->data, 0);
    b->data=(uint8_t*) bamChar+4*9;
    
	return 4 + block_len;
}
コード例 #21
0
ファイル: bwaseqio.c プロジェクト: madhanap/bwa
 static int kseq_read1(kseq_t *seq, int iter, int tid, int thrds, bool *first)
 {
    int c;
    kstream_t *ks = seq->f;
    if (seq->last_char == 0) { /* then jump to the next header line */
       while ((c = ks_getc1(ks, iter, tid, thrds, first)) != -1 && c != '>' && c != '@');
       if (c == -1) return -1; /* end of file */
       seq->last_char = c;
    } /* the first header char has been read */
    seq->comment.l = seq->seq.l = seq->qual.l = 0;
    if (ks_getuntil1(ks, 0, &seq->name, &c, iter, tid, thrds, first) < 0) return -1;
    if (c != '\n') ks_getuntil1(ks, '\n', &seq->comment, 0, iter, tid, thrds, first);
    while ((c = ks_getc1(ks, iter, tid, thrds, first)) != -1 && c != '>' && c != '+' && c != '@') {
       if (isgraph(c)) { /* printable non-space character */
          if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */
             seq->seq.m = seq->seq.l + 2;
             kroundup32(seq->seq.m); /* rounded to next closest 2^k */
             seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m);
          }
          seq->seq.s[seq->seq.l++] = (char)c;
       }
    }
    if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */
    seq->seq.s[seq->seq.l] = 0;   /* null terminated string */
    if (c != '+') return seq->seq.l; /* FASTA */
    if (seq->qual.m < seq->seq.m) {  /* allocate enough memory */
       seq->qual.m = seq->seq.m;
       seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);
    }
    while ((c = ks_getc1(ks, iter, tid, thrds, first)) != -1 && c != '\n'); /* skip the rest of '+' line */
    if (c == -1) return -2; /* we should not stop here */
    while ((c = ks_getc1(ks, iter, tid, thrds, first)) != -1 && seq->qual.l < seq->seq.l)
       if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c;
    seq->qual.s[seq->qual.l] = 0; /* null terminated string */
    seq->last_char = 0;  /* we have not come to the next header line */
    if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */
    return seq->seq.l;
 }
コード例 #22
0
ファイル: ibsget.c プロジェクト: dgaston/ibsget
char *ibs_read_all(const char *url, int *len)
{
    int l, n = 0, m = 0;
    char *ret = 0, *buf;
    kurl_t *ku;

    ku = kurl_open(url, 0);
    if (ku == 0) return 0;
    buf = calloc(1, IBS_BUFSIZE);
    while ((l = kurl_read(ku, buf, IBS_BUFSIZE)) > 0) {
        if (n + l + 1 > m) {
            m = n + l + 1;
            kroundup32(m);
            ret = realloc(ret, m);
        }
        memcpy(ret + n, buf, l);
        n += l;
        ret[n] = 0;
    }
    kurl_close(ku);
    if (len) *len = n;
    return ret;
}
コード例 #23
0
int main (int argc, char * const argv[]) {
	clock_t start, end;
	float cpu_time;
	gzFile read_fp, ref_fp;
	kseq_t *read_seq, *ref_seq;
	int32_t l, m, k, match = 2, mismatch = 2, gap_open = 3, gap_extension = 1, path = 0, reverse = 0, n = 5, sam = 0, protein = 0, header = 0, s1 = 67108864, s2 = 128, filter = 0;
	int8_t* mata = (int8_t*)calloc(25, sizeof(int8_t));
	const int8_t* mat = mata;
	char mat_name[16];
	mat_name[0] = '\0';
	int8_t* ref_num = (int8_t*)malloc(s1);
	int8_t* num = (int8_t*)malloc(s2), *num_rc = 0;
	char* read_rc = 0;

	static const int8_t mat50[] = {
	//  A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X   *
     	5, -2, -1, -2, -1, -1, -1,  0, -2, -1, -2, -1, -1, -3, -1,  1,  0, -3, -2,  0, -2, -1, -1, -5,	// A
       -2,  7, -1, -2, -4,  1,  0, -3,  0, -4, -3,  3, -2, -3, -3, -1, -1, -3, -1, -3, -1,  0, -1, -5,	// R
       -1, -1,  7,  2, -2,  0,  0,  0,  1, -3, -4,  0, -2, -4, -2,  1,  0, -4, -2, -3,  5,  0, -1, -5,	// N
       -2, -2,  2,  8, -4,  0,  2, -1, -1, -4, -4, -1, -4, -5, -1,  0, -1, -5, -3, -4,  6,  1, -1, -5,	// D
       -1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5,	// C
       -1,  1,  0,  0, -3,  7,  2, -2,  1, -3, -2,  2,  0, -4, -1,  0, -1, -1, -1, -3,  0,  4, -1, -5,	// Q
       -1,  0,  0,  2, -3,  2,  6, -3,  0, -4, -3,  1, -2, -3, -1, -1, -1, -3, -2, -3,  1,  5, -1, -5,	// E
     	0, -3,  0, -1, -3, -2, -3,  8, -2, -4, -4, -2, -3, -4, -2,  0, -2, -3, -3, -4, -1, -2, -1, -5,	// G
       -2,  0,  1, -1, -3,  1,  0, -2, 10, -4, -3,  0, -1, -1, -2, -1, -2, -3,  2, -4,  0,  0, -1, -5,	// H
       -1, -4, -3, -4, -2, -3, -4, -4, -4,  5,  2, -3,  2,  0, -3, -3, -1, -3, -1,  4, -4, -3, -1, -5,	// I
       -2, -3, -4, -4, -2, -2, -3, -4, -3,  2,  5, -3,  3,  1, -4, -3, -1, -2, -1,  1, -4, -3, -1, -5,	// L
       -1,  3,  0, -1, -3,  2,  1, -2,  0, -3, -3,  6, -2, -4, -1,  0, -1, -3, -2, -3,  0,  1, -1, -5,	// K
       -1, -2, -2, -4, -2,  0, -2, -3, -1,  2,  3, -2,  7,  0, -3, -2, -1, -1,  0,  1, -3, -1, -1, -5,	// M
       -3, -3, -4, -5, -2, -4, -3, -4, -1,  0,  1, -4,  0,  8, -4, -3, -2,  1,  4, -1, -4, -4, -1, -5,	// F
       -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5,	// P
     	1, -1,  1,  0, -1,  0, -1,  0, -1, -3, -3,  0, -2, -3, -1,  5,  2, -4, -2, -2,  0,  0, -1, -5,	// S
    	0, -1,  0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1,  2,  5, -3, -2,  0,  0, -1, -1, -5, 	// T
       -3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1,  1, -4, -4, -3, 15,  2, -3, -5, -2, -1, -5, 	// W
       -2, -1, -2, -3, -3, -1, -2, -3,  2, -1, -1, -2,  0,  4, -3, -2, -2,  2,  8, -1, -3, -2, -1, -5, 	// Y
     	0, -3, -3, -4, -1, -3, -3, -4, -4,  4,  1, -3,  1, -1, -3, -2,  0, -3, -1,  5, -3, -3, -1, -5, 	// V
       -2, -1,  5,  6, -3,  0,  1, -1,  0, -4, -4,  0, -3, -4, -2,  0,  0, -5, -3, -3,  6,  1, -1, -5, 	// B
       -1,  0,  0,  1, -3,  4,  5, -2,  0, -3, -3,  1, -1, -4, -1,  0, -1, -2, -2, -3,  1,  5, -1, -5, 	// Z
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, 	// X
       -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,  1 	// *
	};

	/* This table is used to transform amino acid letters into numbers. */
	int8_t aa_table[128] = {
		23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
		23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
		23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
		23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
		23, 0,  20, 4,  3,  6,  13, 7,  8,  9,  23, 11, 10, 12, 2,  23,
		14, 5,  1,  15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23,
		23, 0,  20, 4,  3,  6,  13, 7,  8,  9,  23, 11, 10, 12, 2,  23,
		14, 5,  1,  15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23
	};

	/* This table is used to transform nucleotide letters into numbers. */
	int8_t nt_table[128] = {
		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
		4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
		4, 4, 4, 4,  3, 3, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
		4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
		4, 4, 4, 4,  3, 3, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
	};

	int8_t* table = nt_table;

	// Parse command line.
	while ((l = getopt(argc, argv, "m:x:o:e:a:f:pcrsh")) >= 0) {
		switch (l) {
			case 'm': match = atoi(optarg); break;
			case 'x': mismatch = atoi(optarg); break;
			case 'o': gap_open = atoi(optarg); break;
			case 'e': gap_extension = atoi(optarg); break;
			case 'a': strcpy(mat_name, optarg); break;
			case 'f': filter = atoi(optarg); break;
			case 'p': protein = 1; break;
			case 'c': path = 1; break;
			case 'r': reverse = 1; break;
			case 's': sam = 1; break;
			case 'h': header = 1; break;
		}
	}
	if (optind + 2 > argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage: ssw_test [options] ... <target.fasta> <query.fasta>(or <query.fastq>)\n");
		fprintf(stderr, "Options:\n");
		fprintf(stderr, "\t-m N\tN is a positive integer for weight match in genome sequence alignment. [default: 2]\n");
		fprintf(stderr, "\t-x N\tN is a positive integer. -N will be used as weight mismatch in genome sequence alignment. [default: 2]\n");
		fprintf(stderr, "\t-o N\tN is a positive integer. -N will be used as the weight for the gap opening. [default: 3]\n");
		fprintf(stderr, "\t-e N\tN is a positive integer. -N will be used as the weight for the gap extension. [default: 1]\n");
		fprintf(stderr, "\t-p\tDo protein sequence alignment. Without this option, the ssw_test will do genome sequence alignment.\n");
		fprintf(stderr, "\t-a FILE\tFILE is either the Blosum or Pam weight matrix. [default: Blosum50]\n");
		fprintf(stderr, "\t-c\tReturn the alignment path.\n");
		fprintf(stderr, "\t-f N\tN is a positive integer. Only output the alignments with the Smith-Waterman score >= N.\n");
		fprintf(stderr, "\t-r\tThe best alignment will be picked between the original read alignment and the reverse complement read alignment.\n");
		fprintf(stderr, "\t-s\tOutput in SAM format. [default: no header]\n");
		fprintf(stderr, "\t-h\tIf -s is used, include header in SAM output.\n\n");
		return 1;
	}

	// initialize scoring matrix for genome sequences
	for (l = k = 0; LIKELY(l < 4); ++l) {
		for (m = 0; LIKELY(m < 4); ++m) mata[k++] = l == m ? match : -mismatch;	/* weight_match : -weight_mismatch */
		mata[k++] = 0; // ambiguous base
	}
	for (m = 0; LIKELY(m < 5); ++m) mata[k++] = 0;

	if (protein == 1 && (! strcmp(mat_name, "\0"))) {
		n = 24;
		table = aa_table;
		mat = mat50;
	} else if (strcmp(mat_name, "\0")) {

	// Parse score matrix.
		FILE *f_mat = fopen(mat_name, "r");
		char line[128];
		mata = (int8_t*)realloc(mata, 1024 * sizeof(int8_t));
		k = 0;
		m = 0;
		while (fgets(line, 128, f_mat)) {
			if (line[0] == '*' || (line[0] >= 'A' && line[0] <= 'Z')) {
				if (line[0] >= 'A' && line[0] <= 'Z') aa_table[(int)line[0]] = aa_table[(int)line[0] + 32] = m;
				char str[4], *s = str;
				str[0] = '\0';
				l = 1;
				while (line[l]) {
					if ((line[l] >= '0' && line[l] <= '9') || line[l] == '-') *s++ = line[l];
					else if (str[0] != '\0') {
						*s = '\0';
						mata[k++] = (int8_t)atoi(str);
						s = str;
						str[0] = '\0';
					}
					++l;
				}
				if (str[0] != '\0') {
					*s = '\0';
					mata[k++] = (int8_t)atoi(str);
					s = str;
					str[0] = '\0';
				}
				++m;
			}
		}
		if (k == 0) {
			fprintf(stderr, "Problem of reading the weight matrix file.\n");
			return 1;
		}
		fclose(f_mat);
		n = m;
		table = aa_table;
		mat = mata;
	}

	//fprintf(stderr, "query: %s\n", argv[optind + 1]);
	read_fp = gzopen(argv[optind + 1], "r");

    if (! read_fp) {
        fprintf (stderr, "gzopen of '%s' failed.\n", argv[optind + 1]);
            exit (EXIT_FAILURE);
    }

	read_seq = kseq_init(read_fp);
	if (sam && header && path) {
		fprintf(stdout, "@HD\tVN:1.4\tSO:queryname\n");
		ref_fp = gzopen(argv[optind], "r");
		ref_seq = kseq_init(ref_fp);
		while ((l = kseq_read(ref_seq)) >= 0) fprintf(stdout, "@SQ\tSN:%s\tLN:%d\n", ref_seq->name.s, (int32_t)ref_seq->seq.l);
		kseq_destroy(ref_seq);
		gzclose(ref_fp);
	} else if (sam && !path) {
		fprintf(stderr, "SAM format output is only available together with option -c.\n");
		sam = 0;
	}

	// alignment
	if (reverse == 1 && n == 5) {
		read_rc = (char*)malloc(s2);
		num_rc = (int8_t*)malloc(s2);
	}
	start = clock();
	while (kseq_read(read_seq) >= 0) {
		s_profile* p, *p_rc = 0;
		int32_t readLen = read_seq->seq.l;
		int32_t maskLen = readLen / 2;

		while (readLen >= s2) {
			++s2;
			kroundup32(s2);
			num = (int8_t*)realloc(num, s2);
			if (reverse == 1 && n == 5) {
				read_rc = (char*)realloc(read_rc, s2);
				num_rc = (int8_t*)realloc(num_rc, s2);
			}
		}
		for (m = 0; m < readLen; ++m) num[m] = table[(int)read_seq->seq.s[m]];
		p = ssw_init(num, readLen, mat, n, 2);
		if (reverse == 1 && n == 5) {
			reverse_comple(read_seq->seq.s, read_rc);
			for (m = 0; m < readLen; ++m) num_rc[m] = table[(int)read_rc[m]];
			p_rc = ssw_init(num_rc, readLen, mat, n, 2);
		}else if (reverse == 1 && n == 24) {
			fprintf (stderr, "Reverse complement alignment is not available for protein sequences. \n");
			return 1;
		}

		ref_fp = gzopen(argv[optind], "r");
		ref_seq = kseq_init(ref_fp);
		while (kseq_read(ref_seq) >= 0) {
			s_align* result, *result_rc = 0;
			int32_t refLen = ref_seq->seq.l;
			int8_t flag = 0;
			while (refLen > s1) {
				++s1;
				kroundup32(s1);
				ref_num = (int8_t*)realloc(ref_num, s1);
			}
			for (m = 0; m < refLen; ++m) ref_num[m] = table[(int)ref_seq->seq.s[m]];
			if (path == 1) flag = 2;
			result = ssw_align (p, ref_num, refLen, gap_open, gap_extension, flag, filter, 0, maskLen);
			if (reverse == 1 && protein == 0)
				result_rc = ssw_align(p_rc, ref_num, refLen, gap_open, gap_extension, flag, filter, 0, maskLen);
			if (result_rc && result_rc->score1 > result->score1 && result_rc->score1 >= filter) {
				if (sam) ssw_write (result_rc, ref_seq, read_seq, read_rc, table, 1, 1);
				else ssw_write (result_rc, ref_seq, read_seq, read_rc, table, 1, 0);
			}else if (result && result->score1 >= filter){
				if (sam) ssw_write(result, ref_seq, read_seq, read_seq->seq.s, table, 0, 1);
				else ssw_write(result, ref_seq, read_seq, read_seq->seq.s, table, 0, 0);
			} else if (! result) return 1;
			if (result_rc) align_destroy(result_rc);
			align_destroy(result);
		}

		if(p_rc) init_destroy(p_rc);
		init_destroy(p);
		kseq_destroy(ref_seq);
		gzclose(ref_fp);
	}
	end = clock();
	cpu_time = ((float) (end - start)) / CLOCKS_PER_SEC;
	fprintf(stderr, "CPU time: %f seconds\n", cpu_time);

	if (num_rc) {
		free(num_rc);
		free(read_rc);
	}
	kseq_destroy(read_seq);
	gzclose(read_fp);
	free(num);
	free(ref_num);
 	free(mata);
	return 0;
}
コード例 #24
0
ファイル: cram_samtools.c プロジェクト: jkbonfield/htslib
/*---------------------------------------------------------------------------
 * Samtools compatibility portion
 */
int bam_construct_seq(bam_seq_t **bp, size_t extra_len,
		      const char *qname, size_t qname_len,
		      int flag,
		      int rname,      // Ref ID
		      int pos,
		      int end,        // aligned start/end coords
		      int mapq,
		      uint32_t ncigar, const uint32_t *cigar,
		      int mrnm,       // Mate Ref ID
		      int mpos,
		      int isize,
		      int len,
		      const char *seq,
		      const char *qual) {
    static const char L[256] = {
	15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
	15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
	15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
	15,15,15,15,15,15,15,15,15,15,15,15,15, 0,15,15,
	15, 1,14, 2,13,15,15, 4,11,15,15,12,15, 3,15,15,
	15,15, 5, 6, 8,15, 7, 9,15,10,15,15,15,15,15,15,
	15, 1,14, 2,13,15,15, 4,11,15,15,12,15, 3,15,15,
	15,15, 5, 6, 8,15, 7, 9,15,10,15,15,15,15,15,15,
	15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
	15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
	15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
	15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
	15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
	15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
	15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
	15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15
    };
    bam1_t *b = (bam1_t *)*bp;
    uint8_t *cp;
    int i, qname_nuls, bam_len;

    //b->l_aux = extra_len; // we fill this out later

    qname_nuls = 4 - qname_len%4;
    if (qname_len + qname_nuls > 255) // Check for core.l_qname overflow
        return -1;
    bam_len = qname_len + qname_nuls + ncigar*4 + (len+1)/2 + len + extra_len;
    if (b->m_data < bam_len) {
	b->m_data = bam_len;
	kroundup32(b->m_data);
	b->data = (uint8_t*)realloc(b->data, b->m_data);
	if (!b->data)
	    return -1;
    }
    b->l_data = bam_len;

    b->core.tid     = rname;
    b->core.pos     = pos-1;
    b->core.bin     = bam_reg2bin(pos-1, end);
    b->core.qual    = mapq;
    b->core.l_qname = qname_len+qname_nuls;
    b->core.l_extranul = qname_nuls-1;
    b->core.flag    = flag;
    b->core.n_cigar = ncigar;
    b->core.l_qseq  = len;
    b->core.mtid    = mrnm;
    b->core.mpos    = mpos-1;
    b->core.isize   = isize;

    cp = b->data;

    strncpy((char *)cp, qname, qname_len);
    for (i = 0; i < qname_nuls; i++)
	cp[qname_len+i] = '\0';
    cp += qname_len+qname_nuls;
    if (ncigar > 0) memcpy(cp, cigar, ncigar*4);
    cp += ncigar*4;

    for (i = 0; i+1 < len; i+=2) {
	*cp++ = (L[(uc)seq[i]]<<4) + L[(uc)seq[i+1]];
    }
    if (i < len)
	*cp++ = L[(uc)seq[i]]<<4;

    if (qual)
	memcpy(cp, qual, len);
    else
	memset(cp, '\xff', len);

    return bam_len;
}
コード例 #25
0
ファイル: faidx.c プロジェクト: Griffan/FASTQuick
faidx_t *fai_build_core(RAZF *rz)
{
	char c, *name;
	int l_name, m_name, ret;
	int len, line_len, line_blen, state;
	int l1, l2;
	faidx_t *idx;
	uint64_t offset;

	idx = (faidx_t*)calloc(1, sizeof(faidx_t));
	idx->hash = kh_init(s);
	name = 0; l_name = m_name = 0;
	len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0;
	while (razf_read(rz, &c, 1)) {
		if (c == '\n') { // an empty line
			if (state == 1) {
				offset = razf_tell(rz);
				continue;
			} else if ((state == 0 && len < 0) || state == 2) continue;
		}
		if (c == '>') { // fasta header
			if (len >= 0)
				fai_insert_index(idx, name, len, line_len, line_blen, offset);
			l_name = 0;
			while ((ret = razf_read(rz, &c, 1)) != 0 && !isspace(c)) {
				if (m_name < l_name + 2) {
					m_name = l_name + 2;
					kroundup32(m_name);
					name = (char*)realloc(name, m_name);
				}
				name[l_name++] = c;
			}
			name[l_name] = '\0';
			if (ret == 0) {
				fprintf(stderr, "[fai_build_core] the last entry has no sequence\n");
				free(name); fai_destroy(idx);
				return 0;
			}
			if (c != '\n') while (razf_read(rz, &c, 1) && c != '\n');
			state = 1; len = 0;
			offset = razf_tell(rz);
		} else {
			if (state == 3) {
				fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'.\n", name);
				free(name); fai_destroy(idx);
				return 0;
			}
			if (state == 2) state = 3;
			l1 = l2 = 0;
			do {
				++l1;
				if (isgraph(c)) ++l2;
			} while ((ret = razf_read(rz, &c, 1)) && c != '\n');
			if (state == 3 && l2) {
				fprintf(stderr, "[fai_build_core] different line length in sequence '%s'.\n", name);
				free(name); fai_destroy(idx);
				return 0;
			}
			++l1; len += l2;
			if (l2 >= 0x10000) {
				fprintf(stderr, "[fai_build_core] line length exceeds 65535 in sequence '%s'.\n", name);
				free(name); fai_destroy(idx);
				return 0;
			}
			if (state == 1) line_len = l1, line_blen = l2, state = 0;
			else if (state == 0) {
				if (l1 != line_len || l2 != line_blen) state = 2;
			}
		}
	}
	fai_insert_index(idx, name, len, line_len, line_blen, offset);
	free(name);
	return idx;
}
コード例 #26
0
ファイル: bam2fq.c プロジェクト: kmsquire/htslib
int main_bam2fq(int argc, char *argv[])
{
    BGZF *fp, *fpse = 0;
    bam1_t *b;
    uint8_t *buf;
    int max_buf, c, has12 = 0;
    kstring_t str;
    int64_t n_singletons = 0, n_reads = 0;
    char last[512], *fnse = 0;

    while ((c = getopt(argc, argv, "as:")) > 0)
        if (c == 'a') has12 = 1;
        else if (c == 's') fnse = optarg;
    if (argc == optind) {
        fprintf(stderr, "\nUsage:   bam2fq [-a] [-s outSE] <in.bam>\n\n");
        fprintf(stderr, "Options: -a        append /1 and /2 to the read name\n");
        fprintf(stderr, "         -s FILE   write singleton reads to FILE [assume single-end]\n");
        fprintf(stderr, "\n");
        return 1;
    }
    fp = strcmp(argv[optind], "-")? bgzf_open(argv[optind], "r") : bgzf_dopen(fileno(stdin), "r");
    assert(fp);
    bam_hdr_destroy(bam_hdr_read(fp));
    buf = 0;
    max_buf = 0;
    str.l = str.m = 0;
    str.s = 0;
    last[0] = 0;
    if (fnse) fpse = bgzf_open(fnse, "w1");

    b = bam_init1();
    while (bam_read1(fp, b) >= 0) {
        int i, qlen = b->core.l_qseq, is_print = 0;
        uint8_t *qual, *seq;
        if (b->flag&BAM_FSECONDARY) continue; // skip secondary alignments
        ++n_reads;
        if (fpse) {
            if (str.l && strcmp(last, bam_get_qname(b))) {
                bgzf_write(fpse, str.s, str.l);
                str.l = 0;
                ++n_singletons;
            }
            if (str.l) is_print = 1;
            strcpy(last, bam_get_qname(b));
        } else is_print = 1;
        qual = bam_get_qual(b);
        kputc(qual[0] == 0xff? '>' : '@', &str);
        kputsn(bam_get_qname(b), b->core.l_qname - 1, &str);
        if (has12) {
            kputc('/', &str);
            kputw(b->core.flag>>6&3, &str);
        }
        kputc('\n', &str);
        if (max_buf < qlen + 1) {
            max_buf = qlen + 1;
            kroundup32(max_buf);
            buf = (uint8_t*)realloc(buf, max_buf);
        }
        buf[qlen] = 0;
        seq = bam_get_seq(b);
        for (i = 0; i < qlen; ++i) buf[i] = bam_seqi(seq, i); // copy the sequence
        if (bam_is_rev(b)) { // reverse complement
            for (i = 0; i < qlen>>1; ++i) {
                int8_t t = seq_comp_table[buf[qlen - 1 - i]];
                buf[qlen - 1 - i] = seq_comp_table[buf[i]];
                buf[i] = t;
            }
            if (qlen&1) buf[i] = seq_comp_table[buf[i]];
        }
        for (i = 0; i < qlen; ++i) buf[i] = seq_nt16_str[buf[i]];
        kputsn((char*)buf, qlen, &str);
        kputc('\n', &str);
        if (qual[0] != 0xff) {
            kputsn("+\n", 2, &str);
            for (i = 0; i < qlen; ++i) buf[i] = 33 + qual[i];
            if (bam_is_rev(b)) { // reverse
                for (i = 0; i < qlen>>1; ++i) {
                    uint8_t t = buf[qlen - 1 - i];
                    buf[qlen - 1 - i] = buf[i];
                    buf[i] = t;
                }
            }
        }
        kputsn((char*)buf, qlen, &str);
        kputc('\n', &str);
        if (is_print) {
            fwrite(str.s, 1, str.l, stdout);
            str.l = 0;
        }
    }
    if (fpse) {
        if (str.l) {
            bgzf_write(fpse, str.s, str.l);
            ++n_singletons;
        }
        fprintf(stderr, "[M::%s] discarded %lld singletons\n", __func__, (long long)n_singletons);
        bgzf_close(fpse);
    }
    fprintf(stderr, "[M::%s] processed %lld reads\n", __func__, (long long)n_reads);
    free(buf);
    free(str.s);
    bam_destroy1(b);
    bgzf_close(fp);
    return 0;
}
コード例 #27
0
ファイル: bam_sort.c プロジェクト: chapmanb/samtools
/*!
  @abstract Sort an unsorted BAM file based on the chromosome order
  and the leftmost position of an alignment

  @param  is_by_qname whether to sort by query name
  @param  fn       name of the file to be sorted
  @param  prefix   prefix of the output and the temporary files; upon
	                   sucessess, prefix.bam will be written.
  @param  max_mem  approxiate maximum memory (very inaccurate)

  @discussion It may create multiple temporary subalignment files
  and then merge them by calling bam_merge_core(). This function is
  NOT thread safe.
 */
void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t _max_mem, int is_stdout, int n_threads, int level, int sort_type)
{
	int ret, i, n_files = 0;
	size_t mem, max_k, k, max_mem;
	bam_header_t *header;
	bamFile fp;
	bam1_t *b, **buf;
	char *fnout = 0;

	if (n_threads < 2) n_threads = 1;
	g_is_by_qname = is_by_qname;
	max_k = k = 0; mem = 0;
	max_mem = _max_mem * n_threads;
	buf = 0;
	fp = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
	if (fp == 0) {
		fprintf(stderr, "[bam_sort_core] fail to open file %s\n", fn);
		return;
	}
	header = bam_header_read(fp);
	if (is_by_qname) change_SO(header, "queryname");
	else change_SO(header, "coordinate");
	// write sub files
	for (;;) {
		if (k == max_k) {
			size_t old_max = max_k;
			max_k = max_k? max_k<<1 : 0x10000;
			buf = realloc(buf, max_k * sizeof(void*));
			memset(buf + old_max, 0, sizeof(void*) * (max_k - old_max));
		}
		if (buf[k] == 0) buf[k] = (bam1_t*)calloc(1, sizeof(bam1_t));
		b = buf[k];
		if ((ret = bam_read1(fp, b)) < 0) break;
		if (b->data_len < b->m_data>>2) { // shrink
			b->m_data = b->data_len;
			kroundup32(b->m_data);
			b->data = realloc(b->data, b->m_data);
		}
		mem += sizeof(bam1_t) + b->m_data + sizeof(void*) + sizeof(void*); // two sizeof(void*) for the data allocated to pointer arrays
		++k;
		if (mem >= max_mem) {
			n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads, sort_type);
			mem = k = 0;
		}
	}
	if (ret != -1)
		fprintf(stderr, "[bam_sort_core] truncated file. Continue anyway.\n");
	// output file name
	fnout = calloc(strlen(prefix) + 20, 1);
	if (is_stdout) sprintf(fnout, "-");
	else sprintf(fnout, "%s.bam", prefix);
	// write the final output
	if (n_files == 0) { // a single block
		char mode[8];
		strcpy(mode, "w");
		if (level >= 0) sprintf(mode + 1, "%d", level < 9? level : 9);
                sort_aux_core(k, buf, sort_type);
#ifndef _PBGZF_USE 
		write_buffer(fnout, mode, k, buf, header, n_threads);
#else
		write_buffer(fnout, mode, k, buf, header);
#endif
	} else { // then merge
		char **fns;
		n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads, sort_type);
		fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n_files);
		fns = (char**)calloc(n_files, sizeof(char*));
		for (i = 0; i < n_files; ++i) {
			fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
			sprintf(fns[i], "%s.%.4d.bam", prefix, i);
		}
#ifndef _PBGZF_USE 
		bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, n_threads, level);
#else
		bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, level);
#endif
		for (i = 0; i < n_files; ++i) {
			unlink(fns[i]);
			free(fns[i]);
		}
		free(fns);
	}
	free(fnout);
	// free
	for (k = 0; k < max_k; ++k) {
		if (!buf[k]) continue;
		free(buf[k]->data);
		free(buf[k]);
	}
	free(buf);
	bam_header_destroy(header);
	bam_close(fp);
}
コード例 #28
0
ファイル: bam_lpileup.c プロジェクト: 05curranth/samtools
static int tview_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)
{
	bam_lplbuf_t *tv = (bam_lplbuf_t*)data;
	freenode_t *p;
	int i, l, max_level;
	// allocate memory if necessary
	if (tv->max < n) { // enlarge
		tv->max = n;
		kroundup32(tv->max);
		tv->cur_level = (int*)realloc(tv->cur_level, sizeof(int) * tv->max);
		tv->pre_level = (int*)realloc(tv->pre_level, sizeof(int) * tv->max);
	}
	tv->n_cur = n;
	// update cnt
	for (p = tv->head; p->next; p = p->next)
		if (p->cnt > 0) --p->cnt;
	// calculate cur_level[]
	max_level = 0;
	for (i = l = 0; i < n; ++i) {
		const bam_pileup1_t *p = pl + i;
		if (p->is_head) {
			if (tv->head->next && tv->head->cnt == 0) { // then take a free slot
				freenode_t *p = tv->head->next;
				tv->cur_level[i] = tv->head->level;
				mp_free(tv->mp, tv->head);
				tv->head = p;
				--tv->n_nodes;
			} else tv->cur_level[i] = ++tv->max_level;
		} else {
			tv->cur_level[i] = tv->pre_level[l++];
			if (p->is_tail) { // then return a free slot
				tv->tail->level = tv->cur_level[i];
				tv->tail->next = mp_alloc(tv->mp);
				tv->tail = tv->tail->next;
				++tv->n_nodes;
			}
		}
		if (tv->cur_level[i] > max_level) max_level = tv->cur_level[i];
		((bam_pileup1_t*)p)->level = tv->cur_level[i];
	}
	assert(l == tv->n_pre);
	tv->func(tid, pos, n, pl, tv->user_data);
	// sort the linked list
	if (tv->n_nodes) {
		freenode_t *q;
		if (tv->n_nodes + 1 > tv->m_aux) { // enlarge
			tv->m_aux = tv->n_nodes + 1;
			kroundup32(tv->m_aux);
			tv->aux = (freenode_t**)realloc(tv->aux, sizeof(void*) * tv->m_aux);
		}
		for (p = tv->head, i = l = 0; p->next;) {
			if (p->level > max_level) { // then discard this entry
				q = p->next;
				mp_free(tv->mp, p);
				p = q;
			} else {
				tv->aux[i++] = p;
				p = p->next;
			}
		}
		tv->aux[i] = tv->tail; // add a proper tail for the loop below
		tv->n_nodes = i;
		if (tv->n_nodes) {
			ks_introsort(node, tv->n_nodes, tv->aux);
			for (i = 0; i < tv->n_nodes; ++i) tv->aux[i]->next = tv->aux[i+1];
			tv->head = tv->aux[0];
		} else tv->head = tv->tail;
	}
	// clean up
	tv->max_level = max_level;
	memcpy(tv->pre_level, tv->cur_level, tv->n_cur * 4);
	// squeeze out terminated levels
	for (i = l = 0; i < n; ++i) {
		const bam_pileup1_t *p = pl + i;
		if (!p->is_tail)
			tv->pre_level[l++] = tv->pre_level[i];
	}
	tv->n_pre = l;
/*
	fprintf(stderr, "%d\t", pos+1);
	for (i = 0; i < n; ++i) {
		const bam_pileup1_t *p = pl + i;
		if (p->is_head) fprintf(stderr, "^");
		if (p->is_tail) fprintf(stderr, "$");
		fprintf(stderr, "%d,", p->level);
	}
	fprintf(stderr, "\n");
*/
	return 0;
}
コード例 #29
0
static cigar* banded_sw (const int8_t* ref,
				 const int8_t* read,
				 int32_t refLen,
				 int32_t readLen,
				 int32_t score,
				 const uint32_t weight_gapO,  /* will be used as - */
				 const uint32_t weight_gapE,  /* will be used as - */
				 int32_t band_width,
				 const int8_t* mat,	/* pointer to the weight matrix */
				 int32_t n) {

	uint32_t *c = (uint32_t*)malloc(16 * sizeof(uint32_t)), *c1;
	int32_t i, j, e, f, temp1, temp2, s = 16, s1 = 8, l, max = 0;
	int64_t s2 = 1024;
	char op, prev_op;
	int32_t width, width_d, *h_b, *e_b, *h_c;
	int8_t *direction, *direction_line;
	cigar* result = (cigar*)malloc(sizeof(cigar));
	h_b = (int32_t*)malloc(s1 * sizeof(int32_t));
	e_b = (int32_t*)malloc(s1 * sizeof(int32_t));
	h_c = (int32_t*)malloc(s1 * sizeof(int32_t));
	direction = (int8_t*)malloc(s2 * sizeof(int8_t));

	do {
		width = band_width * 2 + 3, width_d = band_width * 2 + 1;
		while (width >= s1) {
			++s1;
			kroundup32(s1);
			h_b = (int32_t*)realloc(h_b, s1 * sizeof(int32_t));
			e_b = (int32_t*)realloc(e_b, s1 * sizeof(int32_t));
			h_c = (int32_t*)realloc(h_c, s1 * sizeof(int32_t));
		}
		while (width_d * readLen * 3 >= s2) {
			++s2;
			kroundup32(s2);
			if (s2 < 0) {
				fprintf(stderr, "Alignment score and position are not consensus.\n");
				exit(1);
			}
			direction = (int8_t*)realloc(direction, s2 * sizeof(int8_t));
		}
		direction_line = direction;
		for (j = 1; LIKELY(j < width - 1); j ++) h_b[j] = 0;
		for (i = 0; LIKELY(i < readLen); i ++) {
			int32_t beg = 0, end = refLen - 1, u = 0, edge;
			j = i - band_width;	beg = beg > j ? beg : j; // band start
			j = i + band_width; end = end < j ? end : j; // band end
			edge = end + 1 < width - 1 ? end + 1 : width - 1;
			f = h_b[0] = e_b[0] = h_b[edge] = e_b[edge] = h_c[0] = 0;
			direction_line = direction + width_d * i * 3;

			for (j = beg; LIKELY(j <= end); j ++) {
				int32_t b, e1, f1, d, de, df, dh;
				set_u(u, band_width, i, j);	set_u(e, band_width, i - 1, j);
				set_u(b, band_width, i, j - 1); set_u(d, band_width, i - 1, j - 1);
				set_d(de, band_width, i, j, 0);
				set_d(df, band_width, i, j, 1);
				set_d(dh, band_width, i, j, 2);

				temp1 = i == 0 ? -weight_gapO : h_b[e] - weight_gapO;
				temp2 = i == 0 ? -weight_gapE : e_b[e] - weight_gapE;
				e_b[u] = temp1 > temp2 ? temp1 : temp2;
				direction_line[de] = temp1 > temp2 ? 3 : 2;

				temp1 = h_c[b] - weight_gapO;
				temp2 = f - weight_gapE;
				f = temp1 > temp2 ? temp1 : temp2;
				direction_line[df] = temp1 > temp2 ? 5 : 4;

				e1 = e_b[u] > 0 ? e_b[u] : 0;
				f1 = f > 0 ? f : 0;
				temp1 = e1 > f1 ? e1 : f1;
				temp2 = h_b[d] + mat[ref[j] * n + read[i]];
				h_c[u] = temp1 > temp2 ? temp1 : temp2;

				if (h_c[u] > max) max = h_c[u];

				if (temp1 <= temp2) direction_line[dh] = 1;
				else direction_line[dh] = e1 > f1 ? direction_line[de] : direction_line[df];
			}
			for (j = 1; j <= u; j ++) h_b[j] = h_c[j];
		}
		band_width *= 2;
	} while (LIKELY(max < score));
	band_width /= 2;

	// trace back
	i = readLen - 1;
	j = refLen - 1;
	e = 0;	// Count the number of M, D or I.
	l = 0;	// record length of current cigar
	op = prev_op = 'M';
	temp2 = 2;	// h
	while (LIKELY(i > 0)) {
		set_d(temp1, band_width, i, j, temp2);
		switch (direction_line[temp1]) {
			case 1:
				--i;
				--j;
				temp2 = 2;
				direction_line -= width_d * 3;
				op = 'M';
				break;
			case 2:
			 	--i;
				temp2 = 0;	// e
				direction_line -= width_d * 3;
				op = 'I';
				break;
			case 3:
				--i;
				temp2 = 2;
				direction_line -= width_d * 3;
				op = 'I';
				break;
			case 4:
				--j;
				temp2 = 1;
				op = 'D';
				break;
			case 5:
				--j;
				temp2 = 2;
				op = 'D';
				break;
			default:
				fprintf(stderr, "Trace back error: %d.\n", direction_line[temp1 - 1]);
				free(direction);
				free(h_c);
				free(e_b);
				free(h_b);
				free(c);
				free(result);
				return 0;
		}
		if (op == prev_op) ++e;
		else {
			++l;
			while (l >= s) {
				++s;
				kroundup32(s);
				c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
			}
			c[l - 1] = to_cigar_int(e, prev_op);
			prev_op = op;
			e = 1;
		}
	}
	if (op == 'M') {
		++l;
		while (l >= s) {
			++s;
			kroundup32(s);
			c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
		}
		c[l - 1] = to_cigar_int(e + 1, op);
	}else {
		l += 2;
		while (l >= s) {
			++s;
			kroundup32(s);
			c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
		}
		c[l - 2] = to_cigar_int(e, op);
		c[l - 1] = to_cigar_int(1, 'M');
	}

	// reverse cigar
	c1 = (uint32_t*)malloc(l * sizeof(uint32_t));
	s = 0;
	e = l - 1;
	while (LIKELY(s <= e)) {
		c1[s] = c[e];
		c1[e] = c[s];
		++ s;
		-- e;
	}
	result->seq = c1;
	result->length = l;

	free(direction);
	free(h_c);
	free(e_b);
	free(h_b);
	free(c);
	return result;
}
コード例 #30
0
ファイル: faidx.c プロジェクト: mtmorgan/Rhtslib
faidx_t *fai_build_core(BGZF *bgzf)
{
    char *name;
    int c;
    int l_name, m_name;
    int line_len, line_blen, state;
    int l1, l2;
    faidx_t *idx;
    uint64_t offset;
    int64_t len;

    idx = (faidx_t*)calloc(1, sizeof(faidx_t));
    name = (char*)calloc(1, sizeof(char)); /* at least 1 byte, for '\0' */
    idx->hash = kh_init(s);
    l_name = m_name = 0;
    len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0;
    while ( (c=bgzf_getc(bgzf))>=0 ) {
        if (c == '\n') { // an empty line
            if (state == 1) {
                offset = bgzf_utell(bgzf);
                continue;
            } else if ((state == 0 && len < 0) || state == 2) continue;
        }
        if (c == '>') { // fasta header
            if (len >= 0)
                fai_insert_index(idx, name, len, line_len, line_blen, offset);
            l_name = 0;
            while ( (c=bgzf_getc(bgzf))>=0 && !isspace(c)) {
                if (m_name < l_name + 2) {
                    m_name = l_name + 2;
                    kroundup32(m_name);
                    name = (char*)realloc(name, m_name);
                }
                name[l_name++] = c;
            }
            name[l_name] = '\0';
            if ( c<0 ) {
                fprintf(stderr, "[fai_build_core] the last entry has no sequence\n");
                free(name); fai_destroy(idx);
                return 0;
            }
            if (c != '\n') while ( (c=bgzf_getc(bgzf))>=0 && c != '\n');
            state = 1; len = 0;
            offset = bgzf_utell(bgzf);
        } else {
            if (state == 3) {
                fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'.\n", name);
                free(name); fai_destroy(idx);
                return 0;
            }
            if (state == 2) state = 3;
            l1 = l2 = 0;
            do {
                ++l1;
                if (isgraph(c)) ++l2;
            } while ( (c=bgzf_getc(bgzf))>=0 && c != '\n');
            if (state == 3 && l2) {
                fprintf(stderr, "[fai_build_core] different line length in sequence '%s'.\n", name);
                free(name); fai_destroy(idx);
                return 0;
            }
            ++l1; len += l2;
            if (state == 1) line_len = l1, line_blen = l2, state = 0;
            else if (state == 0) {
                if (l1 != line_len || l2 != line_blen) state = 2;
            }
        }
    }
    if ( name )
        fai_insert_index(idx, name, len, line_len, line_blen, offset);
    else
    {
        free(idx);
        return NULL;
    }
    free(name);
    return idx;
}