Beispiel #1
0
static lk_TimerState *lkT_newstate (lk_State *S) {
    lk_TimerState *ts = (lk_TimerState*)
        lk_malloc(S, sizeof(lk_TimerState));
    memset(ts, 0, sizeof(*ts));
    if (!lk_initlock(&ts->lock))
        lk_discard(S);
    ts->S = S;
    lk_initmempool(S, &ts->timers, sizeof(lk_Timer), 0);
    return ts;
}
Beispiel #2
0
static int read_ranges_file(char *fname,struct contig **contig_ptr,struct lk_compress *lkc)
{
    int i,err;
    u_int32_t x1,x1a,x2,x3,x;
    struct contig *ctg,*c;
    struct range_blk *r,*r1;
    char *estr;
    FILE *fptr;
    string *s;
    tokens *tok;
    void *tbuf;

    s=0;
    tok=0;
    tbuf=0;
    err=0;
    fptr=open_readfile_and_check(fname,&i,lkc);
    printf("Reading ranges file '%s'\n",fname);
    ctg=0;
    while(!err) {
        s=fget_string(fptr,s,&tbuf);
        if(!s->len) break;
        tok=tokenize(get_cstring(s),'\t',tok);
        if(tok->n_tok>=3) {
            HASH_FIND_STR(ctg,tok->toks[0],c);
            if(!c) {
                c=lk_malloc(sizeof(struct contig));
                c->name=strdup(tok->toks[0]);
                c->size=0;
                c->ranges=lk_malloc(sizeof(struct range_blk));
                c->ranges->idx=0;
                c->ranges->next=0;
                c->tranges=0;
                c->counts=0;
                c->tcounts=0;
                c->tot_count=0;
                c->tot_tcount=0;
                c->tot_bases=0;
                c->tot_tbases=0;
                HASH_ADD_KEYPTR(hh,ctg,c->name,strlen(c->name),c);
            }
            r=c->ranges;
            if(r->idx==RANGE_BLK_SIZE) {
                r=lk_malloc(sizeof(struct range_blk));
                r->idx=0;
                r->next=c->ranges;
                c->ranges=r;
            }
            x1=(u_int32_t)getlnumcolumn(tok,1,0,&estr);
            if(!estr) x2=(u_int32_t)getlnumcolumn(tok,2,0,&estr);
            if(estr || x1<1 || x2<x1) {
                err=1;
                fprintf(stderr,"Error in coordinate format: %s\n",estr);
            } else {
                r->name[r->idx]=getstrcolumn(tok,3,0,0);
                r->x1[r->idx]=x1;
                r->x2[r->idx++]=x2;
            }
        }
    }
    fclose(fptr);
    if(s) free_string(s);
    if(tok) free_tokens(tok);
    if(tbuf) free_fget_buffer(&tbuf);
    signal(SIGCHLD,SIG_DFL);
    x2=x3=0;
    for(c=ctg; c && !err; c=c->hh.next) {
        x1=0;
        r=c->ranges;
        c->ranges=0;
        while(r) {
            for(i=0; i<r->idx; i++) {
                if(r->x2[i]>x1) x1=r->x2[i];
            }
            r1=r->next;
            r->next=c->ranges;
            c->ranges=r;
            r=r1;
        }
        c->size=x1;
        x1a=0;
        if(!x1) err=2;
        else {
            x2+=x1;
            c->counts=lk_malloc(sizeof(count)*c->size);
            for(i=0; i<(int)c->size; i++) c->counts[i]=NO_COUNT;
            for(r=c->ranges; r; r=r->next) {
                for(i=0; i<r->idx; i++) {
                    for(x=r->x1[i]-1; x<r->x2[i]; x++) c->counts[x]=0;
                }
            }
            for(i=0; i<(int)c->size; i++) if(!c->counts[i]) x1a++;
        }
        c->tot_bases=x1a;
        x3+=x1a;
    }
    printf("total=%u, in ranges=%u\n",x2,x3);
    while(waitpid(-1,&i,WNOHANG)>0);
    *contig_ptr=err?0:ctg;
    return err;
}
Beispiel #3
0
int main(int argc,char *argv[])
{
    int i,j,k,c,err,nthr;
    int block_size,extend,detail,combine,out_cnt,format;
    struct lk_compress *lkc;
    struct contig *contigs,*ctg;
    struct range_blk *r;
    struct tdc_par tp;
    char *ranges_file,*target_file,*output_file,*compare_ref;
    char *filter,*suffix,*tn,*prefix,*pp;
    count *ct,cc,cbuf[OUT_WIDTH];
    u_int64_t *hist,*thist,nn,kk,tnn,tkk,number;
    u_int32_t x;
    pthread_t *read_threads;
    FILE *ofptr,*ctg_fptr;
    static struct option longopts[]= {
        {"ranges_file",required_argument,0,'r'},
        {"target_file",required_argument,0,'t'},
        {"block_size",required_argument,0,'b'},
        {"number",required_argument,0,'n'},
        {"output",required_argument,0,'o'},
        {"extend_regions",required_argument,0,'x'},
        {"detailed_output",no_argument,0,'d'},
        {"combine",no_argument,0,'c'},
        {"compare",required_argument,0,'C'},
        {"eland",no_argument,0,'E'},
        {"gem",no_argument,0,'G'},
        {"prefix",no_argument,0,'p'},
        {0,0,0,0}
    };

    err=0;
    detail=combine=0;
    ranges_file=target_file=output_file=compare_ref=prefix=0;
    block_size=1;
    extend=0;
    contigs=0;
    number=0;
    format=GEM_FMT;
    while((c=getopt_long(argc,argv,"p:r:t:b:o:n:x:C:dcEG",longopts,0))!=-1) {
        switch(c) {
        case 'p':
            set_opt("prefix",&prefix,optarg);
            break;
        case 'r':
            set_opt("ranges_file",&ranges_file,optarg);
            break;
        case 't':
            set_opt("target_file",&target_file,optarg);
            break;
        case 'C':
            set_opt("compare",&compare_ref,optarg);
            break;
        case 'o':
            set_opt("output",&output_file,optarg);
            break;
        case 'b':
            block_size=atoi(optarg);
            break;
        case 'E':
            format=ELAND_FMT;
            break;
        case 'G':
            format=GEM_FMT;
            break;
        case 'x':
            extend=atoi(optarg);
            break;
        case 'n':
            number=strtoul(optarg,&pp,10);
            break;
        case 'd':
            detail=1;
            break;
        case 'c':
            combine=1;
            break;
        }
    }
    if(!ranges_file) {
        fprintf(stderr,"No ranges file specified.  Use -r or --ranges_file option\n");
        exit(-1);
    }
    if(!output_file) output_file=strdup("coverage_hist.txt");
    if(block_size<1) {
        fprintf(stderr,"Invalid block size specified - must be >0\n");
        exit(-1);
    }
    printf("Block size = %d, extend range = %d\n",block_size,extend);
    hist=lk_malloc(sizeof(u_int64_t)*(MAX_COUNT+1));
    if(target_file) thist=lk_malloc(sizeof(u_int64_t)*(MAX_COUNT+1));
    else thist=0;
    lkc=init_compress();
    err=read_ranges_file(ranges_file,&contigs,lkc);
    if(!err && target_file) err=read_target_file(target_file,contigs,extend,lkc);
    nthr=(int)sysconf(_SC_NPROCESSORS_ONLN);
    input_files=argv;
    input_idx=optind;
    n_input_files=argc;
    if(!nthr) nthr=1;
    read_threads=malloc(sizeof(pthread_t)*nthr);
    if(combine) {
        tp.ctg=contigs;
        tp.lkc=lkc;
        for(i=0; i<nthr; i++) {
            if((j=pthread_create(read_threads+i,NULL,read_det_cov,&tp))) abt(__FILE__,__LINE__,"Thread creation %d failed: %d\n",i+1,j);
        }
        for(i=0; i<nthr; i++) pthread_join(read_threads[i],NULL);
    } else {
        for(i=optind; !err && i<argc; i++) {
            err=process_file(argv[i],contigs,block_size,format,&number,lkc);
        }
    }
    printf("Generating histogram\n");
    filter=suffix=0;
    if(lkc->default_compress<COMPRESS_NONE) {
        i=lkc->default_compress;
        filter=lkc->comp_path[i][i==COMPRESS_ZIP?1:0];
        suffix=lkc->compress_suffix[i];
    }
    tn=0;
    if(prefix) {
        asprintf(&tn,"%s_contig_summ.txt",prefix);
        assert(tn);
        ctg_fptr=fopen(tn,"w");
        free(tn);
    } else ctg_fptr=fopen("contig_summ.txt","w");
    if(target_file) {
        for(i=0; i<=MAX_COUNT; i++) hist[i]=thist[i]=0;
        for(ctg=contigs; ctg; ctg=ctg->hh.next) {
            ct=ctg->counts;
            for(i=0; i<(int)ctg->size; i+=block_size) {
                cc=ct[i];
                if(cc<=MAX_COUNT) {
                    hist[cc]++;
                    ctg->tot_count+=cc;
                }
            }
            ct=ctg->tcounts;
            for(i=0; i<(int)ctg->tsize; i+=block_size) {
                cc=ct[i];
                if(cc<=MAX_COUNT) {
                    thist[cc]++;
                    ctg->tot_tcount+=cc;
                }
            }
            if(ctg_fptr) {
                fprintf(ctg_fptr,"%s\t%u\t%"PRIu64"\t%g",ctg->name,ctg->tot_bases,ctg->tot_count,(double)ctg->tot_count/(double)ctg->tot_bases);
                fprintf(ctg_fptr,"\t%u\t%"PRIu64"\t%g\n",ctg->tot_tbases,ctg->tot_tcount,ctg->tot_tbases?(double)ctg->tot_tcount/(double)ctg->tot_tbases:0.0);
            }
        }
        if(ctg_fptr) fclose(ctg_fptr);
        nn=tnn=0;
        for(i=0; i<=MAX_COUNT; i++) {
            nn+=hist[i];
            tnn+=thist[i];
        }
        kk=tkk=0;
        ofptr=fopen(output_file,"w");
        if(!ofptr) ofptr=stdout;
        for(i=0; i<=MAX_COUNT; i++) {
            kk+=hist[i];
            tkk+=thist[i];
            fprintf(ofptr,"%d\t%"PRIu64"\t%g\t%g\t",i,hist[i],(double)hist[i]/(double)nn,(double)kk/(double)nn);
            fprintf(ofptr,"%"PRIu64"\t%g\t%g\n",thist[i],(double)thist[i]/(double)tnn,(double)tkk/(double)tnn);
        }
        if(ofptr!=stdout) fclose(ofptr);
    } else {
        for(i=0; i<=MAX_COUNT; i++) hist[i]=0;
        for(ctg=contigs; ctg; ctg=ctg->hh.next) {
            ct=ctg->counts;
            for(i=0; i<(int)ctg->size; i+=block_size) {
                cc=ct[i];
                if(cc<=MAX_COUNT) {
                    hist[cc]++;
                    ctg->tot_count+=cc;
                }
            }
            if(ctg_fptr) {
                fprintf(ctg_fptr,"%s\t%u\t%"PRIu64"\t%g\n",ctg->name,ctg->tot_bases,ctg->tot_count,(double)ctg->tot_count/(double)ctg->tot_bases);
            }
        }
        if(ctg_fptr) fclose(ctg_fptr);
        nn=0;
        for(i=0; i<=MAX_COUNT; i++) nn+=hist[i];
        kk=0;
        ofptr=fopen(output_file,"w");
        if(!ofptr) ofptr=stdout;
        for(i=0; i<=MAX_COUNT; i++) {
            kk+=hist[i];
            fprintf(ofptr,"%d\t%"PRIu64"\t%g\t%g\n",i,hist[i],(double)hist[i]/(double)nn,(double)kk/(double)nn);
        }
        if(ofptr!=stdout) fclose(ofptr);
    }
    if(detail) {
        tn=0;
        ofptr=0;
        if(filter && suffix) {
            asprintf(&tn,"detailed_coverage.txt.%s",suffix);
            if(tn) {
                i=child_open(WRITE,tn,filter);
                ofptr=fdopen(i,"w");
            }
        }
        if(!ofptr) ofptr=fopen("detailed_coverage.txt","w");
        if(!ofptr) ofptr=stdout;
        printf("Writing detailed coverage information\n");
        for(ctg=contigs; ctg; ctg=ctg->hh.next) {
            k=0;
            ct=ctg->counts;
            /*      ct1=ctg->tcounts;*/
            for(r=ctg->ranges; r; r=r->next) {
                for(i=0; i<r->idx; i++) {
                    if(!k++) fprintf(ofptr,"*%s\t%x\n",ctg->name,r->x1[i]);
                    else fprintf(ofptr,"*\t%x\n",r->x1[i]);
                    out_cnt=0;
                    for(x=r->x1[i]; x<=r->x2[i]; x++) {
                        cbuf[out_cnt++]=ct[x-1];
                        if(out_cnt==OUT_WIDTH) {
                            out_cnt=write_cbuf(cbuf,out_cnt,ofptr);
                        }
                    }
                    while(out_cnt) out_cnt=write_cbuf(cbuf,out_cnt,ofptr);
                }
            }
        }
        if(ofptr!=stdout) {
            fclose(ofptr);
            if(tn) {
                free(tn);
                while(waitpid(-1,&i,WNOHANG)>0);
            }
        }
    }
    return err;
}
Beispiel #4
0
static int read_target_file(char *fname,struct contig *ctg,int extend,struct lk_compress *lkc)
{
    int i,err;
    u_int32_t x1,x2,x3,x,cs,x1a;
    struct contig *c;
    struct range_blk *r;
    char *estr;
    FILE *fptr;
    string *s;
    tokens *tok;
    void *tbuf;

    s=0;
    tok=0;
    tbuf=0;
    err=0;
    fptr=open_readfile_and_check(fname,&i,lkc);
    printf("Reading target file '%s'\n",fname);
    while(!err) {
        s=fget_string(fptr,s,&tbuf);
        if(!s->len) break;
        tok=tokenize(get_cstring(s),'\t',tok);
        if(tok->n_tok>=3) {
            HASH_FIND_STR(ctg,tok->toks[0],c);
            if(c) {
                r=c->tranges;
                if(!r) {
                    r=c->tranges=lk_malloc(sizeof(struct range_blk));
                    r->idx=0;
                } else {
                    if(r->idx==RANGE_BLK_SIZE) {
                        r=lk_malloc(sizeof(struct range_blk));
                        r->idx=0;
                        r->next=c->tranges;
                        c->tranges=r;
                    }
                }
                x1=(u_int32_t)getlnumcolumn(tok,1,0,&estr);
                if(!estr) x2=(u_int32_t)getlnumcolumn(tok,2,0,&estr);
                if(estr || x1<1 || x2<x1) {
                    err=1;
                    fprintf(stderr,"Error in coordinate format: %s\n",estr);
                } else {
                    if(x1<=(unsigned int)extend) x1=1;
                    else x1-=extend;
                    x2+=extend;
                    if(x1<1) x1=1;
                    r->name[r->idx]=getstrcolumn(tok,3,0,0);
                    r->x1[r->idx]=x1;
                    r->x2[r->idx++]=x2;
                }
            } else {
                printf("Not found %s\n",tok->toks[0]);
            }
        }
    }
    fclose(fptr);
    if(s) free_string(s);
    if(tok) free_tokens(tok);
    if(tbuf) free_fget_buffer(&tbuf);
    signal(SIGCHLD,SIG_DFL);
    x2=x3=0;
    for(c=ctg; c && !err; c=c->hh.next) {
        if(!c->tranges) continue;
        x1=0;
        for(r=c->tranges; r; r=r->next) {
            for(i=0; i<r->idx; i++) {
                if(r->x2[i]>x1) x1=r->x2[i];
            }
        }
        if(x1>c->size) x1=c->size;
        c->tsize=x1;
        x1a=0;
        if(!x1) {
            fprintf(stderr,"Illegal target range for %s\n",c->name);
            err=2;
            continue;
        } else {
            x2+=x1;
            /*      printf("%s\t%d\n",c->name,c->tsize);*/
            c->tcounts=lk_malloc(sizeof(count)*c->tsize);
            cs=c->tsize;
            for(i=0; i<(int)cs; i++) c->tcounts[i]=NO_COUNT;
            for(r=c->tranges; r; r=r->next) {
                for(i=0; i<r->idx; i++) {
                    for(x=r->x1[i]-1; x<r->x2[i]; x++) {
                        if(x>=cs) break;
                        if(!c->counts[x]) c->tcounts[x]=0;
                    }
                }
            }
            for(i=0; i<(int)cs; i++) if(!c->tcounts[i]) x1a++;
        }
        c->tot_tbases=x1a;
        x3+=x1a;
    }
    printf("total=%u, on target=%u\n",x2,x3);
    while(waitpid(-1,&i,WNOHANG)>0);
    return err;
}