static void ls(char ** args) { char * path = kstrtok(0, DELIMS, args); int fildes, count; struct dirent dbuf[10]; if (path == 0 || !strcmp(path, "")) path = "./"; fildes = open(path, O_DIRECTORY | O_RDONLY | O_SEARCH); if (fildes < 0) { puts("Open failed\n"); return; } count = getdents(fildes, (char *)dbuf, sizeof(dbuf)); if (count < 0) { puts("Reading directory entries failed\n"); } for (int i = 0; i < count; i++) { char buf[80]; struct stat stat; fstatat(fildes, dbuf[i].d_name, &stat, 0); ksprintf(buf, sizeof(buf), "%u %o %u:%u %s\n", (uint32_t)dbuf[i].d_ino, (uint32_t)stat.st_mode, (uint32_t)stat.st_uid, (uint32_t)stat.st_gid, dbuf[i].d_name); puts(buf); } puts("\n"); close(fildes); }
static void touch(char ** args) { int fildes; char * path = kstrtok(0, DELIMS, args); fildes = creat(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); close(fildes); }
char *find_format(char *ptr,char *FORMAT) { char *fmt_ptr = strstr(ptr,FORMAT); if(fmt_ptr!=NULL) { int idx=0; for(int i=0;i<(fmt_ptr-ptr);i++) if(ptr[i]==':') idx++; ks_tokaux_t aux; ptr = kstrtok(ptr,"\t",&aux); ptr = kstrtok(NULL,NULL,&aux);//sample column ptr = kstrtok(ptr,":",&aux); for(int i=0;i<idx;i++) ptr = kstrtok(NULL,NULL,&aux); return(ptr); } else return(NULL); }
void parse(char *dir_path, int type, uint64_t start, uint64_t end) { fnode_t *temp_node, *aux_node, *currnode = root_node->f_child[2]; char *temp; int i = 0; char *path = (char *)kmalloc(sizeof(char) * kstrlen(dir_path)); kstrcpy(path, dir_path); temp = kstrtok(path, "/"); while (temp != NULL) { aux_node = currnode; //kprintf("%s \n", temp); //iterate through all childrens of currnode for(i = 2; i < currnode->end; ++i){ if(kstrcmp(temp, currnode->f_child[i]->f_name) == 0) { currnode = (fnode_t *)currnode->f_child[i]; break; } } //kprintf("\n....%s...%s...", currnode->f_name, temp); //if no child has been found //add this as child of current if (i == aux_node->end) { temp_node = (fnode_t *)kmalloc(sizeof(struct file)); make_node(temp_node, currnode, temp, start, end, type, 0); currnode->f_child[currnode->end] = temp_node; currnode->end += 1; } //kprintf("....%d...%s...", currnode->end, temp); //while(1); temp = kstrtok(NULL, "/"); } }
void* file_lookup(char *dir_path) { char* file_path = (char *)dir_path; fnode_t *aux_node, *currnode = root_node; char *temp = NULL; int i; char *path = (char *)kmalloc(sizeof(char) * kstrlen(file_path)); kstrcpy(path, file_path); temp = kstrtok(path, "/"); if (temp == NULL) return NULL; //kprintf("\n step1 %s", temp); while (temp != NULL) { aux_node = currnode; for (i = 2; i < currnode->end; ++i) { if (kstrcmp(temp, currnode->f_child[i]->f_name) == 0) { currnode = (fnode_t *)currnode->f_child[i]; break; } } if (i == aux_node->end) { return NULL; } temp = kstrtok(NULL, "/"); } if (currnode->f_type == FILE) return (void *)currnode->start; else return NULL; }
bcf_hdr_t *vcf_hdr_read(bcf_t *bp) { kstring_t meta, smpl; int dret; vcf_t *v; bcf_hdr_t *h; if (!bp->is_vcf) return bcf_hdr_read(bp); h = calloc(1, sizeof(bcf_hdr_t)); v = (vcf_t*)bp->v; v->line.l = 0; memset(&meta, 0, sizeof(kstring_t)); memset(&smpl, 0, sizeof(kstring_t)); while (ks_getuntil(v->ks, '\n', &v->line, &dret) >= 0) { if (v->line.l < 2) continue; if (v->line.s[0] != '#') return 0; // no sample line if (v->line.s[0] == '#' && v->line.s[1] == '#') { kputsn(v->line.s, v->line.l, &meta); kputc('\n', &meta); } else if (v->line.s[0] == '#') { int k; ks_tokaux_t aux; char *p; for (p = kstrtok(v->line.s, "\t\n", &aux), k = 0; p; p = kstrtok(0, 0, &aux), ++k) { if (k >= 9) { kputsn(p, aux.p - p, &smpl); kputc('\0', &smpl); } } break; } } kputc('\0', &meta); h->name = 0; h->sname = smpl.s; h->l_smpl = smpl.l; h->txt = meta.s; h->l_txt = meta.l; bcf_hdr_sync(h); return h; }
int ingest1(const char *input,const char *output,char *ref,bool exit_on_mismatch=true) { cerr << "Input: " << input << "\tOutput: "<<output<<endl; kstream_t *ks; kstring_t str = {0,0,0}; gzFile fp = gzopen(input, "r"); VarBuffer vbuf(1000); int prev_rid = -1; if(fp==NULL) { fprintf(stderr,"problem opening %s\n",input); exit(1); } char *out_fname = (char *)malloc(strlen(output)+5); strcpy(out_fname,output); strcat(out_fname,".tmp"); if(fileexists(out_fname)) { fprintf(stderr,"%s file already exists. will not overwrite\n",out_fname); exit(1); } printf("depth: %s\n",out_fname); gzFile depth_fp = gzopen(out_fname, "wb1"); strcpy(out_fname,output); strcat(out_fname,".bcf"); if(fileexists(out_fname)) { fprintf(stderr,"%s file already exists. will not overwrite\n",out_fname); exit(1); } printf("variants: %s\n",out_fname); htsFile *variant_fp=hts_open(out_fname,"wb1"); if(variant_fp==NULL) { fprintf(stderr,"problem opening %s\n",input); exit(1); } ks = ks_init(fp); htsFile *hfp=hts_open(input, "r"); bcf_hdr_t *hdr_in = bcf_hdr_read(hfp); hts_close(hfp); //this is a hack to fix gvcfs where AD is incorrectly defined in the header. (vcf4.2 does not technically allow Number=R) bcf_hdr_remove(hdr_in,BCF_HL_FMT,"AD"); assert( bcf_hdr_append(hdr_in,"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.999 or higher that read contains indicated allele vs all other intersecting indel alleles)\">") == 0); //this is a hack to fix broken gvcfs where GQ is incorrectly labelled as float (v4.3 spec says it should be integer) bcf_hdr_remove(hdr_in,BCF_HL_FMT,"GQ"); assert( bcf_hdr_append(hdr_in,"##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">") == 0); // bcf_hdr_t *hdr_out=hdr_in; bcf_hdr_t *hdr_out = bcf_hdr_dup(hdr_in); remove_hdr_lines(hdr_out,BCF_HL_INFO); remove_hdr_lines(hdr_out,BCF_HL_FLT); bcf_hdr_sync(hdr_out); //here we add FORMAT/PF. which is the pass filter flag for alts. assert( bcf_hdr_append(hdr_out,"##FORMAT=<ID=PF,Number=A,Type=Integer,Description=\"variant was PASS filter in original sample gvcf\">") == 0); args_t *norm_args = init_vcfnorm(hdr_out,ref); norm_args->check_ref |= CHECK_REF_WARN; bcf1_t *bcf_rec = bcf_init(); bcf_hdr_write(variant_fp, hdr_out); kstring_t work1 = {0,0,0}; int buf[5]; ks_tokaux_t aux; int ndec=0; int ref_len,alt_len; while( ks_getuntil(ks, '\n', &str, 0) >=0) { // fprintf(stderr,"%s\n",str.s); if(str.s[0]!='#') { char *ptr = kstrtok(str.s,"\t",&aux);//chrom ptr = kstrtok(NULL,NULL,&aux);//pos work1.l=0; kputsn(str.s,ptr-str.s-1, &work1); buf[0] = bcf_hdr_name2id(hdr_in, work1.s); assert( buf[0]>=0); buf[1]=atoi(ptr)-1; ptr = kstrtok(NULL,NULL,&aux);//ID ptr = kstrtok(NULL,NULL,&aux);//REF ref_len=0; while(ptr[ref_len]!='\t') ref_len++; ptr = kstrtok(NULL,NULL,&aux);//ALT bool is_variant=false; alt_len=0; while(ptr[alt_len]!='\t') alt_len++; if(ptr[0]!='.') is_variant=true; char * QUAL_ptr = kstrtok(NULL, NULL, &aux); assert (QUAL_ptr != NULL); for(int i=0;i<2;i++) ptr = kstrtok(NULL,NULL,&aux);// gets us to INFO //find END if it is there char *end_ptr=strstr(ptr,"END=") ; if(end_ptr!=NULL) buf[2]=atoi(end_ptr+4)-1; else buf[2]=buf[1]+alt_len-1; ptr = kstrtok(NULL,NULL,&aux);//FORMAT //find index of DP (if present) //if not present, dont output anything (indels ignored) char *DP_ptr = find_format(ptr,"DP"); int GQX = 0; int QUAL = 0; // AH: change code to use the minimum of GQ and QUAL fields if // GQX is not defined. See here: // https://support.basespace.illumina.com/knowledgebase/articles/144844-vcf-file // "GQXGenotype quality. GQX is the minimum of the GQ value // and the QUAL column. In general, these are similar values; // taking the minimum makes GQX the more conservative measure of // genotype quality." if(DP_ptr!=NULL) { buf[3]=atoi(DP_ptr); char *GQX_ptr = find_format(ptr,"GQX"); if (GQX_ptr == NULL) { GQX_ptr = find_format(ptr,"GQ"); GQX = atoi(GQX_ptr); if (QUAL_ptr[0] != '.') { QUAL = atoi(QUAL_ptr); if (QUAL < GQX) GQX = QUAL; } } else { GQX = atoi(GQX_ptr); } //trying to reduce entropy on GQ to get better compression performance. //1. rounds down to nearest 10. //2. sets gq to min(gq,100). buf[4]=GQX/10; buf[4]*=10; if(buf[4]>100) buf[4]=100; // printf("%d\t%d\t%d\t%d\t%d\n",buf[0],buf[1],buf[2],buf[3],buf[4]); if(gzwrite(depth_fp,buf,5*sizeof(int))!=(5*sizeof(int))) die("ERROR: problem writing "+(string)out_fname+".tmp"); } if(is_variant) {//wass this a variant? if so write it out to the bcf norm_args->ntotal++; vcf_parse(&str,hdr_in,bcf_rec); // cerr<<bcf_rec->rid<<":"<<bcf_rec->pos<<endl; if(prev_rid!=bcf_rec->rid) vbuf.flush(variant_fp,hdr_out); else vbuf.flush(bcf_rec->pos,variant_fp,hdr_out); prev_rid=bcf_rec->rid; int32_t pass = bcf_has_filter(hdr_in, bcf_rec, "."); bcf_update_format_int32(hdr_out,bcf_rec,"PF",&pass,1); bcf_update_filter(hdr_out,bcf_rec,NULL,0); if(bcf_rec->n_allele>2) {//split multi-allelics (using vcfnorm.c from bcftools1.3 norm_args->nsplit++; split_multiallelic_to_biallelics(norm_args,bcf_rec ); for(int i=0;i<norm_args->ntmp_lines;i++){ remove_info(norm_args->tmp_lines[i]); if(realign(norm_args,norm_args->tmp_lines[i]) != ERR_REF_MISMATCH) ndec+=decompose(norm_args->tmp_lines[i],hdr_out,vbuf); else if(exit_on_mismatch) die("vcf did not match the reference"); else norm_args->nskipped++; } } else { remove_info(bcf_rec); if( realign(norm_args,bcf_rec) != ERR_REF_MISMATCH) ndec+=decompose(bcf_rec,hdr_out,vbuf); else if(exit_on_mismatch) die("vcf did not match the reference"); else norm_args->nskipped++; } vbuf.flush(bcf_rec->pos,variant_fp,hdr_out); } } } vbuf.flush(variant_fp,hdr_out); bcf_hdr_destroy(hdr_in); bcf_hdr_destroy(hdr_out); bcf_destroy1(bcf_rec); ks_destroy(ks); gzclose(fp); gzclose(depth_fp); free(str.s); free(work1.s); hts_close(variant_fp); destroy_data(norm_args); fprintf(stderr,"Variant lines total/split/realigned/skipped:\t%d/%d/%d/%d\n", norm_args->ntotal,norm_args->nsplit,norm_args->nchanged,norm_args->nskipped); fprintf(stderr,"Decomposed %d MNPs\n", ndec); fprintf(stderr,"Indexing %s\n",out_fname); bcf_index_build(out_fname, BCF_LIDX_SHIFT); free(out_fname); return 0; }
int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b) { int dret, k, i, sync = 0; vcf_t *v = (vcf_t*)bp->v; char *p, *q; kstring_t str, rn; ks_tokaux_t aux, a2; if (!bp->is_vcf) return bcf_read(bp, h, b); v->line.l = 0; str.l = 0; str.m = b->m_str; str.s = b->str; rn.l = rn.m = h->l_nm; rn.s = h->name; if (ks_getuntil(v->ks, '\n', &v->line, &dret) < 0) return -1; b->n_smpl = h->n_smpl; for (p = kstrtok(v->line.s, "\t", &aux), k = 0; p; p = kstrtok(0, 0, &aux), ++k) { *(char*)aux.p = 0; if (k == 0) { // ref int tid = bcf_str2id(v->refhash, p); if (tid < 0) { tid = bcf_str2id_add(v->refhash, strdup(p)); kputs(p, &rn); kputc('\0', &rn); sync = 1; } b->tid = tid; } else if (k == 1) { // pos b->pos = atoi(p) - 1; } else if (k == 5) { // qual b->qual = (p[0] >= '0' && p[0] <= '9')? atof(p) : 0; } else if (k <= 8) { // variable length strings kputs(p, &str); kputc('\0', &str); b->l_str = str.l; b->m_str = str.m; b->str = str.s; if (k == 8) bcf_sync(b); } else { // k > 9 if (strncmp(p, "./.", 3) == 0) { for (i = 0; i < b->n_gi; ++i) { if (b->gi[i].fmt == bcf_str2int("GT", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = 1<<7; } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = 0; } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) { ((int32_t*)b->gi[i].data)[k-9] = 0; } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) { ((uint16_t*)b->gi[i].data)[k-9] = 0; } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) { int y = b->n_alleles * (b->n_alleles + 1) / 2; memset((uint8_t*)b->gi[i].data + (k - 9) * y, 0, y); } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) { int y = b->n_alleles * (b->n_alleles + 1) / 2; memset((float*)b->gi[i].data + (k - 9) * y, 0, y * 4); } } goto endblock; } for (q = kstrtok(p, ":", &a2), i = 0; q && i < b->n_gi; q = kstrtok(0, 0, &a2), ++i) { if (b->gi[i].fmt == bcf_str2int("GT", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = (q[0] - '0')<<3 | (q[2] - '0') | (q[1] == '/'? 0 : 1) << 6; } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { double _x = strtod(q, &q); int x = (int)(_x + .499); if (x > 255) x = 255; ((uint8_t*)b->gi[i].data)[k-9] = x; } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) { int x = strtol(q, &q, 10); if (x > 0xffff) x = 0xffff; ((uint32_t*)b->gi[i].data)[k-9] = x; } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) { int x = strtol(q, &q, 10); if (x > 0xffff) x = 0xffff; ((uint16_t*)b->gi[i].data)[k-9] = x; } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) { int x, y, j; uint8_t *data = (uint8_t*)b->gi[i].data; y = b->n_alleles * (b->n_alleles + 1) / 2; for (j = 0; j < y; ++j) { x = strtol(q, &q, 10); if (x > 255) x = 255; data[(k-9) * y + j] = x; ++q; } } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) { int j, y; float x, *data = (float*)b->gi[i].data; y = b->n_alleles * (b->n_alleles + 1) / 2; for (j = 0; j < y; ++j) { x = strtod(q, &q); data[(k-9) * y + j] = x > 0? -x/10. : x; ++q; } } } endblock: i = i; } } h->l_nm = rn.l; h->name = rn.s; if (sync) bcf_hdr_sync(h); return v->line.l + 1; }
static void tish_unlink(char ** args) { char * path = kstrtok(0, DELIMS, args); unlink(path); }
static void tish_rmdir(char ** args) { char * path = kstrtok(0, DELIMS, args); rmdir(path); }
static void tish_mkdir(char ** args) { char * path = kstrtok(0, DELIMS, args); mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP); }
static void debug(char ** args) { char * arg = kstrtok(0, DELIMS, args); /* Thread debug commands */ if (!strcmp(arg, "thread")) { arg = kstrtok(0, DELIMS, args); if (!strcmp(arg, "create")) { create_debug_thread(); } else { puts(invalid_arg); } /* Process debug commands */ } else if (!strcmp(arg, "proc")) { arg = kstrtok(0, DELIMS, args); if (!strcmp(arg, "fork")) { pid_t pid = fork(); if (pid == -1) { puts("fork() failed\n"); } else if (pid == 0) { puts("Hello from the child process\n"); for (int i = 0; i < 10; i++) { puts("."); msleep(500); } exit(0); } else { int status; char buf[20]; puts("original\n"); wait(&status); ksprintf(buf, sizeof(buf), "status: %u\n", status); puts(buf); } } else { puts(invalid_arg); } /* Data Abort Commands */ } else if (!strcmp(arg, "dab")) { arg = kstrtok(0, DELIMS, args); if (!strcmp(arg, "fatal")) { puts("Trying fatal DAB\n"); int * x = (void *)0xfffffff; *x = 1; } else { puts(invalid_arg); } } else if (!strcmp(arg, "ioctl")) { arg = kstrtok(0, DELIMS, args); if (!strcmp(arg, "termios")) { struct termios term; int err; char buf[80]; err = tcgetattr(STDOUT_FILENO, &term); if (err) return; ksprintf(buf, sizeof(buf), "cflags: %u\nispeed: %u\nospeed: %u\n", term.c_cflag, term.c_ispeed, term.c_ospeed); puts(buf); } else { puts(invalid_arg); } } else if (!strcmp(arg, "file")) { const char text[] = "This is a test."; char buf[80]; int fildes = open("file", O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (fildes < 0) return; write(fildes, text, sizeof(text)); lseek(fildes, 0, SEEK_SET); read(fildes, buf, sizeof(buf)); close(fildes); puts(buf); } else { puts("Invalid subcommand\n"); errno = EINVAL; } }