static hashtable* ReadReference(const char* const refname) { hashtable* reference = new_hashtable(12); sequence* sp = read_fasta_sequence(refname); while(sp != NULL){ // allocate a coverage array for the sequence chrcoverage* cov = ckallocz(sizeof(chrcoverage)); cov->length = strlen((char*)sp->sequence); cov->map = ckallocz(strlen((char*)sp->sequence)); cov->cov = ckallocz(strlen((char*)sp->sequence)); cov->seq = ckallocz(strlen((char*)sp->sequence)+1); memcpy(cov->seq, sp->sequence, cov->length); // if the name of the sequence has more than one tokens, just use the // first token in the name int i = 0; while((sp->header[i] != '\n') && (sp->header[i] != 0) && (sp->header[i] != '\t') && (sp->header[i] != 32)) i++; sp->header[i] = 0; add_hashtable(reference,(char*)sp->header,strlen((char*)sp->header),cov); sp = get_next_sequence(sp); } close_fasta_sequence(sp); return reference; }
int init_syntax() { size_t i; hashdata data; dirhash=new_hashtable(0x200); /*FIXME: */ for(i=0;i<dir_cnt;i++){ data.idx=i; add_hashentry(dirhash,directives[i].name,data); } return 1; }
static int import_csv(const char* path) { FILE* fp = fopen(path, "r"); if (fp == NULL) { fprintf(stderr, "File not found: %s\n", path); return -1; } hashtable_t* datastreams = new_hashtable(NULL); while (1) { unsigned long id; double t, v; int n = fscanf(fp, "%lu,%lf,%lf\n", &id, &t, &v); if (n < 3) break; datastream_t* d = NULL; hashtable_lookup(datastreams, id, (void**) &d); if (d == NULL) { d = datastream_load(id); if (d == NULL) { d = datastream_create(id, 0.5, 3600.0, 0.1); if (d == NULL) { fprintf(stderr, "Failed to create the datastream\n"); goto error_recovery; } } hashtable_insert(datastreams, id, d); } printf("id=%d, t=%d, v=%f\n", (int) id, (int) t, v); int err = datastream_insert(d, (timestamp_t) t, (value_t) v); if (err != 0) goto error_recovery; } fclose(fp); hashtable_foreach(datastreams, store_datastreams_callback, NULL); return 0; error_recovery: fclose(fp); hashtable_foreach(datastreams, store_datastreams_callback, NULL); return -1; }
static int init_main(void) { size_t i; char *last; hashdata data; mnemohash=new_hashtable(MNEMOHTABSIZE); i=0; while(i<mnemonic_cnt) { data.idx=i; last=mnemonics[i].name; add_hashentry(mnemohash,mnemonics[i].name,data); do { i++; } while(i<mnemonic_cnt&&!strcmp(last,mnemonics[i].name)); } if(DEBUG) { if(mnemohash->collisions) printf("*** %d mnemonic collisions!!\n",mnemohash->collisions); } symhash=new_hashtable(SYMHTABSIZE); new_include_path("."); taddrmask=MAKEMASK(bytespertaddr<<3); return 1; }
int init_syntax() { size_t i; hashdata data; dirhash=new_hashtable(0x200); /*FIXME: */ for(i=0;i<dir_cnt;i++){ data.idx=i; add_hashentry(dirhash,directives[i].name,data); } #if defined(VASM_CPU_X86) current_pc_char = '.'; #endif cond[0] = 1; clev = ifnesting = 0; return 1; }
osdb_t* osdb_init() { osdb_t* db = (osdb_t*) malloc(sizeof(osdb_t)); if (db == NULL) { log_err("osdb_init: out of memory"); return NULL; } memset(db, 0, sizeof(osdb_t)); db->cache = new_hashtable((hashtable_delete_t) delete_datastream_callback); if (db->cache == NULL) { free(db); return NULL; } return db; }
char const * strget(char const *s) { poolelem e, *ep; if (strpool == NULL) strpool = new_hashtable(&poolinfo); e.str = s; if (!find_hashtable(strpool, (hashelt *)&e)) insert_hashtable(strpool, (hashelt *)&e); ep = (poolelem *)cur_hashtable(strpool); ep->ref++; return ep->str; }
int init_syntax() { size_t i; hashdata data; dirhash = new_hashtable(0x200); for (i=0; i<dir_cnt; i++) { data.idx = i; add_hashentry(dirhash,directives[i].name,data); } cond_init(); current_pc_char = '*'; esc_sequences = 1; /* assertion errors are only a warning */ modify_gen_err(WARNING,47,0); return 1; }
int main(int argc, char *argv[]) { if(argc<=2) { printf("Enter the amount followed by the denominations.\n"); return 1; } else { int memo_size = 10000; HashTable* memo = new_hashtable(memo_size); int amount = atoi(argv[1]); int size = argc - 2; int* dens = malloc(size * sizeof(int)); int i; for(i=0;i<size;i++) { dens[i] = atoi(argv[i+2]); } long ways = memo_coin_sums(dens, size, amount, memo); free(dens); printf("%ld\n", ways); return 0; } }
int text2wfreq_impl(FILE* infp, FILE* outfp, int init_nwords, int verbosity) { int hash_size, scanrc; struct hash_table vocab; char word[MAX_STRING_LENGTH]; hash_size = nearest_prime( init_nwords ); new_hashtable( &vocab, hash_size ); while( (scanrc = fscanf(infp, "%500s", word )) == 1 ) { if ( strlen( word ) >= MAX_STRING_LENGTH ) { pc_message(verbosity,1,"text2wfreq : WARNING: word too long, will be split: %s...\n",word); } if (strlen(word)) { update( &vocab, word ,verbosity); } } if ( scanrc != EOF ) { quit(-1,"Error reading input\n"); } print( outfp, &vocab ); return 0; }
int main( int argc, char **argv ) { int i; int N=NUMBER_KEYS; // The number of keys to insert in the hash_table char* keys[NUMBER_KEYS]={KEYS}; char* values[NUMBER_KEYS]={VALUES}; hashtable *hash_table = new_hashtable(); //The definition of the zize intialized to 8 srand(time(NULL)); // here we start by genarting a random keys and values printf("############ Initialisation of keys #################################################################\n\n"); for(i=0;i<N;i++) { printf("keys[%d]==\"%s\" with hash %d\n",i,keys[i],hash(keys[i])%hash_table->size); } printf("\n############ Initialisation of values #############################################################\n\n"); for(i=0;i<N;i++) { printf("values[%d]==\"%s\"\n",i,values[i]); } // insertion of key-value pairs in the hash_table printf("\n############ Insertion of (key,value) pairs ########################################################\n\n"); for(i=0;i<N;i++) { insert_resize_pair(&hash_table,keys[i],values[i]); printf("Insertion of (\"%s\",\"%s\") succed , SIZE = %d \n",keys [i],values[i],hash_table->size); } printf("\n Our hash table = "); print_hashtable(hash_table); // reinsertion of some pairs in the hash_table printf("\n############ Reinsertion of some pairs #############################################################\n\n"); insert_pair( hash_table, keys[1], "19" ); printf("Reinsertion of (\"%s\",\"%s\") succed \n",keys [1],"19"); insert_pair( hash_table, keys[2], "13" ); printf("Reinsertion of (\"%s\",\"%s\") succed \n",keys [2],"13"); printf("the hash table = "); print_hashtable(hash_table); // getting the values of a given key printf("\n############ Getting the value for a given key #######################################################\n\n"); for(i=0;i<N;i++) printf("hash_table of \"%s\" gives \"%s\"\n",keys[i], get_value( hash_table, keys[i])); // removing the some keys from the hash table printf("\n############ Removing some keys from hash table ######################################################\n\n"); printf("Initialy we have\n"); print_hashtable(hash_table); for(i=0;2*i<N;i++) { delate_key( hash_table, keys[2*i]); printf("we delate the key \"%s\"\n",keys[2*i]); print_hashtable(hash_table); } printf("\n#### After free we print the hash_table to verify #################################################\n\n"); free_hashtable(hash_table); print_hashtable(hash_table); return 0; }
/******************************************************************** * Init *******************************************************************/ void init_mpi(void) { requests=new_hashtable(HASHSIZE); broadcasts=new_hashtable(HASHSIZE); DEL_BUFFER(); YAP_UserCPredicate( "mpi_init", mpi_init,0); // mpi_init/0 #ifdef USE_THREADS YAP_UserCPredicate( "mpi_init_rcv_thread", mpi_init_rcv_thread,1); // mpi_init_rcv_thread(+HandleMsgGoal/1) #endif YAP_UserCPredicate( "mpi_finalize", mpi_finalize,0); // mpi_finalize turn YAP_UserCPredicate( "mpi_comm_size", mpi_comm_size,1); // mpi_comm_size(-Size) YAP_UserCPredicate( "mpi_comm_rank", mpi_comm_rank,1); // mpi_comm_rank(-Rank) YAP_UserCPredicate( "mpi_version", mpi_version,2); // mpi_version(-Major,-Minor) YAP_UserCPredicate( "mpi_get_processor_name", mpi_get_processor_name,1); // mpi_get_processor_name(-Name) YAP_UserCPredicate( "mpi_send", mpi_send,3); // mpi_send(+Data, +Destination, +Tag). YAP_UserCPredicate( "mpi_isend",mpi_isend,4); YAP_UserCPredicate( "mpi_recv", mpi_recv,3); // mpi_recv(?Source,?Tag,-Data). YAP_UserCPredicate( "mpi_irecv", mpi_irecv,3); // mpi_irecv(?Source,?Tag,-Handle). YAP_UserCPredicate( "mpi_wait", mpi_wait,2); // mpi_wait(+Handle,-Status). YAP_UserCPredicate( "mpi_wait_recv", mpi_wait_recv,3); // mpi_wait_recv(+Handle,-Status,-Data). YAP_UserCPredicate( "mpi_test", mpi_test,2); // mpi_test(+Handle,-Status). YAP_UserCPredicate( "mpi_test_recv", mpi_test_recv,3); // mpi_test(+Handle,-Status,-Data). YAP_UserCPredicate( "mpi_bcast", mpi_bcast,2); // mpi_bcast(Root,Term) YAP_UserCPredicate( "mpi_bcast2", mpi_bcast2,2); // mpi_bcast2(Root,Term) YAP_UserCPredicate( "mpi_bcast3", mpi_bcast3,3); // mpi_bcast3(Root,Term,Tag) /** @pred mpi_bcast3(+ _Root_, + _Data_, + _Tag_) Broadcasts the message _Data_ with tag _Tag_ from the process with rank _Root_ to all other processes. */ YAP_UserCPredicate( "mpi_ibcast2", mpi_ibcast2,2); // mpi_ibcast(Root,Term) YAP_UserCPredicate( "mpi_ibcast3", mpi_ibcast3,3); // mpi_ibcast(Root,Term,Tag) /** @pred mpi_ibcast(+ _Root_, + _Data_, + _Tag_) Non-blocking operation. Broadcasts the message _Data_ with tag _Tag_ from the process with rank _Root_ to all other processes. */ YAP_UserCPredicate( "mpi_barrier", mpi_barrier,0); // mpi_barrier/0 YAP_UserCPredicate( "mpi_gc", mpi_gc,0); // mpi_gc/0 YAP_UserCPredicate( "mpi_default_buffer_size", mpi_default_buffer_size,2); // buffer size /** @pred mpi_default_buffer_size(- _OldBufferSize_, ? _NewBufferSize_) The _OldBufferSize_ argument unifies with the current size of the MPI communication buffer size and sets the communication buffer size _NewBufferSize_. The buffer is used for assynchronous waiting and for broadcast receivers. Notice that buffer is local at each MPI process. */ #ifdef MPISTATS YAP_UserCPredicate( "mpi_stats", mpi_stats,7); // mpi_stats(-Time,#MsgsRecv,BytesRecv,MaxRecev,#MsgSent,BytesSent,MaxSent) YAP_UserCPredicate( "mpi_reset_stats", mpi_reset_stats,0); // cleans the timers RESET_STATS(); #endif // YAP_UserCPredicate( "mpi_gather", mpi_gather,0); //mpi_gather(+RootRank,?SendData,?RecvData) // Each process (root process included) sends the contents of its send buffer to the root process. The root process receives the messages and stores them in rank order. The outcome is as if each of the n processes in the group (including the root process) had executed a call to MPI_Send and the root had executed n calls to MPI_Recv. The receive buffer is ignored for all non-root processes. // MPI_Scatter #ifdef DEBUG fprintf(stderr,"MPI module succesfully loaded."); fflush(stderr); #endif }
void main(int argc, char *argv[]) { int i,j; char *vocab_filename; FILE *tempfile; char tempfiles_directory[1000]; int vocab_size; FILE *vocab_file; int verbosity; int buffer_size; int position_in_buffer; int number_of_tempfiles; int max_files; int fof_size; unsigned short *buffer; unsigned short *placeholder; unsigned short *temp_ngram; int temp_count; char temp_word[500]; char temp_word2[500]; char *temp_file_root; char *temp_file_ext; char *host_name; int proc_id; struct utsname uname_info; flag write_ascii; /* Vocab hash table things */ struct hash_table vocabulary; unsigned long hash_size; unsigned long M; tempfile = NULL; /* Just to prevent compilation warnings. */ report_version(&argc,argv); verbosity = pc_intarg(&argc,argv,"-verbosity",DEFAULT_VERBOSITY); /* Process command line */ if (pc_flagarg( &argc, argv,"-help") || argc==1) { fprintf(stderr,"text2idngram - Convert a text stream to an id n-gram stream.\n"); fprintf(stderr,"Usage : text2idngram -vocab .vocab \n"); fprintf(stderr," [ -buffer 100 ]\n"); fprintf(stderr," [ -hash %d ]\n",DEFAULT_HASH_SIZE); fprintf(stderr," [ -temp %s ]\n",DEFAULT_TEMP); fprintf(stderr," [ -files %d ]\n",DEFAULT_MAX_FILES); fprintf(stderr," [ -gzip | -compress ]\n"); fprintf(stderr," [ -verbosity %d ]\n", DEFAULT_VERBOSITY); fprintf(stderr," [ -n 3 ]\n"); fprintf(stderr," [ -write_ascii ]\n"); fprintf(stderr," [ -fof_size 10 ]\n"); exit(1); } pc_message(verbosity,2,"text2idngram\n"); n = pc_intarg( &argc, argv, "-n",DEFAULT_N); placeholder = (unsigned short *) rr_malloc(sizeof(unsigned short)*n); temp_ngram = (unsigned short *) rr_malloc(sizeof(unsigned short)*n); hash_size = pc_intarg( &argc, argv, "-hash",DEFAULT_HASH_SIZE); buffer_size = pc_intarg( &argc, argv, "-buffer",STD_MEM); write_ascii = pc_flagarg(&argc,argv,"-write_ascii"); fof_size = pc_intarg(&argc,argv,"-fof_size",10); max_files = pc_intarg( &argc, argv, "-files",DEFAULT_MAX_FILES); vocab_filename = salloc(pc_stringarg( &argc, argv, "-vocab", "" )); if (!strcmp("",vocab_filename)) { quit(-1,"text2idngram : Error : Must specify a vocabulary file.\n"); } strcpy(tempfiles_directory,pc_stringarg( &argc, argv, "-temp", DEFAULT_TEMP)); if (pc_flagarg(&argc,argv,"-compress")) { temp_file_ext = salloc(".Z"); } else { if (pc_flagarg(&argc,argv,"-gzip")) { temp_file_ext = salloc(".gz"); } else { temp_file_ext = salloc(""); } } uname(&uname_info); host_name = salloc(uname_info.nodename); proc_id = getpid(); sprintf(temp_word,"%s%s.%d.",TEMP_FILE_ROOT,host_name,proc_id); temp_file_root = salloc(temp_word); pc_report_unk_args(&argc,argv,verbosity); /* If the last charactor in the directory name isn't a / then add one. */ if (tempfiles_directory[strlen(tempfiles_directory)-1] != '/') { strcat(tempfiles_directory,"/"); } pc_message(verbosity,2,"Vocab : %s\n",vocab_filename); pc_message(verbosity,2,"N-gram buffer size : %d\n",buffer_size); pc_message(verbosity,2,"Hash table size : %d\n",hash_size); pc_message(verbosity,2,"Temp directory : %s\n",tempfiles_directory); pc_message(verbosity,2,"Max open files : %d\n",max_files); pc_message(verbosity,2,"FOF size : %d\n",fof_size); pc_message(verbosity,2,"n : %d\n",n); buffer_size *= (1000000/(sizeof(unsigned short)*n)); /* Allocate memory for hash table */ fprintf(stderr,"Initialising hash table...\n"); M = nearest_prime(hash_size); new_hashtable(&vocabulary,M); /* Read in the vocabulary */ vocab_size = 0; vocab_file = rr_iopen(vocab_filename); pc_message(verbosity,2,"Reading vocabulary...\n"); while (fgets (temp_word, sizeof(temp_word),vocab_file)) { if (strncmp(temp_word,"##",2)==0) continue; sscanf (temp_word, "%s ",temp_word2); /* Check for repeated words in the vocabulary */ if (index2(&vocabulary,temp_word2) != 0) { fprintf(stderr,"======================================================\n"); fprintf(stderr,"WARNING: word %s is repeated in the vocabulary.\n",temp_word); fprintf(stderr,"=======================================================\n"); } if (strncmp(temp_word,"#",1)==0) { fprintf(stderr,"\n\n===========================================================\n"); fprintf(stderr,":\nWARNING: line assumed NOT a comment:\n"); fprintf(stderr, ">>> %s <<<\n",temp_word); fprintf(stderr, " '%s' will be included in the vocabulary.\n",temp_word2); fprintf(stderr, " (comments must start with '##')\n"); fprintf(stderr,"===========================================================\n\n"); } vocab_size++; add_to_hashtable(&vocabulary,hash(temp_word2,M),temp_word2,vocab_size); } if (vocab_size > MAX_VOCAB_SIZE) { quit(-1,"text2idngram : Error : Vocabulary size exceeds maximum.\n"); } pc_message(verbosity,2,"Allocating memory for the n-gram buffer...\n"); buffer=(unsigned short*) rr_malloc(n*(buffer_size+1)*sizeof(unsigned short)); number_of_tempfiles = 0; /* Read text into buffer */ /* Read in the first ngram */ position_in_buffer = 0; for (i=0;i<=n-1;i++) { get_word(stdin,temp_word); add_to_buffer(index2(&vocabulary,temp_word),0,i,buffer); } while (!rr_feof(stdin)) { /* Fill up the buffer */ pc_message(verbosity,2,"Reading text into the n-gram buffer...\n"); pc_message(verbosity,2,"20,000 n-grams processed for each \".\", 1,000,000 for each line.\n"); while ((position_in_buffer<buffer_size) && (!rr_feof(stdin))) { position_in_buffer++; if (position_in_buffer % 20000 == 0) { if (position_in_buffer % 1000000 == 0) { pc_message(verbosity,2,".\n"); } else { pc_message(verbosity,2,"."); } } for (i=1;i<=n-1;i++) { add_to_buffer(buffer_contents(position_in_buffer-1,i,buffer), position_in_buffer,i-1,buffer); } if (get_word(stdin,temp_word) == 1) { add_to_buffer(index2(&vocabulary,temp_word),position_in_buffer, n-1,buffer); } } for (i=0;i<=n-1;i++) { placeholder[i] = buffer_contents(position_in_buffer,i,buffer); } /* Sort buffer */ pc_message(verbosity,2,"\nSorting n-grams...\n"); qsort((void*) buffer,(size_t) position_in_buffer, n*sizeof(unsigned short),compare_ngrams); /* Output the buffer to temporary BINARY file */ number_of_tempfiles++; sprintf(temp_word,"%s%s%hu%s",tempfiles_directory,temp_file_root, number_of_tempfiles,temp_file_ext); pc_message(verbosity,2,"Writing sorted n-grams to temporary file %s\n", temp_word); tempfile = rr_oopen(temp_word); for (i=0;i<=n-1;i++) { temp_ngram[i] = buffer_contents(0,i,buffer); if (temp_ngram[i] > MAX_VOCAB_SIZE) { quit(-1,"Invalid trigram in buffer.\nAborting"); } } temp_count = 1; for (i=1;i<=position_in_buffer;i++) { if (!compare_ngrams(temp_ngram,&buffer[i*n])) { temp_count++; } else { for (j=0;j<=n-1;j++) { rr_fwrite(&temp_ngram[j],sizeof(unsigned short),1, tempfile,"temporary n-gram ids"); temp_ngram[j] = buffer_contents(i,j,buffer); } rr_fwrite(&temp_count,sizeof(int),1,tempfile, "temporary n-gram counts"); temp_count = 1; } } rr_oclose(tempfile); for (i=0;i<=n-1;i++) { add_to_buffer(placeholder[i],0,i,buffer); } position_in_buffer = 0; } /* Merge the temporary files, and output the result to standard output */ pc_message(verbosity,2,"Merging temporary files...\n"); merge_tempfiles(1, number_of_tempfiles, temp_file_root, temp_file_ext, max_files, tempfiles_directory, stdout, write_ascii, fof_size); pc_message(verbosity,0,"text2idngram : Done.\n"); exit(0); }
int main(int argc, char *argv[]) { short out2stdout=0; hashtable ht=new_hashtable(HASHSIZE); bamFile in,in2; bamFile out; int paired;//1 if not paired or pair read 1, 2 otherwise index_mem=sizeof(hashtable)*sizeof(hashnode**)*HASHSIZE*2; if (argc != 3) { fprintf(stderr, "Usage: bam_fix_NH <in.bam> <out.bam or - for stdout>\n"); return 1; } // Open file and exit if error in = bam_open(argv[1], "rb"); out2stdout = strcmp(argv[2], "-")? 0 : 1; out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); if (in == 0 ) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]); return 1; } if (out == 0) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]); return 1; } unsigned long num_alns=0; int ref; // *********** // Copy header bam_header_t *header; header = bam_header_read(in); bam_header_write(out,header); // sorted by name? // Should not rely on the value in SO bam1_t *aln=bam_init1(); bam1_t *prev=bam_init1(); if (!out2stdout) { fprintf(stderr,"bam_fix_NH version %s\n",VERSION); fprintf(stderr,"Processing %s\n",argv[1]); fprintf(stderr,"Hashing...\n");fflush(stderr); } while(bam_read1(in,aln)>=0) { // read alignment if (aln->core.tid < 0) continue;//ignore unaligned reads if (aln->core.flag & BAM_FUNMAP) continue; if (aln->core.flag & BAM_FREAD2) paired=2; else paired=1; ++num_alns; new_read_aln(ht,fix_read_name(bam1_qname(aln),paired)); if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns); } bam_close(in); if(!out2stdout) { fprintf(stderr,"%s%lu\n",BACKLINE,num_alns); fprintf(stderr,"Hashing complete (%lu alignments)\n",num_alns); fprintf(stderr,"Memory used: %ld MB\n",index_mem/1024/1024); fprintf(stderr,"Updating entries with NH and printing BAM...\n"); fflush(stderr); } // reopen in2 = bam_open(argv[1], "rb"); if (in2 == 0 ) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]); return 1; } header = bam_header_read(in2); num_alns=0; while(bam_read1(in2,aln)>=0) { // read alignment paired=1; if (aln->core.tid < 0) continue;//ignore unaligned reads if (aln->core.flag & BAM_FUNMAP) continue; if (aln->core.flag & BAM_FREAD2) paired=2; ++num_alns; READ_ALN *r=get_read_aln(ht,fix_read_name(bam1_qname(aln),paired)); assert(r!=NULL); // update the NH field uint8_t *old_nh = bam_aux_get(aln, "NH"); int32_t nh=r->ctr; if (old_nh) { if (nh!=bam_aux2i(old_nh)) { fprintf(stderr,"warning: value mismatch! replacing>%s %d->%d\n",bam1_qname(aln),bam_aux2i(old_nh),nh); } bam_aux_del(aln, old_nh); bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh); #ifdef DEBUG // printf("!>%s %d\n",bam1_qname(aln),r->ctr); #endif } if (!old_nh) { // add NH bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh); #ifdef DEBUG fprintf(stderr,"!>%s %d\n",bam1_qname(aln),bam_aux2i(old_nh)); #endif } bam_write1(out,aln); if(!out2stdout) PRINT_ALNS_PROCESSED(num_alns); } // bam_destroy1(aln); bam_close(in2); bam_close(out); if(!out2stdout) { fprintf(stderr,"%s%lu\n",BACKLINE,num_alns); fprintf(stderr,"Done.\n"); } return 0; }
int main(int argc, char **argv ) { //long paired=0; is_paired_data=0; is_interleaved=0; printf("Version iRAP %s\n",VERSION); if (argc<2 || argc>3) { fprintf(stderr,"Usage: fastq_validator fastq1 [fastq2 file|pe]\n"); //fprintf(stderr,"%d",argc); exit(1); } FILE *fd1=NULL; FILE *fd2=NULL; // open & close fd1=open_fastq(argv[1]); fclose(fd1); //fprintf(stderr,"%d\n",argc); //bin/fprintf(stderr,"%s\n",argv[0]); if (argc ==3) { is_paired_data=1; if ( !strncmp(argv[2],"pe",2) ) { is_interleaved=1; } else { fd2=open_fastq(argv[2]); fclose(fd2); } } // ************************************************************ // casava 1.8? is_casava_18=is_casava_1_8(argv[1]); if (is_casava_18) fprintf(stderr,"CASAVA=1.8\n"); // ************************************************************ //off_t cur_offset=1; // interleaved if ( is_interleaved ) { exit(validate_interleaved(argv[1])); } unsigned long cline=1; fprintf(stderr,"HASHSIZE=%lu\n",(long unsigned int)HASHSIZE); //memset(&collisions[0],0,HASHSIZE+1); hashtable sn_index=new_hashtable(HASHSIZE); index_mem+=sizeof(hashtable); index_file(argv[1],sn_index,0,-1); fprintf(stderr,"\n"); // print some info fprintf(stderr,"Reads processed: %ld\n",sn_index->n_entries); fprintf(stderr,"Memory used in indexing: ~%ld MB\n",index_mem/1024/1024); // pair-end if (argc ==3 ) { fprintf(stderr,"File %s processed\n",argv[1]); fprintf(stderr,"Next file %s\n",argv[2]); // validate the second file and check if all reads are paired fd2=open_fastq(argv[2]); INDEX_ENTRY* e; // read the entry using another fd cline=1; // TODO: improve code - mostly duplicated:( while(!feof(fd2)) { //long start_pos=ftell(fd2); char *hdr=READ_LINE_HDR(fd2); if ( hdr==NULL) break; int len; char *seq=READ_LINE_SEQ(fd2); char *hdr2=READ_LINE_HDR2(fd2); char *qual=READ_LINE_QUAL(fd2); char* readname=get_readname(hdr,&len,cline,argv[2]); if (seq==NULL || hdr2==NULL || qual==NULL ) { fprintf(stderr,"Error in file %s, line %lu: file truncated?\n",argv[2],cline); exit(1); } if (validate_entry(hdr,hdr2,seq,qual,cline,argv[2])!=0) { exit(1); } //fprintf(stderr,"Reads processed: %ld\n",sn_index->n_entries); // check for duplicates if ( (e=lookup_header(sn_index,readname))==NULL ) { fprintf(stderr,"Error in file %s, line %lu: unpaired read - %s\n",argv[2],cline,readname); exit(1); } else { ulong key=hashit(readname); // remove entry from sn_index if (delete(sn_index,key,e)!=e) { fprintf(stderr,"Error in file %s, line %lu: unable to delete entry from sn_index - %s\n",argv[2],cline,readname); exit(1); } free_indexentry(e); } PRINT_READS_PROCESSED(cline/4); // cline+=4; } printf("\n"); if (sn_index->n_entries>0 ) { fprintf(stderr,"Error in file %s: found %lu unpaired reads\n",argv[1],sn_index->n_entries); exit(1); } } printf("OK\n"); exit(0); }
int main(int argc, char *argv[]) { hashtable ht=new_hashtable(HASHSIZE); bamFile in,in2; bamFile out; if (argc != 3) { fprintf(stderr, "Usage: bam_fix_NH <in.bam> <out.bam>\n"); return 1; } // Open file and exit if error //in = strcmp(argv[1], "-")? bam_open(argv[1], "rb") : bam_dopen(fileno(stdin), "rb"); in = bam_open(argv[1], "rb"); out = strcmp(argv[2], "-")? bam_open(argv[2], "w") : bam_dopen(fileno(stdout), "w"); if (in == 0 ) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]); return 1; } if (out == 0) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[2]); return 1; } unsigned long num_alns=0; int ref; // *********** // Copy header bam_header_t *header; header = bam_header_read(in); bam_header_write(out,header); // sorted by name? // Should not rely on the value in SO bam1_t *aln=bam_init1(); bam1_t *prev=bam_init1(); printf("Hashing...\n");flush(stdout); while(bam_read1(in,aln)>=0) { // read alignment if (aln->core.tid < 0) continue;//ignore unaligned reads ++num_alns; new_read_aln(ht,bam1_qname(aln)); } bam_close(in); printf("Hashing complete (%lu alignments)\n",num_alns); printf("Memory used in the hash: %ld MB\n",index_mem/1024/1024); flush(stdout); // reopen in2 = bam_open(argv[1], "rb"); if (in2 == 0 ) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]); return 1; } header = bam_header_read(in2); while(bam_read1(in2,aln)>=0) { // read alignment if (aln->core.tid < 0) continue;//ignore unaligned reads ++num_alns; READ_ALN *r=get_read_aln(ht,bam1_qname(aln)); //assert(r!=NULL); // update the NH field uint8_t *old_nh = bam_aux_get(aln, "NH"); uint8_t nh=r->ctr; if (old_nh) { if (nh!=bam_aux2i(old_nh)) { fprintf(stderr,"warning: value mismatch! replacing>%s %d->%d\n",bam1_qname(aln),bam_aux2i(old_nh),nh); } bam_aux_del(aln, old_nh); bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh); } if (!old_nh) { // add NH bam_aux_append(aln, "NH", 'i', 4, (uint8_t*)&nh); #ifdef DEBUG printf("!>%s %d\n",bam1_qname(aln),bam_aux2i(old_nh)); #endif } // in->header // Also fix the XS:A tag // BAM_FREAD1 // BAM_FREAD2 // BAM_FREVERSE the read is mapped to the reverse strand //bam1_cigar(b) //BAM_CREF_SKIP 3 CIGAR skip on the reference (e.g. spliced alignment) //BAM_FREVERSE 16 the read is mapped to the reverse strand if (aln->core.flag & BAM_FSECONDARY) continue; // skip secondary alignments if (aln->core.flag & ! BAM_FPAIRED) continue; // not paired if (aln->core.flag & ! BAM_FPROPER_PAIR) continue; // not a proper pair if (aln->core.flag & ! BAM_FMUNMAP) continue; // the mate is mapped if (aln->core.flag & BAM_FSECONDARY) continue; // secundary read if (aln->core.flag & BAM_FREAD2) continue; // only count each pair once // core.strand == 0 (f/+) 1 r/- // flag // bam1_qname(b) bam_write1(out,aln); } // bam_destroy1(aln); bam_close(in2); bam_close(out); return 0; /* uint8_t *old_nm = bam_aux_get(b, "NM"); 90 if (c->flag & BAM_FUNMAP) return; 91 if (old_nm) old_nm_i = bam_aux2i(old_nm); 92 if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); 93 else if (nm != old_nm_i) { 94 fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam1_qname(b), old_nm_i, nm); 95 bam_aux_del(b, old_nm); 96 bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); 97 } */ }
int main() { const int TABLE_SIZE = 101; int i; Hashtable* ht_str_str = new_hashtable(TABLE_SIZE); print_hashtable(ht_str_str); srand(time(NULL)); for (i = 0; i < TABLE_SIZE << 1; ++i) { int len = rand() % 100 + 1; int key_size = len + 1; char* strkey = (char*)malloc(key_size); if (strkey == NULL) { continue; } strkey[len] = '\0'; len--; while (len >= 0) { strkey[len] = (char)rand(); if (strkey[len] == '\0') { continue; } len--; } len = rand() % 100 + 1; int val_size = len + 1; char* strval = (char*)malloc(val_size); if (strval == NULL) { free(strkey); continue; } strval[len] = '\0'; len--; while (len >= 0) { strval[len] = (char)rand(); if (strval[len] == '\0') { continue; } len--; } hashtable_add(ht_str_str, (BYTE*)strkey, key_size, (BYTE*)strval, val_size); } print_hashtable(ht_str_str); /* the function strlen(const char* s) return the length of string s(not include the end char '\0')*/ hashtable_add(ht_str_str, (BYTE*)"hello world", strlen("hello world") + 1, (BYTE*)"f**k haitai", strlen("hello world") +1); printf("find :%d\n", hashtable_contains_key(ht_str_str, (BYTE*)"hello world", strlen("hello world") + 1)); printf("find :%d\n", hashtable_contains_key(ht_str_str, (BYTE*)"f**k world", strlen("f**k world") + 1)); char* pstr = (char*)hashtable_getval(ht_str_str, (BYTE*)"hello world", 12); printf("get :%s\n", pstr); free(pstr); print_hashtable(ht_str_str); BYTE* p = NULL; hashtable_add(ht_str_str, (BYTE*)&p, sizeof(p), (BYTE*)"The key is NULL", 16); int* k = NULL; pstr = (char*)hashtable_getval(ht_str_str, (BYTE*)&k, sizeof(k)); printf("'null' key's value :%s\n", pstr); free(pstr); hashtable_remove(ht_str_str, (BYTE*)&p, sizeof(p)); hashtable_empty(ht_str_str); //print_hashtable(ht_str_str); hashtable_dispose(ht_str_str); //print_hashtable(ht_str_str); getchar(); return 0; }
int main(int argc, char **argv ) { long paired=0; //printf("%d",sizeof(struct index_entry)); if (argc!=6) { fprintf(stderr,"Usage: fastqinterleaved2pair.c fastq fastq1 fastq2\n"); //fprintf(stderr,"%d",argc); exit(1); } FILE *fd=open_fastq(argv[1]); // ************************************************************ off_t cur_offset=1; unsigned long cline=1; hashtable index=new_hashtable(HASHSIZE); index_mem+=sizeof(hashtable); index_file(argv[1],index,0,-1); printf("\n"); // print some info printf("Reads indexed: %ld\n",index->n_entries); printf("Memory used in indexing: %ld MB\n",index_mem/1024/1024); // char *p1=argv[3]; char *p2=argv[4]; char *p3=argv[5]; fd1=open_fastq(argv[1]); fd2=open_fastq(argv[2]); FILE *fdw1=fopen(p1,"w"); FILE *fdw2=fopen(p2,"w"); FILE *fdw3=fopen(p3,"w"); unsigned long up2=0; if ( fdw1==NULL || fdw2==NULL || fdw3==NULL ) { fprintf(stderr,"Unable to create output files\n"); exit(1); } // read the entry using another fd cline=1; while(!feof(fd2)) { long start_pos=ftell(fd2); char *hdr=READ_LINE(fd2); if ( hdr==NULL) break; if ( hdr[0]!='@' ) { fprintf(stderr,"line %ul: error in header %s",cline,hdr); return 1; } // discard @ (one less byte) hdr=&hdr[1]; int len=strlen(hdr); len--; hdr[len-1]='\0'; // // lookup hdr in index INDEX_ENTRY* e=lookup_header(index,hdr); if (e==NULL) { ++up2; copy_read(start_pos,fd2,fdw3); } else { long key=hashit(hdr); // pair found ++paired; copy_read(start_pos,fd2,fdw2); copy_read(e->entry_start,fd1,fdw1); // remove entry from index if (delete(index,key,e)!=e) { fprintf(stderr,"Unable to delete entry from index\n"); exit(1); } free_indexentry(e); } PRINT_READS_PROCESSED(cline/4); cline+=4; } printf("\n"); printf("Recording %ld unpaired reads from %s\n",index->n_entries,argv[1]);fflush(stdout); fclose(fd1); // record the unpaired from argv[1] fd1=open_fastq(argv[1]); #ifndef SEQDISKACCESS init_hash_traversal(index); INDEX_ENTRY* e; cline=1; while((e=(INDEX_ENTRY*)next_hash_object(index))!=NULL) { copy_read(e->entry_start,fd1,fdw3); PRINT_READS_PROCESSED(cline); ++cline; } // #else //sequential disk access // cline=1; unsigned long remaining=index->n_entries; while(!feof(fd1) && remaining ) { //long start_pos=ftell(fd2); char *hdr=READ_LINE(fd1); if ( hdr==NULL) break; if ( hdr[0]!='@' ) { fprintf(stderr,"line %ld %s: error in header %s",cline,argv[1],hdr); return 1; } // discard @ (one less byte) hdr=&hdr[1]; int len=strlen(hdr); len--; hdr[len-1]='\0'; // // lookup hdr in index INDEX_ENTRY* e=lookup_header(index,hdr); if (e!=NULL) { copy_read(e->entry_start,fd1,fdw3); remaining--; } else { READ_LINE(fd1);//seq READ_LINE(fd1);//qual READ_LINE(fd1);//qual } PRINT_READS_PROCESSED(cline/4); cline+=4; } fclose(fd1); #endif printf("\n"); printf("Unpaired from %s: %ld\n",argv[1],index->n_entries); printf("Unpaired from %s: %ld\n",argv[2],up2); printf("Paired: %ld\n",paired); /*fseek(fd2,start_pos,SEEK_SET); printf("%s",READ_LINE(fd2)); printf("%s",READ_LINE(fd2)); printf("%s",READ_LINE(fd2)); printf("%s",READ_LINE(fd2)); */ fclose(fdw1); fclose(fdw2); fclose(fdw3); if ( paired == 0 ) { fprintf(stderr,"!!!WARNING!!! 0 paired reads! are the headers ok?\n"); exit(1); } exit(0); }
int main(int argc, char **argv ) { //long paired=0; unsigned long num_reads1=0, num_reads2=0; is_paired_data=FALSE; is_interleaved=FALSE; fix_dot=FALSE; int nopt=0; int c; opterr = 0; fprintf(stderr,"Version iRAP %s\n",VERSION); while ((c = getopt (argc, argv, "f")) != -1) switch (c) { case 'f': fix_dot = TRUE; fprintf(stderr,"Fixing (-f) enabled: Replacing . by N (creating .fix.gz files)\n"); ++nopt; break; default: ++nopt; fprintf(stderr,"ERROR: Option -%c invalid\n",optopt); exit(1); } if (argc-nopt<2 || argc-nopt>3) { fprintf(stderr,"Usage: fastq_info [-f] fastq1 [fastq2 file|pe]\n"); //fprintf(stderr,"%d",argc); exit(1); } //gzFile fd1=NULL; gzFile fd2=NULL; if (argc-nopt ==3) { is_paired_data=TRUE; //fprintf(stderr,"%d %d %d %s\n",argc,nopt,argc-nopt,argv[2+nopt]); if ( !strncmp(argv[2+nopt],"pe",2) ) { is_interleaved=FALSE; } //else { // fd2=open_fastq(argv[2+nopt]); // gzclose(fd2); // } // ************************************************************ if ( is_interleaved ) { // interleaved num_reads1=validate_interleaved(argv[1+nopt]); } else { // single or pair of fastq file(s) unsigned long cline=1; fprintf(stderr,"HASHSIZE=%lu\n",(long unsigned int)HASHSIZE); //memset(&collisions[0],0,HASHSIZE+1); hashtable sn_index=new_hashtable(HASHSIZE); index_mem+=sizeof(hashtable); index_file(argv[1+nopt],sn_index,0,-1); num_reads1=sn_index->n_entries; fprintf(stderr,"\n"); // print some info fprintf(stderr,"Reads processed: %ld\n",sn_index->n_entries); fprintf(stderr,"Memory used in indexing: ~%ld MB\n",index_mem/1024/1024); // pair-end if (argc-nopt ==3 ) { fprintf(stderr,"File %s processed\n",argv[1+nopt]); fprintf(stderr,"Next file %s\n",argv[2+nopt]); // validate the second file and check if all reads are paired fd2=open_fastq(argv[2+nopt]); gzFile fdf=open_fixed_fastq(argv[2+nopt]); INDEX_ENTRY* e; // read the entry using another fd cline=1; // TODO: improve code - mostly duplicated:( while(!gzeof(fd2)) { long long start_pos=gztell(fd2); char *hdr=READ_LINE_HDR(fd2); if ( hdr==NULL) break; int len; char *seq=READ_LINE_SEQ(fd2); char *hdr2=READ_LINE_HDR2(fd2); char *qual=READ_LINE_QUAL(fd2); char* readname=get_readname(hdr,&len,cline,argv[2+nopt]); if (seq==NULL || hdr2==NULL || qual==NULL ) { fprintf(stderr,"\nError in file %s, line %lu: file truncated?\n",argv[2+nopt],cline); exit(1); } if (validate_entry(hdr,hdr2,seq,qual,cline,argv[2+nopt])!=0) { exit(1); } //fprintf(stderr,"Reads processed: %ld\n",sn_index->n_entries); // check for duplicates if ( (e=lookup_header(sn_index,readname))==NULL ) { fprintf(stderr,"\nError in file %s, line %lu: unpaired read - %s\n",argv[2+nopt],cline,readname); exit(1); } else { ulong key=hashit(readname); // remove entry from sn_index if (delete(sn_index,key,e)!=e) { fprintf(stderr,"\nError in file %s, line %lu: unable to delete entry from sn_index - %s\n",argv[2+nopt],cline,readname); exit(1); } free_indexentry(e); } PRINT_READS_PROCESSED(cline/4); ++num_reads2; // replace_dots(start_pos,seq,hdr,hdr2,qual,fdf); cline+=4; } printf("\n"); close_fixed_fastq(fdf); if (sn_index->n_entries>0 ) { fprintf(stderr,"\nError in file %s: found %lu unpaired reads\n",argv[1+nopt],sn_index->n_entries); exit(1); } } } FILE* out; if (fix_dot) { out=stderr; } else { out=stdout; } fprintf(out,"------------------------------------\n"); if ( num_reads2>0 ) { fprintf(out,"Number of reads: %lu %lu\n",num_reads1,num_reads2); } else { fprintf(out,"Number of reads: %lu\n",num_reads1); } fprintf(out,"Quality encoding range: %lu %lu\n",min_qual,max_qual); char *enc=qualRange2enc(min_qual,max_qual); if ( enc == NULL ) { fprintf(stderr,"\nERROR: Unable to determine quality encoding - unknown range [%lu,%lu]\n",min_qual,max_qual); exit(1); } fprintf(out,"Quality encoding: %s\n",qualRange2enc(min_qual,max_qual)); fprintf(out,"Read length: %lu %lu\n",min_rl,max_rl); fprintf(out,"OK\n"); exit(0); }