Example #1
0
typedef struct {
	int n, max;
	bam1_t **a;
} tmp_stack_t;

static inline void stack_insert(tmp_stack_t *stack, bam1_t *b)
{
	if (stack->n == stack->max) {
		stack->max = stack->max? stack->max<<1 : 0x10000;
		stack->a = (bam1_t**)realloc(stack->a, sizeof(bam1_t*) * stack->max);
	}
	stack->a[stack->n++] = b;
}

static inline void dump_best(tmp_stack_t *stack, khash_t(pos) *best_hash, bamFile out)
{
	int i;
	for (i = 0; i != stack->n; ++i) {
		bam_write1(out, stack->a[i]);
		bam_destroy1(stack->a[i]);
	}
	stack->n = 0;
	if (kh_size(best_hash) > BUFFER_SIZE) kh_clear(pos, best_hash);
}

static void clear_del_set(khash_t(name) *del_set)
{
	khint_t k;
	for (k = kh_begin(del_set); k < kh_end(del_set); ++k)
		if (kh_exist(del_set, k))
Example #2
0
    if (is_word_token(token.type)) {
        add_normalized_token(array, str, token, LANGUAGE_CLASSIFIER_NORMALIZE_TOKEN_OPTIONS);
    } else {
        char_array_add(array, " ");
    }
}


static inline void append_prefix(char_array *array, char *prefix) {
    if (prefix != NULL) {
        char_array_append(array, prefix);
        char_array_append(array, NAMESPACE_SEPARATOR_CHAR);
    }
}

static inline void add_full_token_feature(khash_t(str_double) *features, char *prefix, char_array *feature_array, char *str, token_t token) {
    if (features == NULL || feature_array == NULL) return;

    char_array_clear(feature_array);
    append_prefix(feature_array, prefix);

    char_array_add_len(feature_array, str + token.offset, token.len);

    if (feature_array->n <= 1) return;
    char *feature = char_array_get_string(feature_array);
    log_debug("full token feature=%s\n", feature);
    feature_counts_add(features, feature, 1.0);
}


static void add_ngram_features(khash_t(str_double) *features, char *prefix, char_array *feature_array, char *str, token_t token, size_t n) {
Example #3
0
int run_effect(char **urls, shared_options_data_t *shared_options_data, effect_options_data_t *options_data) {
    int ret_code = 0;
    double start, stop, total;
    
    vcf_file_t *vcf_file = vcf_open(shared_options_data->vcf_filename, shared_options_data->max_batches);
    if (!vcf_file) {
        LOG_FATAL("VCF file does not exist!\n");
    }
    
    ped_file_t *ped_file = NULL;
    if (shared_options_data->ped_filename) {
        ped_file = ped_open(shared_options_data->ped_filename);
        if (!ped_file) {
            LOG_FATAL("PED file does not exist!\n");
        }
        LOG_INFO("About to read PED file...\n");
        // Read PED file before doing any processing
        ret_code = ped_read(ped_file);
        if (ret_code != 0) {
            LOG_FATAL_F("Can't read PED file: %s\n", ped_file->filename);
        }
    }
    
    char *output_directory = shared_options_data->output_directory;
    size_t output_directory_len = strlen(output_directory);
    
    ret_code = create_directory(output_directory);
    if (ret_code != 0 && errno != EEXIST) {
        LOG_FATAL_F("Can't create output directory: %s\n", output_directory);
    }
    
    // Remove all .txt files in folder
    ret_code = delete_files_by_extension(output_directory, "txt");
    if (ret_code != 0) {
        return ret_code;
    }
    
    // Initialize environment for connecting to the web service
    ret_code = init_http_environment(0);
    if (ret_code != 0) {
        return ret_code;
    }
    
    // Output file descriptors
    static cp_hashtable *output_files = NULL;
    // Lines of the output data in the main .txt files
    static list_t *output_list = NULL;
    // Consequence type counters (for summary, must be kept between web service calls)
    static cp_hashtable *summary_count = NULL;
    // Gene list (for genes-with-variants, must be kept between web service calls)
    static cp_hashtable *gene_list = NULL;

    // Initialize collections of file descriptors and summary counters
    ret_code = initialize_output_files(output_directory, output_directory_len, &output_files);
    if (ret_code != 0) {
        return ret_code;
    }
    initialize_output_data_structures(shared_options_data, &output_list, &summary_count, &gene_list);
    initialize_ws_buffers(shared_options_data->num_threads);
    
    // Create job.status file
    char job_status_filename[output_directory_len + 10];
    sprintf(job_status_filename, "%s/job.status", output_directory);
    FILE *job_status = new_job_status_file(job_status_filename);
    if (!job_status) {
        LOG_FATAL("Can't create job status file\n");
    } else {
        update_job_status_file(0, job_status);
    }
    
 
#pragma omp parallel sections private(start, stop, total)
    {
#pragma omp section
        {
            LOG_DEBUG_F("Thread %d reads the VCF file\n", omp_get_thread_num());
            
            start = omp_get_wtime();
            
            ret_code = vcf_read(vcf_file, 1,
                                (shared_options_data->batch_bytes > 0) ? shared_options_data->batch_bytes : shared_options_data->batch_lines,
                                shared_options_data->batch_bytes <= 0);

            stop = omp_get_wtime();
            total = stop - start;

            if (ret_code) {
                LOG_ERROR_F("Error %d while reading the file %s\n", ret_code, vcf_file->filename);
            }

            LOG_INFO_F("[%dR] Time elapsed = %f s\n", omp_get_thread_num(), total);
            LOG_INFO_F("[%dR] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);

            notify_end_parsing(vcf_file);
        }
        
#pragma omp section
        {
            // Enable nested parallelism and set the number of threads the user has chosen
            omp_set_nested(1);
            
            LOG_DEBUG_F("Thread %d processes data\n", omp_get_thread_num());
            
            // Filters and files for filtering output
            filter_t **filters = NULL;
            int num_filters = 0;
            if (shared_options_data->chain != NULL) {
                filters = sort_filter_chain(shared_options_data->chain, &num_filters);
            }
            FILE *passed_file = NULL, *failed_file = NULL, *non_processed_file = NULL;
            get_filtering_output_files(shared_options_data, &passed_file, &failed_file);
            
            // Pedigree information (used in some filters)
            individual_t **individuals = NULL;
            khash_t(ids) *sample_ids = NULL;
            
            // Filename structure outdir/vcfname.errors
            char *prefix_filename = calloc(strlen(shared_options_data->vcf_filename), sizeof(char));
            get_filename_from_path(shared_options_data->vcf_filename, prefix_filename);
            char *non_processed_filename = malloc((strlen(shared_options_data->output_directory) + strlen(prefix_filename) + 9) * sizeof(char));
            sprintf(non_processed_filename, "%s/%s.errors", shared_options_data->output_directory, prefix_filename);
            non_processed_file = fopen(non_processed_filename, "w");
            free(non_processed_filename);
            
            // Maximum size processed by each thread (never allow more than 1000 variants per query)
            if (shared_options_data->batch_lines > 0) {
                shared_options_data->entries_per_thread = MIN(MAX_VARIANTS_PER_QUERY, 
                            ceil((float) shared_options_data->batch_lines / shared_options_data->num_threads));
            } else {
                shared_options_data->entries_per_thread = MAX_VARIANTS_PER_QUERY;
            }
            LOG_DEBUG_F("entries-per-thread = %d\n", shared_options_data->entries_per_thread);
    
            int i = 0;
            vcf_batch_t *batch = NULL;
            int ret_ws_0 = 0, ret_ws_1 = 0, ret_ws_2 = 0;
            
            start = omp_get_wtime();

            while (batch = fetch_vcf_batch(vcf_file)) {
                if (i == 0) {
                    // Add headers associated to the defined filters
                    vcf_header_entry_t **filter_headers = get_filters_as_vcf_headers(filters, num_filters);
                    for (int j = 0; j < num_filters; j++) {
                        add_vcf_header_entry(filter_headers[j], vcf_file);
                    }
                        
                    // Write file format, header entries and delimiter
                    if (passed_file != NULL) { write_vcf_header(vcf_file, passed_file); }
                    if (failed_file != NULL) { write_vcf_header(vcf_file, failed_file); }
                    if (non_processed_file != NULL) { write_vcf_header(vcf_file, non_processed_file); }
                    
                    LOG_DEBUG("VCF header written\n");
                    
                    if (ped_file) {
                        // Create map to associate the position of individuals in the list of samples defined in the VCF file
                        sample_ids = associate_samples_and_positions(vcf_file);
                        // Sort individuals in PED as defined in the VCF file
                        individuals = sort_individuals(vcf_file, ped_file);
                    }
                }
                
//                     printf("batch loaded = '%.*s'\n", 50, batch->text);
//                     printf("batch text len = %zu\n", strlen(batch->text));

//                 if (i % 10 == 0) {
                    LOG_INFO_F("Batch %d reached by thread %d - %zu/%zu records \n", 
                            i, omp_get_thread_num(),
                            batch->records->size, batch->records->capacity);
//                 }

                int reconnections = 0;
                int max_reconnections = 3; // TODO allow to configure?

                // Write records that passed to a separate file, and query the WS with them as args
                array_list_t *failed_records = NULL;
                int num_variables = ped_file? get_num_variables(ped_file): 0;
                array_list_t *passed_records = filter_records(filters, num_filters, individuals, sample_ids, num_variables, batch->records, &failed_records);
                if (passed_records->size > 0) {
                    // Divide the list of passed records in ranges of size defined in config file
                    int num_chunks;
                    int *chunk_sizes;
                    int *chunk_starts = create_chunks(passed_records->size, shared_options_data->entries_per_thread, &num_chunks, &chunk_sizes);
                    
                    do {
                        // OpenMP: Launch a thread for each range
                        #pragma omp parallel for num_threads(shared_options_data->num_threads)
                        for (int j = 0; j < num_chunks; j++) {
                            int tid = omp_get_thread_num();
                            LOG_DEBUG_F("[%d] WS invocation\n", tid);
                            LOG_DEBUG_F("[%d] -- effect WS\n", tid);
                            if (!reconnections || ret_ws_0) {
                                ret_ws_0 = invoke_effect_ws(urls[0], (vcf_record_t**) (passed_records->items + chunk_starts[j]), 
                                                            chunk_sizes[j], options_data->excludes);
                                parse_effect_response(tid, output_directory, output_directory_len, output_files, output_list, summary_count, gene_list);
                                free(effect_line[tid]);
                                effect_line[tid] = (char*) calloc (max_line_size[tid], sizeof(char));
                            }
                            
                            if (!options_data->no_phenotypes) {
                                if (!reconnections || ret_ws_1) {
                                    LOG_DEBUG_F("[%d] -- snp WS\n", omp_get_thread_num());
                                    ret_ws_1 = invoke_snp_phenotype_ws(urls[1], (vcf_record_t**) (passed_records->items + chunk_starts[j]), chunk_sizes[j]);
                                    parse_snp_phenotype_response(tid, output_list);
                                    free(snp_line[tid]);
                                    snp_line[tid] = (char*) calloc (snp_max_line_size[tid], sizeof(char));
                                }
                                 
                                if (!reconnections || ret_ws_2) {
                                    LOG_DEBUG_F("[%d] -- mutation WS\n", omp_get_thread_num());
                                    ret_ws_2 = invoke_mutation_phenotype_ws(urls[2], (vcf_record_t**) (passed_records->items + chunk_starts[j]), chunk_sizes[j]);
                                    parse_mutation_phenotype_response(tid, output_list);
                                    free(mutation_line[tid]);
                                    mutation_line[tid] = (char*) calloc (mutation_max_line_size[tid], sizeof(char));
                                }
                            }
                        }
                        
                        LOG_DEBUG_F("*** %dth web services invocation finished\n", i);
                        
                        if (ret_ws_0 || ret_ws_1 || ret_ws_2) {
                            if (ret_ws_0) {
                                LOG_ERROR_F("Effect web service error: %s\n", get_last_http_error(ret_ws_0));
                            }
                            if (ret_ws_1) {
                                LOG_ERROR_F("SNP phenotype web service error: %s\n", get_last_http_error(ret_ws_1));
                            }
                            if (ret_ws_2) {
                                LOG_ERROR_F("Mutations phenotype web service error: %s\n", get_last_http_error(ret_ws_2));
                            }
                            
                            // In presence of errors, wait 4 seconds before retrying
                            reconnections++;
                            LOG_ERROR_F("Some errors ocurred, reconnection #%d\n", reconnections);
                            sleep(4);
                        } else {
                            free(chunk_starts);
                            free(chunk_sizes);
                        }
                    } while (reconnections < max_reconnections && (ret_ws_0 || ret_ws_1 || ret_ws_2));
                }
                
                // If the maximum number of reconnections was reached still with errors, 
                // write the non-processed batch to the corresponding file
                if (reconnections == max_reconnections && (ret_ws_0 || ret_ws_1 || ret_ws_2)) {
                #pragma omp critical
                    {
                        write_vcf_batch(batch, non_processed_file);
                    }
                }
                
                // Write records that passed and failed filters to separate files, and free them
                write_filtering_output_files(passed_records, failed_records, passed_file, failed_file);
                free_filtered_records(passed_records, failed_records, batch->records);
                
                // Free batch and its contents
                vcf_batch_free(batch);
                
                i++;
            }

            stop = omp_get_wtime();

            total = stop - start;

            LOG_INFO_F("[%d] Time elapsed = %f s\n", omp_get_thread_num(), total);
            LOG_INFO_F("[%d] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);

            // Free resources
            if (passed_file) { fclose(passed_file); }
            if (failed_file) { fclose(failed_file); }
            if (non_processed_file) { fclose(non_processed_file); }
            
            // Free filters
            for (i = 0; i < num_filters; i++) {
                filter_t *filter = filters[i];
                filter->free_func(filter);
            }
            free(filters);
            
            // Decrease list writers count
            for (i = 0; i < shared_options_data->num_threads; i++) {
                list_decr_writers(output_list);
            }
        }
        
#pragma omp section
        {
            // Thread which writes the results to all_variants, summary and one file per consequence type
            int ret = 0;
            char *line;
            list_item_t* item = NULL;
            FILE *fd = NULL;
            
            FILE *all_variants_file = cp_hashtable_get(output_files, "all_variants");
            FILE *snp_phenotype_file = cp_hashtable_get(output_files, "snp_phenotypes");
            FILE *mutation_phenotype_file = cp_hashtable_get(output_files, "mutation_phenotypes");
            
            while ((item = list_remove_item(output_list)) != NULL) {
                line = item->data_p;
                
                // Type greater than 0: consequence type identified by its SO code
                // Type equals to -1: SNP phenotype
                // Type equals to -2: mutation phenotype
                if (item->type > 0) {
                    // Write entry in the consequence type file
                    fd = cp_hashtable_get(output_files, &(item->type));
                    int ret = fprintf(fd, "%s\n", line);
                    if (ret < 0) {
                        LOG_ERROR_F("Error writing to file: '%s'\n", line);
                    }
                    
                    // Write in all_variants
                    ret = fprintf(all_variants_file, "%s\n", line);
                    if (ret < 0) {
                        LOG_ERROR_F("Error writing to all_variants: '%s'\n", line);
                    }
                    
                } else if (item->type == SNP_PHENOTYPE) {
                    ret = fprintf(snp_phenotype_file, "%s\n", line);
                    if (ret < 0) {
                        LOG_ERROR_F("Error writing to snp_phenotypes: '%s'\n", line);
                    }
                    
                } else if (item->type == MUTATION_PHENOTYPE) {
                    ret = fprintf(mutation_phenotype_file, "%s\n", line);
                    if (ret < 0) {
                        LOG_ERROR_F("Error writing to mutation_phenotypes: '%s'\n", line);
                    }
                }
                
                free(line);
                list_item_free(item);
            }
            
        }
    }

    write_summary_file(summary_count, cp_hashtable_get(output_files, "summary"));
    write_genes_with_variants_file(gene_list, output_directory);
    write_result_file(shared_options_data, options_data, summary_count, output_directory);

    free_output_data_structures(output_files, summary_count, gene_list);
    free_ws_buffers(shared_options_data->num_threads);
    free(output_list);
    vcf_close(vcf_file);
    
    update_job_status_file(100, job_status);
    close_job_status_file(job_status);
    
    return ret_code;
}
u_int32_t cacheClassificationLookup(dbClassificationObj *iLookup,cacheClassificationObj *iHead);
u_int32_t dbClassificationLookup(dbClassificationObj *iLookup,cacheClassificationObj *iHead);
/* LOOKUP FUNCTIONS */


/* CLASSIFICATION FUNCTIONS */
u_int32_t ClassificationPullDataStore(DatabaseData *data, dbClassificationObj **iArrayPtr,u_int32_t *array_length);
u_int32_t ClassificationCacheUpdateDBid(dbClassificationObj *iDBList,u_int32_t array_length,cacheClassificationObj **cacheHead);
u_int32_t ClassificationPopulateDatabase(DatabaseData  *data,cacheClassificationObj *cacheHead);
u_int32_t ClassificationCacheSynchronize(DatabaseData *data,cacheClassificationObj **cacheHead);
/* CLASSIFICATION FUNCTIONS */

/* SIGNATURE FUNCTIONS */
static u_int32_t SignatureLookupCache(dbSignatureObj * lookup);
static u_int32_t dbSignatureObjEquals(dbSignatureObj const * const sig1,dbSignatureObj const * const sig2);
static u_int32_t SignatureCacheLazyInit(MasterCache * mc, khash_t(dbSigCacheNode) ** cache, sig_gid_t gid);

/* SIGNATURE FUNCTIONS */


/* SIGNATURE REFERENCE FUNCTIONS */
static u_int32_t SignatureInsertReferences(DatabaseData * data, dbSignatureObj * sig);
static u_int32_t SignatureInsertReference(DatabaseData * data, u_int32_t db_sig_id, int seq, ReferenceNode * ref);
static u_int32_t ReferenceSystemLookupDbCache(MasterCache *mc, dbSystemObj * lookup);
static u_int32_t ReferenceSystemCacheInsertObj(dbSystemObj * sys, MasterCache * mc );
static u_int32_t DbReferenceSystemLookup(DatabaseData * data, dbSystemObj * lookup);
static u_int32_t ReferenceSystemLookupDatabase(DatabaseData * data, dbSystemObj * lookup);
static u_int32_t ReferenceSystemPopulateDatabase(DatabaseData * data, dbSystemObj * sys);
static u_int32_t ReferenceLookup(DatabaseData * data, dbReferenceObj * ref);
static u_int32_t ReferencePopulateDatabase(DatabaseData * data, dbReferenceObj * ref);
static u_int32_t ReferenceLookupDatabase(DatabaseData * data, dbReferenceObj * lookup);
Example #5
0
int main(int argc, char **argv)
{
  // compiler complains about unused function without these linese
  (void)kh_clear_ghash;
  (void)kh_del_ghash;

  if(argc < 2) print_usage(usage, NULL);

  char swap_alleles = 0;

  int c;
  while((c = getopt(argc, argv, "s")) >= 0) {
    switch (c) {
      case 's': swap_alleles = 1; break;
      default: die("Unknown option: %c", c);
    }
  }

  if(optind == argc) print_usage(usage, "Not enough arguments");

  char *inputpath = argv[optind];
  char **refpaths = argv + optind + 1;
  size_t num_refs = argc - optind - 1;

  gzFile gzin = gzopen(inputpath, "r");
  if(gzin == NULL) die("Cannot read file: %s", inputpath);

  size_t i, nchroms = 0, capacity = 1024;
  khash_t(ghash) *genome = kh_init(ghash);
  read_t *reads = malloc(capacity * sizeof(read_t)), *r;
  int hret;
  khiter_t k;

  for(i = 0; i < num_refs; i++) {
    fprintf(stderr, "Loading %s\n", refpaths[i]);
    load_reads(refpaths[i], &reads, &capacity, &nchroms);
  }

  if(num_refs == 0) {
    fprintf(stderr, "Loading from stdin\n");
    load_reads("-", &reads, &capacity, &nchroms);
  }

  if(nchroms == 0) die("No chromosomes loaded");

  for(i = 0; i < nchroms; i++) {
    r = reads + i;
    fprintf(stderr, "Loaded: '%s'\n", r->name.b);
    k = kh_put(ghash, genome, r->name.b, &hret);
    if(hret == 0) warn("Duplicate read name (taking first): %s", r->name.b);
    else kh_value(genome, k) = r;
  }

  // Now read VCF
  StrBuf line;
  strbuf_alloc(&line, 1024);
  char *fields[9];
  char *chr;
  int pos, reflen, altlen;

  while(strbuf_reset_gzreadline(&line, gzin) > 0)
  {
    if(line.b[0] == '#') fputs(line.b, stdout);
    else
    {
      strbuf_chomp(&line);
      vcf_columns(line.b, fields);
      fields[1][-1] = fields[2][-1] = '\0';
      chr = line.b;
      pos = atoi(fields[1])-1;
      k = kh_get(ghash, genome, chr);
      r = kh_value(genome, k);
      fields[1][-1] = fields[2][-1] = '\t';
      reflen = fields[4] - fields[3] - 1;
      altlen = fields[5] - fields[4] - 1;
      if(k == kh_end(genome)) warn("Cannot find chrom: %s", chr);
      else if(pos < 0) warn("Bad line: %s\n", line.b);
      else if((reflen == 1 && altlen == 1) || fields[3][0] == fields[4][0])
      {
        if((unsigned)pos + reflen <= r->seq.end &&
           strncasecmp(r->seq.b+pos,fields[3],reflen) == 0)
        {
          fputs(line.b, stdout);
          fputc('\n', stdout);
        }
        else if(swap_alleles && (unsigned)pos + altlen <= r->seq.end &&
                strncasecmp(r->seq.b+pos,fields[4],altlen) == 0)
        {
          // swap alleles
          char tmp[altlen], *ref = fields[3], *alt = fields[4];
          memcpy(tmp, alt, altlen);
          memmove(ref+altlen+1, ref, reflen);
          memcpy(ref, tmp, altlen);
          ref[altlen] = '\t';
          fputs(line.b, stdout);
          fputc('\n', stdout);
        }
        // else printf("FAIL0\n");
      }
      // else printf("FAIL1\n");
    }
  }

  kh_destroy(ghash, genome);
  strbuf_dealloc(&line);
  gzclose(gzin);

  for(i = 0; i < nchroms; i++) seq_read_dealloc(reads+i);
  free(reads);

  fprintf(stderr, " Done.\n");

  return 0;
}
Example #6
0
vertex_buffer_t*
get_group_buffer(vertex_buffer_t* buff,
                 const texture_t* tex,
                 const font_t*    font,
                 const shader_t*  program,
                 blend_func blend)
{
  tex_group_t* tg;
  blend_group_t* bg;
  unsigned int bkey;
  khiter_t i,j;
  khash_t(hmsp)* tgs;
  const char* atlas_name = NO_ATLAS;
  char buffer [128];
  vec4* v;
  int z;
  shader_group_t* sg;
  khash_t(hmsp)*  sgs;

  if (tex)
    atlas_name = sen_texture_atlas(tex);
  else if (font)
    atlas_name = sen_font_atlas(font);

  bkey =  (unsigned int)blend;

  i = kh_get(hmip, g_bgs, bkey);

  if (i != kh_end(g_bgs))
    bg = kh_val(g_bgs, i);
  else {
    bg =  blend_group_new(bkey);
    kh_insert(hmip, g_bgs, bkey, bg);
  }
  bg->num++;

  tgs = bg->tgs;

  j = kh_get(hmsp, tgs, atlas_name);
  if (j != kh_end(tgs))
    tg = kh_val(tgs, j);
  else {
    tg = tex_group_new(tex,font);
    kh_insert(hmsp, tgs, atlas_name, tg);
  }

  tg->num++;
  v = (vec4*) buff->vertices->items;
  z = (int) (v->z * 10000);

  sprintf (buffer, "%05d%s",z,program->name);
  sgs = tg->sgs;
  i = kh_get(hmsp, sgs, buffer);
  if (i != kh_end(sgs))
    sg = kh_val(sgs, i);
  else {
    sg = shader_group_new(program, buff,buffer,z,tg,bg);
    kh_insert(hmsp, sgs, sg->name, sg);
  }
  if (sg->buff == NULL) {
    sg->buff = vertex_buffer_new(vertex_buffer_format(buff));
  }
  sg->num++;
  return sg->buff;
}
Example #7
0
ERR_VALUE kmer_freq_distribution(const PROGRAM_OPTIONS *Options, const uint32_t KMerSize, const ONE_READ *Reads, const size_t ReadCount)
{
	int err;
	size_t maxValue = 0;
	khiter_t it;
	size_t kmerCount = 0;
	char *kmerString = NULL;
	khash_t(kc) *table = kh_init(kc);
	ERR_VALUE ret = ERR_INTERNAL_ERROR;

	ret = utils_calloc(KMerSize + 1, sizeof(char), &kmerString);
	if (ret == ERR_SUCCESS) {
		const ONE_READ *r = Reads;
		
		kmerString[KMerSize] = '\0';
		for (size_t i = 0; i < ReadCount; ++i) {
			const READ_PART *p = &r->Part;
			
				read_split(r);
				if (p->ReadSequenceLength >= KMerSize) {
					for (size_t j = 0; j < p->ReadSequenceLength - KMerSize + 1; ++j) {
						char *s = NULL;

						memcpy(kmerString, p->ReadSequence + j, KMerSize*sizeof(char));
						ret = utils_copy_string(kmerString, &s);
						if (ret == ERR_SUCCESS) {
							it = kh_put(kc, table, s, &err);
							switch (err) {
							case 0:
								kh_value(table, it) += 1;
								if (kh_value(table, it) > maxValue)
									maxValue = kh_value(table, it);

								utils_free(s);
								break;
							case 1:
							case 2:
								kh_value(table, it) = 1;
								break;
							default:
								ret = ERR_OUT_OF_MEMORY;
								break;
							}

							++kmerCount;
							if (ret != ERR_SUCCESS)
								utils_free(s);
						}

						if (ret != ERR_SUCCESS)
							break;
					}
				}

			if (ret != ERR_SUCCESS)
				break;

			++r;
		}

		if (ret == ERR_SUCCESS) {
			size_t *freqArray = NULL;

			++maxValue;
			ret = utils_calloc(maxValue, sizeof(size_t), &freqArray);
			if (ret == ERR_SUCCESS) {
				memset(freqArray, 0, maxValue*sizeof(size_t));
				for (it = kh_begin(table); it != kh_end(table); ++it) {
					if (kh_exist(table, it))
						++freqArray[kh_value(table, it)];
				}

				for (size_t i = 0; i < maxValue; ++i) {
					if (freqArray[i] > 0)
						fprintf(stdout, "%Iu, %Iu, %lf\n", i, freqArray[i], (double)freqArray[i]*100/ (double)kmerCount);
				}

				utils_free(freqArray);
			}
		}

		utils_free(kmerString);
	}

	for (size_t i = kh_begin(table); i < kh_end(table); ++i) {
		if (kh_exist(table, i))
			utils_free(kh_key(table, i));
	}

	kh_destroy(kc, table);

	return ret;
}
Example #8
0
int stk_maskseq(int argc, char *argv[])
{
	khash_t(reg) *h = kh_init(reg);
	gzFile fp;
	kseq_t *seq;
	int l, i, j, c, is_complement = 0, is_lower = 0;
	khint_t k;
	while ((c = getopt(argc, argv, "cl")) >= 0) {
		switch (c) {
		case 'c': is_complement = 1; break;
		case 'l': is_lower = 1; break;
		}
	}
	if (argc - optind < 2) {
		fprintf(pysamerr, "Usage:   seqtk maskseq [-cl] <in.fa> <in.bed>\n\n");
		fprintf(pysamerr, "Options: -c     mask the complement regions\n");
		fprintf(pysamerr, "         -l     soft mask (to lower cases)\n");
		return 1;
	}
	h = stk_reg_read(argv[optind+1]);
	// maskseq
	fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
	seq = kseq_init(fp);
	while ((l = kseq_read(seq)) >= 0) {
		k = kh_get(reg, h, seq->name.s);
		if (k == kh_end(h)) { // not found in the hash table
			if (is_complement) {
				for (j = 0; j < l; ++j)
					seq->seq.s[j] = is_lower? tolower(seq->seq.s[j]) : 'N';
			}
		} else {
			reglist_t *p = &kh_val(h, k);
			if (!is_complement) {
				for (i = 0; i < p->n; ++i) {
					int beg = p->a[i]>>32, end = p->a[i];
					if (beg >= seq->seq.l) {
						fprintf(pysamerr, "[maskseq] start position >= the sequence length.\n");
						continue;
					}
					if (end >= seq->seq.l) end = seq->seq.l;
					if (is_lower) for (j = beg; j < end; ++j) seq->seq.s[j] = tolower(seq->seq.s[j]);
					else for (j = beg; j < end; ++j) seq->seq.s[j] = 'N';
				}
			} else {
				int8_t *mask = calloc(seq->seq.l, 1);
				for (i = 0; i < p->n; ++i) {
					int beg = p->a[i]>>32, end = p->a[i];
					if (end >= seq->seq.l) end = seq->seq.l;
					for (j = beg; j < end; ++j) mask[j] = 1;
				}
				for (j = 0; j < l; ++j)
					if (mask[j] == 0) seq->seq.s[j] = is_lower? tolower(seq->seq.s[j]) : 'N';
				free(mask);
			}
		}
		printf(">%s", seq->name.s);
		for (j = 0; j < seq->seq.l; ++j) {
			if (j%60 == 0) putchar('\n');
			putchar(seq->seq.s[j]);
		}
		putchar('\n');
	}
Example #9
0
static void bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, int min_pos)
{
    if ( !srt->grp_str2int )
    {
        // first time here, initialize
        if ( !srt->pair )
        {
            if ( readers->collapse==COLLAPSE_NONE ) readers->collapse = BCF_SR_PAIR_EXACT;
            bcf_sr_set_opt(readers, BCF_SR_PAIR_LOGIC, readers->collapse);
        }
        bcf_sr_init_scores(srt);
        srt->grp_str2int = khash_str2int_init();
        srt->var_str2int = khash_str2int_init();
    }
    int k;
    khash_t(str2int) *hash;
    hash = srt->grp_str2int;
    for (k=0; k < kh_end(hash); k++)
        if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k));
    hash = srt->var_str2int;
    for (k=0; k < kh_end(hash); k++)
        if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k));
    kh_clear(str2int, srt->grp_str2int);
    kh_clear(str2int, srt->var_str2int);
    srt->ngrp = srt->nvar = srt->nvset = 0;

    grp_t grp;
    memset(&grp,0,sizeof(grp_t));

    // group VCFs into groups, each with a unique combination of variants in the duplicate lines
    int ireader,ivar,irec,igrp,ivset,iact;
    for (ireader=0; ireader<readers->nreaders; ireader++) srt->vcf_buf[ireader].nrec = 0;
    for (iact=0; iact<srt->nactive; iact++)
    {
        ireader = srt->active[iact];
        bcf_sr_t *reader = &readers->readers[ireader];
        int rid   = bcf_hdr_name2id(reader->header, chr);
        grp.nvar  = 0;
        hts_expand(int,reader->nbuffer,srt->moff,srt->off);
        srt->noff  = 0;
        srt->str.l = 0;
        for (irec=1; irec<=reader->nbuffer; irec++)
        {
            bcf1_t *line = reader->buffer[irec];
            if ( line->rid!=rid || line->pos!=min_pos ) break;

            if ( srt->str.l ) kputc(';',&srt->str);
            srt->off[srt->noff++] = srt->str.l;
            size_t beg = srt->str.l;
            for (ivar=1; ivar<line->n_allele; ivar++)
            {
                if ( ivar>1 ) kputc(',',&srt->str);
                kputs(line->d.allele[0],&srt->str);
                kputc('>',&srt->str);
                kputs(line->d.allele[ivar],&srt->str);
            }
            if ( line->n_allele==1 )
            {
                kputs(line->d.allele[0],&srt->str);
                kputsn(">.",2,&srt->str);
            }

            // Create new variant or attach to existing one. But careful, there can be duplicate
            // records with the same POS,REF,ALT (e.g. in dbSNP-b142)
            char *var_str = beg + srt->str.s;
            int ret, var_idx = 0, var_end = srt->str.l;
            while ( 1 )
            {
                ret = khash_str2int_get(srt->var_str2int, var_str, &ivar);
                if ( ret==-1 ) break;

                var_t *var = &srt->var[ivar];
                if ( var->vcf[var->nvcf-1] != ireader ) break;

                srt->str.l = var_end;
                kputw(var_idx, &srt->str);
                var_str = beg + srt->str.s;
                var_idx++;
            }
            if ( ret==-1 )
            {
                ivar = srt->nvar++;
                hts_expand0(var_t,srt->nvar,srt->mvar,srt->var);
                srt->var[ivar].nvcf = 0;
                khash_str2int_set(srt->var_str2int, strdup(var_str), ivar);
                free(srt->var[ivar].str);   // possible left-over from the previous position
            }
            var_t *var = &srt->var[ivar];
            var->nalt = line->n_allele - 1;
            var->type = bcf_get_variant_types(line);
            srt->str.s[var_end] = 0;
            if ( ret==-1 )
                var->str = strdup(var_str);

            int mvcf = var->mvcf;
            var->nvcf++;
            hts_expand0(int*, var->nvcf, var->mvcf, var->vcf);
            if ( mvcf != var->mvcf ) var->rec = (bcf1_t **) realloc(var->rec,sizeof(bcf1_t*)*var->mvcf);
            var->vcf[var->nvcf-1] = ireader;
            var->rec[var->nvcf-1] = line;

            grp.nvar++;
            hts_expand(var_t,grp.nvar,grp.mvar,grp.var);
            grp.var[grp.nvar-1] = ivar;
        }
        char *grp_key = grp_create_key(srt);
        int ret = khash_str2int_get(srt->grp_str2int, grp_key, &igrp);
        if ( ret==-1 )
        {
            igrp = srt->ngrp++;
            hts_expand0(grp_t, srt->ngrp, srt->mgrp, srt->grp);
            free(srt->grp[igrp].var);
            srt->grp[igrp] = grp;
            srt->grp[igrp].key = grp_key;
            khash_str2int_set(srt->grp_str2int, grp_key, igrp);
            memset(&grp,0,sizeof(grp_t));
        }
        else
            free(grp_key);
        srt->grp[igrp].nvcf++;
    }
    free(grp.var);

    // initialize bitmask - which groups is the variant present in
    for (ivar=0; ivar<srt->nvar; ivar++)
    {
        srt->var[ivar].mask = kbs_resize(srt->var[ivar].mask, srt->ngrp);
        kbs_clear(srt->var[ivar].mask);
    }
    for (igrp=0; igrp<srt->ngrp; igrp++)
    {
        for (ivar=0; ivar<srt->grp[igrp].nvar; ivar++)
        {
            int i = srt->grp[igrp].var[ivar];
            kbs_insert(srt->var[i].mask, igrp);
        }
    }

    // create the initial list of variant sets
    for (ivar=0; ivar<srt->nvar; ivar++)
    {
        ivset = srt->nvset++;
        hts_expand0(varset_t, srt->nvset, srt->mvset, srt->vset);

        varset_t *vset = &srt->vset[ivset];
        vset->nvar = 1;
        hts_expand0(var_t, vset->nvar, vset->mvar, vset->var);
        vset->var[vset->nvar-1] = ivar;
        var_t *var  = &srt->var[ivar];
        vset->cnt   = var->nvcf;
        vset->mask  = kbs_resize(vset->mask, srt->ngrp);
        kbs_clear(vset->mask);
        kbs_bitwise_or(vset->mask, var->mask);

        int type = 0;
        if ( var->type==VCF_REF ) type |= SR_REF;
        else
        {
            if ( var->type & VCF_SNP ) type |= SR_SNP;
            if ( var->type & VCF_MNP ) type |= SR_SNP;
            if ( var->type & VCF_INDEL ) type |= SR_INDEL;
            if ( var->type & VCF_OTHER ) type |= SR_OTHER;
        }
        var->type = type;
    }
#if DEBUG_VSETS
    debug_vsets(srt);
#endif

    // initialize the pairing matrix
    hts_expand(int, srt->ngrp*srt->nvset, srt->mpmat, srt->pmat);
    hts_expand(int, srt->nvset, srt->mcnt, srt->cnt);
    memset(srt->pmat, 0, sizeof(*srt->pmat)*srt->ngrp*srt->nvset);
    for (ivset=0; ivset<srt->nvset; ivset++)
    {
        varset_t *vset = &srt->vset[ivset];
        for (igrp=0; igrp<srt->ngrp; igrp++) srt->pmat[ivset*srt->ngrp+igrp] = 0;
        srt->cnt[ivset] = vset->cnt;
    }

    // pair the lines
    while ( srt->nvset )
    {
#if DEBUG_VSETS
    fprintf(stderr,"\n");
    debug_vsets(srt);
#endif

        int imax = 0;
        for (ivset=1; ivset<srt->nvset; ivset++)
            if ( srt->cnt[imax] < srt->cnt[ivset] ) imax = ivset;

        int ipair = -1;
        uint32_t max_score = 0;
        for (ivset=0; ivset<srt->nvset; ivset++)
        {
            if ( kbs_logical_and(srt->vset[imax].mask,srt->vset[ivset].mask) ) continue;   // cannot be merged
            uint32_t score = pairing_score(srt, imax, ivset);
            // fprintf(stderr,"score: %d %d, logic=%d \t..\t %u\n", imax,ivset,srt->pair,score);
            if ( max_score < score ) { max_score = score; ipair = ivset; }
        }

        // merge rows creating a new variant set this way
        if ( ipair!=-1 && ipair!=imax )
        {
            imax = merge_vsets(srt, imax, ipair);
            continue;
        }

        push_vset(srt, imax);
    }

    srt->chr = chr;
    srt->pos = min_pos;
}
Example #10
0
static void
gc_mark_phase(pic_state *pic)
{
  struct context *cxt;
  size_t j;

  assert(pic->heap->weaks == NULL);

  /* context */
  for (cxt = pic->cxt; cxt != NULL; cxt = cxt->prev) {
    if (cxt->fp) gc_mark_object(pic, (struct object *)cxt->fp);
    if (cxt->sp) gc_mark_object(pic, (struct object *)cxt->sp);
    if (cxt->irep) gc_mark_object(pic, (struct object *)cxt->irep);
  }

  /* arena */
  for (j = 0; j < pic->ai; ++j) {
    gc_mark_object(pic, (struct object *)pic->arena[j]);
  }

  /* global variables */
  gc_mark(pic, pic->globals);

  /* dynamic environment */
  gc_mark(pic, pic->dyn_env);

  /* top continuation */
  gc_mark(pic, pic->halt);

  /* features */
  gc_mark(pic, pic->features);

  /* weak maps */
  do {
    struct object *key;
    pic_value val;
    int it;
    khash_t(weak) *h;
    struct weak *weak;

    j = 0;
    weak = pic->heap->weaks;

    while (weak != NULL) {
      h = &weak->hash;
      for (it = kh_begin(h); it != kh_end(h); ++it) {
        if (! kh_exist(h, it))
          continue;
        key = kh_key(h, it);
        val = kh_val(h, it);
        if (is_alive(key)) {
          if (obj_p(pic, val) && ! is_alive(obj_ptr(pic, val))) {
            gc_mark(pic, val);
            ++j;
          }
        }
      }
      weak = weak->prev;
    }
  } while (j > 0);
}
Example #11
0
char *fai_fetch(const faidx_t *fai, const char *str, int *len)
{
	char *s, c;
	int i, l, k, name_end;
	khiter_t iter;
	faidx1_t val;
	khash_t(s) *h;
	int beg, end;

	beg = end = -1;
	h = fai->hash;
	name_end = l = strlen(str);
	s = (char*)malloc(l+1);
	// remove space
	for (i = k = 0; i < l; ++i)
		if (!isspace(str[i])) s[k++] = str[i];
	s[k] = 0; l = k;
	// determine the sequence name
	for (i = l - 1; i >= 0; --i) if (s[i] == ':') break; // look for colon from the end
	if (i >= 0) name_end = i;
	if (name_end < l) { // check if this is really the end
		int n_hyphen = 0;
		for (i = name_end + 1; i < l; ++i) {
			if (s[i] == '-') ++n_hyphen;
			else if (!isdigit(s[i]) && s[i] != ',') break;
		}
		if (i < l || n_hyphen > 1) name_end = l; // malformated region string; then take str as the name
		s[name_end] = 0;
		iter = kh_get(s, h, s);
		if (iter == kh_end(h)) { // cannot find the sequence name
			iter = kh_get(s, h, str); // try str as the name
			if (iter == kh_end(h)) {
				*len = 0;
			free(s); return 0;
			} else s[name_end] = ':', name_end = l;
		}
	} else iter = kh_get(s, h, str);
	if(iter == kh_end(h)) {
		fprintf(pysamerr, "[fai_fetch] Warning - Reference %s not found in FASTA file, returning empty sequence\n", str);
		free(s);
		return 0;
	};
	val = kh_value(h, iter);
	// parse the interval
	if (name_end < l) {
		for (i = k = name_end + 1; i < l; ++i)
			if (s[i] != ',') s[k++] = s[i];
		s[k] = 0;
		beg = atoi(s + name_end + 1);
		for (i = name_end + 1; i != k; ++i) if (s[i] == '-') break;
		end = i < k? atoi(s + i + 1) : val.len;
		if (beg > 0) --beg;
	} else beg = 0, end = val.len;
	if (beg >= val.len) beg = val.len;
	if (end >= val.len) end = val.len;
	if (beg > end) beg = end;
	free(s);

	// now retrieve the sequence
	l = 0;
	s = (char*)malloc(end - beg + 2);
	razf_seek(fai->rz, val.offset + beg / val.line_blen * val.line_len + beg % val.line_blen, SEEK_SET);
	while (razf_read(fai->rz, &c, 1) == 1 && l < end - beg && !fai->rz->z_err)
		if (isgraph(c)) s[l++] = c;
	s[l] = '\0';
	*len = l;
	return s;
}
Example #12
0
static void rmdupse_buf(buffer_t *buf)
{
	khash_t(32) *h;
	uint32_t key;
	khint_t k;
	int mpos, i, upper;
	listelem_t *p;
	mpos = 0x7fffffff;
	mpos = (buf->x == buf->n)? buf->buf[buf->x-1].b->core.pos : 0x7fffffff;
	upper = (buf->x < 0)? buf->n : buf->x;
	// fill the hash table
	h = kh_init(32);
	for (i = 0; i < upper; ++i) {
		elem_t *e = buf->buf + i;
		int ret;
		if (e->score < 0) continue;
		if (e->rpos >= 0) {
			if (e->rpos <= mpos) key = (uint32_t)e->rpos<<1 | 1;
			else continue;
		} else {
			if (e->b->core.pos < mpos) key = (uint32_t)e->b->core.pos<<1;
			else continue;
		}
		k = kh_put(32, h, key, &ret);
		p = &kh_val(h, k);
		if (ret == 0) { // present in the hash table
			if (p->n == p->m) {
				p->m <<= 1;
				p->a = (int*)realloc(p->a, p->m * sizeof(int));
			}
			p->a[p->n++] = i;
		} else {
			p->m = p->n = 1;
			p->a = (int*)calloc(p->m, sizeof(int));
			p->a[0] = i;
		}
	}
	// rmdup
	for (k = kh_begin(h); k < kh_end(h); ++k) {
		if (kh_exist(h, k)) {
			int max, maxi;
			p = &kh_val(h, k);
			// get the max
			for (i = max = 0, maxi = -1; i < p->n; ++i) {
				if (buf->buf[p->a[i]].score > max) {
					max = buf->buf[p->a[i]].score;
					maxi = i;
				}
			}
			// mark the elements
			for (i = 0; i < p->n; ++i) {
				buf->buf[p->a[i]].score = -1;
				if (i != maxi) {
					bam_destroy1(buf->buf[p->a[i]].b);
					buf->buf[p->a[i]].b = 0;
				}
			}
			// free
			free(p->a);
		}
	}
	kh_destroy(32, h);
}
Example #13
0
static void trans_tbl_init(bam_hdr_t* out, bam_hdr_t* translate, trans_tbl_t* tbl, bool merge_rg, bool merge_pg)
{
    tbl->n_targets = translate->n_targets;
    tbl->tid_trans = (int*)calloc(translate->n_targets, sizeof(int));
    tbl->rg_trans = kh_init(c2c);
    tbl->pg_trans = kh_init(c2c);
    if (!tbl->tid_trans || !tbl->rg_trans || !tbl->pg_trans) { perror("out of memory"); exit(-1); }

    int32_t out_len = out->l_text;
    while (out_len > 0 && out->text[out_len-1] == '\n') {--out_len; } // strip trailing \n's
    kstring_t out_text = { 0, 0, NULL };
    kputsn(out->text, out_len, &out_text);

    int i, min_tid = -1;
    tbl->lost_coord_sort = false;

    khash_t(c2i) *out_tid = kh_init(c2i);
    for (i = 0; i < out->n_targets; ++i) {
        int ret;
        khiter_t iter = kh_put(c2i, out_tid, out->target_name[i], &ret);
        if (ret <= 0) abort();
        kh_value(out_tid, iter) = i;
    }

    for (i = 0; i < translate->n_targets; ++i) {
        khiter_t iter = kh_get(c2i, out_tid, translate->target_name[i]);

        if (iter == kh_end(out_tid)) { // Append missing entries to out
            tbl->tid_trans[i] = out->n_targets++;
            out->target_name = (char**)realloc(out->target_name, sizeof(char*)*out->n_targets);
            out->target_name[out->n_targets-1] = strdup(translate->target_name[i]);
            out->target_len = (uint32_t*)realloc(out->target_len, sizeof(uint32_t)*out->n_targets);
            out->target_len[out->n_targets-1] = translate->target_len[i];
            // grep line with regex '^@SQ.*\tSN:%s(\t.*$|$)', translate->target_name[i]
            // from translate->text
            regex_t sq_id;
            regmatch_t* matches = (regmatch_t*)calloc(2, sizeof(regmatch_t));
            if (matches == NULL) { perror("out of memory"); exit(-1); }
            kstring_t seq_regex = { 0, 0, NULL };
            ksprintf(&seq_regex, "^@SQ.*\tSN:%s(\t.*$|$)", translate->target_name[i]);
            regcomp(&sq_id, seq_regex.s, REG_EXTENDED|REG_NEWLINE);
            free(seq_regex.s);
            if (regexec(&sq_id, translate->text, 1, matches, 0) != 0)
            {
                fprintf(pysamerr, "[trans_tbl_init] @SQ SN (%s) found in binary header but not text header.\n",translate->target_name[i]);
                exit(1);
            }
            regfree(&sq_id);

            // Produce our output line and append it to out_text
            kputc('\n', &out_text);
            kputsn(translate->text+matches[0].rm_so, matches[0].rm_eo-matches[0].rm_so, &out_text);

            free(matches);
        } else {
            tbl->tid_trans[i] = kh_value(out_tid, iter);
        }
        if (tbl->tid_trans[i] > min_tid) {
            min_tid = tbl->tid_trans[i];
        } else {
            tbl->lost_coord_sort = true;
        }
    }
    kh_destroy(c2i, out_tid);

    // grep @RG id's
    regex_t rg_id;
    regmatch_t* matches = (regmatch_t*)calloc(2, sizeof(regmatch_t));
    if (matches == NULL) { perror("out of memory"); exit(-1); }
    regcomp(&rg_id, "^@RG.*\tID:([!-)+-<>-~][ !-~]*)(\t.*$|$)", REG_EXTENDED|REG_NEWLINE);
    char* text = translate->text;
    klist_t(hdrln) *rg_list = kl_init(hdrln);
    while(1) { //   foreach rg id in translate's header
        if (regexec(&rg_id, text, 2, matches, 0) != 0) break;
        // matches[0] is the whole @RG line; matches[1] is the ID field value
        kstring_t match_id = { 0, 0, NULL };
        kputsn(text+matches[1].rm_so, matches[1].rm_eo-matches[1].rm_so, &match_id);

        // is our matched ID in our output list already
        regex_t rg_id_search;
        kstring_t rg_regex = { 0, 0, NULL };
        ksprintf(&rg_regex, "^@RG.*\tID:%s(\t.*$|$)", match_id.s);
        regcomp(&rg_id_search, rg_regex.s, REG_EXTENDED|REG_NEWLINE|REG_NOSUB);
        free(rg_regex.s);
        kstring_t transformed_id = { 0, 0, NULL };
        bool transformed_equals_match;
        if (regexec(&rg_id_search, out->text, 0, NULL, 0) != 0  || merge_rg) {
            // Not in there so can add it as 1-1 mapping
            kputs(match_id.s, &transformed_id);
            transformed_equals_match = true;
        } else {
            // It's in there so we need to transform it by appending random number to id
            ksprintf(&transformed_id, "%s-%0lX", match_id.s, lrand48());
            transformed_equals_match = false;
        }
        regfree(&rg_id_search);

        // Insert it into our translation map
        int in_there = 0;
        khiter_t iter = kh_put(c2c, tbl->rg_trans, ks_release(&match_id), &in_there);
        char *transformed_id_s = ks_release(&transformed_id);
        kh_value(tbl->rg_trans,iter) = transformed_id_s;
        // take matched line and replace ID with transformed_id
        kstring_t transformed_line = { 0, 0, NULL };
        if (transformed_equals_match) {
            kputsn(text+matches[0].rm_so, matches[0].rm_eo-matches[0].rm_so, &transformed_line);
        } else {
            kputsn(text+matches[0].rm_so, matches[1].rm_so-matches[0].rm_so, &transformed_line);
            kputs(transformed_id_s, &transformed_line);
            kputsn(text+matches[1].rm_eo, matches[0].rm_eo-matches[1].rm_eo, &transformed_line);
        }

        if (!(transformed_equals_match && merge_rg)) {
            // append line to linked list for PG processing
            char** ln = kl_pushp(hdrln, rg_list);
            *ln = ks_release(&transformed_line);  // Give away to linked list
        }
        else free(transformed_line.s);

        text += matches[0].rm_eo; // next!
    }
    regfree(&rg_id);

    // Do same for PG id's
    regex_t pg_id;
    regcomp(&pg_id, "^@PG.*\tID:([!-)+-<>-~][ !-~]*)(\t.*$|$)", REG_EXTENDED|REG_NEWLINE);
    text = translate->text;
    klist_t(hdrln) *pg_list = kl_init(hdrln);
    while(1) { //   foreach pg id in translate's header
        if (regexec(&pg_id, text, 2, matches, 0) != 0) break;
        kstring_t match_id = { 0, 0, NULL };
        kputsn(text+matches[1].rm_so, matches[1].rm_eo-matches[1].rm_so, &match_id);

        // is our matched ID in our output list already
        regex_t pg_id_search;
        kstring_t pg_regex = { 0, 0, NULL };
        ksprintf(&pg_regex, "^@PG.*\tID:%s(\t.*$|$)", match_id.s);
        regcomp(&pg_id_search, pg_regex.s, REG_EXTENDED|REG_NEWLINE|REG_NOSUB);
        free(pg_regex.s);
        kstring_t transformed_id = { 0, 0, NULL };
        bool transformed_equals_match;
        if (regexec(&pg_id_search, out->text, 0, NULL, 0) != 0 || merge_pg) {
            // Not in there so can add it as 1-1 mapping
            kputs(match_id.s, &transformed_id);
            transformed_equals_match = true;
        } else {
            // It's in there so we need to transform it by appending random number to id
            ksprintf(&transformed_id, "%s-%0lX", match_id.s, lrand48());
            transformed_equals_match = false;
        }
        regfree(&pg_id_search);

        // Insert it into our translation map
        int in_there = 0;
        khiter_t iter = kh_put(c2c, tbl->pg_trans, ks_release(&match_id), &in_there);
        char *transformed_id_s = ks_release(&transformed_id);
        kh_value(tbl->pg_trans,iter) = transformed_id_s;
        // take matched line and replace ID with transformed_id
        kstring_t transformed_line = { 0, 0, NULL };
        if (transformed_equals_match) {
            kputsn(text+matches[0].rm_so, matches[0].rm_eo-matches[0].rm_so, &transformed_line);
        } else {
            kputsn(text+matches[0].rm_so, matches[1].rm_so-matches[0].rm_so, &transformed_line);
            kputs(transformed_id_s, &transformed_line);
            kputsn(text+matches[1].rm_eo, matches[0].rm_eo-matches[1].rm_eo, &transformed_line);
        }

        if (!(transformed_equals_match && merge_pg)) {
            // append line to linked list for PP processing
            char** ln = kl_pushp(hdrln, pg_list);
            *ln = ks_release(&transformed_line);  // Give away to linked list
        }
        else free(transformed_line.s);
        text += matches[0].rm_eo; // next!
    }
    regfree(&pg_id);
    // need to translate PP's on the fly in second pass because they may not be in correct order and need complete tbl->pg_trans to do this
    // for each line {
    // with ID replaced with tranformed_id and PP's transformed using the translation table
    // }
    regex_t pg_pp;
    regcomp(&pg_pp, "^@PG.*\tPP:([!-)+-<>-~][!-~]*)(\t.*$|$)", REG_EXTENDED|REG_NEWLINE);
    kliter_t(hdrln) *iter = kl_begin(pg_list);
    while (iter != kl_end(pg_list)) {
        char* data = kl_val(iter);

        kstring_t transformed_line = { 0, 0, NULL };
        // Find PP tag
        if (regexec(&pg_pp, data, 2, matches, 0) == 0) {
            // Lookup in hash table
            kstring_t pp_id = { 0, 0, NULL };
            kputsn(data+matches[1].rm_so, matches[1].rm_eo-matches[1].rm_so, &pp_id);

            khiter_t k = kh_get(c2c, tbl->pg_trans, pp_id.s);
            free(pp_id.s);
            char* transformed_id = kh_value(tbl->pg_trans,k);
            // Replace
            kputsn(data, matches[1].rm_so-matches[0].rm_so, &transformed_line);
            kputs(transformed_id, &transformed_line);
            kputsn(data+matches[1].rm_eo, matches[0].rm_eo-matches[1].rm_eo, &transformed_line);
        } else { kputs(data, &transformed_line); }
        // Produce our output line and append it to out_text
        kputc('\n', &out_text);
        kputsn(transformed_line.s, transformed_line.l, &out_text);

        free(transformed_line.s);
        free(data);
        iter = kl_next(iter);
    }
    regfree(&pg_pp);

    // Need to also translate @RG PG's on the fly too
    regex_t rg_pg;
    regcomp(&rg_pg, "^@RG.*\tPG:([!-)+-<>-~][!-~]*)(\t.*$|$)", REG_EXTENDED|REG_NEWLINE);
    kliter_t(hdrln) *rg_iter = kl_begin(rg_list);
    while (rg_iter != kl_end(rg_list)) {
        char* data = kl_val(rg_iter);

        kstring_t transformed_line = { 0, 0, NULL };
        // Find PG tag
        if (regexec(&rg_pg, data, 2, matches, 0) == 0) {
            // Lookup in hash table
            kstring_t pg_id = { 0, 0, NULL };
            kputsn(data+matches[1].rm_so, matches[1].rm_eo-matches[1].rm_so, &pg_id);

            khiter_t k = kh_get(c2c, tbl->pg_trans, pg_id.s);
            free(pg_id.s);
            char* transformed_id = kh_value(tbl->pg_trans,k);
            // Replace
            kputsn(data, matches[1].rm_so-matches[0].rm_so, &transformed_line);
            kputs(transformed_id, &transformed_line);
            kputsn(data+matches[1].rm_eo, matches[0].rm_eo-matches[1].rm_eo, &transformed_line);
        } else { kputs(data, &transformed_line); }
        // Produce our output line and append it to out_text
        kputc('\n', &out_text);
        kputsn(transformed_line.s, transformed_line.l, &out_text);

        free(transformed_line.s);
        free(data);
        rg_iter = kl_next(rg_iter);
    }

    regfree(&rg_pg);
    kl_destroy(hdrln,pg_list);
    kl_destroy(hdrln,rg_list);
    free(matches);

    // Add trailing \n and write back to header
    free(out->text);
    kputc('\n', &out_text);
    out->l_text = out_text.l;
    out->text = ks_release(&out_text);
}
Example #14
0
                have_phrase = true;
                break;
            }
        }

        if (!have_phrase) {
            add_normalized_strings_token(strings, str, token, options);
        }

        string_tree_finalize_token(tree);
    }

}


void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_strings, char *str, normalize_options_t options) {
    size_t len = strlen(str);
    token_array *tokens = tokenize_keep_whitespace(str);
    string_tree_t *token_tree = string_tree_new_size(len);

    add_normalized_strings_tokenized(token_tree, str, tokens, options);
    string_tree_iterator_t *tokenized_iter = string_tree_iterator_new(token_tree);

    string_tree_iterator_t *iter;

    char_array *temp_string = char_array_new_size(len);

    char *token;

    char *lang;
Example #15
0
khash_t (si32) *
new_si32_ht (void)
{
  khash_t (si32) * h = kh_init (si32);
  return h;
}
static int output_stats_and_reset(struct bgpcorsaro_pfxmonitor_state_t *state,
                                  uint32_t interval_start)
{
  khiter_t k;
  khiter_t p;
  khiter_t a;
  int khret;
  uint8_t pfx_visible;
  uint32_t unique_pfxs = 0;
  khash_t(peer_asn_map) * pam;
  /* origin_asn -> num peer ASns*/
  khash_t(asn_count_map) *asn_np = NULL;

  if ((asn_np = kh_init(asn_count_map)) == NULL) {
    return -1;
  }

  /* for each prefix go through all peers */
  for (k = kh_begin(state->pfx_info); k != kh_end(state->pfx_info); ++k) {
    if (kh_exist(state->pfx_info, k) == 0) {
      continue;
    }
    /* reset counters */
    kh_clear(asn_count_map, asn_np);

    /* get peer-asn map for this prefix */
    pam = kh_value(state->pfx_info, k);

    /* save the origin asn visibility (i.e. how many peers' ASns
     * observe such information */

    /* for each peer, go through all origins */
    for (p = kh_begin(pam); p != kh_end(pam); ++p) {
      if (kh_exist(pam, p) == 0) {
        continue;
      }
      /* increment the counter for this ASN */
      if ((a = kh_get(asn_count_map, asn_np, kh_value(pam, p))) ==
          kh_end(asn_np)) {
        a = kh_put(asn_count_map, asn_np, kh_value(pam, p), &khret);
        kh_value(asn_np, a) = 1;
      } else {
        kh_value(asn_np, a)++;
      }
    }

    /* now asn_np has a complete count of the number of peers' ASns that
       observed
       each origin ASN */

    /* count the prefix and origins if their visibility
     * is above the threshold */
    pfx_visible = 0;
    for (a = kh_begin(asn_np); a != kh_end(asn_np); ++a) {
      if (kh_exist(asn_np, a) == 0) {
        continue;
      }
      /* the information is accounted only if it is
       * consistent on at least threshold peers' ASns */
      if (kh_value(asn_np, a) >= state->peer_asns_th) {
        pfx_visible = 1;
        bgpstream_id_set_insert(state->unique_origins, kh_key(asn_np, a));
      }
    }

    /* updating counters */
    unique_pfxs += pfx_visible;
  }

  DUMP_METRIC(unique_pfxs, state->interval_start, "%s.%s.%s.%s",
              state->metric_prefix, PLUGIN_NAME, state->ip_space_name,
              "prefixes_cnt");

  DUMP_METRIC(bgpstream_id_set_size(state->unique_origins),
              state->interval_start, "%s.%s.%s.%s", state->metric_prefix,
              PLUGIN_NAME, state->ip_space_name, "origin_ASns_cnt");

  bgpstream_id_set_clear(state->unique_origins);
  kh_destroy(asn_count_map, asn_np);

  return 0;
}
Example #17
0
	ii->ap_prior = .01 * (n_ap + .01) / tot;
	if (ii->ap_prior < ap_prior) ii->ap_prior = ap_prior;

	fprintf(stderr, "[infer_isize] %s: qu(%d, %d, %d)", rg?rg:"(null)", p25, p50, p75);
	if (isnan(ii->std) || p75 > MAX_ISIZE) {
		ii->low = ii->high = ii->high_bayesian = 0; ii->avg = ii->std = -1.0;
		fprintf(stderr, " -- not useable\n");
		return -1;
	}

	fprintf(stderr, " bound(%d,%d), num/avg/std/kur/skw %d/%.3lf/%.3lf/%.3lf/%.3lf, ap %.2e, max %d, %.2lf sigma\n",
            ii->low, ii->high, n, ii->avg, ii->std, skewness, kurtosis, ii->ap_prior, ii->high_bayesian, y);
	return 0;
}

void improve_isize_est(khash_t(isize_infos) *iinfos, bam_pair_t *p, double ap_prior, int64_t L)
{
    bwa_seq_t *s = p->bwa_seq ;
    if( p->kind<1 || s[0].mapQ < 20 ) return ;
    if( p->kind>1 && s[1].mapQ < 20 ) return ;

    int len = p->kind==1 ? s[0].len :       // single read: isize is length (trimming/merging is assumed)
        s[0].pos < s[1].pos ? s[1].pos + s[1].len - s[0].pos :
        s[0].pos + s[0].len - s[1].pos;
    if( len < 0 || len >= MAX_ISIZE ) return ;

    int ret = 0 ;
    const char *rg = bam_get_rg(p->bam_rec);
    khiter_t it = kh_get(isize_infos, iinfos, rg) ;
    isize_info_t *ii = &kh_value(iinfos, it) ;
    if( it==kh_end(iinfos) ) { 
Example #18
0
        this_coord = unclipped_end(bam);
        orientation = O_RR;
    } else {
        this_coord = unclipped_start(bam);
        orientation = O_FF;
    }

    key->single        = 1;
    key->this_ref      = this_ref;
    key->this_coord    = this_coord;
    key->orientation   = orientation;
}

/* Add the duplicate name to a hash if it does not exist. */

static int add_duplicate(khash_t(duplicates) *d_hash, bam1_t *dupe) {
    khiter_t d;
    int ret;

    d = kh_get(duplicates, d_hash, bam_get_qname(dupe));

    if (d == kh_end(d_hash)) {
        d = kh_put(duplicates, d_hash, strdup(bam_get_qname(dupe)), &ret);

        if (ret > 0) {
            kh_value(d_hash, d) = 1;
        } else if (ret == 0) {
            kh_value(d_hash, d)++;
        } else {
            fprintf(stderr, "[markdup] error: unable to store supplementary duplicates.\n");
            return 1;
Example #19
0
size_t
sen_render_flush(int clear_buff)
{
//  gl_check_error();
  //_logfi("1");
  blend_group_t* bg;
  khint_t i,k,j;
  size_t total = 0;
  khash_t(hmsp)*  tgs;
  camera_t* cam = sen_camera();
  tex_group_t* tg;
  khash_t(hmsp)* sgs;
  shader_group_t* sg;

  vector_clear(zsorter);
  for (k = kh_begin(g_bgs); k != kh_end(g_bgs); ++k)
  {
    if (!kh_exist(g_bgs,k)) continue;
    bg = kh_val(g_bgs, k);

    if (bg->num == 0) {
      kh_del(hmip,g_bgs,k);
      continue;
    }
    tgs = bg->tgs;

   // set_blending( (blend_func) (kh_key(g_bgs, k))  );

    for (i = kh_begin(tgs); i != kh_end(tgs); ++i)
    {
      if (!kh_exist(tgs,i)) continue;
      tg = kh_val(tgs, i);
      if (tg->num == 0) {
        kh_del(hmsp,tgs,i);
        continue;
      }

      /*
      if (tg->tex)
        sen_texture_bind(tg->tex);
      else if (tg->font)
        sen_font_bind(tg->font);
        */

      sgs = tg->sgs;

      for (j = kh_begin(sgs); j != kh_end(sgs); ++j)
      {
        if (!kh_exist(sgs,j)) continue;
        sg = kh_val(sgs, j);
        if (sg->num == 0 || !sg->buff) {
          kh_del(hmsp,sgs,j);
          continue;
        }
        if (sg->buff) {
          /*
          sen_shader_use(sg->program);
          {
            if (tg->tex || tg->font)
              sen_uniform_1iN(sg->program, "u_tex0", 0);
            sen_uniform_m4fN(sg->program, "u_mvp",  cam->view_proj.data);
            vertex_buffer_render( sg->buff, GL_TRIANGLES);
            total+=vertex_buffer_size(sg->buff);
            if (clear_buff)
              vertex_buffer_clear( sg->buff );
            //sen_shader_use(NULL);
          }*/
          vector_push_back( zsorter, &sg );
        }
        sg->num = 0;
      }
      tg->num = 0;
    }
    bg->num = 0;
  }
  if (zsorter->size > 0)
    vector_sort(zsorter, zcmp);

  for (j = 0; j < zsorter->size; j++) {
    shader_group_t* sg = *(shader_group_t**)vector_get(zsorter, j);
   // _logfi("%s %d",sg->name, sg->z);


    set_blending( (blend_func) (sg->bg->key)  );


    if (sg->tg->tex)
      sen_texture_bind(sg->tg->tex);
    else if (sg->tg->font)
      sen_font_bind(sg->tg->font);

    sen_shader_use(sg->program);
    {

      if (sg->tg->tex || sg->tg->font)
        sen_uniform_1iN(sg->program, "u_tex0", 0);
      sen_uniform_m4fN(sg->program, "u_mvp", sg->z > 9500 ? cam->proj.data  : cam->view_proj.data);
      vertex_buffer_render( sg->buff, GL_TRIANGLES);
      total+=vertex_buffer_size(sg->buff);
      if (clear_buff)
        vertex_buffer_clear( sg->buff );

      //sen_shader_use(NULL);
    }
  }

 // _logfi("-------------------------------------------------");
  return total;
}
Example #20
0
	bam1_t *b;
} elem_t, *elem_p;
#define __free_elem(p) bam_destroy1((p)->data.b)
KLIST_INIT(q, elem_t, __free_elem)
typedef klist_t(q) queue_t;

KHASH_MAP_INIT_INT(best, elem_p)
typedef khash_t(best) besthash_t;

typedef struct {
	uint64_t n_checked, n_removed;
	besthash_t *left, *rght;
} lib_aux_t;
KHASH_MAP_INIT_STR(lib, lib_aux_t)

static lib_aux_t *get_aux(khash_t(lib) *aux, const char *lib)
{
	khint_t k = kh_get(lib, aux, lib);
	if (k == kh_end(aux)) {
		int ret;
		char *p = strdup(lib);
		lib_aux_t *q;
		k = kh_put(lib, aux, p, &ret);
		q = &kh_val(aux, k);
		q->left = kh_init(best);
		q->rght = kh_init(best);
		q->n_checked = q->n_removed = 0;
		return q;
	} else return &kh_val(aux, k);
}
Example #21
0
#include "haywire.h"
#include "hw_string.h"
#include "khash.h"
#include "http_response_cache.h"
#include "http_server.h"

#define CRLF "\r\n"
KHASH_MAP_INIT_STR(string_hashmap, hw_string*)

static uv_timer_t cache_invalidation_timer;
static uv_key_t thread_cache_key;

void initialize_http_request_cache();
void free_http_request_cache();
void http_request_cache_timer(uv_timer_t* handle, int status);
void create_cached_http_request(khash_t(string_hashmap)* http_request_cache, char* http_status);
void set_cached_request(khash_t(string_hashmap)* http_request_cache, char* http_status, hw_string* cache_entry);
hw_string* get_cached_request(char* http_status);

void initialize_http_request_cache()
{
    uv_key_create(&thread_cache_key);
}

void http_request_cache_configure_listener(uv_loop_t* loop, uv_async_t* handle)
{
    uv_timer_t* cache_invalidation_timer = malloc(sizeof(uv_timer_t));
    uv_timer_init(loop, cache_invalidation_timer);
    uv_timer_start(cache_invalidation_timer, http_request_cache_timer, 500, 500);
    uv_unref((uv_handle_t*) cache_invalidation_timer);
}
Example #22
0
  return h;
}

/* Initialize a new int key - uint64_t value hash table */
static
khash_t (su64) *
new_su64_ht (void)
{
  khash_t (su64) * h = kh_init (su64);
  return h;
}

/* Destroys both the hash structure and the keys for a
 * string key - int value hash */
static void
des_si32_free (khash_t (si32) * hash)
{
  khint_t k;
  if (!hash)
    return;

  for (k = 0; k < kh_end (hash); ++k) {
    if (kh_exist (hash, k)) {
      free ((char *) kh_key (hash, k));
    }
  }

  kh_destroy (si32, hash);
}

/* Destroys both the hash structure and its string values */
Example #23
0
int split_pe(int argc, char *argv[], char *progname)
{
    int c, bc_len = -1, ret, i, j, bc_idx, only_count = 0;
    unsigned num_mismatches = DEFAULT_NUM_MISMATCHES, 
             num_spacer_bases = DEFAULT_NUM_SPACER_BASES, 
             dna_alpha_len = strlen(DNA_ALPHA),
             num_undetermined = 0;
    char *out_prefix = NULL, *fn, **sptr,
         bc_id[1024], bc_seq[1024], bc_seq_cpy[1024]; // hello, buffer overflow
    clock_t t = clock();
    BcRec bc;
    ArrayBcRec bcs;
    FILE *fp;
    kseq_t *seq1, *seq2;
    khash_t(str) *h = kh_init(str);
    khint_t k, k2;
    gzFile *fp1, *fp2;

    ARRAY_INIT(&bcs, BcRec, 1000);

    while ((c = getopt(argc, argv, "m:s:o:c")) >= 0) {
        switch (c) {
            case 'm': if (sscanf(optarg, "%u", &num_mismatches) != 1) {
                          fprintf(stderr, "Error: option -m expects unsigned int\n");
                          return -1;
                      }
                      break;
            case 's': if (sscanf(optarg, "%u", &num_spacer_bases) != 1) {
                          fprintf(stderr, "Error: option -s expects unsigned int\n");
                          return -1;
                      }
                      break;
            case 'o': out_prefix = strdup(optarg);
                      break;
            case 'c': only_count = 1;
                      break;
        }
    }

    if (optind + 3 != argc) {
        print_pe_usage(progname);
        return -1;
    }

    if (num_mismatches != 0 && num_mismatches != 1) {
        fprintf(stderr, "Error: argument -m has to be 0 or 1\n");
        return -1;
    }

    if (out_prefix == NULL) {
        out_prefix = strdup(DEFAULT_OUTPUT_PREFIX);
    }

    for (sptr = argv+optind; sptr-argv<argc; sptr++) {
        if (access(*sptr, F_OK) == -1) {
            fprintf(stderr, "Error: file %s does not exist\n", *sptr);
            return -1;
        }
    }

    fprintf(stderr, "[barcode file: %s]\n", argv[optind]);
    fprintf(stderr, "[fastq file1: %s]\n", argv[optind+1]);
    fprintf(stderr, "[fastq file2: %s]\n", argv[optind+2]);
    fprintf(stderr, "[number of mismatches allowed: %u]\n", num_mismatches);
    fprintf(stderr, "[number of spacer bases: %u]\n", num_spacer_bases);
    fprintf(stderr, "[output prefix: %s]\n", out_prefix);
    fprintf(stderr, "[only count: %s]\n", only_count ? "true" : "false");

    /* read barcode file */
    if ((fp = fopen(argv[optind], "r")) == NULL) {
        fprintf(stderr, "Error: cannot open barcode file %s\n", argv[optind]);
        return -1;
    }

    while (fscanf(fp, "%s %s", bc_id, bc_seq) == 2) {
        bc_len = strlen(bc_seq);
        bc.id = strdup(bc_id);
        bc.seq = strdup(bc_seq);
        bc.num_found = 0;
        if (!only_count) {
            fn = (char*)calloc(strlen(out_prefix) + 3 + strlen(bc_id) + 6 + 1, sizeof(char));
            strcpy(fn, out_prefix);
            strcat(fn, "_1_");
            strcat(fn, bc_id);
            strcat(fn, ".fq.gz");
            bc.fp1 = gzopen(fn, "w");
            fn[strlen(out_prefix)+1] = '2';
            bc.fp2 = gzopen(fn, "w");
            free(fn);
        } else {
            bc.fp1 = NULL;
            bc.fp2 = NULL;
        }
        ARRAY_PUSH(&bcs, BcRec, bc);
        k = kh_put(str, h, strdup(bc_seq), &ret);
        if (num_mismatches == 0) {
            kh_val(h, k) = bcs.nextfree - 1;
            //printf("setting %s to %lu (%s %s)\n", bc_seq, bcs.nextfree - 1, bcs.elems[bcs.nextfree - 1].seq, bcs.elems[bcs.nextfree - 1].id);
        } else {
            for (i=0; i<strlen(bc_seq); i++) {
                strcpy(bc_seq_cpy, bc_seq);
                for (j=0; j<dna_alpha_len; j++) {
                    bc_seq_cpy[i] = DNA_ALPHA[j];
                    k = kh_put(str, h, strdup(bc_seq_cpy), &ret);
                    kh_val(h, k) = bcs.nextfree - 1;
                    //printf("setting %s to %lu (%s %s)\n", bc_seq_cpy, bcs.nextfree - 1, bcs.elems[bcs.nextfree - 1].seq, bcs.elems[bcs.nextfree - 1].id);
                }
            }
        }
    }

    fclose(fp);

    if (bc_len == -1) {
        fprintf(stderr, "Error: could not find any barcodes in file %s\n", argv[optind]);
        return -1;
    }

    fp1 = gzopen(argv[optind+1], "r");
    seq1 = kseq_init(fp1);
    fp2 = gzopen(argv[optind+2], "r");
    seq2 = kseq_init(fp2);

    while (kseq_read(seq1) >= 0) {
        strncpy(bc_seq, seq1->seq.s, bc_len);
        k = kh_get(str, h, bc_seq);
        kseq_read(seq2);
        strncpy(bc_seq, seq2->seq.s, bc_len);
        k2 = kh_get(str, h, bc_seq);
        if (k != kh_end(h) || k2 != kh_end(h)) {
            bc_idx = k2 != kh_end(h) ? kh_val(h, k2) : kh_val(h, k);
            if (!only_count) {
                gzprintf(bcs.elems[bc_idx].fp1, "@%s %s\n%s\n+\n%s\n"
                         , seq1->name.s
                         , seq1->comment.s
                         , seq1->seq.s+bc_len+num_spacer_bases
                         , seq1->qual.s+bc_len+num_spacer_bases);
                gzprintf(bcs.elems[bc_idx].fp2, "@%s %s\n%s\n+\n%s\n"
                         , seq2->name.s
                         , seq2->comment.s
                         , seq2->seq.s+bc_len+num_spacer_bases
                         , seq2->qual.s+bc_len+num_spacer_bases);
            }
            bcs.elems[bc_idx].num_found += 2;
        } else {    
            num_undetermined += 2;
        }
    }
    
    gzclose(fp1);
    gzclose(fp2);
    kseq_destroy(seq1);
    kseq_destroy(seq2);

    for (i=0; i<bcs.nextfree; i++) {
        printf("%s\t%s\t%u\n", bcs.elems[i].id, bcs.elems[i].seq, bcs.elems[i].num_found); 
        if (!only_count) {
            gzclose(bcs.elems[i].fp1);
            gzclose(bcs.elems[i].fp2);
        }
    }

    printf("UNDETERMINED\tNONE\t%u\n", num_undetermined);

    ARRAY_FREE(&bcs);
    kh_destroy(str, h);

    fprintf(stderr, "[CPU time: %.2f sec]\n", (float)(clock() - t) / CLOCKS_PER_SEC);

    return 0;
}
Example #24
0
khash_t (igsl) *
new_igsl_ht (void)
{
  khash_t (igsl) * h = kh_init (igsl);
  return h;
}
Example #25
0
typedef struct {
	int32_t n, m;
	uint64_t *offset;
} bam_lidx_t;

KHASH_MAP_INIT_INT(i, bam_binlist_t)

struct __bam_index_t {
	int32_t n;
	uint64_t n_no_coor; // unmapped reads without coordinate
	khash_t(i) **index;
	bam_lidx_t *index2;
};

// requirement: len <= LEN_MASK
static inline void insert_offset(khash_t(i) *h, int bin, uint64_t beg, uint64_t end)
{
	khint_t k;
	bam_binlist_t *l;
	int ret;
	k = kh_put(i, h, bin, &ret);
	l = &kh_value(h, k);
	if (ret) { // not present
		l->m = 1; l->n = 0;
		l->list = (pair64_t*)calloc(l->m, 16);
	}
	if (l->n == l->m) {
		l->m <<= 1;
		l->list = (pair64_t*)realloc(l->list, l->m * 16);
	}
	l->list[l->n].u = beg; l->list[l->n++].v = end;
Example #26
0
khash_t (is32) *
new_is32_ht (void)
{
  khash_t (is32) * h = kh_init (is32);
  return h;
}
Example #27
0
File: smr.c Project: dpoon/smr
// Definitions/prototypes/initializations for data structures, functions, etc.
//------------------------------------------------------------------------------
#define MAX_LINE_LENGTH 8192
#define MAX_ID_LENGTH 1024
KHASH_MAP_INIT_STR(m32, unsigned)

typedef struct
{
  char delim;
  const char *outfile;
  FILE *outstream;
  unsigned numfiles;
} SmrOptions;

void smr_init_options(SmrOptions *options);
khash_t(m32) *smr_collect_molids(SmrOptions *options, khash_t(m32) **maps);
khash_t(m32) *smr_load_file(const char *filename);
void smr_parse_options(SmrOptions *options, int argc, char **argv);
void smr_print_matrix(SmrOptions *options, khash_t(m32) **maps);
void smr_print_usage(FILE *outstream);
void smr_terminate(SmrOptions *options, khash_t(m32) **maps);

//------------------------------------------------------------------------------
// Main method
//------------------------------------------------------------------------------

int main(int argc, char **argv)
{
  SmrOptions options;
  smr_init_options(&options);
  smr_parse_options(&options, argc, argv);
Example #28
0
khash_t (iu64) *
new_iu64_ht (void)
{
  khash_t (iu64) * h = kh_init (iu64);
  return h;
}
Example #29
0
#else
  #define mrb_usascii_str_new2 mrb_str_new_cstr
#endif

KHASH_MAP_INIT_INT(mt, struct RProc*);
KHASH_MAP_INIT_INT(iv, mrb_value);

typedef struct fc_result {
    mrb_sym name;
    struct RClass * klass;
    mrb_value path;
    struct RClass * track;
    struct fc_result *prev;
} fcresult_t;

int kiv_lookup(khash_t(iv) *table, mrb_sym key, mrb_value *value);
extern struct kh_iv *mrb_class_tbl;

void
mrb_gc_mark_mt(mrb_state *mrb, struct RClass *c)
{
  khiter_t k;
  khash_t(mt) *h = c->mt;

  if (!h) return;
  for (k = kh_begin(h); k != kh_end(h); k++) {
    if (kh_exist(h, k)){
      struct RProc *m = kh_value(h, k);
      if (m) {
        paint_black(m);
      }
Example #30
0
/**
 * See Copyright Notice in picrin.h
 */

#include "picrin.h"

KHASH_DECLARE(m, void *, int)
KHASH_DEFINE2(m, void *, int, 0, kh_ptr_hash_func, kh_ptr_hash_equal)

static bool
internal_equal_p(pic_state *pic, pic_value x, pic_value y, size_t depth, khash_t(m) *h)
{
  pic_value local = pic_nil_value();
  size_t c = 0;

  if (depth > 10) {
    if (depth > 200) {
      pic_errorf(pic, "Stack overflow in equal\n");
    }
    if (pic_pair_p(x) || pic_vec_p(x)) {
      int ret;
      kh_put(m, h, pic_obj_ptr(x), &ret);
      if (ret != 0) {
        return true;            /* `x' was seen already.  */
      }
    }
  }

 LOOP:

  if (pic_eqv_p(x, y)) {