コード例 #1
0
ファイル: bed_file.c プロジェクト: cyenyxe/hpg-libs-1
bed_file_t *bed_open(char *filename) {
    size_t len;
    char *data = mmap_file(&len, filename);

    bed_file_t *bed_file = (bed_file_t *) malloc(sizeof(bed_file_t));
    bed_file->filename = filename;
    bed_file->data = data;
    bed_file->data_len = len;
    bed_file->header_entries = linked_list_new(COLLECTION_MODE_SYNCHRONIZED);
    bed_file->records = linked_list_new(COLLECTION_MODE_SYNCHRONIZED);
    return bed_file;
}
コード例 #2
0
ファイル: bwt_server_cpu.c プロジェクト: fw1121/hpg-aligner
cal_t *convert_bwt_anchor_to_CAL(bwt_anchor_t *bwt_anchor, size_t read_start, size_t read_end) {
  linked_list_t *linked_list = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED);
  seed_region_t *seed_region = seed_region_new(read_start, read_end,
					       bwt_anchor->start, bwt_anchor->end, 0, 0, 0);

  linked_list_insert_first(seed_region, linked_list);

  cal_t *cal = cal_new(bwt_anchor->chromosome + 1, bwt_anchor->strand,
		       bwt_anchor->start, bwt_anchor->end,
		       1, linked_list,
		       linked_list_new(COLLECTION_MODE_ASYNCHRONIZED));

  return cal;

}
コード例 #3
0
ファイル: suffix_mng.c プロジェクト: fw1121/hpg-aligner
suffix_mng_t *suffix_mng_new(sa_genome3_t *genome) {
  suffix_mng_t *p = (suffix_mng_t *) calloc(1, sizeof(suffix_mng_t));

  int num_chroms = genome->num_chroms;

  char *name;
  Container *subject = (Container *) malloc(sizeof(Container));
  bl_containerInit(subject, num_chroms, sizeof(char *));

  linked_list_t **suffix_lists = (linked_list_t **) malloc (sizeof(linked_list_t *) * num_chroms);
  for (int i = 0; i < num_chroms; i++) {
    suffix_lists[i] = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED);

    name = calloc(64, sizeof(char));
    sprintf(name, "%i", i);
    bl_containerAdd(subject, &name);
  }

  p->num_seeds = 0;
  p->num_chroms = num_chroms;
  p->subject = subject;
  p->suffix_lists = suffix_lists;

  return p;
}
コード例 #4
0
void test_insert()
{
	// regular insert
	linked_list* ll = linked_list_new();
	linked_list_insert(ll, 2);
	linked_list_insert(ll, 4);
	linked_list_insert(ll, 6);
	assert_equals(linked_list_size(ll), 3);
	assert_true(linked_list_contains(ll, 2));
	assert_true(linked_list_contains(ll, 4));
	assert_true(linked_list_contains(ll, 6));
	assert_false(linked_list_contains(ll, -1));
	assert_false(linked_list_contains(ll, 0));
	linked_list_clear(ll);
	linked_list* ll = linked_list_new();
	assert_equals(linked_list_size(ll), 0);
	assert_false(linked_list_contains(ll, 1));
	linked_list_insert(ll, 1);
	assert_equals(linked_list_size(ll), 1);
	assert_true(linked_list_contains(ll, 1));
	linked_list_clear(ll);
	// insert by index 
	// set by index 
}
コード例 #5
0
ファイル: bfwork.c プロジェクト: opencb/hpg-aligner
/**
 * Initialize empty BAM framework data structure.
 */
void
bfwork_init(bam_fwork_t *fwork)
{



	assert(fwork);

	//Set all to zero
	memset(fwork, 0, sizeof(bam_fwork_t));

	//Create regions
	fwork->regions_list = linked_list_new(COLLECTION_MODE_SYNCHRONIZED);

	//Init locks
	omp_init_lock(&fwork->regions_lock);
	omp_init_lock(&fwork->free_slots);
	omp_init_lock(&fwork->output_file_lock);
	omp_init_lock(&fwork->reference_lock);
}
コード例 #6
0
void test_lookup()
{
	// get 
	linked_list* ll = linked_list_new();
	int i;
	for (i = 0; i < 100; i++)
	{
		linked_list_insert(ll, i * 2);
	}
	assert_equals(linked_list_get(ll, 0), 0);
	assert_equals(linked_list_get(ll, 99), 99 * 2);
	assert_equals(linked_list_get(ll, 50), 50 * 2);
	// contains 
	assert_true(linked_list_contains(ll, 10));
	assert_false(linked_list_contains(ll, 1));
	// peek 
	//assert_equals(linked_list_first(ll), 0);
	// last 
	//assert_equals(linked_list_last(ll), 99 * 2);
	// index of 
}
コード例 #7
0
ファイル: cal_seeker.c プロジェクト: fw1121/hpg-aligner
//====================================================================================
// apply_caling
//====================================================================================
int apply_caling(cal_seeker_input_t* input, batch_t *batch) {
  mapping_batch_t *mapping_batch = batch->mapping_batch;
  array_list_t *list = NULL;
  size_t read_index, num_cals;
  int min_seeds, max_seeds;


  cal_t *cal;
  array_list_t *cal_list;

  fastq_read_t *read;



  size_t num_chromosomes = input->genome->num_chromosomes + 1;
  size_t num_targets = mapping_batch->num_targets;
  size_t *targets = mapping_batch->targets;
  size_t new_num_targets = 0;
  array_list_t *region_list;
  bwt_anchor_t *bwt_anchor_back, *bwt_anchor_forw;
  linked_list_t *linked_list;
  int anchor_nt, gap_nt;
  seed_region_t *seed_region_start, *seed_region_end;
  //max_seeds = input->cal_optarg->num_seeds;
  
  //  size_t *new_targets = (size_t *) calloc(num_targets, sizeof(size_t));
  
  // set to zero
  mapping_batch->num_to_do = 0;

  for (size_t i = 0; i < num_targets; i++) {

    read_index = targets[i];
    read = array_list_get(read_index, mapping_batch->fq_batch); 
    region_list = mapping_batch->mapping_lists[read_index];
    // for debugging
    //    LOG_DEBUG_F("%s\n", ((fastq_read_t *) array_list_get(read_index, mapping_batch->fq_batch))->id);
    
    if (!list) {
      list = array_list_new(1000, 
			    1.25f, 
			    COLLECTION_MODE_ASYNCHRONIZED);
    }


    if (array_list_get_flag(region_list) == 0 || 
	array_list_get_flag(region_list) == 2) {
      //We have normal and extend seeds (anchors)
      max_seeds = (read->length / 15)*2 + 10;
      num_cals = bwt_generate_cal_list_linked_list(region_list,
						   input->cal_optarg,
						   &min_seeds, &max_seeds,
						   num_chromosomes,
						   list, read->length,
						   input->cal_optarg->min_cal_size, 0);
    } else {
      //We have double anchors with smaller distance between they
      //printf("Easy case... Two anchors and same distance between read gap and genome distance\n");
      num_cals = 0;
      for (int a = array_list_size(region_list) - 1; a >= 0; a -= 2) {
	max_seeds = 2;
	min_seeds = 2;
	bwt_anchor_back = array_list_remove_at(a, region_list);
	bwt_anchor_forw = array_list_remove_at(a - 1, region_list);

	linked_list = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED);

	
	//Seed for the first anchor
	anchor_nt = bwt_anchor_forw->end - bwt_anchor_forw->start;
	//printf("\t seed0[%i-%i][%lu-%lu]\n", 0, anchor_nt - 1,
	//     bwt_anchor_forw->start, bwt_anchor_forw->end);
	seed_region_start = seed_region_new(0, anchor_nt - 1,
					    bwt_anchor_forw->start, bwt_anchor_forw->end, 0, 0, 0);

	//Seed for the first anchor
	gap_nt = read->length - (anchor_nt + (bwt_anchor_back->end - bwt_anchor_back->start));
	//printf("\t gap_nt = %i, anchor_nt = %i\n", gap_nt, anchor_nt);
	//printf("\t seed1[%i-%i][%lu-%lu]\n", anchor_nt + gap_nt, read->length - 1, 
	//     bwt_anchor_back->start + 1, bwt_anchor_back->end);
	seed_region_end = seed_region_new(anchor_nt + gap_nt, read->length - 1,
					  bwt_anchor_back->start + 1, bwt_anchor_back->end, 1, 0, 0);

	//The reference distance is 0 and the read distance not
	//The read distance is 0 and the reference distance not
	//if (seed_region_start->genome_end > seed_region_end->genome_start || 
	//  seed_region_start->read_end > seed_region_end->read_start) { 
	//array_list_clear(region_list, NULL);
	//continue;
	if (seed_region_end->genome_start - seed_region_start->genome_end < 5 || 
	    seed_region_end->read_start - seed_region_start->read_end < 5) {
	  seed_region_start->genome_end -= 5;
	  seed_region_start->read_end -= 5;
	  seed_region_end->genome_start += 5;
	  seed_region_end->read_start += 5;
	}

	linked_list_insert(seed_region_start, linked_list);
	linked_list_insert_last(seed_region_end, linked_list);

	cal = cal_new(bwt_anchor_forw->chromosome + 1,
		      bwt_anchor_forw->strand,
		      bwt_anchor_forw->start,
		      bwt_anchor_back->end + 1,
		      2,
		      linked_list,
		      linked_list_new(COLLECTION_MODE_ASYNCHRONIZED));
	array_list_insert(cal, list);
	num_cals++;
      }
    }

    // for debugging
    LOG_DEBUG_F("read %s : num. cals = %i, min. seeds = %i, max. seeds = %i\n", 
		read->id, num_cals, min_seeds, max_seeds);


    /*    if (num_cals == 0) {
      int seed_size = 24;
      //First, Delete old regions
      array_list_clear(mapping_batch->mapping_lists[read_index], region_bwt_free);
      //Second, Create new regions with seed_size 24 and 1 Mismatch
      bwt_map_inexact_seeds_seq(read->sequence, seed_size, seed_size/2,
				bwt_optarg, bwt_index, 
				mapping_batch->mapping_lists[read_index]);

      num_cals = bwt_generate_cal_list_linked_list(mapping_batch->mapping_lists[mapping_batch->targets[i]], 
						   input->cal_optarg,
						   &min_seeds, &max_seeds,
						   num_chromosomes,
						   list, read->length);
						   }*/

    /*
    for (size_t j = 0; j < num_cals; j++) {
      cal = array_list_get(j, list);
      LOG_DEBUG_F("\tchr: %i, strand: %i, start: %lu, end: %lu, num_seeds = %i, num. regions = %lu\n", 
		  cal->chromosome_id, cal->strand, cal->start, cal->end, cal->num_seeds, cal->sr_list->size);
    }
    */
    //    printf("min_seeds = %i, max_seeds = %i, min_limit = %i, num_cals = %i\n", 
    //	   min_seeds, max_seeds, min_limit, array_list_size(list));

    // filter incoherent CALs
    int founds[num_cals], found = 0;
    for (size_t j = 0; j < num_cals; j++) {
      founds[j] = 0;
      cal = array_list_get(j, list);
      LOG_DEBUG_F("\tcal %i of %i: sr_list size = %i (cal->num_seeds = %i) %i:%lu-%lu\n", 
		  j, num_cals, cal->sr_list->size, cal->num_seeds,
		  cal->chromosome_id, cal->start, cal->end);
      if (cal->sr_list->size > 0) {
	int start = 0;
	for (linked_list_item_t *list_item = cal->sr_list->first; list_item != NULL; list_item = list_item->next) {
	  seed_region_t *s = list_item->item;
	  
	  LOG_DEBUG_F("\t\t:: star %lu > %lu s->read_start\n", start, s->read_start);
	  if (start > s->read_start) {
	    LOG_DEBUG("\t\t\t:: remove\n");
	    found++;
	    founds[j] = 1;
	  }
	  start = s->read_end + 1;
	}
      } else {
	found++;
	founds[j] = 1;
      }
    }
    if (found) {
      min_seeds = 100000;
      max_seeds = 0;
      cal_list = array_list_new(MAX_CALS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);
      for (size_t j = 0; j < num_cals; j++) {
	if (!founds[j]) {
	  cal = array_list_get(j, list);
	  cal->num_seeds = cal->sr_list->size;
	  if (cal->num_seeds > max_seeds) max_seeds = cal->num_seeds;
	  if (cal->num_seeds < min_seeds) min_seeds = cal->num_seeds;
	  array_list_insert(cal, cal_list);
	  array_list_set(j, NULL, list);
	}
      }
      array_list_free(list, (void *) cal_free);
      num_cals = array_list_size(cal_list);
      list = cal_list;
    }
  
    //    LOG_FATAL_F("num. cals = %i, min. seeds = %i, max. seeds = %i\n", num_cals, min_seeds, max_seeds);
    // filter CALs by the number of seeds

    cal_list = list;
    list = NULL;
    /*
    int min_limit = input->cal_optarg->min_num_seeds_in_cal;

    if (min_limit < 0) min_limit = max_seeds;
    //    min_limit -= 3;
    
    if (min_seeds == max_seeds || min_limit <= min_seeds) {
      cal_list = list;
      list = NULL;
    } else {
      cal_list = array_list_new(MAX_CALS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);
      for (size_t j = 0; j < num_cals; j++) {
	cal = array_list_get(j, list);
	if (cal->num_seeds >= min_limit) {
	  array_list_insert(cal, cal_list);
	  array_list_set(j, NULL, list);
	}
      }
      array_list_clear(list, (void *) cal_free);
      num_cals = array_list_size(cal_list);
    }
    */
    if (num_cals > MAX_CALS) {
      for (size_t j = num_cals - 1; j >= MAX_CALS; j--) {
	cal = (cal_t *) array_list_remove_at(j, cal_list);
	cal_free(cal);
      }
      num_cals = array_list_size(cal_list);
    }
    
    //    LOG_DEBUG_F("num. cals = %i, MAX_CALS = %i\n", num_cals, MAX_CALS);

    if (num_cals > 0 && num_cals <= MAX_CALS) {
      array_list_set_flag(2, cal_list);
      targets[new_num_targets++] = read_index;

      /*
      int count1 = 0, count2 = 0;
      // count number of sw to do

      // method #1
      //      printf("method #1\n");
      seed_region_t *s, *prev_s;
      linked_list_iterator_t* itr;
      for (size_t j = 0; j < num_cals; j++) {
	prev_s = NULL;
	cal = array_list_get(j, cal_list);
	itr = linked_list_iterator_new(cal->sr_list);
	s = (seed_region_t *) linked_list_iterator_curr(itr);
	while (s != NULL) {
	  if ((prev_s == NULL && s->read_start != 0) || (prev_s != NULL)) {
	    //	    printf("\t\t\tcase 1\n");
	    count1++;
	  }
	  prev_s = s;
	  linked_list_iterator_next(itr);
	  s = linked_list_iterator_curr(itr);
	}
	if (prev_s != NULL && prev_s->read_end < read->length - 1) { 
	  count1++;
	  //	  printf("\t\t\tcase 2 (%i < %i)\n", prev_s->read_end, read->length - 1);
	}
	linked_list_iterator_free(itr);
      }

      // method #2
      printf("method #2\n");
      for (size_t j = 0; j < num_cals; j++) {
	cal = array_list_get(j, cal_list);
	printf("\t: %i\n", j);
	if (cal->sr_list->size > 0) {
	  int start = 0;
	  for (linked_list_item_t *list_item = cal->sr_list->first; list_item != NULL; list_item = list_item->next) {
	    seed_region_t *s = list_item->item;
	    printf("\t\t[%i|%i - %i|%i]\n", s->genome_start, s->read_start, s->read_end, s->genome_end);
	    if (s->read_start != start) {
	      count2++;
	    }
	    start = s->read_end + 1;
	  }
	  if (start < read->length) { 
	    count2++;
	  }
	}
      }
      printf("count #1 = %i, count #2 = %i\n", count1, count2);
      assert(count1 == count2);

      mapping_batch->num_to_do += count1;
*/

      // we have to free the region list
      array_list_free(mapping_batch->mapping_lists[read_index], (void *) region_bwt_free);
      mapping_batch->mapping_lists[read_index] = cal_list;
    } else {
      array_list_set_flag(0, mapping_batch->mapping_lists[read_index]);
      // we have to free the region list
      array_list_clear(mapping_batch->mapping_lists[read_index], (void *) region_bwt_free);
      if (cal_list) array_list_free(cal_list, (void *) cal_free);
      if (list) array_list_clear(list, (void *) cal_free);
    }

    /*    
    cal_list = list;
    list = NULL;
    array_list_set_flag(2, cal_list);
    //    mapping_batch->num_to_do += num_cals;
    targets[new_num_targets++] = read_index;
    
    // we have to free the region list
    array_list_free(mapping_batch->mapping_lists[read_index], (void *) region_bwt_free);
    mapping_batch->mapping_lists[read_index] = cal_list;
    */
    /*
    // filter CALs by the number of seeds
    int min_limit = input->cal_optarg->min_num_seeds_in_cal;
    if (min_limit < 0) min_limit = max_seeds;

    printf("min_seeds = %i, max_seeds = %i, min_limit = %i, num_cals = %i\n", 
	   min_seeds, max_seeds, min_limit, array_list_size(list));
    
    if (min_seeds == max_seeds || min_limit <= min_seeds) {
      cal_list = list;
      list = NULL;
    } else {
      cal_list = array_list_new(MAX_CALS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);
      for (size_t j = 0; j < num_cals; j++) {
	cal = array_list_get(j, list);
	if (cal->num_seeds >= min_limit) {
	  array_list_insert(cal, cal_list);
	  array_list_set(j, NULL, list);
	}
      }
      array_list_clear(list, (void *) cal_free);
      num_cals = array_list_size(cal_list);
      printf("************, num_cals = %i\n", num_cals);
    }

    if (num_cals > MAX_CALS) {
      for (size_t j = num_cals - 1; j >= MAX_CALS; j--) {
	cal = (cal_t *) array_list_remove_at(j, cal_list);
	cal_free(cal);
      }
      num_cals = array_list_size(cal_list);
    }

    if (num_cals > 0 && num_cals <= MAX_CALS) {
      array_list_set_flag(2, cal_list);
      mapping_batch->num_to_do += num_cals;
      targets[new_num_targets++] = read_index;
      
      // we have to free the region list
      array_list_free(mapping_batch->mapping_lists[read_index], (void *) region_bwt_free);
      mapping_batch->mapping_lists[read_index] = cal_list;
    } else {
      array_list_set_flag(0, mapping_batch->mapping_lists[read_index]);
      // we have to free the region list
      array_list_clear(mapping_batch->mapping_lists[read_index], (void *) region_bwt_free);
      if (cal_list) array_list_free(cal_list, (void *) cal_free);
      if (list) array_list_clear(list, (void *) cal_free);
    }
    */
  } // end for 0 ... num_targets

  // update batch
  mapping_batch->num_targets = new_num_targets;

  //  LOG_DEBUG_F("num. SW to do: %i\n", 	mapping_batch->num_to_do);

  //  exit(-1);

  // free memory
  if (list) array_list_free(list, NULL);

  if (batch->mapping_mode == RNA_MODE) {
    return RNA_STAGE;
  }

  if (batch->pair_input->pair_mng->pair_mode != SINGLE_END_MODE) {
    return PRE_PAIR_STAGE;
  } else if (batch->mapping_batch->num_targets > 0) {
    return SW_STAGE;
  }
  
  return DNA_POST_PAIR_STAGE;
}
コード例 #8
0
ファイル: suffix_mng.c プロジェクト: fw1121/hpg-aligner
void suffix_mng_create_cals(fastq_read_t *read, int min_area, int strand, 
			    sa_index3_t *sa_index, array_list_t *cal_list,
			    suffix_mng_t *p) {

  if (!p) return;
  if (!p->suffix_lists) return;

  if (p->num_seeds <= 0) return;

  int read_area, chrom;
  seed_t *seed;
  seed_cal_t *cal;
  linked_list_t *seed_list;
  claspinfo_t info;
  bl_claspinfoInit(&info);

  // initialization
  info.fragments = (Container *) malloc(sizeof(Container));
  bl_containerInit(info.fragments, p->num_seeds, sizeof(slmatch_t));

  info.subject = p->subject;

  slmatch_t frag;
  linked_list_t *suffix_list;
  for (unsigned int i = 0; i < p->num_chroms; i++) {
    suffix_list = p->suffix_lists[i];
    if (suffix_list) {
      for (linked_list_item_t *item = suffix_list->first; 
	   item != NULL; 
	   item = item->next) {

	seed = item->item;

	bl_slmatchInit(&frag, 0);
	frag.i = seed->read_start;
	frag.j = seed->read_end - seed->read_start + 1;
	frag.p = seed->genome_start;
	frag.q = seed->genome_end - seed->genome_start + 1;
	frag.scr = seed->genome_end - seed->genome_start + 1;
	frag.subject = seed->chromosome_id;
	bl_containerAdd(info.fragments, &frag);
      }
    }
  }

  // sort fragments
  qsort(info.fragments->contspace, bl_containerSize(info.fragments),
	sizeof(slmatch_t), cmp_slmatch_qsort);
  int begin = 0;
  for (int i = 1; i <= bl_containerSize(info.fragments); i++){
    // end of fragments list or different database sequence 
    // --> process fragment[begin]...fragment[i-1], write output
    // and free chains (less memory consumption with large input files)
    if (i == bl_containerSize(info.fragments) ||
	((slmatch_t *) bl_containerGet(info.fragments, begin))->subject !=
	((slmatch_t *) bl_containerGet(info.fragments, i))->subject){
      if (info.chainmode == SOP){
	// only use chaining without clustering if no ids are specified
	bl_slClusterSop((slmatch_t *) info.fragments->contspace + begin, i - begin,
			info.epsilon, info.lambda, info.maxgap);
      }
      else {    
	bl_slClusterLin((slmatch_t *) info.fragments->contspace + begin, i - begin,
			info.epsilon, info.lambda, info.maxgap);
      }
      
      for (int j = begin; j < i; j++) {


	slmatch_t *match = (slmatch_t *) bl_containerGet(info.fragments, j);

	if (match->chain) {
	  slchain_t *chain = (slchain_t *) match->chain;

	  if (chain->scr >= info.minscore &&
	      bl_containerSize(chain->matches) >= info.minfrag) {

	    chrom = atoi(*(char **) bl_containerGet(info.subject, chain->subject));
	    
	    read_area = 0;
	    seed_list = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED);
	    
	    for (int k = 0; k < bl_containerSize(chain->matches); k++){
	      slmatch_t *frag = *(slmatch_t **) bl_containerGet(chain->matches, k);

	      seed = seed_new(frag->i, frag->i + frag->j - 1, frag->p, frag->p + frag->q - 1);
	      seed->chromosome_id = chrom;
	      seed->strand = strand;
	      read_area += frag->j;
	      cigar_append_op(frag->j, '=', &seed->cigar);
	      
	      linked_list_insert_last(seed, seed_list);
	    }

	    // extend seeds	    
	    cal = seed_cal_new(chrom, strand, chain->p, chain->p + chain->q - 1, seed_list);
	    cal->read = read;
	    extend_seeds(cal, sa_index);
	    seed_cal_update_info(cal);

	    if (cal->read_area >= min_area) {
	      array_list_insert(cal, cal_list);
	    } else {
	      seed_cal_free(cal);
	    }
	  }

	  bl_slchainDestruct(chain);
	  free(chain);
	  match->chain = NULL;
	}
      }  // END OF for (j = begin; j < i; j++)
      begin = i;
    } // END OF  if (i == bl_containerSize(info.fragments) ||
  } // END OF for (i = 1; i <= bl_containerSize(info.fragments); i++)

  // destruct everything
  info.subject = NULL;
  bl_claspinfoDestruct(&info);

  // finally, clear suffix manager
  suffix_mng_clear(p);
}