int main( int argc, char **args ) { //create a new array list. friso_array_t array = new_array_list(); fstring keys[] = { "chenmanwen", "yangqinghua", "chenxin", "luojiangyan", "xiaoyanzi", "bibi", "zhangrenfang", "yangjian", "liuxiao", "pankai", "chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo", "caizaili", "panpan", "xiaolude", "yintanwen" }; int j, idx = 2, len = sizeof( keys ) / sizeof( fstring ); for ( j = 0; j < len; j++ ) { array_list_add( array, keys[j] ); } printf("length=%d, allocations=%d\n", array->length, array->allocs ); array_list_trim( array ); printf("after tirm length=%d, allocations=%d\n", array->length, array->allocs ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); printf("\nAfter set %dth item.\n", idx ); array_list_set( array, idx, "chenxin__" ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); printf("\nAfter remove %dth item.\n", idx ); array_list_remove( array, idx ); printf("length=%d, allocations=%d\n", array->length, array->allocs ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); printf("\nInsert a item at %dth\n", idx ); array_list_insert( array, idx, "*chenxin*" ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); free_array_list( array ); return 0; }
void array_set(Array * const list, int index, Object obj) { array_list_set((ArrayList * const ) list, index, obj); }
array_list_t *filter_cals(size_t num_cals, size_t read_length, array_list_t *list) { cal_t *cal; int min_seeds, max_seeds; array_list_t *cal_list; size_t select_cals; //filter-incoherent CALs int founds[num_cals], found = 0; for (size_t j = 0; j < num_cals; j++) { founds[j] = 0; cal = array_list_get(j, list); LOG_DEBUG_F("\tcal %i of %i: sr_list size = %i (cal->num_seeds = %i) %i:%lu-%lu\n", j, num_cals, cal->sr_list->size, cal->num_seeds, cal->chromosome_id, cal->start, cal->end); if (cal->sr_list->size > 0) { int start = 0; size_t genome_start = 0; int first = 1; for (linked_list_item_t *list_item = cal->sr_list->first; list_item != NULL; list_item = list_item->next) { seed_region_t *s = list_item->item; LOG_DEBUG_F("\t\t:: star %lu > %lu s->read_start\n", start, s->read_start); LOG_DEBUG_F("\t\t:: read_star %lu > read_end %lu \n", s->read_start, s->read_end); if (start > s->read_start || s->read_start >= s->read_end) { LOG_DEBUG("\t\t\t:: remove\n"); found++; founds[j] = 1; } if (!first && ((s->genome_start < genome_start) || (s->genome_start - genome_start) > 2 * read_length)) { //printf("Remove (genome_start = %i s->genome_start = %i)\n", genome_start, s->genome_start); //cal_print(cal); found++; founds[j] = 1; } first = 0; start = s->read_end + 1; genome_start = s->genome_end + 1; } } else { found++; founds[j] = 1; } } if (found) { min_seeds = 100000; max_seeds = 0; cal_list = array_list_new(MAX_CALS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); for (size_t j = 0; j < num_cals; j++) { if (!founds[j]) { cal = array_list_get(j, list); cal->num_seeds = cal->sr_list->size; if (cal->num_seeds > max_seeds) max_seeds = cal->num_seeds; if (cal->num_seeds < min_seeds) min_seeds = cal->num_seeds; array_list_insert(cal, cal_list); array_list_set(j, NULL, list); } } array_list_free(list, (void *) cal_free); num_cals = array_list_size(cal_list); list = cal_list; } num_cals = array_list_size(list); int max = 100; if (num_cals > max) { select_cals = num_cals - max; for(int j = num_cals - 1; j >= max; j--) { cal_free(array_list_remove_at(j, list)); } } return list; }
int apply_caling_rna(cal_seeker_input_t* input, batch_t *batch) { LOG_DEBUG("========= APPLY CALING RNA =========\n"); //if (time_on) { start_timer(start); } bwt_optarg_t *bwt_optarg = input->bwt_optarg; bwt_index_t *bwt_index = input->index; cal_optarg_t *cal_optarg = input->cal_optarg; mapping_batch_t *mapping_batch = batch->mapping_batch; size_t num_cals, select_cals; size_t total_reads = 0; size_t num_targets, target_pos, total_targets, extra_target_pos; fastq_read_t *read; genome_t *genome = input->genome; unsigned int num_chromosomes = genome->num_chromosomes; int min_seeds, max_seeds; int seed_size = input->cal_optarg->seed_size; array_list_t *cal_list, *list; cal_t *cal; //array_list_t *region_list; num_targets = mapping_batch->num_targets; total_targets = 0; extra_target_pos = 0; total_reads += num_targets; target_pos = 0; mapping_batch->extra_stage_do = 1; /* int t, target; for (t = 0; t < num_targets; t++) { target = mapping_batch->targets[t]; mapping_batch->mapping_lists[target]->size = 0; } return RNA_POST_PAIR_STAGE; */ array_list_t *region_list = array_list_new(1000, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); //extern size_t TOTAL_READS_SEEDING, TOTAL_READS_SEEDING2; //pthread_mutex_lock(&mutex_sp); //TOTAL_READS_SEEDING += num_targets; //pthread_mutex_unlock(&mutex_sp); //printf("Num targets = %i\n", num_targets); for (size_t i = 0; i < num_targets; i++) { read = array_list_get(mapping_batch->targets[i], mapping_batch->fq_batch); //printf("From CAL Seeker %s\n", read->id); list = mapping_batch->mapping_lists[mapping_batch->targets[i]]; //if (array_list_get_flag(region_list) == 0 || // array_list_get_flag(region_list) == 2) { //We have normal and extend seeds (anchors) max_seeds = (read->length / 15)*2 + 10; //printf("%i\n", input->cal_optarg->min_cal_size); num_cals = bwt_generate_cals(read->sequence, seed_size, bwt_optarg, cal_optarg, bwt_index, list, num_chromosomes); // if we want to seed with 24-length seeds, if (num_cals == 0) { //printf("No Cals seeding...\n"); //pthread_mutex_lock(&mutex_sp); //extern size_t seeds_1err; //seeds_1err++; //pthread_mutex_unlock(&mutex_sp); int seed_size = 24; //First, Delete old regions array_list_clear(region_list, (void *)region_bwt_free); //Second, Create new regions with seed_size 24 and 1 Mismatch bwt_map_inexact_seeds_seq(read->sequence, seed_size, seed_size/2, bwt_optarg, bwt_index, region_list); max_seeds = (read->length / 15)*2 + 10; //int prev_min_cal = input->cal_optarg->min_cal_size; //input->cal_optarg->min_cal_size = seed_size + seed_size / 2; //printf("NO CALS, new seeds %lu\n", array_list_size(region_list)); num_cals = bwt_generate_cal_list_linked_list(region_list, input->cal_optarg, &min_seeds, &max_seeds, genome->num_chromosomes + 1, list, read->length, cal_optarg->min_cal_size, 0); //input->cal_optarg->min_cal_size = prev_min_cal; //pthread_mutex_lock(&mutex_sp); //TOTAL_READS_SEEDING2++; //pthread_mutex_unlock(&mutex_sp); } array_list_clear(region_list, (void *)region_bwt_free); //filter-incoherent CALs int founds[num_cals], found = 0; for (size_t j = 0; j < num_cals; j++) { founds[j] = 0; cal = array_list_get(j, list); LOG_DEBUG_F("\tcal %i of %i: sr_list size = %i (cal->num_seeds = %i) %i:%lu-%lu\n", j, num_cals, cal->sr_list->size, cal->num_seeds, cal->chromosome_id, cal->start, cal->end); if (cal->sr_list->size > 0) { int start = 0; size_t genome_start = 0; int first = 1; for (linked_list_item_t *list_item = cal->sr_list->first; list_item != NULL; list_item = list_item->next) { seed_region_t *s = list_item->item; LOG_DEBUG_F("\t\t:: star %lu > %lu s->read_start\n", start, s->read_start); LOG_DEBUG_F("\t\t:: read_star %lu > read_end %lu \n", s->read_start, s->read_end); if (start > s->read_start || s->read_start >= s->read_end) { LOG_DEBUG("\t\t\t:: remove\n"); found++; founds[j] = 1; } if (!first && ((s->genome_start < genome_start) || (s->genome_start - genome_start) > 2*read->length)) { //printf("Remove (genome_start = %i s->genome_start = %i)\n", genome_start, s->genome_start); //cal_print(cal); found++; founds[j] = 1; } first = 0; start = s->read_end + 1; genome_start = s->genome_end + 1; } } else { found++; founds[j] = 1; } } if (found) { min_seeds = 100000; max_seeds = 0; cal_list = array_list_new(MAX_CALS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); for (size_t j = 0; j < num_cals; j++) { if (!founds[j]) { cal = array_list_get(j, list); cal->num_seeds = cal->sr_list->size; if (cal->num_seeds > max_seeds) max_seeds = cal->num_seeds; if (cal->num_seeds < min_seeds) min_seeds = cal->num_seeds; array_list_insert(cal, cal_list); array_list_set(j, NULL, list); } } array_list_free(list, (void *) cal_free); num_cals = array_list_size(cal_list); list = cal_list; } mapping_batch->mapping_lists[mapping_batch->targets[i]] = list; num_cals = array_list_size(list); int max = 100; if (num_cals > max) { select_cals = num_cals - max; for(int j = num_cals - 1; j >= max; j--) { cal_free(array_list_remove_at(j, mapping_batch->mapping_lists[mapping_batch->targets[i]])); } } //mapping_batch->targets[target_pos++] = mapping_batch->targets[i]; //} //else if (num_cals > 0) { mapping_batch->targets[target_pos++] = mapping_batch->targets[i]; /* printf("<<<<<===== CAL SERVER =====>>>>>\n"); */ /* for (int c = 0; c < array_list_size(mapping_batch->mapping_lists[mapping_batch->targets[i]]); c++) { */ /* cal_t *cal_aux = array_list_get(c, mapping_batch->mapping_lists[mapping_batch->targets[i]]); */ /* cal_print(cal_aux); */ /* } */ /* printf("<<<<<===== CAL SERVER END =====>>>>>\n"); */ //printf("Total CALs %i\n", num_cals); } mapping_batch->num_targets = target_pos; array_list_free(region_list, NULL); //if (time_on) { stop_timer(start, end, time); timing_add(time, CAL_SEEKER, timing); } LOG_DEBUG("========= APPLY CALING RNA END =========\n"); // return RNA_STAGE; if (batch->mapping_mode == RNA_MODE) { return RNA_STAGE; } if (batch->pair_input->pair_mng->pair_mode != SINGLE_END_MODE) { return PRE_PAIR_STAGE; } else if (batch->mapping_batch->num_targets > 0) { return SW_STAGE; } return DNA_POST_PAIR_STAGE; }
//==================================================================================== // apply_caling //==================================================================================== int apply_caling(cal_seeker_input_t* input, batch_t *batch) { mapping_batch_t *mapping_batch = batch->mapping_batch; array_list_t *list = NULL; size_t read_index, num_cals; int min_seeds, max_seeds; cal_t *cal; array_list_t *cal_list; fastq_read_t *read; size_t num_chromosomes = input->genome->num_chromosomes + 1; size_t num_targets = mapping_batch->num_targets; size_t *targets = mapping_batch->targets; size_t new_num_targets = 0; array_list_t *region_list; bwt_anchor_t *bwt_anchor_back, *bwt_anchor_forw; linked_list_t *linked_list; int anchor_nt, gap_nt; seed_region_t *seed_region_start, *seed_region_end; //max_seeds = input->cal_optarg->num_seeds; // size_t *new_targets = (size_t *) calloc(num_targets, sizeof(size_t)); // set to zero mapping_batch->num_to_do = 0; for (size_t i = 0; i < num_targets; i++) { read_index = targets[i]; read = array_list_get(read_index, mapping_batch->fq_batch); region_list = mapping_batch->mapping_lists[read_index]; // for debugging // LOG_DEBUG_F("%s\n", ((fastq_read_t *) array_list_get(read_index, mapping_batch->fq_batch))->id); if (!list) { list = array_list_new(1000, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); } if (array_list_get_flag(region_list) == 0 || array_list_get_flag(region_list) == 2) { //We have normal and extend seeds (anchors) max_seeds = (read->length / 15)*2 + 10; num_cals = bwt_generate_cal_list_linked_list(region_list, input->cal_optarg, &min_seeds, &max_seeds, num_chromosomes, list, read->length, input->cal_optarg->min_cal_size, 0); } else { //We have double anchors with smaller distance between they //printf("Easy case... Two anchors and same distance between read gap and genome distance\n"); num_cals = 0; for (int a = array_list_size(region_list) - 1; a >= 0; a -= 2) { max_seeds = 2; min_seeds = 2; bwt_anchor_back = array_list_remove_at(a, region_list); bwt_anchor_forw = array_list_remove_at(a - 1, region_list); linked_list = linked_list_new(COLLECTION_MODE_ASYNCHRONIZED); //Seed for the first anchor anchor_nt = bwt_anchor_forw->end - bwt_anchor_forw->start; //printf("\t seed0[%i-%i][%lu-%lu]\n", 0, anchor_nt - 1, // bwt_anchor_forw->start, bwt_anchor_forw->end); seed_region_start = seed_region_new(0, anchor_nt - 1, bwt_anchor_forw->start, bwt_anchor_forw->end, 0, 0, 0); //Seed for the first anchor gap_nt = read->length - (anchor_nt + (bwt_anchor_back->end - bwt_anchor_back->start)); //printf("\t gap_nt = %i, anchor_nt = %i\n", gap_nt, anchor_nt); //printf("\t seed1[%i-%i][%lu-%lu]\n", anchor_nt + gap_nt, read->length - 1, // bwt_anchor_back->start + 1, bwt_anchor_back->end); seed_region_end = seed_region_new(anchor_nt + gap_nt, read->length - 1, bwt_anchor_back->start + 1, bwt_anchor_back->end, 1, 0, 0); //The reference distance is 0 and the read distance not //The read distance is 0 and the reference distance not //if (seed_region_start->genome_end > seed_region_end->genome_start || // seed_region_start->read_end > seed_region_end->read_start) { //array_list_clear(region_list, NULL); //continue; if (seed_region_end->genome_start - seed_region_start->genome_end < 5 || seed_region_end->read_start - seed_region_start->read_end < 5) { seed_region_start->genome_end -= 5; seed_region_start->read_end -= 5; seed_region_end->genome_start += 5; seed_region_end->read_start += 5; } linked_list_insert(seed_region_start, linked_list); linked_list_insert_last(seed_region_end, linked_list); cal = cal_new(bwt_anchor_forw->chromosome + 1, bwt_anchor_forw->strand, bwt_anchor_forw->start, bwt_anchor_back->end + 1, 2, linked_list, linked_list_new(COLLECTION_MODE_ASYNCHRONIZED)); array_list_insert(cal, list); num_cals++; } } // for debugging LOG_DEBUG_F("read %s : num. cals = %i, min. seeds = %i, max. seeds = %i\n", read->id, num_cals, min_seeds, max_seeds); /* if (num_cals == 0) { int seed_size = 24; //First, Delete old regions array_list_clear(mapping_batch->mapping_lists[read_index], region_bwt_free); //Second, Create new regions with seed_size 24 and 1 Mismatch bwt_map_inexact_seeds_seq(read->sequence, seed_size, seed_size/2, bwt_optarg, bwt_index, mapping_batch->mapping_lists[read_index]); num_cals = bwt_generate_cal_list_linked_list(mapping_batch->mapping_lists[mapping_batch->targets[i]], input->cal_optarg, &min_seeds, &max_seeds, num_chromosomes, list, read->length); }*/ /* for (size_t j = 0; j < num_cals; j++) { cal = array_list_get(j, list); LOG_DEBUG_F("\tchr: %i, strand: %i, start: %lu, end: %lu, num_seeds = %i, num. regions = %lu\n", cal->chromosome_id, cal->strand, cal->start, cal->end, cal->num_seeds, cal->sr_list->size); } */ // printf("min_seeds = %i, max_seeds = %i, min_limit = %i, num_cals = %i\n", // min_seeds, max_seeds, min_limit, array_list_size(list)); // filter incoherent CALs int founds[num_cals], found = 0; for (size_t j = 0; j < num_cals; j++) { founds[j] = 0; cal = array_list_get(j, list); LOG_DEBUG_F("\tcal %i of %i: sr_list size = %i (cal->num_seeds = %i) %i:%lu-%lu\n", j, num_cals, cal->sr_list->size, cal->num_seeds, cal->chromosome_id, cal->start, cal->end); if (cal->sr_list->size > 0) { int start = 0; for (linked_list_item_t *list_item = cal->sr_list->first; list_item != NULL; list_item = list_item->next) { seed_region_t *s = list_item->item; LOG_DEBUG_F("\t\t:: star %lu > %lu s->read_start\n", start, s->read_start); if (start > s->read_start) { LOG_DEBUG("\t\t\t:: remove\n"); found++; founds[j] = 1; } start = s->read_end + 1; } } else { found++; founds[j] = 1; } } if (found) { min_seeds = 100000; max_seeds = 0; cal_list = array_list_new(MAX_CALS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); for (size_t j = 0; j < num_cals; j++) { if (!founds[j]) { cal = array_list_get(j, list); cal->num_seeds = cal->sr_list->size; if (cal->num_seeds > max_seeds) max_seeds = cal->num_seeds; if (cal->num_seeds < min_seeds) min_seeds = cal->num_seeds; array_list_insert(cal, cal_list); array_list_set(j, NULL, list); } } array_list_free(list, (void *) cal_free); num_cals = array_list_size(cal_list); list = cal_list; } // LOG_FATAL_F("num. cals = %i, min. seeds = %i, max. seeds = %i\n", num_cals, min_seeds, max_seeds); // filter CALs by the number of seeds cal_list = list; list = NULL; /* int min_limit = input->cal_optarg->min_num_seeds_in_cal; if (min_limit < 0) min_limit = max_seeds; // min_limit -= 3; if (min_seeds == max_seeds || min_limit <= min_seeds) { cal_list = list; list = NULL; } else { cal_list = array_list_new(MAX_CALS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); for (size_t j = 0; j < num_cals; j++) { cal = array_list_get(j, list); if (cal->num_seeds >= min_limit) { array_list_insert(cal, cal_list); array_list_set(j, NULL, list); } } array_list_clear(list, (void *) cal_free); num_cals = array_list_size(cal_list); } */ if (num_cals > MAX_CALS) { for (size_t j = num_cals - 1; j >= MAX_CALS; j--) { cal = (cal_t *) array_list_remove_at(j, cal_list); cal_free(cal); } num_cals = array_list_size(cal_list); } // LOG_DEBUG_F("num. cals = %i, MAX_CALS = %i\n", num_cals, MAX_CALS); if (num_cals > 0 && num_cals <= MAX_CALS) { array_list_set_flag(2, cal_list); targets[new_num_targets++] = read_index; /* int count1 = 0, count2 = 0; // count number of sw to do // method #1 // printf("method #1\n"); seed_region_t *s, *prev_s; linked_list_iterator_t* itr; for (size_t j = 0; j < num_cals; j++) { prev_s = NULL; cal = array_list_get(j, cal_list); itr = linked_list_iterator_new(cal->sr_list); s = (seed_region_t *) linked_list_iterator_curr(itr); while (s != NULL) { if ((prev_s == NULL && s->read_start != 0) || (prev_s != NULL)) { // printf("\t\t\tcase 1\n"); count1++; } prev_s = s; linked_list_iterator_next(itr); s = linked_list_iterator_curr(itr); } if (prev_s != NULL && prev_s->read_end < read->length - 1) { count1++; // printf("\t\t\tcase 2 (%i < %i)\n", prev_s->read_end, read->length - 1); } linked_list_iterator_free(itr); } // method #2 printf("method #2\n"); for (size_t j = 0; j < num_cals; j++) { cal = array_list_get(j, cal_list); printf("\t: %i\n", j); if (cal->sr_list->size > 0) { int start = 0; for (linked_list_item_t *list_item = cal->sr_list->first; list_item != NULL; list_item = list_item->next) { seed_region_t *s = list_item->item; printf("\t\t[%i|%i - %i|%i]\n", s->genome_start, s->read_start, s->read_end, s->genome_end); if (s->read_start != start) { count2++; } start = s->read_end + 1; } if (start < read->length) { count2++; } } } printf("count #1 = %i, count #2 = %i\n", count1, count2); assert(count1 == count2); mapping_batch->num_to_do += count1; */ // we have to free the region list array_list_free(mapping_batch->mapping_lists[read_index], (void *) region_bwt_free); mapping_batch->mapping_lists[read_index] = cal_list; } else { array_list_set_flag(0, mapping_batch->mapping_lists[read_index]); // we have to free the region list array_list_clear(mapping_batch->mapping_lists[read_index], (void *) region_bwt_free); if (cal_list) array_list_free(cal_list, (void *) cal_free); if (list) array_list_clear(list, (void *) cal_free); } /* cal_list = list; list = NULL; array_list_set_flag(2, cal_list); // mapping_batch->num_to_do += num_cals; targets[new_num_targets++] = read_index; // we have to free the region list array_list_free(mapping_batch->mapping_lists[read_index], (void *) region_bwt_free); mapping_batch->mapping_lists[read_index] = cal_list; */ /* // filter CALs by the number of seeds int min_limit = input->cal_optarg->min_num_seeds_in_cal; if (min_limit < 0) min_limit = max_seeds; printf("min_seeds = %i, max_seeds = %i, min_limit = %i, num_cals = %i\n", min_seeds, max_seeds, min_limit, array_list_size(list)); if (min_seeds == max_seeds || min_limit <= min_seeds) { cal_list = list; list = NULL; } else { cal_list = array_list_new(MAX_CALS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); for (size_t j = 0; j < num_cals; j++) { cal = array_list_get(j, list); if (cal->num_seeds >= min_limit) { array_list_insert(cal, cal_list); array_list_set(j, NULL, list); } } array_list_clear(list, (void *) cal_free); num_cals = array_list_size(cal_list); printf("************, num_cals = %i\n", num_cals); } if (num_cals > MAX_CALS) { for (size_t j = num_cals - 1; j >= MAX_CALS; j--) { cal = (cal_t *) array_list_remove_at(j, cal_list); cal_free(cal); } num_cals = array_list_size(cal_list); } if (num_cals > 0 && num_cals <= MAX_CALS) { array_list_set_flag(2, cal_list); mapping_batch->num_to_do += num_cals; targets[new_num_targets++] = read_index; // we have to free the region list array_list_free(mapping_batch->mapping_lists[read_index], (void *) region_bwt_free); mapping_batch->mapping_lists[read_index] = cal_list; } else { array_list_set_flag(0, mapping_batch->mapping_lists[read_index]); // we have to free the region list array_list_clear(mapping_batch->mapping_lists[read_index], (void *) region_bwt_free); if (cal_list) array_list_free(cal_list, (void *) cal_free); if (list) array_list_clear(list, (void *) cal_free); } */ } // end for 0 ... num_targets // update batch mapping_batch->num_targets = new_num_targets; // LOG_DEBUG_F("num. SW to do: %i\n", mapping_batch->num_to_do); // exit(-1); // free memory if (list) array_list_free(list, NULL); if (batch->mapping_mode == RNA_MODE) { return RNA_STAGE; } if (batch->pair_input->pair_mng->pair_mode != SINGLE_END_MODE) { return PRE_PAIR_STAGE; } else if (batch->mapping_batch->num_targets > 0) { return SW_STAGE; } return DNA_POST_PAIR_STAGE; }