Ejemplo n.º 1
0
void outputSelectedAlignments(std::vector<std::vector<Chain*>>& alignment_strings,
    Chain** queries, int32_t queries_length, const std::string& out_path) {

    std::string out_extension = ".aligned.fasta";

    for (uint32_t i = 0; i < alignment_strings.size(); ++i) {

        std::ofstream out_file;
        char* out_file_name = createFileName(chainGetName(queries[i]), out_path, out_extension);

        int query_len = chainGetLength(queries[i]);

        out_file.open(out_file_name);
        out_file << ">QUERY" << std::endl;

        for (int j = 1; j < query_len + 1; ++j) {
            out_file << chainGetChar(queries[i], j - 1);
            if (j % 60 == 0) out_file << std::endl;
        }
        out_file << std::endl;

        for (uint32_t j = 0; j < alignment_strings[i].size(); ++j) {
            out_file << ">" << chainGetName(alignment_strings[i][j]) << std::endl;

            for (int k = 1; k < query_len + 1; ++k) {
                out_file << chainGetChar(alignment_strings[i][j], k - 1);
                if (k % 60 == 0) out_file << std::endl;
            }
            out_file << std::endl;
        }

        out_file.close();
        delete[] out_file_name;
    }
}
Ejemplo n.º 2
0
int alignmentsSelect(std::vector<Chain*>& alignment_strings, Chain* query, float threshold) {

	int amino_acid_num = 26;
	float median = kLog_2_20;

    int* amino_acid_nums = new int[amino_acid_num];
	for (int i = 0; i < amino_acid_num; ++i) {
		amino_acid_nums[i] = 0;
	}

	int query_len = chainGetLength(query);

    float* pos_freq = new float[query_len];
	for (int i = 0; i < query_len; ++i) {
		pos_freq[i] = 0.0;
	}

	char c;
	int i, valid;
	for (i = 1; median > threshold && i <= (int) alignment_strings.size(); ++i) {

		for (int j = 0; j < query_len; ++j) {
			valid = 0;

			for (int k = 0; k < i; ++k) {
				c = chainGetChar(alignment_strings[k], j);
				if (c != 'X') {
					valid++;
					amino_acid_nums[(int) c - 'A']++;
				}
			}

			for (int k = 0; k < amino_acid_num; ++k) {
				if (amino_acid_nums[k] != 0) {
					pos_freq[j] += amino_acid_nums[k] / (float) valid *
						log2f(amino_acid_nums[k] / (float) valid);
				}
			}

			pos_freq[j] += kLog_2_20;

			for (int k = 0; k < amino_acid_num; ++k) {
				if (amino_acid_nums[k] != 0) {
					amino_acid_nums[k] = 0;
				}
			}
		}

		median = getMedian(pos_freq, query_len);

		for (int j = 0; j < query_len; ++j) {
			pos_freq[j] = 0.0;
		}
	}

    delete[] pos_freq;
    delete[] amino_acid_nums;

	return i - 1;
}
Ejemplo n.º 3
0
extern ChainDatabase* chainDatabaseCreate(Chain** database, int databaseStart, 
    int databaseLen, int* cards, int cardsLen) {
    
    ChainDatabase* db = (ChainDatabase*) malloc(sizeof(struct ChainDatabase));
    
    TIMER_START("Creating database");
    
    db->database = database + databaseStart;
    db->databaseStart = databaseStart;
    db->databaseLen = databaseLen;
    
    int i;
    long databaseElems = 0;
    for (i = 0; i < databaseLen; ++i) {
        databaseElems += chainGetLength(db->database[i]);
    }
    db->databaseElems = databaseElems;
    
    db->chainDatabaseGpu = chainDatabaseGpuCreate(db->database, databaseLen, 
        cards, cardsLen);
    
    TIMER_STOP;
    
    return db;
}
Ejemplo n.º 4
0
void alignmentsExtract(std::vector<Chain*>& dst, Chain* query, DbAlignment** alignments,
    int alignments_length) {

	int query_len = chainGetLength(query);

	char* alignment_str = new char[query_len];
	int query_start, length;

	Chain* target = nullptr;

	char* query_str = nullptr;
	char* target_str = nullptr;
	Alignment* alignment = nullptr;

	const char gap_item = '-';

	for (int i = 0; i < alignments_length; ++i) {

		target = dbAlignmentGetTarget(alignments[i]);

		query_start = dbAlignmentGetQueryStart(alignments[i]);
		length = dbAlignmentGetPathLen(alignments[i]);

		alignment = dbAlignmentToAlignment(alignments[i]);
		aligmentStr(&query_str, &target_str, alignment, gap_item);
		alignmentDelete(alignment);

		int j = 0;
		for (; j < query_start; ++j) {
			alignment_str[j] = 'X';
		}

		for (int k = 0; k < length; ++k) {
			if (query_str[k] != gap_item) {
				if (target_str[k] != gap_item) {
					alignment_str[j++] = target_str[k];
				} else {
					alignment_str[j++] = 'X';
				}
			}
		}

		for (; j < query_len; ++j) {
			alignment_str[j] = 'X';
		}

        dst.push_back(chainCreate((char*) chainGetName(target), strlen(chainGetName(target)),
            alignment_str, query_len));

        delete[] query_str;
        delete[] target_str;
	}

    delete[] alignment_str;
}
Ejemplo n.º 5
0
extern void swGPUParamSearch(Chain* rowChainL, Chain* columnChainL, 
    SWPrefs* swPrefsL) {

    SWResult* swResult = 
        swDataGetResult(swSolveGPU(rowChainL, columnChainL, swPrefsL), 0);

    rowChain = rowChainL;
    columnChain = columnChainL;
    swPrefs = swPrefsL;
    columns = chainGetLength(columnChainL);
    expectedResult = swResultGetScore(swResult);

    srand(time(NULL));

    Cromosome child;

    initPopulation();
    evaluatePopulation();

    int generationIdx;
    int childIdx;

    for (generationIdx = 0; generationIdx < GENERATION_NMR; ++generationIdx) {

        sortCromosomes(population, 0, POPULATION_NMR - 1);

        printf(".............................................\n");
        printf("Generation: %d\n", generationIdx);

        for (childIdx = 0; childIdx < CHILD_NMR; ++childIdx) {

            child = crossover(selection());
            mutation(&child);

            children[childIdx] = child;
        }

        evaluateChildren();

        for (childIdx = 0; childIdx < CHILD_NMR; ++childIdx) {
            population[POPULATION_NMR - childIdx - 1] = children[childIdx];
        }
    }
}
Ejemplo n.º 6
0
uint64_t searchDatabase(std::vector<std::vector<uint32_t>>& dst,
    const std::string& database_path, Chain** queries, int32_t queries_length,
    uint32_t kmer_length, uint32_t max_candidates, uint32_t num_threads) {

    fprintf(stderr, "** Searching database for candidate sequences **\n");

    std::shared_ptr<Hash> query_hash = createHash(queries, queries_length, 0,
        queries_length, kmer_length);

    Chain** database = nullptr;
    int database_length = 0;
    int database_start = 0;

    FILE* handle = nullptr;
    int serialized = 0;
    readFastaChainsPartInit(&database, &database_length, &handle, &serialized,
        database_path.c_str());

    uint64_t database_cells = 0;

    std::vector<float> min_scores(queries_length, 1000000.0);
    std::vector<std::vector<std::vector<Candidate>>> candidates(num_threads);

    uint32_t part = 1;
    float part_size = database_chunk / (float) 1000000000;

    while (true) {

        int status = 1;

        status &= readFastaChainsPart(&database, &database_length, handle,
            serialized, database_chunk);

        databaseLog(part, part_size, 0);

        uint32_t database_split_size = (database_length - database_start) / num_threads;
        std::vector<uint32_t> database_splits(num_threads + 1, database_start);
        for (uint32_t i = 1; i < num_threads; ++i) {
            database_splits[i] += i * database_split_size;
        }
        database_splits[num_threads] = database_length;

        std::vector<ThreadPoolTask*> thread_tasks(num_threads, nullptr);

        for (uint32_t i = 0; i < num_threads; ++i) {

            auto thread_data = new ThreadSearchData(query_hash, queries_length, min_scores,
                database, database_splits[i], database_splits[i + 1], kmer_length,
                max_candidates, candidates[i], i == num_threads - 1, part,
                part_size);

            thread_tasks[i] = threadPoolSubmit(threadSearchDatabase, (void*) thread_data);
        }

        for (uint32_t i = 0; i < num_threads; ++i) {
            threadPoolTaskWait(thread_tasks[i]);
            threadPoolTaskDelete(thread_tasks[i]);
        }

        for (int i = database_start; i < database_length; ++i) {
            database_cells += chainGetLength(database[i]);
            chainDelete(database[i]);
            database[i] = nullptr;
        }

        // merge candidates from all threads
        for (int32_t i = 0; i < queries_length; ++i) {
            for (uint32_t j = 1; j < num_threads; ++j) {
                if (candidates[j][i].empty()) {
                    continue;
                }
                candidates[0][i].insert(candidates[0][i].end(),
                    candidates[j][i].begin(), candidates[j][i].end());
                std::vector<Candidate>().swap(candidates[j][i]);
            }

            if (num_threads > 1) {
                std::sort(candidates[0][i].begin(), candidates[0][i].end());
                if (candidates[0][i].size() > max_candidates) {
                    std::vector<Candidate> tmp(candidates[0][i].begin(),
                        candidates[0][i].begin() + max_candidates);
                    candidates[0][i].swap(tmp);
                }
            }

            if (!candidates[0][i].empty()) {
                min_scores[i] = candidates[0][i].back().score;
            }
        }

        databaseLog(part, part_size, 100);
        ++part;

        if (status == 0) {
            break;
        }

        database_start = database_length;
    }
    fprintf(stderr, "\n\n");

    fclose(handle);
    deleteFastaChains(database, database_length);

    dst.clear();
    dst.resize(queries_length);

    for (int32_t i = 0; i < queries_length; ++i) {
        dst[i].reserve(candidates[0][i].size());
        for (uint32_t j = 0; j < candidates[0][i].size(); ++j) {
            dst[i].emplace_back(candidates[0][i][j].id);
        }
        std::vector<Candidate>().swap(candidates[0][i]);
        std::sort(dst[i].begin(), dst[i].end());
    }

    return database_cells;
}
Ejemplo n.º 7
0
void* threadSearchDatabase(void* params) {

    auto thread_data = (ThreadSearchData*) params;

    thread_data->candidates.resize(thread_data->queries_length);

    std::vector<uint32_t> kmer_vector;
    std::vector<std::vector<int32_t>> hits(thread_data->queries_length);
    std::vector<float> min_scores(thread_data->min_scores);

    uint32_t log_counter = 0;
    uint32_t log_size = (thread_data->database_end - thread_data->database_begin) / (100. / log_step_percentage);
    float log_percentage = log_step_percentage;

    for (uint32_t i = thread_data->database_begin; i < thread_data->database_end; ++i) {

        if (thread_data->log && log_percentage < 100.0) {
            ++log_counter;
            if (log_size != 0 && log_counter % log_size == 0) {
                databaseLog(thread_data->part, thread_data->part_size, log_percentage);
                log_percentage += log_step_percentage;
            }
        }

        createKmerVector(kmer_vector, thread_data->database[i], thread_data->kmer_length);

        for (uint32_t j = 0; j < kmer_vector.size(); ++j) {
            if (j != 0 && kmer_vector[j] == kmer_vector[j - 1]) {
                continue;
            }

            Hash::Iterator begin, end;
            thread_data->query_hash->hits(begin, end, kmer_vector[j]);
            for (; begin != end; ++begin) {
                hits[begin->id].emplace_back(begin->position);
            }
        }

        for (uint32_t j = 0; j < thread_data->queries_length; ++j) {
            if (hits[j].empty()) {
                continue;
            }

            float similartiy_score = longestIncreasingSubsequence(hits[j]) /
                (float) chainGetLength(thread_data->database[i]);

            if (thread_data->candidates[j].size() < thread_data->max_candidates || similartiy_score > min_scores[j]) {
                thread_data->candidates[j].emplace_back(similartiy_score, i);
                min_scores[j] = std::min(min_scores[j], similartiy_score);
            }

            std::vector<int32_t>().swap(hits[j]);
        }
    }

    for (uint32_t i = 0; i < thread_data->queries_length; ++i) {
        std::sort(thread_data->candidates[i].begin(), thread_data->candidates[i].end());

        if (thread_data->candidates[i].size() > thread_data->max_candidates) {
            std::vector<Candidate> tmp(thread_data->candidates[i].begin(),
                thread_data->candidates[i].begin() + thread_data->max_candidates);
            thread_data->candidates[i].swap(tmp);
        }
    }

    delete thread_data;

    return nullptr;
}
Ejemplo n.º 8
0
static void databaseSearchStep(DbAlignment*** dbAlignments, 
    int* dbAlignmentsLen, int type, Chain** queries, int queriesStart, 
    int queriesLen, ChainDatabase* chainDatabase, Scorer* scorer, 
    int maxAlignments, ValueFunction valueFunction, void* valueFunctionParam, 
    double valueThreshold, int* indexes, int indexesLen, int* cards, 
    int cardsLen) {
    
    Chain** database = chainDatabase->database;
    int databaseStart = chainDatabase->databaseStart;
    int databaseLen = chainDatabase->databaseLen;
    long databaseElems = chainDatabase->databaseElems;
    ChainDatabaseGpu* chainDatabaseGpu = chainDatabase->chainDatabaseGpu;
    
    int i, j, k;
    
    //**************************************************************************
    // CALCULATE CELL NUMBER
    
    long queriesElems = 0;
    for (i = 0; i < queriesLen; ++i) {
        queriesElems += chainGetLength(queries[i]);
    }
    
    if (indexes != NULL) {
    
        databaseElems = 0;
        
        for (i = 0; i < indexesLen; ++i) {
            databaseElems += chainGetLength(database[indexes[i]]);
        }
    }
    
    long long cells = (long long) queriesElems * databaseElems;
    
    //**************************************************************************
    
    //**************************************************************************
    // CALCULATE SCORES
    
    int* scores;
    
    if (cells < GPU_DB_MIN_CELLS || cardsLen == 0) {
        scoreCpu(&scores, type, queries, queriesLen, database, 
            databaseLen, scorer, indexes, indexesLen);
    } else {
        scoreDatabasesGpu(&scores, type, queries, queriesLen, chainDatabaseGpu, 
            scorer, indexes, indexesLen, cards, cardsLen, NULL);
    }
    
    //**************************************************************************
    
    //**************************************************************************
    // EXTRACT BEST CHAINS AND SAVE THEIR DATA MULTITHREADED
    
    TIMER_START("Extract best");
    
    DbAlignmentData** dbAlignmentsData = 
        (DbAlignmentData**) malloc(queriesLen * sizeof(DbAlignmentData*));

    ExtractContext* eContexts = 
        (ExtractContext*) malloc(queriesLen * sizeof(ExtractContext));
    
    for (i = 0; i < queriesLen; ++i) {
        eContexts[i].dbAlignmentData = &(dbAlignmentsData[i]);
        eContexts[i].dbAlignmentLen = &(dbAlignmentsLen[i]);
        eContexts[i].query = queries[i];
        eContexts[i].database = database;
        eContexts[i].databaseLen = databaseLen;
        eContexts[i].scores = scores + i * databaseLen;
        eContexts[i].maxAlignments = maxAlignments;
        eContexts[i].valueFunction = valueFunction;
        eContexts[i].valueFunctionParam = valueFunctionParam;
        eContexts[i].valueThreshold = valueThreshold;
        eContexts[i].cards = cards;
        eContexts[i].cardsLen = cardsLen;
    }

    if (cardsLen == 0) {

        size_t tasksSize = queriesLen * sizeof(ThreadPoolTask*);
        ThreadPoolTask** tasks = (ThreadPoolTask**) malloc(tasksSize);

        for (i = 0; i < queriesLen; ++i) {
            tasks[i] = threadPoolSubmit(extractThread, (void*) &(eContexts[i]));
        }
        
        for (i = 0; i < queriesLen; ++i) {
            threadPoolTaskWait(tasks[i]);
            threadPoolTaskDelete(tasks[i]);
        }

        free(tasks);

    } else {

        int chunks = MIN(queriesLen, cardsLen);

        int cardsChunk = cardsLen / chunks;
        int cardsAdd = cardsLen % chunks;
        int cardsOff = 0;

        int contextsChunk = queriesLen / chunks;
        int contextsAdd = queriesLen % chunks;
        int contextsOff = 0;

        size_t contextsSize = chunks * sizeof(ExtractContexts);
        ExtractContexts* contexts = (ExtractContexts*) malloc(contextsSize);

        size_t tasksSize = chunks * sizeof(Thread);
        Thread* tasks = (Thread*) malloc(tasksSize);

        for (i = 0; i < chunks; ++i) {

            int* cards_ = cards + cardsOff;
            int cardsLen_ = cardsChunk + (i < cardsAdd);
            cardsOff += cardsLen_;

            ExtractContext* contexts_ = eContexts + contextsOff;
            int contextsLen_ = contextsChunk + (i < contextsAdd);
            contextsOff += contextsLen_;

            for (j = 0; j < contextsLen_; ++j) {
                contexts_[j].cards = cards_;
                contexts_[j].cardsLen = cardsLen_;
            }

            contexts[i].contexts = contexts_;
            contexts[i].contextsLen = contextsLen_;

            threadCreate(&(tasks[i]), extractsThread, &(contexts[i]));
        }

        for (i = 0; i < chunks; ++i) {
            threadJoin(tasks[i]);
        }

        free(tasks);
        free(contexts);
    }

    free(eContexts);
    free(scores); // this is big, release immediately

    TIMER_STOP;

    //**************************************************************************
    
    //**************************************************************************
    // ALIGN BEST TARGETS MULTITHREADED
    
    TIMER_START("Database aligning");
    
    // create structure
    for (i = 0; i < queriesLen; ++i) {
        size_t dbAlignmentsSize = dbAlignmentsLen[i] * sizeof(DbAlignment*);
        dbAlignments[i] = (DbAlignment**) malloc(dbAlignmentsSize);
    }
    
    // count tasks
    int aTasksLen = 0;
    for (i = 0; i < queriesLen; ++i) {
        aTasksLen += dbAlignmentsLen[i];
    }
    
    size_t aTasksSize = aTasksLen * sizeof(ThreadPoolTask*);
    ThreadPoolTask** aTasks = (ThreadPoolTask**) malloc(aTasksSize);

    size_t aContextsSize = aTasksLen * sizeof(AlignContext);
    AlignContext* aContextsCpu = (AlignContext*) malloc(aContextsSize);
    AlignContext* aContextsGpu = (AlignContext*) malloc(aContextsSize);
    int aContextsCpuLen = 0;
    int aContextsGpuLen = 0;
    
    for (i = 0, k = 0; i < queriesLen; ++i, ++k) {
    
        Chain* query = queries[i];
        int rows = chainGetLength(query);

        for (j = 0; j < dbAlignmentsLen[i]; ++j, ++k) {
            
            DbAlignmentData data = dbAlignmentsData[i][j];
            Chain* target = database[data.idx];

            int cols = chainGetLength(target);
            long long cells = (long long) rows * cols;

            AlignContext* context;
            if (cols < GPU_MIN_LEN || cells < GPU_MIN_CELLS || cardsLen == 0) {
                context = &(aContextsCpu[aContextsCpuLen++]);
                context->cards = NULL;
                context->cardsLen = 0;
            } else {
                context = &(aContextsGpu[aContextsGpuLen++]);
            }

            context->dbAlignment = &(dbAlignments[i][j]);
            context->type = type;
            context->query = query;
            context->queryIdx = i;
            context->target = target;
            context->targetIdx = data.idx + databaseStart;
            context->value = data.value;
            context->score = data.score;
            context->scorer = scorer;
            context->cells = cells;
        }
    }
    
    LOG("Aligning %d cpu, %d gpu", aContextsCpuLen, aContextsGpuLen);

    // run cpu tasks
    int aCpuTasksLen;
    AlignContextsPacked* aContextsCpuPacked;

    if (aContextsCpuLen < 10000) {

        aCpuTasksLen = aContextsCpuLen;
        aContextsCpuPacked = NULL;

        for (i = 0; i < aCpuTasksLen; ++i) {
            aTasks[i] = threadPoolSubmit(alignThread, &(aContextsCpu[i]));
        }

    } else {

        aCpuTasksLen = aContextsCpuLen / CPU_PACKED_CHUNK;
        aCpuTasksLen += (aContextsCpuLen % CPU_PACKED_CHUNK) != 0;

        size_t contextsSize = aCpuTasksLen * sizeof(AlignContextsPacked);
        AlignContextsPacked* contexts = (AlignContextsPacked*) malloc(contextsSize);

        for (i = 0; i < aCpuTasksLen; ++i) {

            int length = MIN(CPU_PACKED_CHUNK, aContextsCpuLen - i * CPU_PACKED_CHUNK);

            contexts[i].contexts = aContextsCpu + i * CPU_PACKED_CHUNK;
            contexts[i].contextsLen = length;
        }

        for (i = 0; i < aCpuTasksLen; ++i) {
            aTasks[i] = threadPoolSubmit(alignsPackedThread, &(contexts[i]));
        }

        aContextsCpuPacked = contexts;
    }

    if (aContextsGpuLen) {

        int chunks = MIN(aContextsGpuLen, cardsLen);
        
        size_t contextsSize = chunks * sizeof(AlignContexts);
        AlignContexts* contexts = (AlignContexts*) malloc(contextsSize);

        size_t balancedSize = chunks * aContextsGpuLen * sizeof(AlignContext*);
        AlignContext** balanced = (AlignContext**) malloc(balancedSize);

        // set phony contexts, init data
        for (i = 0; i < chunks; ++i) {
            contexts[i].contexts = balanced + i * aContextsGpuLen;
            contexts[i].contextsLen = 0;
            contexts[i].cells = 0;
        }
        
        // balance tasks by round roobin, chunks are pretty small (CUDA cards)
        for (i = 0; i < aContextsGpuLen; ++i) {
        
            int minIdx = 0;
            long long min = contexts[0].cells;
            for (j = 1; j < chunks; ++j) {
                if (contexts[j].cells < min) {
                    min = contexts[j].cells;
                    minIdx = j;
                }
            }
            
            AlignContext* context = &(aContextsGpu[i]);
            contexts[minIdx].contexts[contexts[minIdx].contextsLen++] = context;
            contexts[minIdx].cells += context->cells;
        }

        // set context cards
        int cardsChunk = cardsLen / chunks;
        int cardsAdd = cardsLen % chunks;
        int cardsOff = 0;

        for (i = 0; i < chunks; ++i) {
        
            int cCardsLen = cardsChunk + (i < cardsAdd);
            int* cCards = cards + cardsOff;
            cardsOff += cCardsLen;

            for (j = 0; j < contexts[i].contextsLen; ++j) {
                contexts[i].contexts[j]->cards = cCards;
                contexts[i].contexts[j]->cardsLen = cCardsLen;
            }
        }

        size_t tasksSize = chunks * sizeof(Thread);
        Thread* tasks = (Thread*) malloc(tasksSize);

        // run gpu tasks first
        for (i = 0; i < chunks; ++i) {
            threadCreate(&(tasks[i]), alignsThread, &(contexts[i]));
        }
        
        // wait for gpu tasks to finish
        for (i = 0; i < chunks; ++i) {
            threadJoin(tasks[i]);
        }

        free(balanced);
        free(contexts);
    }

    // wait for cpu tasks
    for (i = 0; i < aCpuTasksLen; ++i) {
        threadPoolTaskWait(aTasks[i]);
        threadPoolTaskDelete(aTasks[i]);
    }

    free(aContextsCpuPacked);
    free(aContextsCpu);
    free(aContextsGpu);
    free(aTasks);
    
    TIMER_STOP;
    
    //**************************************************************************
    
    //**************************************************************************
    // CLEAN MEMORY

    for (i = 0; i < queriesLen; ++i) {
        free(dbAlignmentsData[i]);
    }
    free(dbAlignmentsData);

    //**************************************************************************
}