static void loadAndAlignAll(struct bzp *bzp, char *target, char *query, char *output) /* blatz - Align genomic dna across species. */ { struct dnaLoad *queryDl = dnaLoadOpen(query); struct dnaLoad *targetDl = dnaLoadOpen(target); struct blatzIndex *indexList = blatzIndexDl(targetDl, bzp->weight, bzp->unmask); bzpTime("loaded and indexed target DNA"); // LX BEG if (bzp->dynaWordCoverage > 0) { dynaNumWords = (pow(4,bzp->weight)); // ?? check with Jim if this is correct AllocArray(dynaWordCount,dynaNumWords); printf("Allocated word count table of size %d\n",dynaNumWords); dynaWordLimit = bzp->dynaWordCoverage; // cheating, should be more like: //dynaWordLimit = bzp->dynaWordCoverage*dynaSequenceSize/dynaNumWords; printf("Set word limit to %d\n",dynaWordLimit); } // LX END verbose(2, "Loaded %d in %s, opened %s\n", slCount(indexList), target, query); alignAll(bzp, indexList, queryDl, output); }
int TemplateAlignment::findBestAlignment( TemplateAlignment::Result& result ) { // Align all of the templates to the target cloud std::vector<Result, Eigen::aligned_allocator<Result> > results; alignAll (results); // Find the template with the best (lowest) fitness score float lowest_score = std::numeric_limits<float>::infinity (); int best_template = 0; for (size_t i = 0; i < results.size (); ++i) { const Result &r = results[i]; if (r.fitness_score < lowest_score) { lowest_score = r.fitness_score; best_template = (int) i; } } // Output the best alignment result = results[best_template]; return (best_template); }
int main(int argc, char* argv[]){ int a = 0, i = 0, j = 0, k = 0, l = 0, m = 0, n = 0, nN = 0, nK = 0; int nNLen = 0, nKLen = 0; t_Params tParams; t_Data tSeqData, tRefData; char *acTest = NULL; int nTestMatch = -1, nTestLength = -1; double dTest = 0.0; int nParentD = 0; int* anChi = NULL; t_Result *atResult; /*get command line params*/ getCommandLineParams(&tParams, argc, argv); /*read sequences to chimera check*/ readData(tParams.szSeqInputFile, &tSeqData); /*read sequences to compare too*/ readData(tParams.szRefInputFile, &tRefData); /*set parameters for sequence distances*/ initLookUp(&tParams); /*number of sequences*/ nN = tSeqData.nSeq; /*number of reference sequences*/ nK = tRefData.nSeq; atResult = (t_Result *) malloc(nN*sizeof(t_Result)); if(!atResult) goto memoryError; anChi = (int *) malloc(sizeof(int)*nN); if(!anChi){ goto memoryError; } for(i = 0; i < nN; i++){ anChi[i] = FALSE; atResult[i].dX = 0.0; atResult[i].dY = 0.0; atResult[i].dZ = 0.0; atResult[i].dP = 0.0; } /*max length of sequences*/ nNLen = tSeqData.nMaxLen; /*max length of references*/ nKLen = tRefData.nMaxLen; sortByFreq(&tSeqData); for(i = 0; i < nN; i++){ t_Align atAlign[nK]; double dBest = BIG_DBL, dBestChi = 0.0, dBestTri = 0.0; int nBestJ = -1; int nBest = BIG_INT, nBestChi = BIG_INT, nBestTri = BIG_INT; int nSplit = -1, nSplit1 = -1, nSplit2 = -1, nP1 = -1, nP2 = -1, nT1 = -1, nT2 = -1, nT3 = -1; int anRestrict[nK]; int nCompare = 0; double dLoon = 0.0, dCIndex = 0.0, dP = 0.0, dR = 0.0; int nI = tSeqData.anSort[i]; int nLenI = tSeqData.anLen[nI]; int anD[nLenI], anR[nLenI], anBestD[nLenI], anBestR[nLenI]; /*do pairwise alignments and get best hit for each sequence i*/ nCompare = alignAll(nI, nLenI, &nBest, &nBestJ, anRestrict, nK, &tSeqData, &tRefData, atAlign, &tParams, anChi); if(nCompare >= 2){ nBestChi = getBestChimera(nK, &tRefData, &nP1, &nP2, &nSplit, anRestrict, nLenI, atAlign, anD, anR, anBestD, anBestR); if(nBestChi >= 3 && nCompare >= 3){ nBestTri = getBestTrimera(nK, &tRefData, &nT1, &nT2, &nT3, &nSplit1, &nSplit2, anRestrict, nLenI, atAlign, anD, anR, anBestD, anBestR); } dBestChi = ((double) nBestChi)/((double) nLenI); dBestTri = ((double) nBestTri)/((double) nLenI); dBest = needlemanWunschN(&tSeqData.acSequences[nI*nNLen],&tRefData.acSequences[nBestJ*nKLen] , nLenI, tRefData.anLen[nBestJ], nKLen); if(nBestChi - nBestTri >= 3){ nTestMatch = TRIMERA; acTest = getTrimera(&nTestLength, &atAlign[nT1], &atAlign[nT2], &atAlign[nT3],nSplit1, nSplit2,nLenI); /*Trimera*/ } else{ /*Chimera*/ nTestMatch = CHIMERA; acTest = getChimera(&nTestLength, &atAlign[nP1], &atAlign[nP2], nSplit, nLenI); } dTest = needlemanWunschN(&tSeqData.acSequences[nI*nNLen], acTest, nLenI, nTestLength, nKLen); dCIndex = calcCIndex(nI, nP1, nP2, acTest, nTestLength, &tRefData, &tSeqData); dLoon = calcLoonIndex(&tSeqData, &tRefData, nI, nP1, nP2, &nSplit, &nParentD, &tParams); if(dCIndex >= 0.15 || dCIndex - dBest > 0.0){ dP = 0.0; } else{ dR = tParams.dAlpha + tParams.dBeta*dLoon; dP = 1.0/(1.0 + exp(-dR)); } if(dP > 0.5){ anChi[nI] = TRUE; } atResult[nI].dX = dCIndex; atResult[nI].dY = dCIndex - dBest; atResult[nI].dZ = dLoon; atResult[nI].dP = dP; switch(nTestMatch){ case GOOD: //printf("Good\n"); break; case CHIMERA: //printf("Chimera\n"); break; case TRIMERA: //printf("Trimera\n"); break; case QUAMERA: //printf("Quamera\n"); break; } free(acTest); } else{ //printf("0 0 Null 0 0 0 Null Null 0.0 0.0 0.0 0.0 0 0 0 Null\n"); } for(j = 0; j < nK; j++){ if(anRestrict[j] == FALSE){ free(atAlign[j].acA); free(atAlign[j].acB); free(atAlign[j].anD); free(atAlign[j].anR); free(atAlign[j].anMapD); free(atAlign[j].anMapR); } } } for(i = 0; i < nN; i++){ printf("%s %f %f %f %f\n",tSeqData.aszID[i],atResult[i].dX, atResult[i].dY, atResult[i].dZ, atResult[i].dP); } /*free allocated memory*/ free(adLookUp); free(anChi); free(atResult); destroyData(&tSeqData); destroyData(&tRefData); exit(EXIT_SUCCESS); memoryError: fprintf(stderr, "Failed allocating memory in main\n"); fflush(stderr); exit(EXIT_FAILURE); }