int BaseSet::getScore() const { uint8_t cons = getConsensus(); int score = 0; for (int j = 1; j < Base::NBASES; ++j) { if (j == cons) continue; score += count[j]; } return score; }
void updateICMRecord(IntConConMesg *icm_mesg, char * inFile, char *seqAn, char *prefix, char * wrkDir) { // update the consensus icm_mesg->consensus = getConsensus(inFile, seqAn, prefix, wrkDir); icm_mesg->length = strlen(icm_mesg->consensus); // update quality icm_mesg->quality = (char *) safe_malloc(sizeof(char) * icm_mesg->length+1); memset(icm_mesg->quality, '1', icm_mesg->length); icm_mesg->quality[icm_mesg->length] = '\0'; //update read data int32 currRead = 0; char line[AS_SEQAN_MAX_RESULT_LENGTH]; char resultFile[AS_SEQAN_MAX_BUFFER_LENGTH]; getFileName(prefix, wrkDir, AS_SEQAN_RESULT, resultFile); FILE *tempOut; tempOut = fopen(resultFile,"r"); // skip header of output while (!feof(tempOut)) { fgets(line, AS_SEQAN_MAX_RESULT_LENGTH, tempOut); if (strncasecmp(line, "typ", 3) == 0) { break; } } // now read alignments of each read for (currRead = 0; currRead < icm_mesg->num_pieces; currRead++) { if (currRead > 0) { // read the typ: line fgets(line, AS_SEQAN_MAX_RESULT_LENGTH, tempOut); } // read the seq: line fgets(line, AS_SEQAN_MAX_RESULT_LENGTH, tempOut); // read the Pos line fgets(line, AS_SEQAN_MAX_RESULT_LENGTH, tempOut); chomp(line); int32 begin, end; sscanf(line,"Pos:"F_S32","F_S32,&begin,&end); icm_mesg->pieces[currRead].position.bgn = begin; icm_mesg->pieces[currRead].position.end = end; // read the dln line fgets(line, AS_SEQAN_MAX_RESULT_LENGTH, tempOut); chomp(line); sscanf(line,"dln:"F_S32, &icm_mesg->pieces[currRead].delta_length); // read the del line fgets(line, AS_SEQAN_MAX_RESULT_LENGTH, tempOut); chomp(line); if (icm_mesg->pieces[currRead].delta_length > 0) { char *dlnStr = line+AS_SEQAN_MAX_HEADER_LENGTH; icm_mesg->pieces[currRead].delta = (int32 *)safe_malloc(sizeof(int32) * icm_mesg->pieces[currRead].delta_length); int32 i = 0; while (i < icm_mesg->pieces[currRead].delta_length) { icm_mesg->pieces[currRead].delta[i] = (int32) strtol(dlnStr,&dlnStr,10); i++; } } // read blank line fgets(line, AS_SEQAN_MAX_RESULT_LENGTH, tempOut); } // now read the alignments of each unitig for (currRead = 0; currRead < icm_mesg->num_unitigs; currRead++) { // read the seq: line while (strncmp(line, "Pos:", 4) != 0) { fgets(line, AS_SEQAN_MAX_RESULT_LENGTH, tempOut); } // we read the Pos line above, process it now chomp(line); int32 begin, end; sscanf(line,"Pos:"F_S32","F_S32,&begin,&end); // read the dln line fgets(line, AS_SEQAN_MAX_RESULT_LENGTH, tempOut); chomp(line); sscanf(line,"dln:"F_S32, &icm_mesg->unitigs[currRead].delta_length); // read the del line char * del = readMultiLine(tempOut); if (currRead >= 0) { icm_mesg->unitigs[currRead].position.bgn = begin; icm_mesg->unitigs[currRead].position.end = end; if (icm_mesg->unitigs[currRead].delta_length > 0) { char *dlnStr = del+AS_SEQAN_MAX_HEADER_LENGTH; icm_mesg->unitigs[currRead].delta = (int32 *)safe_malloc(sizeof(int32) * icm_mesg->unitigs[currRead].delta_length); int32 i = 0; while (i < icm_mesg->unitigs[currRead].delta_length) { icm_mesg->unitigs[currRead].delta[i] = (int32) strtol(dlnStr,&dlnStr,10); i++; } } } safe_free(del); } fclose(tempOut); }
void runGenTest(RunParameters &r) { // Define variables Vector J, expJ; std::vector<int> cons; std::vector<double> weight; if (r.useGI) { epsilonP2_ptr=&epsilonP2_GI; epsilonC_ptr=&epsilonC_GI; getMaxError_ptr=&getMaxError_GI; } else { epsilonP2_ptr=&epsilonP2; epsilonC_ptr=&epsilonC; getMaxError_ptr=&getMaxError; } // Get reference sequence from file FILE *consIn = fopen(r.getConsensusInfile().c_str(),"r"); if (consIn!=NULL) getConsensus(consIn,cons); else { printf("Error reading input from file %s\n\n",r.getConsensusInfile().c_str()); exit(1); } fclose(consIn); if (r.useVerbose) { printf("Reference sequence: "); for (int i=0;i<cons.size();i++) printf(" %d",cons[i]); printf("\n\n"); } // Retrieve couplings from file FILE *dataIn=fopen(r.getInfile().c_str(),"r"); if (dataIn!=NULL) getCouplings(dataIn,J); else { printf("Error reading input from file %s",r.getInfile().c_str()); exit(1); } fclose(dataIn); // Resize expJ for (int i=0;i<J.size();i++) expJ.push_back(std::vector<double>(J[i].size(),0)); for (int i=0;i<J.size();i++) { for (int j=0;j<J[i].size();j++) expJ[i][j] = exp(J[i][j]); } // Declare 2-point correlations, 3-point correlations, P(k) and magnetisations bool ThreePoints = (r.p3red || r.p3); int N = sizetolength(J.size()); // System size double alpha = 0.01; // Field regularization multiplier double gamma = 0; // Regularization strength (L2, set below) if (r.useGamma) { if (r.gamma==0) gamma=1/(r.sampleB); else gamma=r.gamma; } Vector p(J.size(),std::vector<double>()); // MC magnetisations and 2-point correlations Vector cc(J.size(),std::vector<double>()); // MC connected 2-point correlations Vector q(J.size(),std::vector<double>()); // MSA magnetisations and 2-point correlations Vector qcc(J.size(),std::vector<double>()); // MSA connected 2-point correlations std::vector<std::vector<std::vector<std::vector<double> > > > p3(N); // MC 3-point correlations std::vector<std::vector<std::vector<std::vector<double> > > > c3(N); // MC connected 3-point correlations std::vector<std::vector<std::vector<std::vector<double> > > > q3(N); // MSA 3-point correlations std::vector<std::vector<std::vector<std::vector<double> > > > qc3(N); // MSA connected 3-point correlations std::vector<double> pk(N+1,0); // MC mutation probability std::vector<double> qk(N+1,0); // MSA mutation probability std::vector<double> absErr(2,0); // Absolute errors on magnetisation and 2-point correlations for (int i=0;i<J.size();i++) { cc[i].resize(J[i].size(),0); p[i].resize(J[i].size(),0); qcc[i].resize(J[i].size(),0); q[i].resize(J[i].size(),0); } if (ThreePoints) { for (int i=0;i<N;i++) { p3[i].resize(N); c3[i].resize(N); q3[i].resize(N); qc3[i].resize(N); for (int j=0;j<N;j++) { p3[i][j].resize(N); c3[i][j].resize(N); q3[i][j].resize(N); qc3[i][j].resize(N); for (int k=0;k<N;k++) { p3[i][j][k].resize(p[i].size()*p[j].size()*p[k].size(),0); c3[i][j][k].resize(p[i].size()*p[j].size()*p[k].size(),0); q3[i][j][k].resize(p[i].size()*p[j].size()*p[k].size(),0); qc3[i][j][k].resize(p[i].size()*p[j].size()*p[k].size(),0); } } } } // Get sequences from MSA file and compute correlations FILE *alIn=fopen(r.getInfileAl().c_str(),"r"); FILE *weightIn=fopen(r.getWeights().c_str(),"r"); if (alIn!=NULL){ if (ThreePoints) getAlignment(alIn,weightIn,J,q,q3,qk,cons); else getAlignment(alIn,weightIn,J,q,qk,cons); } else { printf("Error reading input from file %s\n\n",r.getInfileAl().c_str()); exit(1); } fclose(alIn); if (weightIn!=NULL) fclose(weightIn); if (r.useVerbose) printf("Got N=%d, len(h[0])=%d\n",N,(int)J[0].size()); // Get default starting configuration, if nontrivial std::vector<int> lattice(N); if (r.useStart) { FILE *startIn=fopen(r.getStartInfile().c_str(),"r"); for (int i=0;i<N;i++) fscanf(startIn,"%d",&lattice[i]); } else { for (int i=0;i<N;i++) lattice[i]=(int) p[i].size(); } // Prepare to simulate srand((unsigned)time(0)); // Run MC and get correlations if (ThreePoints) getErrorGenTest(J, expJ, r.sampleB, r.b, r.runs, p, lattice, pk, p3, cons); // compute errors on P P2 and MAX else getErrorGenTest(J, expJ, r.sampleB, r.b, r.runs, p, lattice, pk, cons); // compute errors on P P2 and MAX //Compute connected correlations double Neff = 0; double NJeff = 0; // estimate the threshold for correlations to print out double meanq = 0; for (int i=0;i<lattice.size();i++) { for (int a=0;a<p[i].size();a++) { Neff++; meanq+=q[i][a]; absErr[0] += (p[i][a] - q[i][a]) * (p[i][a] - q[i][a]); for (int j=i+1;j<lattice.size();j++) { for (int b=0;b<p[j].size();b++) { NJeff++; int idx = index(i,j,lattice.size()); int sab = sindex(a,b,J[i].size(),J[j].size()); absErr[1] += (p[idx][sab] - q[idx][sab]) * (p[idx][sab] - q[idx][sab]); cc[idx][sab] = p[idx][sab] - (p[i][a] * p[j][b]); qcc[idx][sab] = q[idx][sab] - (q[i][a] * q[j][b]); if (ThreePoints) { for (int k=j+1;k<lattice.size();k++) { for (int c=0;c<p[k].size();c++) { int ijx = idx; int ikx = index(i,k,lattice.size()); int jkx = index(j,k,lattice.size()); int sac = sindex(a,c,J[i].size(),J[k].size()); int sbc = sindex(b,c,J[j].size(),J[k].size()); int sabc = sindex3(a,b,c,J[i].size(),J[j].size(),J[k].size()); c3[i][j][k][sabc] = p3[i][j][k][sabc] - (p[i][a]*p[jkx][sbc]) - (p[j][b]*p[ikx][sac]) - (p[k][c]*p[ijx][sab]) + (2*(p[i][a]*p[j][b]*p[k][c])); qc3[i][j][k][sabc] = q3[i][j][k][sabc] - (q[i][a]*q[jkx][sbc]) - (q[j][b]*q[ikx][sac]) - (q[k][c]*q[ijx][sab]) + (2*(q[i][a]*q[j][b]*q[k][c])); } } } } } } } absErr[0] = sqrt(absErr[0]/Neff); absErr[1] = sqrt(absErr[1]/NJeff); meanq=meanq/Neff; // Print out errors double maxPrecision=1/(r.sampleB); double ep1 = epsilonP(q, p, N, maxPrecision, J, gamma, alpha); double ep2 = (*epsilonP2_ptr)(q, p, N, maxPrecision, J, gamma); double em = (*getMaxError_ptr)( q, p, maxPrecision, J, gamma, alpha); printf("\nRelative errors: P %f, P2 %f MAX %f gamma %f\n",ep1,ep2,em,gamma); printf("Absolute errors: P %f, P2 %f \n\n",absErr[0],absErr[1]); //Print results for comparison FILE *mOut = fopen(r.getMOutfile().c_str(),"w"); FILE *pOut = fopen(r.getP2Outfile().c_str(),"w"); FILE *ccOut = fopen(r.getCCOutfile().c_str(),"w"); FILE *pkOut = fopen(r.getPKOutfile().c_str(),"w"); printMagnetisations(mOut, q, p); double num=0; printCorrelations(ccOut, qcc, cc,pOut, q, p); if (ThreePoints){ FILE *p3Out = fopen(r.getP3Outfile().c_str(),"w"); FILE *c3Out = fopen(r.getC3Outfile().c_str(),"w"); if (r.p3red) num=meanq*meanq*meanq; if (r.p3) num=0; print3points(c3Out, qc3, c3,p3Out, q3, p3, num); } for (int sit=0;sit<N;sit++) fprintf(pkOut,"%d %le %le\n",sit,qk[sit],pk[sit]); fflush(pkOut); }