int main (int argn, char** argv) { // 1. parse cmd parse_cmd_line(argn, argv); // 2. input DNA sequence file int numSeq = 0; SequenceSet allSeqs (0, Sequence()); parse_seqs_file(allSeqs, numSeq, trainFname); vector<int> lenSeqs (numSeq, 0); for (int n = 0; n < numSeq; n ++) lenSeqs[n] = allSeqs[n].size(); // pre-info cout << "#########################################################" << endl; cout << "ScoreMatch: " << C_M; cout << ", ScoreInsertion: " << C_I; cout << ", ScoreDeletion: " << C_D; cout << ", ScoreMismatch: " << C_MM << endl; for (int n = 0; n < numSeq; n ++) sequence_dump(allSeqs, n); // 3. relaxed convex program: ADMM-based algorithm string dir_path = string(trainFname)+".trace/"; //system((string("rm -rf ")+dir_path).c_str()); //system((string("mkdir ")+dir_path).c_str()); // omp_set_num_threads(NUM_THREADS); int T2 = get_init_model_length (lenSeqs) + LENGTH_OFFSET; // model_seq_length time_t begin = time(NULL); vector<Tensor4D> W = CVX_ADMM_MSA (allSeqs, lenSeqs, T2, dir_path); time_t end = time(NULL); // 4. output the result // a. tuple view cout << ">>>>>>>>>>>>>>>>>>>>>>>TupleView<<<<<<<<<<<<<<<<<<<<<<<<" << endl; for (int n = 0; n < numSeq; n ++) { cout << "n = " << n << endl; tensor4D_dump(W[n]); } // b. sequence view cout << ">>>>>>>>>>>>>>>>>>>>>>>SequenceView<<<<<<<<<<<<<<<<<<<<<<<<" << endl; int T2m = T2; Tensor tensor (T2m, Matrix (NUM_DNA_TYPE, vector<double>(NUM_DNA_TYPE, 0.0))); Matrix mat_insertion (T2m, vector<double> (NUM_DNA_TYPE, 0.0)); for (int n = 0; n < numSeq; n ++) { int T1 = W[n].size(); for (int i = 0; i < T1; i ++) { for (int j = 0; j < T2m; j ++) { for (int d = 0; d < NUM_DNA_TYPE; d ++) { for (int m = 0; m < NUM_MOVEMENT; m ++) { if (m == DELETION_A or m == MATCH_A) tensor[j][d][dna2T3idx('A')] += max(0.0, W[n][i][j][d][m]); else if (m == DELETION_T or m == MATCH_T) tensor[j][d][dna2T3idx('T')] += max(0.0, W[n][i][j][d][m]); else if (m == DELETION_C or m == MATCH_C) tensor[j][d][dna2T3idx('C')] += max(0.0, W[n][i][j][d][m]); else if (m == DELETION_G or m == MATCH_G) tensor[j][d][dna2T3idx('G')] += max(0.0, W[n][i][j][d][m]); else if (m == DELETION_START or m == MATCH_START) tensor[j][d][dna2T3idx('*')] += max(0.0, W[n][i][j][d][m]); else if (m == DELETION_END or m == MATCH_END) tensor[j][d][dna2T3idx('#')] += max(0.0, W[n][i][j][d][m]); else if (m == INSERTION) mat_insertion[j][d] += max(0.0, W[n][i][j][d][m]); } } } } } Trace trace (0, Cell(2)); // 1d: j, 2d: ATCG refined_viterbi_algo (trace, tensor, mat_insertion); // for (int i = 0; i < trace.size(); i ++) // cout << trace[i].toString() << endl; for (int n = 0; n < numSeq; n ++) { char buffer [50]; sprintf (buffer, "Seq%5d", n); cout << buffer << ": "; for (int j = 0; j < allSeqs[n].size(); j ++) cout << allSeqs[n][j]; cout << endl; } Sequence recSeq; cout << "SeqRecov: "; for (int i = 0; i < trace.size(); i ++) if (trace[i].action != INSERTION) { cout << trace[i].acidB; recSeq.push_back(trace[i].acidB); if (trace[i].acidB == '#') break; } cout << endl; cout << ">>>>>>>>>>>>>>>>>>>>>>>MatchingView<<<<<<<<<<<<<<<<<<<<<<<<" << endl; // NOTE: rounding scheme SequenceSet allModelSeqs, allDataSeqs; for (int n = 0; n < numSeq; n ++) { Sequence model_seq = recSeq, data_seq = allSeqs[n]; data_seq.erase(data_seq.begin()); model_seq.erase(model_seq.begin()); data_seq.erase(data_seq.end()-1); model_seq.erase(model_seq.end()-1); // align sequences locally Plane plane (data_seq.size()+1, Trace(model_seq.size()+1, Cell(2))); Trace trace (0, Cell(2)); smith_waterman (model_seq, data_seq, plane, trace); // 4. output the result model_seq.clear(); data_seq.clear(); for (int i = 0; i < trace.size(); i ++) model_seq.push_back(trace[i].acidA); for (int i = 0; i < trace.size(); i ++) data_seq.push_back(trace[i].acidB); allModelSeqs.push_back(model_seq); allDataSeqs.push_back(data_seq); for (int i = 0; i < model_seq.size(); i ++) cout << model_seq[i]; cout << endl; for (int i = 0; i < data_seq.size(); i ++) cout << data_seq[i]; cout << endl; } cout << ">>>>>>>>>>>>>>>>>>>>>ClustalOmegaView<<<<<<<<<<<<<<<<<<<<<<" << endl; writeClusterView(string(trainFname)+".co", allModelSeqs, allDataSeqs); cout << "#########################################################" << endl; cout << "Time Spent: " << end - begin << " seconds" << endl; return 0; }
void main (int argc, char* argv[]) { int pos = 0; int length = 0; sequence s = sequence_create(); srand(time(NULL)); // while(length < 20000) // { // char * c = malloc(sizeof(char)); // *c = getc(stdin); // if(*c == '\n') continue; // pos = (int) (length * (rand() / (RAND_MAX + 1.0))); // // system("clear"); // // printf("try to insert \033[01;35m%s\033[00m at pos \033[01;35m%d\033[00m...\n",c,pos); // sequence_insert(s,c,pos); // length++; // // sequence_dump(s); // } // while(length > 0) // { // pos = (int) (length * (rand() / (RAND_MAX + 1.0))); // // printf("try to delete at pos \033[01;35m%d\033[00m...\n",pos); // sequence_delete(s,pos); // length--; // // sequence_dump(s); // } /******************************************/ char * c = malloc(sizeof(char)); *c = 'A'; while(length < 20) { pos = (int) (length * (rand() / (RAND_MAX + 1.0))); printf("## "); // for (int i = 0; i < 20; ++i) // { printf("try to insert \033[01;35mA\033[00m at pos \033[01;35m%d\033[00m...\n",pos); sequence_insert(s,c,pos); length++; pos++; sequence_dump(s); // } } while(length > 0) { pos = (int) (length * (rand() / (RAND_MAX + 1.0))); printf(" ## "); for (int i = 0; i < 5; ++i) { printf("try to delete at pos \033[01;35m%d\033[00m...\n",pos); sequence_delete(s,pos); length--; pos--; sequence_dump(s); } } sequence_destroy(s); }