예제 #1
0
int main (int argn, char** argv) {
    // 1. parse cmd 
    parse_cmd_line(argn, argv);

    // 2. input DNA sequence file
    int numSeq = 0;
    SequenceSet allSeqs (0, Sequence());
    parse_seqs_file(allSeqs, numSeq, trainFname);
    vector<int> lenSeqs (numSeq, 0);
    for (int n = 0; n < numSeq; n ++) 
        lenSeqs[n] = allSeqs[n].size();

    // pre-info
    cout << "#########################################################" << endl;
    cout << "ScoreMatch: " << C_M;
    cout << ", ScoreInsertion: " << C_I;
    cout << ", ScoreDeletion: " << C_D;
    cout << ", ScoreMismatch: " << C_MM << endl;
    for (int n = 0; n < numSeq; n ++) 
        sequence_dump(allSeqs, n);

    // 3. relaxed convex program: ADMM-based algorithm
    string dir_path = string(trainFname)+".trace/";
    //system((string("rm -rf ")+dir_path).c_str());
    //system((string("mkdir ")+dir_path).c_str());
    // omp_set_num_threads(NUM_THREADS);
    int T2 = get_init_model_length (lenSeqs) + LENGTH_OFFSET; // model_seq_length
    time_t begin = time(NULL);
    vector<Tensor4D> W = CVX_ADMM_MSA (allSeqs, lenSeqs, T2, dir_path);
    time_t end = time(NULL);

    // 4. output the result
    // a. tuple view
    cout << ">>>>>>>>>>>>>>>>>>>>>>>TupleView<<<<<<<<<<<<<<<<<<<<<<<<" << endl;
    for (int n = 0; n < numSeq; n ++) {
        cout << "n = " << n << endl;
        tensor4D_dump(W[n]);
    }
    // b. sequence view
    cout << ">>>>>>>>>>>>>>>>>>>>>>>SequenceView<<<<<<<<<<<<<<<<<<<<<<<<" << endl;
    int T2m = T2;
    Tensor tensor (T2m, Matrix (NUM_DNA_TYPE, vector<double>(NUM_DNA_TYPE, 0.0)));
    Matrix mat_insertion (T2m, vector<double> (NUM_DNA_TYPE, 0.0));
    for (int n = 0; n < numSeq; n ++) {
        int T1 = W[n].size();
        for (int i = 0; i < T1; i ++) { 
            for (int j = 0; j < T2m; j ++) {
                for (int d = 0; d < NUM_DNA_TYPE; d ++) {
                    for (int m = 0; m < NUM_MOVEMENT; m ++) {
                        if (m == DELETION_A or m == MATCH_A)
                            tensor[j][d][dna2T3idx('A')] += max(0.0, W[n][i][j][d][m]);
                        else if (m == DELETION_T or m == MATCH_T)
                            tensor[j][d][dna2T3idx('T')] += max(0.0, W[n][i][j][d][m]);
                        else if (m == DELETION_C or m == MATCH_C)
                            tensor[j][d][dna2T3idx('C')] += max(0.0, W[n][i][j][d][m]);
                        else if (m == DELETION_G or m == MATCH_G)
                            tensor[j][d][dna2T3idx('G')] += max(0.0, W[n][i][j][d][m]);
                        else if (m == DELETION_START or m == MATCH_START)
                            tensor[j][d][dna2T3idx('*')] += max(0.0, W[n][i][j][d][m]);
                        else if (m == DELETION_END or m == MATCH_END)
                            tensor[j][d][dna2T3idx('#')] += max(0.0, W[n][i][j][d][m]);
                        else if (m == INSERTION) 
                            mat_insertion[j][d] += max(0.0, W[n][i][j][d][m]);
                    }
                }
            }
        }
    }
    Trace trace (0, Cell(2)); // 1d: j, 2d: ATCG
    refined_viterbi_algo (trace, tensor, mat_insertion);
    // for (int i = 0; i < trace.size(); i ++) 
    //    cout << trace[i].toString() << endl;
    for (int n = 0; n < numSeq; n ++) {
        char buffer [50];
        sprintf (buffer, "Seq%5d", n);
        cout << buffer << ": ";
        for (int j = 0; j < allSeqs[n].size(); j ++) 
            cout << allSeqs[n][j];
        cout << endl;
    }
    Sequence recSeq;
    cout << "SeqRecov: ";
    for (int i = 0; i < trace.size(); i ++) 
        if (trace[i].action != INSERTION) {
            cout << trace[i].acidB;
            recSeq.push_back(trace[i].acidB);
            if (trace[i].acidB == '#') break;
        }
    cout << endl;
    cout << ">>>>>>>>>>>>>>>>>>>>>>>MatchingView<<<<<<<<<<<<<<<<<<<<<<<<" << endl;
    // NOTE: rounding scheme
    SequenceSet allModelSeqs, allDataSeqs;
    for (int n = 0; n < numSeq; n ++) {
        Sequence model_seq = recSeq, data_seq = allSeqs[n];
       data_seq.erase(data_seq.begin());
       model_seq.erase(model_seq.begin());
        data_seq.erase(data_seq.end()-1);
        model_seq.erase(model_seq.end()-1);

        // align sequences locally
        Plane plane (data_seq.size()+1, Trace(model_seq.size()+1, Cell(2)));
        Trace trace (0, Cell(2));
        smith_waterman (model_seq, data_seq, plane, trace);

        // 4. output the result
        model_seq.clear(); data_seq.clear();
        for (int i = 0; i < trace.size(); i ++) 
            model_seq.push_back(trace[i].acidA);
        for (int i = 0; i < trace.size(); i ++) 
            data_seq.push_back(trace[i].acidB);
        allModelSeqs.push_back(model_seq);
        allDataSeqs.push_back(data_seq);
        for (int i = 0; i < model_seq.size(); i ++) cout << model_seq[i];
        cout << endl;
        for (int i = 0; i < data_seq.size(); i ++) cout << data_seq[i];
        cout << endl;
    }
    cout << ">>>>>>>>>>>>>>>>>>>>>ClustalOmegaView<<<<<<<<<<<<<<<<<<<<<<" << endl;
    writeClusterView(string(trainFname)+".co", allModelSeqs, allDataSeqs);
	
    
    cout << "#########################################################" << endl;
    cout << "Time Spent: " << end - begin << " seconds" << endl;
    return 0;
}
예제 #2
0
void main (int argc, char* argv[]) 
{
  int pos = 0;
  int length = 0;
  sequence s = sequence_create();
  srand(time(NULL));
   
  // while(length < 20000)
  // {
  //   char * c = malloc(sizeof(char));
  //   *c = getc(stdin);
  //   if(*c == '\n') continue;
  //   pos = (int) (length * (rand() / (RAND_MAX + 1.0)));
  //   // system("clear");
  //   // printf("try to insert \033[01;35m%s\033[00m at pos \033[01;35m%d\033[00m...\n",c,pos);
  //   sequence_insert(s,c,pos);
  //   length++;
  //   // sequence_dump(s);
  // }

  // while(length > 0)
  // {
  //   pos = (int) (length * (rand() / (RAND_MAX + 1.0)));
  //   // printf("try to delete at pos \033[01;35m%d\033[00m...\n",pos);
  //   sequence_delete(s,pos);
  //   length--;
  //   // sequence_dump(s);
  // }

  /******************************************/

  char * c = malloc(sizeof(char));
  *c = 'A';
  while(length < 20)
  {
    pos = (int) (length * (rand() / (RAND_MAX + 1.0)));
    printf("## ");
    // for (int i = 0; i < 20; ++i)
    // {
      printf("try to insert \033[01;35mA\033[00m at pos \033[01;35m%d\033[00m...\n",pos);
      sequence_insert(s,c,pos);
      length++;
      pos++;
      sequence_dump(s);
    // }
  }
  while(length > 0)
  {
    pos = (int) (length * (rand() / (RAND_MAX + 1.0)));
    printf("  ## ");
    for (int i = 0; i < 5; ++i)
    {
      printf("try to delete at pos \033[01;35m%d\033[00m...\n",pos);
      sequence_delete(s,pos);
      length--;
      pos--;
      sequence_dump(s);
    }
  }

  sequence_destroy(s);
}