// entry main function
int main (int argc, char ** argv) {
    // exception control: illustrate the usage if get input of wrong format
    if (argc < 5) {
        cerr << "Usage: cvx_clustering [dataFile] [fw_max_iter] [max_iter] [lambda] " << endl;
        cerr << "Note: dataFile must be scaled to [0,1] in advance." << endl;
        exit(-1);
    }

    // parse arguments
    char * dataFile = argv[1];
    int fw_max_iter = atoi(argv[2]);
    int max_iter = atoi(argv[3]);
    double lambda_base = atof(argv[4]);
    char * dmatFile = argv[5];

    // vector<Instance*> data;
    // readFixDim (dataFile, data, FIX_DIM);

    // read in data
    int FIX_DIM;
    Parser parser;
    vector<Instance*>* pdata;
    vector<Instance*> data;
    pdata = parser.parseSVM(dataFile, FIX_DIM);
    data = *pdata;
    // vector<Instance*> data;
    // readFixDim (dataFile, data, FIX_DIM);

    // explore the data
    int dimensions = -1;
    int N = data.size(); // data size
    for (int i = 0; i < N; i++) {
        vector< pair<int,double> > * f = &(data[i]->fea);
        int last_index = f->size()-1;
        if (f->at(last_index).first > dimensions) {
            dimensions = f->at(last_index).first;
        }
    }
    assert (dimensions == FIX_DIM);

    int D = dimensions;
    cerr << "D = " << D << endl; // # features
    cerr << "N = " << N << endl; // # instances
    cerr << "lambda = " << lambda_base << endl;
    cerr << "r = " << r << endl;
    int seed = time(NULL);
    srand (seed);
    cerr << "seed = " << seed << endl;

    //create lambda with noise
    double* lambda = new double[N];
    for(int i=0; i<N; i++) {
        lambda[i] = lambda_base + noise();
    }

    // pre-compute distance matrix
    dist_func df = L2norm;
    double ** dist_mat = mat_init (N, N);
    //  double ** dist_mat = mat_read (dmatFile, N, N);
    mat_zeros (dist_mat, N, N);
    compute_dist_mat (data, dist_mat, N, D, df, true);
    ofstream dist_mat_out ("dist_mat");
    dist_mat_out << mat_toString(dist_mat, N, N);
    dist_mat_out.close();

    // Run sparse convex clustering
    double ** W = mat_init (N, N);
    mat_zeros (W, N, N);
    cvx_clustering (dist_mat, fw_max_iter, max_iter, D, N, lambda, W);
    ofstream W_OUT("w_out");
    W_OUT<< mat_toString(W, N, N);
    W_OUT.close();

    // Output cluster
    output_objective(clustering_objective (dist_mat, W, N));

    /* Output cluster centroids */
    output_model (W, N);

    /* Output assignment */
    output_assignment (W, data, N);

    /* reallocation */
    mat_free (dist_mat, N, N);
    mat_free (W, N, N);
}
////////////////////////////////////////////////////////////
// learn_errors
//
// Correct reads using a much stricter filter in order
// to count the nt->nt errors and learn the errors
// probabilities
////////////////////////////////////////////////////////////
//static void learn_errors(string fqf, bithash * trusted, vector<streampos> & starts, vector<unsigned long long> & counts, double (&ntnt_prob)[4][4], double prior_prob[4]) {
static void learn_errors(string fqf, bithash * trusted, vector<streampos> & starts, vector<unsigned long long> & counts, double ntnt_prob[Read::max_qual][4][4], double prior_prob[4]) {
  unsigned int ntnt_counts[Read::max_qual][4][4] = {0};
  unsigned int samples = 0;

  unsigned int chunk = 0;
#pragma omp parallel //shared(trusted)
  {    
    unsigned int tchunk;
    string header,ntseq,strqual,corseq;
    int trim_length;
    char* nti;
    Read *r;    
    ifstream reads_in(fqf.c_str());
    
    while(chunk < threads*chunks_per_thread) {
#pragma omp critical
      tchunk = chunk++;     
      
      reads_in.seekg(starts[tchunk]);
      
      unsigned long long tcount = 0;
      while(getline(reads_in, header)) {
	//cout << header << endl;
	
	// get sequence
	getline(reads_in, ntseq);
	//cout << ntseq << endl;
	
	// convert ntseq to iseq
	vector<unsigned int> iseq;
	for(int i = 0; i < ntseq.size(); i++) {
	  nti = strchr(nts, ntseq[i]);
	  iseq.push_back(nti - nts);
	}
		
	// get quality values
	getline(reads_in,strqual);
	//cout << strqual << endl;
	getline(reads_in,strqual);
	//cout << strqual << endl;

	vector<int> untrusted;

	if(iseq.size() < trim_t)
	  trim_length = 0;
	else {
	  for(int i = 0; i < iseq.size()-k+1; i++) {
	    if(!trusted->check(&iseq[i])) {
	      untrusted.push_back(i);
	    }
	  }
	  
	  trim_length = quick_trim(strqual, untrusted);
	}

	// fix error reads
	if(untrusted.size() > 0) {
	  // correct
	  r = new Read(header, &iseq[0], strqual, untrusted, trim_length);
	  corseq = r->correct(trusted, ntnt_prob, prior_prob, true);
	    
	  // if trimmed to long enough
	  if(corseq.size() >= trim_t) {
	    if(r->trusted_read != 0) { // else no guarantee there was a correction
	      for(int c = 0; c < r->trusted_read->corrections.size(); c++) {
		correction cor = r->trusted_read->corrections[c];
		if(iseq[cor.index] < 4) {
		  // P(obs=o|actual=a,a!=o) for Bayes
		  ntnt_counts[strqual[cor.index]-Read::quality_scale][cor.to][iseq[cor.index]]++;
		  
		  // P(actual=a|obs=o,a!=o)
		  //ntnt_counts[iseq[cor.index]][cor.to]++;
		  samples++;
		}
	      }
	    }
	  }
	  delete r;
	}
	
	if(++tcount == counts[tchunk] || samples > 200000)
	  break;
      }
    }
    reads_in.close();
  }

  regress_probs(ntnt_prob, ntnt_counts);

  output_model(ntnt_prob, ntnt_counts, fqf);
}