int 
Clusters::
reclusterNeyEssen()
{
  cerr << "Calculating MLE for prior probabilities" << endl;
  vector<double> prior(numberClasses,0.0l);
  double xxx = 1.0l/(double)numberTypes;
  for (int i = 0; i < numberTypes; i++){
    int c = classVector[i];
    prior[c] += xxx;
  }
  for (int i = 0; i < numberClasses;i++)
    cerr << i << " " << prior[i] << endl;
  if (numberStates > 0){
    cerr << "Training all the HMMs" << endl;
    for (int c = 0; c < numberClasses; c++){
      //      cerr << "Training HMM " << c << endl;
      HMM* hmmPtr = hmms[c];
      HMM* newHmmPtr = new HMM(numberStates, alphabetSize);
      for (int i = 0; i < numberTypes; i++){
	if (classVector[i] == c){
	  // then this word is in the right class
	  // so train it on word i
	  const string & word = *(corpus.wordArray[i]);
	  vector<int> v;
	  hmmPtr->convertString(word,v);
	  // FIXME 
	  double weight = 1.0l;
	  if (USE_TRUE_WEIGHT){
	    weight = corpus.countArray[i];
	  }
	  hmmPtr->emSingle(*newHmmPtr, weight, v);
	}
      }
      newHmmPtr->normalise();
      hmms[c] = newHmmPtr;
      delete hmmPtr;
    }
  }
  int something = 0;
  for (int i = 0; i < numberTypes; i++){
    //    cerr << "Word " << i;
    int w = sortedWords[i];
    //cerr << *(corpus.wordArray[w]) << endl;
    if (counts[w] > FREQ_CUTOFF){
      //cerr << "Doing " << w << endl;
      if (bestCluster(w, prior)){
	something++;
      }
    }
  }
 
  return something;
}
Clusters::
Clusters(int numberClasses_, 
	 const SimpleCorpusOne & corpus_,
	 int numberStates_,
	 int alphabetSize_,
	 bool randomised)
  :
  numberClasses(numberClasses_), 
  numberTypes(corpus_.numberTypes), 
  numberTokens(corpus_.numberTokens),
  numberStates(numberStates_),
  alphabetSize(alphabetSize_),
  data(corpus_.data),
  corpus(corpus_),
  clusterBigrams(numberClasses_,numberClasses_)
{
  classVector.resize(numberTypes);
  counts.resize(numberTypes); 
  sortedWords.resize(numberTypes);
  first.resize(numberTypes);
  clusterUnigrams.resize(numberClasses);
  next = new int[numberTokens];
  for (int i = 0; i < numberTokens; i++)
    next[i] = numberTokens;
  for (int w = 0; w < numberTypes; w++){
    counts[w]=0;
    classVector[w] = numberClasses -1;
  }
  // counts are set
  for (int i = 0; i < numberTokens; i++)
    counts[data[i]]++;
  // now find the most frequent numberClasses -1 of them.
  vector< pair<int,int> > countsTable(numberTypes);
  for (int i = 0; i < numberTypes; i++){
    countsTable[i] = pair<int,int>(counts[i],i);
    //cerr << counts[i] << " " << i << endl;
  }
  
  cerr << "Sorting words" << endl;
  sort(countsTable.begin(),countsTable.end());

  for (int i = 0; i < numberTypes; i++){
    first[i] = -1;
    sortedWords[i] = countsTable[numberTypes - 1 - i].second;
    //cerr << "sort " << i << " " << sortedWords[i] << " , n =" << countsTable[numberTypes - 1 - i].first << endl;
  }

  if (randomised)
    {
      for (int i = 0; i < numberTypes; i++){
	if (counts[i] > FREQ_CUTOFF){
	  int rc = (int) (1.0 * numberClasses *rand()/(RAND_MAX+1.0));
	  classVector[i] = rc;
	}
      }
    }
  else {
    for (int i = 0; i < numberClasses-1; i++){
      classVector[sortedWords[i]]= i;
    }
  }
  
  vector<int> last(numberTypes,0);
  cerr << "Indexing data" << endl;
  for (int i = 0; i < numberTokens-1; i++){
    int w = data[i];
    int w2 = data[i+1];
    if (w2 < 0 || w2 > numberTypes -1){
      cerr << i+1 << " " << w2 << endl;
    }
    assert(w >= 0 && w < numberTypes);
    assert(w2 >= 0 && w2 < numberTypes);
    if (first[w] == -1){
      first[w] = i;
      last[w] = i;
    }
    else
      {
	next[last[w]] = i;
	last[w] = i;
      }
    int c1 = classVector[w];
    int c2 = classVector[w2];
    assert(c1 >= 0 && c1 < numberClasses);
    assert(c2 >= 0 && c2 < numberClasses);
    clusterBigrams(c1,c2)++;
    clusterUnigrams[c1]++;
  }
  cerr << "Finished indexing " << endl;

  // be careful
  clusterUnigrams[classVector[data[numberTokens-1]]]++;
  cerr << "Numberstates " << numberStates << endl;
  if (numberStates > 0){
    cerr << "Starting to do the HMMs" << endl;
    hmms.resize(numberClasses);
    for (int i = 0; i < numberClasses; i++){
      HMM* hmmPtr = new HMM(numberStates, alphabetSize);
      hmmPtr->randomise();
      hmmPtr->normalise();
      hmms[i] = hmmPtr;
    }
  }
}