bool TrafficManager::_SingleSim( )
{
    int  iter;
    int  total_phases;
    int  converged;
    int  max_outstanding;
    int  empty_steps;

    double cur_latency;
    double prev_latency;

    double cur_accepted;
    double prev_accepted;

    double warmup_threshold;
    double stopping_threshold;
    double acc_stopping_threshold;

    double min, avg;

    bool   clear_last;

    _time = 0;

    if ( _use_lagging ) {
        for ( int s = 0; s < _sources; ++s ) {
            for ( int c = 0; c < _classes; ++c  ) {
                _qtime[s][c]    = 0;
                _qdrained[s][c] = false;
            }
        }
    }

    if ( _voqing ) {
        for ( int s = 0; s < _sources; ++s ) {
            for ( int d = 0; d < _dests; ++d ) {
                _active_vc[s][d] = false;
            }
        }
    }

    stopping_threshold     = 0.01;
    acc_stopping_threshold = 0.01;
    warmup_threshold       = 0.05;
    iter            = 0;
    converged       = 0;
    max_outstanding = 0;
    total_phases    = 0;

    // warm-up ...
    // reset stats, all packets after warmup_time marked
    // converge
    // draing, wait until all packets finish

    _sim_state    = warming_up;
    total_phases  = 0;
    prev_latency  = 0;
    prev_accepted = 0;

    _ClearStats( );
    clear_last    = false;

    while ( ( total_phases < _max_samples ) &&
            ( ( _sim_state != running ) ||
              ( converged < 3 ) ) ) {

        if ( clear_last || ( ( _sim_state == warming_up ) && ( (total_phases & 0x1) == 0 ) ) ) {
            clear_last = false;
            _ClearStats( );
        }

        for ( iter = 0; iter < _sample_period; ++iter ) {
            _Step( );
        }

        cout << "%=================================" << endl;

        int dmin;

        cur_latency = _latency_stats[0]->Average( );
        dmin = _ComputeAccepted( &avg, &min );
        cur_accepted = avg;

        cout << "% Average latency = " << cur_latency << endl;

        if ( _reorder ) {
            cout << "% Reorder latency = " << _rob_latency->Average( ) << endl;
            cout << "% Reorder size = " << _rob_size->Average( ) << endl;
        }

        cout << "% Accepted packets = " << min << " at node " << dmin << " (avg = " << avg << ")" << endl;

        if ( MATLAB_OUTPUT ) {
            cout << "lat(" << total_phases + 1 << ") = " << cur_latency << ";" << endl;
            cout << "thru(" << total_phases + 1 << ",:) = [ ";
            for ( int d = 0; d < _dests; ++d ) {
                cout << _accepted_packets[d]->Average( ) << " ";
            }
            cout << "];" << endl;
        }

        // Fail safe
        if ( ( _sim_mode == latency ) && ( cur_latency >_latency_thres ) ) {
            cout << "Average latency is getting huge" << endl;
            converged = 0;
            _sim_state = warming_up;
            break;
        }

        cout << "% latency change    = " << fabs( ( cur_latency - prev_latency ) / cur_latency ) << endl;
        cout << "% throughput change = " << fabs( ( cur_accepted - prev_accepted ) / cur_accepted ) << endl;

        if ( _sim_state == warming_up ) {

            if ( _warmup_periods == 0 ) {
                if ( _sim_mode == latency ) {
                    if ( ( fabs( ( cur_latency - prev_latency ) / cur_latency ) < warmup_threshold ) &&
                            ( fabs( ( cur_accepted - prev_accepted ) / cur_accepted ) < warmup_threshold ) ) {
                        cout << "% Warmed up ..." << endl;
                        clear_last = true;
                        _sim_state = running;
                    }
                } else {
                    if ( fabs( ( cur_accepted - prev_accepted ) / cur_accepted ) < warmup_threshold ) {
                        cout << "% Warmed up ..." << endl;
                        clear_last = true;
                        _sim_state = running;
                    }
                }
            } else {
                if ( total_phases + 1 >= _warmup_periods ) {
                    cout << "% Warmed up ..." << endl;
                    clear_last = true;
                    _sim_state = running;
                }
            }
        } else if ( _sim_state == running ) {
            if ( _sim_mode == latency ) {
                if ( ( fabs( ( cur_latency - prev_latency ) / cur_latency ) < stopping_threshold ) &&
                        ( fabs( ( cur_accepted - prev_accepted ) / cur_accepted ) < acc_stopping_threshold ) ) {
                    ++converged;
                } else {
                    converged = 0;
                }
            } else {
                if ( fabs( ( cur_accepted - prev_accepted ) / cur_accepted ) > acc_stopping_threshold ) {
                    converged = 0;
                }
            }
        }

        prev_latency  = cur_latency;
        prev_accepted = cur_accepted;

        ++total_phases;
    }

    if ( _sim_state == running ) {
        ++converged;

        if ( _sim_mode == latency ) {
            cout << "% Draining all recorded packets ..." << endl;
            _sim_state  = draining;
            _drain_time = _time;
            empty_steps = 0;
            while ( _PacketsOutstanding( ) ) {
                _Step( );
                ++empty_steps;

                if ( empty_steps % 1000 == 0 ) {
                    _DisplayRemaining( );
                }
            }
        }
    } else {
        cout << "Too many sample periods needed to converge" << endl;
    }

    // Empty any remaining packets
    cout << "% Draining remaining packets ..." << endl;
    _empty_network = true;
    empty_steps = 0;
    while ( _total_in_flight > 0 ) {
        _Step( );
        ++empty_steps;

        if ( empty_steps % 1000 == 0 ) {
            _DisplayRemaining( );
        }
    }
    _empty_network = false;

    return( converged > 0 );
}
bool PTrafficManager::_SingleSim( )
{
  int  iter;
  int  total_phases;
  int  converged;
  int  empty_steps;
  
  double cur_latency;
  double prev_latency;

  double cur_accepted;
  double prev_accepted;

  double warmup_threshold;
  double stopping_threshold;
  double acc_stopping_threshold;

  double min, avg;

  bool   clear_last;

  for(int i = 0; i<_threads;i++){
    thread_time[i] = 0;
  }
  _time = 0;
  //remove any pending request from the previous simulations
  for (int i=0;i<_sources;i++) {
    _packets_sent[i] = 0;
    _requestsOutstanding[i] = 0;

    while (!_repliesPending[i].empty()) {
      _repliesPending[i].pop_front();
    }
  }

  //reset queuetime for all sources
  for ( int s = 0; s < _sources; ++s ) {
    for ( int c = 0; c < _classes; ++c  ) {
      _qtime[s][c]    = 0;
      _qdrained[s][c] = false;
    }
  }

  stopping_threshold     = 0.05;
  acc_stopping_threshold = 0.05;
  warmup_threshold       = 0.05;
  iter            = 0;
  converged       = 0;
  total_phases    = 0;

  // warm-up ...
  // reset stats, all packets after warmup_time marked
  // converge
  // draing, wait until all packets finish
  _sim_state    = warming_up;
  total_phases  = 0;
  prev_latency  = 0;
  prev_accepted = 0;

  _ClearStats( );
  clear_last    = false;

  if (_sim_mode == batch && timed_mode){
    assert(false);
    while(_time<_sample_period){
      _Step();
      if ( _time % 10000 == 0 ) {
	cout <<_sim_state<< "%=================================" << endl;
	int dmin;
	cur_latency = _latency_stats[0]->Average( );
	dmin = _ComputeAccepted( &avg, &min );
	cur_accepted = avg;
	
	cout << "% Average latency = " << cur_latency << endl;
	cout << "% Accepted packets = " << min << " at node " << dmin << " (avg = " << avg << ")" << endl;
	cout << "lat(" << total_phases + 1 << ") = " << cur_latency << ";" << endl;
	_latency_stats[0]->Display();
      } 
    }
    //    cout<<"Total inflight "<<_total_in_flight<<endl;
    converged = 1;

  } else if(_sim_mode == batch && !timed_mode){//batch mode   
    assert(false);
    while(_packets_sent[0] < _batch_size){
      _Step();
      if ( _time % 1000 == 0 ) {
	cout <<_sim_state<< "%=================================" << endl;
	int dmin;
	cur_latency = _latency_stats[0]->Average( );
	dmin = _ComputeAccepted( &avg, &min );
	cur_accepted = avg;
	
	cout << "% Average latency = " << cur_latency << endl;
	cout << "% Accepted packets = " << min << " at node " << dmin << " (avg = " << avg << ")" << endl;
	cout << "lat(" << total_phases + 1 << ") = " << cur_latency << ";" << endl;
	_latency_stats[0]->Display();
      }
    }
    cout << "batch size of "<<_batch_size  <<  " sent. Time used is " << _time << " cycles" <<endl;
    cout<< "Draining the Network...................\n";
    empty_steps = 0;
    while( (_drain_measured_only ? _measured_in_flight : _total_in_flight) > 0 ) { 
      _Step( ); 
      ++empty_steps;
      
      if ( empty_steps % 1000 == 0 ) {
	_DisplayRemaining( ); 
	cout << ".";
      }
    }
    cout << endl;
    cout << "batch size of "<<_batch_size  <<  " received. Time used is " << _time << " cycles" <<endl;
    converged = 1;
  } else { 
    //once warmed up, we require 3 converging runs
    //to end the simulation 
    pthread_t threads[_threads];
    
    Thread_job job[_threads];
    
    pthread_mutex_lock(&master_lock);
    for(int i = 0; i<_threads; i++){
      job[i].tid = i;
      job[i].pt = this;
      pthread_create(&threads[i], NULL,PTrafficManager::launchthread,(void *)(&job[i]));
    }
    


    while( ( total_phases < _max_samples ) && 
	   ( ( _sim_state != running ) || 
	     ( converged < 3 ) ) ) {

      if ( clear_last || (( ( _sim_state == warming_up ) && ( total_phases & 0x1 == 0 ) )) ) {
	clear_last = false;
	_ClearStats( );
      }
      cout<<"master waiting "<<endl;
      pthread_cond_wait(&master_restart, &master_lock);
      cout<<"master woken up "<<endl;

      for(int i = 0; i<_threads; i++){
	pthread_mutex_lock(&thread_restart_lock[i]);
      }

      
      void* status;
      for(int i = 0; i<_threads; i++){
	for(int j = 0; j<_classes;j++){
	  _latency_stats[j]->MergeStats(thread_latency_stats[i][j]);     
	}
	  _hop_stats->MergeStats(thread_hop_stats[i]);
      }

      if(!(( total_phases+1 < _max_samples ) && 
	   ( ( _sim_state != running ) || 
	     ( converged+1 < 3 ) ))){
	thread_stop = true;
	cout<<"oh f**k\n";
      }


      for(int i = 0; i<_threads; i++){
	pthread_cond_signal(&thread_restart[i]);
	pthread_mutex_unlock(&thread_restart_lock[i]);
      }

      cout <<_sim_state<< "%=================================" << endl;
      int dmin;
      cur_latency = _latency_stats[0]->Average( );
      dmin = _ComputeAccepted( &avg, &min );
      cur_accepted = avg;
      cout << "% Average latency = " << cur_latency << endl;
      cout << "% Accepted packets = " << min << " at node " << dmin << " (avg = " << avg << ")" << endl;
      cout << "lat(" << total_phases + 1 << ") = " << cur_latency << ";" << endl;
      _latency_stats[0]->Display();
//       cout << "thru(" << total_phases + 1 << ",:) = [ ";
//       for ( int d = 0; d < _dests; ++d ) {
// 	cout << _accepted_packets[d]->Average( ) << " ";
//       }
//       cout << "];" << endl;

      // Fail safe for latency mode, throughput will ust continue
      if ( ( _sim_mode == latency ) && ( cur_latency >_latency_thres ) ) {
	cout << "Average latency is getting huge" << endl;
	converged = 0; 
	_sim_state = warming_up;
	break;
      }

      cout << "% latency change    = " << fabs( ( cur_latency - prev_latency ) / cur_latency ) << endl;
      cout << "% throughput change = " << fabs( ( cur_accepted - prev_accepted ) / cur_accepted ) << endl;

      if ( _sim_state == warming_up ) {
	if ( _warmup_periods == 0 ) {
	  if ( _sim_mode == latency ) {
	    if ( ( fabs( ( cur_latency - prev_latency ) / cur_latency ) < warmup_threshold ) &&
		 ( fabs( ( cur_accepted - prev_accepted ) / cur_accepted ) < warmup_threshold ) ) {
	      cout << "% Warmed up ..." <<  "Time used is " << _time << " cycles" <<endl;
	      clear_last = true;
	      _sim_state = running;
	    }
	  } else {
	    if ( fabs( ( cur_accepted - prev_accepted ) / cur_accepted ) < warmup_threshold ) {
	      cout << "% Warmed up ..." << "Time used is " << _time << " cycles" << endl;
	      clear_last = true;
	      _sim_state = running;
	    }
	  }
	} else {
	  if ( total_phases + 1 >= _warmup_periods ) {
	    cout << "% Warmed up ..." <<  "Time used is " << _time << " cycles" <<endl;
	    clear_last = true;
	    _sim_state = running;
	  }
	}
      } else if ( _sim_state == running ) {
	if ( _sim_mode == latency ) {
	  if ( ( fabs( ( cur_latency - prev_latency ) / cur_latency ) < stopping_threshold ) &&
	       ( fabs( ( cur_accepted - prev_accepted ) / cur_accepted ) < acc_stopping_threshold ) ) {
	    ++converged;
	  } else {
	    converged = 0;
	  }
	} else {
	  if ( fabs( ( cur_accepted - prev_accepted ) / cur_accepted ) < acc_stopping_threshold ) {
	    ++converged;
	  } else {
	    converged = 0;
	  }
	} 
      }
      prev_latency  = cur_latency;
      prev_accepted = cur_accepted;
      ++total_phases;


    }
    pthread_mutex_unlock(&master_lock);



  
    if ( _sim_state == running ) {
      ++converged;

      if ( _sim_mode == latency ) {
	cout << "% Draining all recorded packets ..." << endl;
	_sim_state  = draining;
	_drain_time = _time;
	empty_steps = 0;
// 	while( _PacketsOutstanding( ) ) { 
// 	  _Step( ); 
// 	  ++empty_steps;
	
// 	  if ( empty_steps % 1000 == 0 ) {
// 	    _DisplayRemaining( ); 
// 	  }
// 	}
      }
    } else {
      cout << "Too many sample periods needed to converge" << endl;
    }

    // Empty any remaining packets
    cout << "% Draining remaining packets ..." << endl;
    _empty_network = true;
    empty_steps = 0;
//     while( (_drain_measured_only ? _measured_in_flight : _total_in_flight) > 0 ) { 
//       _Step( ); 
//       ++empty_steps;

//       if ( empty_steps % 1000 == 0 ) {
// 	_DisplayRemaining( ); 
//       }
//     }
    _empty_network = false;
  }

  return ( converged > 0 );
}