void IQRouter::_SWAlloc() { Flit *f; Credit *c; VC *cur_vc; BufferState *dest_vc; int input; int output; int vc; int expanded_input; int expanded_output; _sw_allocator->Clear(); for (input = 0; input < _inputs; ++input) { for (int s = 0; s < _input_speedup; ++s) { expanded_input = s * _inputs + input; // Arbitrate (round-robin) between multiple // requesting VCs at the same input (handles // the case when multiple VC's are requesting // the same output port) vc = _sw_rr_offset[expanded_input]; for (int v = 0; v < _vcs; ++v) { // This continue accounts for the interleaving of // VCs when input speedup is used if ((vc % _input_speedup) != s) { vc = (vc + 1) % _vcs; continue; } cur_vc = &_vc[input][vc]; if ((cur_vc->GetState() == VC::active) && (!cur_vc->Empty())) { dest_vc = &_next_vcs[cur_vc->GetOutputPort()]; if (!dest_vc->IsFullFor(cur_vc->GetOutputVC())) { // When input_speedup > 1, the virtual channel buffers // are interleaved to create multiple input ports to // the switch. Similarily, the output ports are // interleaved based on their originating input when // output_speedup > 1. assert( expanded_input == (vc%_input_speedup)*_inputs + input ); expanded_output = (input % _output_speedup) * _outputs + cur_vc->GetOutputPort(); if ((_switch_hold_in[expanded_input] == -1) && (_switch_hold_out[expanded_output] == -1)) { // We could have requested this same input-output pair in a previous // iteration, only replace the previous request if the current // request has a higher priority (this is default behavior of the // allocators). Switch allocation priorities are strictly // determined by the packet priorities. _sw_allocator->AddRequest(expanded_input, expanded_output, vc, cur_vc->GetPriority(), cur_vc->GetPriority()); } } } vc = (vc + 1) % _vcs; } } } _sw_allocator->Allocate(); // Winning flits cross the switch _crossbar_pipe->WriteAll(0); for (int input = 0; input < _inputs; ++input) { c = 0; for (int s = 0; s < _input_speedup; ++s) { expanded_input = s * _inputs + input; if (_switch_hold_in[expanded_input] != -1) { expanded_output = _switch_hold_in[expanded_input]; vc = _switch_hold_vc[expanded_input]; cur_vc = &_vc[input][vc]; if (cur_vc->Empty()) { // Cancel held match if VC is empty expanded_output = -1; } } else { expanded_output = _sw_allocator->OutputAssigned(expanded_input); } if (expanded_output >= 0) { output = expanded_output % _outputs; if (_switch_hold_in[expanded_input] == -1) { vc = _sw_allocator->ReadRequest(expanded_input, expanded_output); cur_vc = &_vc[input][vc]; } if (_hold_switch_for_packet) { _switch_hold_in[expanded_input] = expanded_output; _switch_hold_vc[expanded_input] = vc; _switch_hold_out[expanded_output] = expanded_input; } assert( ( cur_vc->GetState( ) == VC::active ) && ( !cur_vc->Empty( ) ) && ( cur_vc->GetOutputPort( ) == ( expanded_output % _outputs ) ) ); dest_vc = &_next_vcs[cur_vc->GetOutputPort()]; assert( !dest_vc->IsFullFor( cur_vc->GetOutputVC( ) ) ); // Forward flit to crossbar and send credit back f = cur_vc->RemoveFlit(); f->hops++; if (f->watch) { cout << "Forwarding flit through crossbar at " << _fullname << ":" << endl; cout << *f; } if (!c) { c = _NewCredit(_vcs); } c->vc[c->vc_cnt] = f->vc; c->vc_cnt++; f->vc = cur_vc->GetOutputVC(); dest_vc->SendingFlit(f); _crossbar_pipe->Write(f, expanded_output); if (f->tail) { cur_vc->SetState(VC::idle); _switch_hold_in[expanded_input] = -1; _switch_hold_vc[expanded_input] = -1; _switch_hold_out[expanded_output] = -1; } _sw_rr_offset[expanded_input] = (f->vc + 1) % _vcs; } } _credit_pipe->Write(c, input); } }
void ChaosRouter::_OutputAdvance( ) { Flit *f, *f2; Credit *c; bool advanced; int mq; _crossbar_pipe->WriteAll( 0 ); for ( int i = 0; i < _inputs; ++i ) { if ( ( ( _input_output_match[i] != -1 ) || ( _input_mq_match[i] != -1 ) ) && ( !_input_frame[i].empty( ) ) ) { advanced = false; f = _input_frame[i].front( ); /*if ( ! ) { } else { cout << "Input = " << i << ", input_output_match = " << _input_output_match[i] << ", input_mq_match = " << _input_mq_match[i] << endl; Error( "Input queue empty, but matched!" ); }*/ if ( _input_output_match[i] != -1 ) { if ( f->tail ) { _output_matched[_input_output_match[i]] = false; } _crossbar_pipe->Write( f, _input_output_match[i] ); if ( f->watch ) { *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Flit traversing crossbar from input queue " << i << " at " << FullName() << endl << *f; } advanced = true; } else if ( !_MultiQueueFull( _input_mq_match[i] ) ) { mq = _input_mq_match[i]; if ( f->head ) { _rf( this, f, i, _mq_route[mq], false ); _mq_age[mq] = 0; if ( _multi_state[mq] == empty ) { _multi_state[mq] = filling; } else if ( _multi_state[mq] == leaving ) { _multi_state[mq] = shared; } else { Error( "Multi-queue received head while not empty or leaving!" ); } } if ( f->tail ) { _mq_matched[mq] = false; if ( _multi_state[mq] == filling ) { _multi_state[mq] = full; } else if ( _multi_state[mq] == cut_through ) { _multi_state[mq] = leaving; } else { Error( "Multi-queue received tail while not filling or cutting-through!" ); } } _multi_queue[mq].push( f ); if ( f->watch ) { *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Flit stored in multiqueue at " << FullName() << endl << "State = " << _multi_state[mq] << endl << *f; } advanced = true; } if ( advanced ) { _input_frame[i].pop( ); if ( f->tail ) { // last in packet, update state if ( _input_state[i] == leaving ) { _input_state[i] = empty; } else if ( _input_state[i] == shared ) { _input_state[i] = filling; f2 = _input_frame[i].front( ); // update routes _rf( this, f2, i, _input_route[i], false ); } _input_output_match[i] = -1; _input_mq_match[i] = -1; } c = _NewCredit( 1 ); c->vc[0] = 0; c->vc_cnt = 1; _credit_queue[i].push( c ); } } } for ( int m = 0; m < _multi_queue_size; ++m ) { if ( _multi_match[m] != -1 ) { if ( !_multi_queue[m].empty( ) ) { f = _multi_queue[m].front( ); _multi_queue[m].pop( ); } else { cout << "State = " << _multi_state[m] << endl; Error( "Multi queue empty, but matched!" ); } _crossbar_pipe->Write( f, _multi_match[m] ); if ( f->watch ) { *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Flit traversing crossbar from multiqueue slot " << m << " at " << FullName() << endl << *f; } if ( f->head ) { if ( _multi_state[m] == filling ) { _multi_state[m] = cut_through; } else if ( _multi_state[m] == full ) { _multi_state[m] = leaving; } else { Error( "Multi-queue sent head while not filling or full!" ); } } if ( f->tail ) { _output_matched[_multi_match[m]] = false; _multi_match[m] = -1; if ( _multi_state[m] == shared ) { _multi_state[m] = filling; } else if ( _multi_state[m] == leaving ) { _multi_state[m] = empty; } else { cout << "State = " << _multi_state[m] << endl; cout << *f; Error( "Multi-queue sent tail while not leaving or shared!" ); } } } _mq_age[m]++; } }
void IQRouterSplit::_Alloc( ) { bool watched = false; int fast_path_vcs[_inputs]; _sw_allocator->Clear( ); for(int input = 0; input < _inputs; ++input) { fast_path_vcs[input] = -1; for(int s = 0; s < _input_speedup; ++s) { int expanded_input = s*_inputs + input; // Arbitrate (round-robin) between multiple requesting VCs at the same // input (handles the case when multiple VC's are requesting the same // output port) int vc = _sw_rr_offset[expanded_input]; for(int v = 0; v < _vcs; ++v) { // This continue acounts for the interleaving of VCs when input speedup // is used. // dub: Essentially, this skips loop iterations corresponding to those // VCs not in the current speedup set. The skipped iterations will be // handled in a different iteration of the enclosing loop over 's'. // dub: Furthermore, we skip this iteration if the current VC has only a // single, newly arrived flit. if(((vc % _input_speedup) != s) || _use_fast_path[input*_vcs+vc]) { vc = (vc + 1) % _vcs; continue; } VC * cur_vc = _vc[input][vc]; VC::eVCState vc_state = cur_vc->GetState(); if(cur_vc->FrontFlit() && cur_vc->FrontFlit()->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Saw flit " << cur_vc->FrontFlit()->id << " in slow path." << endl; if(!cur_vc->Empty()) { Flit * f = cur_vc->FrontFlit(); assert(f); if(((vc_state != VC::vc_alloc) && (vc_state != VC::active)) || (cur_vc->GetStateTime() < _sw_alloc_delay)) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << vc << " at input " << input << " is not ready for slow-path allocation (flit: " << f->id << ", state: " << VC::VCSTATE[vc_state] << ", state time: " << cur_vc->GetStateTime() << ")." << endl; vc = (vc + 1) % _vcs; continue; } if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << vc << " at input " << input << " is requesting slow-path allocation (flit: " << f->id << ", state: " << VC::VCSTATE[vc_state] << ")." << endl; const OutputSet * route_set = cur_vc->GetRouteSet(); int output = _vc_rr_offset[input*_vcs+vc]; for(int output_index = 0; output_index < _outputs; ++output_index) { // in active state, we only care about our assigned output port if(vc_state == VC::active) { output = cur_vc->GetOutputPort(); } // When input_speedup > 1, the virtual channel buffers are // interleaved to create multiple input ports to the switch. // Similarily, the output ports are interleaved based on their // originating input when output_speedup > 1. assert(expanded_input == (vc%_input_speedup)*_inputs+input); int expanded_output = (input%_output_speedup)*_outputs + output; if((_switch_hold_in[expanded_input] == -1) && (_switch_hold_out[expanded_output] == -1)) { BufferState * dest_vc = _next_vcs[output]; bool do_request = false; int in_priority; // check if any suitable VCs are available and determine the // highest priority for this port int vc_cnt = route_set->NumVCs(output); assert(!((vc_state == VC::active) && (vc_cnt == 0))); for(int vc_index = 0; vc_index < vc_cnt; ++vc_index) { int vc_prio; int out_vc = route_set->GetVC(output, vc_index, &vc_prio); if((vc_state == VC::vc_alloc) && !dest_vc->IsAvailableFor(out_vc)) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << out_vc << " at output " << output << " is busy." << endl; continue; } else if((vc_state == VC::active) && (out_vc != cur_vc->GetOutputVC())) { continue; } if(dest_vc->IsFullFor(out_vc)) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << out_vc << " at output " << output << " has no buffers available." << endl; continue; } if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << out_vc << " at output " << output << " is available." << endl; if(!do_request || (vc_prio > in_priority)) { do_request = true; in_priority = vc_prio; } } if(do_request) { if(f->watch) { *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << vc << " at input " << input << " requests output " << output << " (flit: " << f->id << ", exp. input: " << expanded_input << ", exp. output: " << expanded_output << ")." << endl; watched = true; } // We could have requested this same input-output pair in a // previous iteration; only replace the previous request if the // current request has a higher priority (this is default // behavior of the allocators). Switch allocation priorities // are strictly determined by the packet priorities. _sw_allocator->AddRequest(expanded_input, expanded_output, vc, in_priority, cur_vc->GetPriority()); } } // in active state, we only care about our assigned output port if(vc_state == VC::active) { break; } output = (output + 1) % _outputs; } } vc = (vc + 1) % _vcs; } } // dub: handle fast-path flits separately so we know all switch requests // from other VCs that are on the regular path have been issued already for(int vc = 0; vc < _vcs; vc++) { if(_use_fast_path[input*_vcs+vc]) { VC * cur_vc = _vc[input][vc]; if(cur_vc->Empty()) { continue; } Flit * f = cur_vc->FrontFlit(); assert(f); if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Saw flit " << f->id << " in fast path." << endl; VC::eVCState vc_state = cur_vc->GetState(); if(((vc_state != VC::vc_alloc) && (vc_state != VC::active)) || (cur_vc->GetStateTime() < _sw_alloc_delay)) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << vc << " at input " << input << " is not ready for fast-path allocation (flit: " << f->id << ", state: " << VC::VCSTATE[vc_state] << ", state time: " << cur_vc->GetStateTime() << ")." << endl; continue; } if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << vc << " at input " << input << " is requesting fast-path allocation (flit: " << f->id << ", state: " << VC::VCSTATE[vc_state] << ")." << endl; if(fast_path_vcs[input] >= 0) cout << "XXX" << endl << FullName() << endl << "VC: " << vc << ", input: " << input << ", flit: " << f->id << ", fast VC: " << fast_path_vcs[input] << ", fast flit: " << _vc[input][fast_path_vcs[input]]->FrontFlit()->id << endl << "XXX" << endl; assert(fast_path_vcs[input] < 0); fast_path_vcs[input] = vc; const OutputSet * route_set = cur_vc->GetRouteSet(); int expanded_input = (vc%_input_speedup)*_inputs+input; for(int output = 0; output < _outputs; ++output) { // dub: if we're done with VC allocation, we already know our output if(vc_state == VC::active) { output = cur_vc->GetOutputPort(); } BufferState * dest_vc = _next_vcs[output]; int expanded_output = (input%_output_speedup)*_outputs + output; if(_sw_allocator->ReadRequest(expanded_input, expanded_output) >= 0) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Crossbar slot is already in use by slow path " << "(exp. input: " << expanded_input << ", exp. output: " << expanded_output << ")." << endl; if(vc_state == VC::active) { break; } else { continue; } } bool do_request = false; int in_priority; int vc_cnt = route_set->NumVCs(output); assert((vc_state != VC::active) || (vc_cnt > 0)); for(int vc_index = 0; vc_index < vc_cnt; ++vc_index) { int vc_prio; int out_vc = route_set->GetVC(output, vc_index, &vc_prio); if((vc_state == VC::vc_alloc) && !dest_vc->IsAvailableFor(out_vc)) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << out_vc << " at output " << output << " is busy." << endl; continue; } else if((vc_state == VC::active) && (out_vc != cur_vc->GetOutputVC())) { continue; } if(dest_vc->IsFullFor(out_vc)) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << out_vc << " at output " << output << " has no buffers available." << endl; continue; } if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << out_vc << " at output " << output << " is available." << endl; if(!do_request || (vc_prio > in_priority)) { do_request = true; in_priority = vc_prio; } } if(do_request) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << vc << " at input " << input << " requests output " << output << " (flit: " << f->id << ", exp. input: " << expanded_input << ", exp. output: " << expanded_output << ")." << endl; // We could have requested this same input-output pair in a // previous iteration; only replace the previous request if the // current request has a higher priority (this is default // behavior of the allocators). Switch allocation priorities // are strictly determined by the packet priorities. _sw_allocator->AddRequest(expanded_input, expanded_output, vc, in_priority, cur_vc->GetPriority()); } if(vc_state == VC::active) { break; } } } } } if(watched) { *gWatchOut << GetSimTime() << " | " << _sw_allocator->FullName() << " | "; _sw_allocator->PrintRequests(gWatchOut); } _sw_allocator->Allocate(); if(watched) { *gWatchOut << GetSimTime() << " | " << _sw_allocator->FullName() << " | " << "Grants = [ "; for(int input = 0; input < _inputs; ++input) for(int s = 0; s < _input_speedup; ++s) { int expanded_input = s * _inputs + input; int expanded_output = _sw_allocator->OutputAssigned(expanded_input); if(expanded_output > -1) { int output = expanded_output % _outputs; int vc = _sw_allocator->ReadRequest(expanded_input, expanded_output); *gWatchOut << input << " -> " << output << " (vc:" << vc << ") "; } } *gWatchOut << "]." << endl; } // Winning flits cross the switch _crossbar_pipe->WriteAll(NULL); ////////////////////////////// // Switch Power Modelling // - Record Total Cycles // switchMonitor.cycle() ; for(int input = 0; input < _inputs; ++input) { Credit * c = NULL; for(int s = 0; s < _input_speedup; ++s) { int expanded_input = s*_inputs + input; int expanded_output; VC * cur_vc; int vc; int fvc = fast_path_vcs[input]; if(_switch_hold_in[expanded_input] != -1) { assert(_switch_hold_in[expanded_input] >= 0); expanded_output = _switch_hold_in[expanded_input]; vc = _switch_hold_vc[expanded_input]; cur_vc = _vc[input][vc]; if(cur_vc->Empty()) { // Cancel held match if VC is empty expanded_output = -1; } } else { expanded_output = _sw_allocator->OutputAssigned(expanded_input); if(expanded_output >= 0) { vc = _sw_allocator->ReadRequest(expanded_input, expanded_output); cur_vc = _vc[input][vc]; } else { vc = -1; cur_vc = NULL; } } if(expanded_output >= 0) { int output = expanded_output % _outputs; Flit * f = cur_vc->FrontFlit(); assert(f); if(vc == fvc) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Fast-path allocation successful for VC " << vc << " at input " << input << " (flit: " << f->id << ")." << endl; } else { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Slow-path allocation successful for VC " << vc << " at input " << input << " (flit: " << f->id << ")." << endl; if(fvc >= 0) { assert(_use_fast_path[input*_vcs+fvc]); VC * fast_vc = _vc[input][fvc]; assert(fast_vc->FrontFlit()); if(fast_vc->FrontFlit()->watch) cout << GetSimTime() << " | " << FullName() << " | " << "Disabling fast-path allocation for VC " << fvc << " at input " << input << "." << endl; _use_fast_path[input*_vcs+fvc] = false; } } BufferState * dest_vc = _next_vcs[output]; switch(cur_vc->GetState()) { case VC::vc_alloc: { const OutputSet * route_set = cur_vc->GetRouteSet(); int sel_prio = -1; int sel_vc = -1; int vc_cnt = route_set->NumVCs(output); for(int vc_index = 0; vc_index < vc_cnt; ++vc_index) { int out_prio; int out_vc = route_set->GetVC(output, vc_index, &out_prio); if(!dest_vc->IsAvailableFor(out_vc)) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << out_vc << " at output " << output << " is busy." << endl; continue; } if(dest_vc->IsFullFor(out_vc)) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << out_vc << " at output " << output << " has no buffers available." << endl; continue; } if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << out_vc << " at output " << output << " is available." << endl; if(out_prio > sel_prio) { sel_vc = out_vc; sel_prio = out_prio; } } if(sel_vc < 0) { cout << "XXX" << endl << "Flit " << f->id << ", VC " << vc << ", input " << input << ":" << endl << "None of " << vc_cnt << " VCs at output " << output << " were suitable and available." << endl << "XXX" << endl; } // we should only get to this point if some VC requested // allocation assert(sel_vc > -1); // dub: this is taken care of later on //cur_vc->SetState(VC::active); cur_vc->SetOutput(output, sel_vc); dest_vc->TakeBuffer(sel_vc); _vc_rr_offset[input*_vcs+vc] = (output + 1) % _outputs; if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << sel_vc << " at output " << output << " granted to VC " << vc << " at input " << input << " (flit: " << f->id << ")." << endl; } // NOTE: from here, we just fall through to the code for VC::active! case VC::active: if(_hold_switch_for_packet) { _switch_hold_in[expanded_input] = expanded_output; _switch_hold_vc[expanded_input] = vc; _switch_hold_out[expanded_output] = expanded_input; } //assert(cur_vc->GetState() == VC::active); assert(!cur_vc->Empty()); assert(cur_vc->GetOutputPort() == output); dest_vc = _next_vcs[output]; assert(!dest_vc->IsFullFor(cur_vc->GetOutputVC())); // Forward flit to crossbar and send credit back f = cur_vc->RemoveFlit(); if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Output " << output << " granted to VC " << vc << " at input " << input << " (flit: " << f->id << ", exp. input: " << expanded_input << ", exp. output: " << expanded_output << ")." << endl; f->hops++; // // Switch Power Modelling // switchMonitor.traversal(input, output, f); bufferMonitor.read(input, f); if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Forwarding flit " << f->id << " through crossbar " << "(exp. input: " << expanded_input << ", exp. output: " << expanded_output << ")." << endl; if (c == NULL) { c = _NewCredit(_vcs); } assert(vc == f->vc); c->vc[c->vc_cnt] = vc; c->vc_cnt++; c->dest_router = f->from_router; f->vc = cur_vc->GetOutputVC(); dest_vc->SendingFlit(f); _crossbar_pipe->Write(f, expanded_output); if(f->tail) { if(cur_vc->Empty()) { cur_vc->SetState(VC::idle); } else { cur_vc->Route(_rf, this, cur_vc->FrontFlit(), input); cur_vc->SetState(VC::vc_alloc); } _switch_hold_in[expanded_input] = -1; _switch_hold_vc[expanded_input] = -1; _switch_hold_out[expanded_output] = -1; } else { // reset state timer for next flit cur_vc->SetState(VC::active); } if(!_use_fast_path[input*_vcs+vc]) { _sw_rr_offset[expanded_input] = (vc + 1) % _vcs; } if(cur_vc->Empty() && !_use_fast_path[input*_vcs+vc]) { if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Enabling fast-path allocation for VC " << vc << " at input " << input << "." << endl; _use_fast_path[input*_vcs+vc] = true; } } } else if((fvc >= 0) && ((fvc % _input_speedup) == s)) { assert(_use_fast_path[input*_vcs+fvc]); VC * fast_vc = _vc[input][fvc]; Flit * f = fast_vc->FrontFlit(); assert(f); if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Disabling fast-path allocation for VC " << fvc << " at input " << input << "." << endl; _use_fast_path[input*_vcs+fvc] = false; } } _credit_pipe->Write(c, input); } }
void IQRouterBaseline::_SWAlloc( ) { Flit *f; Credit *c; VC *cur_vc; BufferState *dest_vc; int input; int output; int vc; int expanded_input; int expanded_output; bool watched = false; bool any_nonspec_reqs = false; bool any_nonspec_output_reqs[_outputs*_output_speedup]; memset(any_nonspec_output_reqs, 0, _outputs*_output_speedup*sizeof(bool)); _sw_allocator->Clear( ); if ( _speculative >= 2 ) _spec_sw_allocator->Clear( ); for ( input = 0; input < _inputs; ++input ) { int vc_ready_nonspec = 0; int vc_ready_spec = 0; for ( int s = 0; s < _input_speedup; ++s ) { expanded_input = s*_inputs + input; // Arbitrate (round-robin) between multiple // requesting VCs at the same input (handles // the case when multiple VC's are requesting // the same output port) vc = _sw_rr_offset[ expanded_input ]; for ( int v = 0; v < _vcs; ++v ) { // This continue acounts for the interleaving of // VCs when input speedup is used // dub: Essentially, this skips loop iterations corresponding to those // VCs not in the current speedup set. The skipped iterations will be // handled in a different iteration of the enclosing loop over 's'. if ( ( vc % _input_speedup ) != s ) { vc = ( vc + 1 ) % _vcs; continue; } cur_vc = _vc[input][vc]; if(!cur_vc->Empty() && (cur_vc->GetStateTime() >= _sw_alloc_delay)) { switch(cur_vc->GetState()) { case VC::active: { output = cur_vc->GetOutputPort( ); dest_vc = _next_vcs[output]; if ( !dest_vc->IsFullFor( cur_vc->GetOutputVC( ) ) ) { // When input_speedup > 1, the virtual channel buffers are // interleaved to create multiple input ports to the switch. // Similarily, the output ports are interleaved based on their // originating input when output_speedup > 1. assert( expanded_input == (vc%_input_speedup)*_inputs + input ); expanded_output = (input%_output_speedup)*_outputs + output; if ( ( _switch_hold_in[expanded_input] == -1 ) && ( _switch_hold_out[expanded_output] == -1 ) ) { // We could have requested this same input-output pair in a // previous iteration; only replace the previous request if // the current request has a higher priority (this is default // behavior of the allocators). Switch allocation priorities // are strictly determined by the packet priorities. Flit * f = cur_vc->FrontFlit(); assert(f); if(f->watch) { *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << vc << " at input " << input << " requested output " << output << " (non-spec., exp. input: " << expanded_input << ", exp. output: " << expanded_output << ", flit: " << f->id << ", prio: " << cur_vc->GetPriority() << ")." << endl; watched = true; } // dub: for the old-style speculation implementation, we // overload the packet priorities to prioritize // non-speculative requests over speculative ones if( _speculative == 1 ) _sw_allocator->AddRequest(expanded_input, expanded_output, vc, 1, 1); else _sw_allocator->AddRequest(expanded_input, expanded_output, vc, cur_vc->GetPriority( ), cur_vc->GetPriority( )); any_nonspec_reqs = true; any_nonspec_output_reqs[expanded_output] = true; vc_ready_nonspec++; } } } break; // // The following models the speculative VC allocation aspects // of the pipeline. An input VC with a request in for an egress // virtual channel will also speculatively bid for the switch // regardless of whether the VC allocation succeeds. These // speculative requests are handled in a separate allocator so // as to prevent them from interfering with non-speculative bids // case VC::vc_spec: case VC::vc_spec_grant: { assert( _speculative > 0 ); assert( expanded_input == (vc%_input_speedup)*_inputs + input ); const OutputSet * route_set = cur_vc->GetRouteSet( ); const list<OutputSet::sSetElement>* setlist = route_set ->GetSetList(); list<OutputSet::sSetElement>::const_iterator iset = setlist->begin( ); while(iset!=setlist->end( )) { BufferState * dest_vc = _next_vcs[iset->output_port]; bool do_request = false; // check if any suitable VCs are available for ( int out_vc = iset->vc_start; out_vc <= iset->vc_end; ++out_vc ) { int vc_prio = iset->pri; if(!do_request && ((_speculative < 3) || dest_vc->IsAvailableFor(out_vc))) { do_request = true; break; } } if(do_request) { expanded_output = (input%_output_speedup)*_outputs + iset->output_port; if ( ( _switch_hold_in[expanded_input] == -1 ) && ( _switch_hold_out[expanded_output] == -1 ) ) { Flit * f = cur_vc->FrontFlit(); assert(f); if(f->watch) { *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "VC " << vc << " at input " << input << " requested output " << iset->output_port << " (spec., exp. input: " << expanded_input << ", exp. output: " << expanded_output << ", flit: " << f->id << ", prio: " << cur_vc->GetPriority() << ")." << endl; watched = true; } // dub: for the old-style speculation implementation, we // overload the packet priorities to prioritize non- // speculative requests over speculative ones if( _speculative == 1 ) _sw_allocator->AddRequest(expanded_input, expanded_output, vc, 0, 0); else _spec_sw_allocator->AddRequest(expanded_input, expanded_output, vc, cur_vc->GetPriority( ), cur_vc->GetPriority( )); vc_ready_spec++; } } iset++; } } break; } } vc = ( vc + 1 ) % _vcs; } } } if(watched) { *gWatchOut << GetSimTime() << " | " << _sw_allocator->FullName() << " | "; _sw_allocator->PrintRequests( gWatchOut ); if(_speculative >= 2) { *gWatchOut << GetSimTime() << " | " << _spec_sw_allocator->FullName() << " | "; _spec_sw_allocator->PrintRequests( gWatchOut ); } } _sw_allocator->Allocate(); if(_speculative >= 2) _spec_sw_allocator->Allocate(); // Winning flits cross the switch _crossbar_pipe->WriteAll( 0 ); ////////////////////////////// // Switch Power Modelling // - Record Total Cycles // switchMonitor.cycle() ; for ( int input = 0; input < _inputs; ++input ) { c = 0; int vc_grant_nonspec = 0; int vc_grant_spec = 0; for ( int s = 0; s < _input_speedup; ++s ) { bool use_spec_grant = false; expanded_input = s*_inputs + input; if ( _switch_hold_in[expanded_input] != -1 ) { assert(_switch_hold_in[expanded_input] >= 0); expanded_output = _switch_hold_in[expanded_input]; vc = _switch_hold_vc[expanded_input]; assert(vc >= 0); cur_vc = _vc[input][vc]; if ( cur_vc->Empty( ) ) { // Cancel held match if VC is empty expanded_output = -1; } } else { expanded_output = _sw_allocator->OutputAssigned( expanded_input ); if ( ( _speculative >= 2 ) && ( expanded_output < 0 ) ) { expanded_output = _spec_sw_allocator->OutputAssigned(expanded_input); if ( expanded_output >= 0 ) { assert(_spec_sw_allocator->InputAssigned(expanded_output) >= 0); assert(_spec_sw_allocator->ReadRequest(expanded_input, expanded_output) >= 0); switch ( _filter_spec_grants ) { case 0: if ( any_nonspec_reqs ) expanded_output = -1; break; case 1: if ( any_nonspec_output_reqs[expanded_output] ) expanded_output = -1; break; case 2: if ( _sw_allocator->InputAssigned(expanded_output) >= 0 ) expanded_output = -1; break; default: assert(false); } } use_spec_grant = (expanded_output >= 0); } } if ( expanded_output >= 0 ) { output = expanded_output % _outputs; if ( _switch_hold_in[expanded_input] == -1 ) { if(use_spec_grant) { assert(_spec_sw_allocator->OutputAssigned(expanded_input) >= 0); assert(_spec_sw_allocator->InputAssigned(expanded_output) >= 0); vc = _spec_sw_allocator->ReadRequest(expanded_input, expanded_output); } else { assert(_sw_allocator->OutputAssigned(expanded_input) >= 0); assert(_sw_allocator->InputAssigned(expanded_output) >= 0); vc = _sw_allocator->ReadRequest(expanded_input, expanded_output); } assert(vc >= 0); cur_vc = _vc[input][vc]; } // Detect speculative switch requests which succeeded when VC // allocation failed and prevenet the switch from forwarding; // also, in case the routing function can return multiple outputs, // check to make sure VC allocation and speculative switch allocation // pick the same output port. if ( ( ( cur_vc->GetState() == VC::vc_spec_grant ) || ( cur_vc->GetState() == VC::active ) ) && ( cur_vc->GetOutputPort() == output ) ) { if(use_spec_grant) { vc_grant_spec++; } else { vc_grant_nonspec++; } if ( _hold_switch_for_packet ) { _switch_hold_in[expanded_input] = expanded_output; _switch_hold_vc[expanded_input] = vc; _switch_hold_out[expanded_output] = expanded_input; } assert((cur_vc->GetState() == VC::vc_spec_grant) || (cur_vc->GetState() == VC::active)); assert(!cur_vc->Empty()); assert(cur_vc->GetOutputPort() == output); dest_vc = _next_vcs[output]; if ( dest_vc->IsFullFor( cur_vc->GetOutputVC( ) ) ) continue ; // Forward flit to crossbar and send credit back f = cur_vc->RemoveFlit( ); assert(f); if(f->watch) { *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Output " << output << " granted to VC " << vc << " at input " << input; if(cur_vc->GetState() == VC::vc_spec_grant) *gWatchOut << " (spec"; else *gWatchOut << " (non-spec"; *gWatchOut << ", exp. input: " << expanded_input << ", exp. output: " << expanded_output << ", flit: " << f->id << ")." << endl; } f->hops++; // // Switch Power Modelling // switchMonitor.traversal( input, output, f) ; bufferMonitor.read(input, f) ; if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Forwarding flit " << f->id << " through crossbar " << "(exp. input: " << expanded_input << ", exp. output: " << expanded_output << ")." << endl; if ( !c ) { c = _NewCredit( _vcs ); } assert(vc == f->vc); c->vc[c->vc_cnt] = f->vc; c->vc_cnt++; c->dest_router = f->from_router; f->vc = cur_vc->GetOutputVC( ); dest_vc->SendingFlit( f ); _crossbar_pipe->Write( f, expanded_output ); if(f->tail) { if(cur_vc->Empty()) { cur_vc->SetState(VC::idle); } else if(_routing_delay > 0) { cur_vc->SetState(VC::routing); _routing_vcs.push(input*_vcs+vc); } else { cur_vc->Route(_rf, this, cur_vc->FrontFlit(), input); cur_vc->SetState(VC::vc_alloc); _vcalloc_vcs.insert(input*_vcs+vc); } _switch_hold_in[expanded_input] = -1; _switch_hold_vc[expanded_input] = -1; _switch_hold_out[expanded_output] = -1; } else { // reset state timer for next flit cur_vc->SetState(VC::active); } _sw_rr_offset[expanded_input] = ( vc + 1 ) % _vcs; } else { assert(cur_vc->GetState() == VC::vc_spec); Flit * f = cur_vc->FrontFlit(); assert(f); if(f->watch) *gWatchOut << GetSimTime() << " | " << FullName() << " | " << "Speculation failed at output " << output << "(exp. input: " << expanded_input << ", exp. output: " << expanded_output << ", flit: " << f->id << ")." << endl; } } } // Promote all other virtual channel grants marked as speculative to active. for ( int vc = 0 ; vc < _vcs ; vc++ ) { cur_vc = _vc[input][vc] ; if ( cur_vc->GetState() == VC::vc_spec_grant ) { cur_vc->SetState( VC::active ) ; } } _credit_pipe->Write( c, input ); } }