void SoilLayer::addOneFront5Top(const double & deltaz,const int & frzing){ //deltaz is the distance between the front and top interface of a layer frozen =0; Front* fnt = new Front(); fnt->set(deltaz, frzing); fronts.push_front(fnt); };
void SoilLayer::addOneFront5Bot(const double & deltaz,const int & frzing){ //add one front at the bottom of fronts frozen =0; Front* fnt = new Front(); fnt->set(deltaz, frzing); fronts.push_back(fnt); };
bool isEnd(Map &map) const{ for(size_t i=0; i<front->length(); ++i){ if (map.mark(0, front->at(i)) && map.mark(1, front->at(i))){ return true; } } return false; }
Coord meetCell(const Map &map) const{ for(size_t i=0; i<front->length(); ++i){ const Coord &c = front->at(i); if (map.mark(0, c) && map.mark(1, c)){ return c; } } return Coord(0, 0); }
void step(Map &map){ frontNew->clear(); nStep++; while(!front->empty()){ const Coord &c = front->pop(); for(int dir = DirectionFirst+1; dir != DirectionLast; ++dir){ if (c.canStepTo(static_cast<Direction>(dir), map)){ Coord cNext = c.stepTo(static_cast<Direction>(dir)); if (!map.mark(id, cNext) && !map.isWall(cNext)){ map.mark(id, cNext) = nStep; frontNew->push(cNext); } } } } Front *f = front; front = frontNew; frontNew = f; }
void check(time_t now, Front & front) { // Procedure which stops the recording on inactivity if (this->last_record_activity_time == 0) this->last_record_activity_time = now; if ((front.get_total_received() == this->last_total_received) && (front.get_total_sent() == this->last_total_sent)) { if (!this->stop_record_inactivity && (now > this->last_record_activity_time + this->stop_record_time)) { this->stop_record_inactivity = true; front.can_be_pause_capture(); } } else { this->last_record_activity_time = now; this->last_total_received = front.get_total_received(); this->last_total_sent = front.get_total_sent(); // front.trans->reset_quantum_sent(); // Here we only reset the quantum sent // because Check() will already reset the // quantum received when checking for inactivity if (this->stop_record_inactivity) { this->stop_record_inactivity = false; if (front.can_be_resume_capture()) { if (this->ini.get<cfg::globals::bogus_refresh_rect>() && this->ini.get<cfg::globals::allow_using_multiple_monitors>() && (this->front.client_info.cs_monitor.monitorCount > 1)) { this->mm.mod->rdp_suppress_display_updates(); this->mm.mod->rdp_allow_display_updates(0, 0, this->front.client_info.width, this->front.client_info.height); } this->mm.mod->rdp_input_invalidate(Rect( 0, 0, this->front.client_info.width, this->front.client_info.height)); } } } }
void check(time_t now, Front & front) { // Procedure which stops the recording on inactivity if (this->last_record_activity_time == 0) this->last_record_activity_time = now; if ((front.get_total_received() == this->last_total_received) && (front.get_total_sent() == this->last_total_sent)) { if (!this->stop_record_inactivity && (now > this->last_record_activity_time + this->stop_record_time)) { this->stop_record_inactivity = true; front.pause_capture(); } } else { this->last_record_activity_time = now; this->last_total_received = front.get_total_received(); this->last_total_sent = front.get_total_sent(); // front.trans->reset_quantum_sent(); // Here we only reset the quantum sent // because Check() will already reset the // quantum received when checking for inactivity if (this->stop_record_inactivity) { this->stop_record_inactivity = false; front.resume_capture(); // resume capture } } }
void init(int pid, Coord c, const Map &map){ id = pid; size_t N = 4*(map.numRows()+map.numCols()); destroy(); front = new Front(N); frontNew = new Front(N); nStep = 1; front->push(c); map.mark(id, c) = 1; }
void LowerForwardSolve ( const NodeInfo& info, const Front<F>& front, MatrixNode<F>& X ) { EL_DEBUG_CSE const Int numChildren = info.children.size(); for( Int c=0; c<numChildren; ++c ) LowerForwardSolve ( *info.children[c], *front.children[c], *X.children[c] ); // Set up a workspace // TODO: Only set up a workspace if there is not a parent // (or a duplicate's parent) auto& W = X.work; const Int numRHS = X.matrix.Width(); W.Resize( front.Height(), numRHS ); auto WT = W( IR(0,info.size), ALL ); auto WB = W( IR(info.size,END), ALL ); WT = X.matrix; Zero( WB ); // Update using the children (if they exist) for( Int c=0; c<numChildren; ++c ) { auto& childW = X.children[c]->work; const Int childSize = info.children[c]->size; const Int childHeight = childW.Height(); const Int childUSize = childHeight-childSize; auto childU = childW( IR(childSize,childHeight), IR(0,numRHS) ); for( Int iChild=0; iChild<childUSize; ++iChild ) { const Int iFront = info.childRelInds[c][iChild]; for( Int j=0; j<numRHS; ++j ) W(iFront,j) += childU(iChild,j); } childW.Empty(); } // Solve against this front FrontLowerForwardSolve( front, W ); // Store this node's portion of the result X.matrix = WT; }
QREngineResultCode GPUQREngine ( size_t gpuMemorySize, // The total available GPU memory size in bytes Front *userFronts, // The list of fronts to factorize Int numFronts, // The number of fronts to factorize QREngineStats *stats // An optional parameter. If present, statistics // are collected and passed back to the caller // via this struct ) { /* Allocate workspaces */ Front *fronts = (Front*) SuiteSparse_calloc(numFronts, sizeof(Front)); if(!fronts) { return QRENGINE_OUTOFMEMORY; } size_t FSize, RSize; FSize = RSize = 0; for(int f=0; f<numFronts; f++) { /* Configure the front */ Front *userFront = &(userFronts[f]); Int m = userFront->fm; Int n = userFront->fn; Front *front = new (&fronts[f]) Front(f, EMPTY, m, n); FSize += front->getNumFrontValues(); RSize += front->getNumRValues(); } // We have to allocate page-locked CPU-GPU space to leverage asynchronous // memory transfers. This has to be done in a way that the CUDA driver is // aware of, which unfortunately means making a copy of the user input. // calloc pagelocked space on CPU, and calloc space on the GPU Workspace *wsMongoF = Workspace::allocate(FSize, // CPU and GPU sizeof(double), true, true, true, true); // calloc pagelocked space on the CPU. Nothing on the GPU Workspace *wsMongoR = Workspace::allocate(RSize, // CPU sizeof(double), true, true, false, true); /* Cleanup and return if we ran out of memory. */ if(!wsMongoF || !wsMongoR) { return GPUQREngine_Cleanup (QRENGINE_OUTOFMEMORY, userFronts, fronts, numFronts, wsMongoF, wsMongoR); } /* Prepare the fronts for GPU execution. */ size_t FOffset, ROffset; FOffset = ROffset = 0; for(int f=0; f<numFronts; f++) { // Set the front pointers; make the copy from user data into front data. Front *front = &(fronts[f]); front->F = CPU_REFERENCE(wsMongoF, double*) + FOffset; front->gpuF = GPU_REFERENCE(wsMongoF, double*) + FOffset; front->cpuR = CPU_REFERENCE(wsMongoR, double*) + ROffset; FOffset += front->getNumFrontValues(); ROffset += front->getNumRValues(); /* COPY USER DATA (user's F to our F) */ Front *userFront = &(userFronts[f]); double *userF = userFront->F; double *F = front->F; Int m = userFront->fm; Int n = userFront->fn; bool isColMajor = userFront->isColMajor; Int ldn = userFront->ldn; for(Int i=0; i<m; i++) { for(Int j=0; j<n; j++) { F[i*n+j] = (isColMajor ? userF[j*ldn+i] : userF[i*ldn+j]); } } /* Attach either the user-specified Stair, or compute it. */ front->Stair = userFront->Stair; if(!front->Stair) front->Stair = GPUQREngine_FindStaircase(front); /* Cleanup and return if we ran out of memory building the staircase */ if(!front->Stair) { return GPUQREngine_Cleanup (QRENGINE_OUTOFMEMORY, userFronts, fronts, numFronts, wsMongoF, wsMongoR); } } /* Transfer the fronts to the GPU. */ if(!wsMongoF->transfer(cudaMemcpyHostToDevice)) { return GPUQREngine_Cleanup (QRENGINE_GPUERROR, userFronts, fronts, numFronts, wsMongoF, wsMongoR); } /* Do the factorization for this set of fronts. */ QREngineResultCode result = GPUQREngine_Internal(gpuMemorySize, fronts, numFronts, NULL, NULL, NULL, stats); if(result != QRENGINE_SUCCESS) { return GPUQREngine_Cleanup (result, userFronts, fronts, numFronts, wsMongoF, wsMongoR); } /* COPY USER DATA (our R back to user's R) */ for(int f=0; f<numFronts; f++) { Front *userFront = &(userFronts[f]); double *R = (&fronts[f])->cpuR; double *userR = userFront->cpuR; Int m = userFront->fm; Int n = userFront->fn; Int rank = userFront->rank; bool isColMajor = userFront->isColMajor; Int ldn = userFront->ldn; for(Int i=0; i<rank; i++) { for(Int j=0; j<n; j++) { userR[i*ldn+j] = (isColMajor ? R[j*n+i] : R[i*n+j]); } } } /* Return that the factorization was successful. */ return GPUQREngine_Cleanup (QRENGINE_SUCCESS, userFronts, fronts, numFronts, wsMongoF, wsMongoR); }
void run_mod(mod_api & mod, Front & front, wait_obj & front_event, SocketTransport * st_mod, SocketTransport * st_front) { struct timeval time_mark = { 0, 50000 }; bool run_session = true; BackEvent_t mod_event_signal = BACK_EVENT_NONE; while (run_session) { try { unsigned max = 0; fd_set rfds; fd_set wfds; FD_ZERO(&rfds); FD_ZERO(&wfds); struct timeval timeout = time_mark; add_to_fd_set(front_event, st_front, rfds, max, timeout); add_to_fd_set(mod.get_event(), st_mod, rfds, max, timeout); if (is_set(mod.get_event(), st_mod, rfds)) { timeout.tv_sec = 0; timeout.tv_usec = 0; } int num = select(max + 1, &rfds, &wfds, 0, &timeout); if (num < 0) { if (errno == EINTR) { continue; } // Socket error break; } if (is_set(front_event, st_front, rfds)) { try { front.incoming(mod); } catch (...) { run_session = false; continue; }; } if (front.up_and_running) { if (is_set(mod.get_event(), st_mod, rfds)) { mod.get_event().reset(); mod.draw_event(time(NULL)); if (mod.get_event().signal != BACK_EVENT_NONE) { mod_event_signal = mod.get_event().signal; } if (mod_event_signal == BACK_EVENT_NEXT) { run_session = false; } } } else { } } catch (Error & e) { LOG(LOG_ERR, "Session::Session exception = %d!\n", e.id); run_session = false; }; } // while (run_session) }
bool Scheduler::postProcess ( void ) { /* Post-process all active fronts. */ for(Int p=0; p<numActiveFronts; p++) { /* Get the front from the "active fronts" permutation. */ Int f = afPerm[p]; Front *front = (&frontList[f]); SparseMeta *meta = &(front->sparseMeta); bool isDense = front->isDense(); bool isSparse = front->isSparse(); FrontState state = front->state; FrontState nextState = state; /* The post-processing we do depends on the state: */ switch(state) { /* There's nothing to do if you're waiting to be allocated. */ case ALLOCATE_WAIT: break; /* The only time we stay in ASSEMBLE_S is if we can't get to * adding the task to the work queue in a particular pass. * This happens when we have a ton of other work to do. */ case ASSEMBLE_S: break; /* If we're in CHILD_WAIT, see if all of the children are ready. */ case CHILD_WAIT: { // assert(isSparse); /* If all the children are ready then we can proceed. */ int nc = meta->nc; if(nc == 0) { initializeBucketList(f); nextState = FACTORIZE; } break; } /* If we're in the middle of a factorization: */ case FACTORIZE: // // IsRReadyEarly experimental feature : pulls R from the GPU // // R is computed but the contribution block is not. This // // method is under development and not yet available for // // production use. // if(isSparse && (&bucketLists[f])->IsRReadyEarly()) { // /* If we haven't created the event yet, create it. */ // if(eventFrontDataReady[f] == NULL) { // // Piggyback the synchronization on the next kernel // // launch. // cudaEventCreate(&eventFrontDataReady[f]); // cudaEventRecord(eventFrontDataReady[f], // kernelStreams[activeSet^1]); } // /* We must have created the event on the last kernel // launch so try to pull R off the GPU. */ else { // pullFrontData(f); } } break; // At this point, the R factor is ready to be pulled from the GPU. case FACTORIZE_COMPLETE: { /* If we haven't created the event yet, create it. */ if(eventFrontDataReady[f] == NULL) { // Piggyback the synchronization on the next kernel launch. cudaEventCreate(&eventFrontDataReady[f]); cudaEventRecord(eventFrontDataReady[f], kernelStreams[activeSet^1]); } /* We must have created the event already during factorize, so instead try to pull R off the GPU. */ else { pullFrontData(f); } /* If the front is dense or staged, then we can't assemble into the parent, so just cleanup. */ if(isDense || meta->isStaged) { nextState = CLEANUP; } /* Else we're sparse and not staged so it means we have memory to assemble into the parent. */ else { nextState = PARENT_WAIT; } break; } /* If we're waiting on the parent to be allocated: */ case PARENT_WAIT: { // assert(isSparse); /* Make sure we're trying to pull the R factor off the GPU. */ pullFrontData(f); // If we have a parent, allocate it and proceed to PUSH_ASSEMBLE Int pids = front->pids; if(pids != EMPTY) { activateFront(pids); nextState = PUSH_ASSEMBLE; } /* Else the parent is the dummy, so cleanup and move to done. */ else { nextState = CLEANUP; } break; } /* The only time we stay in PUSH_ASSEMBLE is if we can't get to * adding the task to the work queue in a particular pass. * This happens when we have a ton of other work to do. */ case PUSH_ASSEMBLE: // assert(isSparse); break; /* If we're in CLEANUP then we need to free the front. */ case CLEANUP: { /* If we were able to get the R factor and free the front. */ if(pullFrontData(f) && finishFront(f)) { /* Update the parent's child count. */ Int pid = front->pids; if(pid != EMPTY) (&frontList[pid])->sparseMeta.nc--; /* Move to DONE. */ nextState = DONE; /* Keep track of the # completed. */ numFrontsCompleted++; /* Revisit the same position again since a front was * swapped to the current location. */ p--; } break; } /* This is the done state with nothing to do. */ case DONE: break; } #if 0 if(front->printMe) { printf("[PostProcessing] %g : %d -> %d\n", (double) (front->fidg), state, nextState); // StateNames[state], StateNames[nextState]); debugDumpFront(front); } #endif /* Save the next state back to the frontDescriptor. */ front->state = nextState; } // printf("%2.2f completed.\n", 100 * (double) numCompleted / (double) // numFronts); /* Return whether all the fronts are DONE. */ return (numFronts == numFrontsCompleted); }