/** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */ static bool shouldMerge(NGHolder &ha, const ue2::unordered_map<NFAVertex, u32> &a_state_ids, NGHolder &hb, const ue2::unordered_map<NFAVertex, u32> &b_state_ids, size_t cpl, const ReportManager *rm, const CompileContext &cc) { size_t combinedStateCount = countStates(ha, a_state_ids) + countStates(hb, b_state_ids) - cpl; if (combinedStateCount > FAST_STATE_LIMIT) { // More complex implementability check. NGHolder h_temp; cloneHolder(h_temp, ha); assert(h_temp.kind == hb.kind); mergeNfaComponent(h_temp, hb, cpl); reduceImplementableGraph(h_temp, SOM_NONE, rm, cc); u32 numStates = isImplementableNFA(h_temp, rm, cc); DEBUG_PRINTF("isImplementableNFA returned %u states\n", numStates); if (!numStates) { DEBUG_PRINTF("not implementable\n"); return false; } else if (numStates > FAST_STATE_LIMIT) { DEBUG_PRINTF("too many states to merge\n"); return false; } } return true; }
inline double operator()( double & _ssh, const polymorphicSite & site, const bool & haveOutgroup = false, const unsigned & outgroup = 0 ) const /*! \param _ssh a value of ssh to increment \param site an object representing the value type of PolyTable::const_site_iterator \param haveOutgroup true of one of the elements of \a site is an outgroup state, false otherwise \param outgroup the index of the outgroup sequence in \a site */ { stateCounter c = countStates()(site.second.begin(), site.second.end(), haveOutgroup,outgroup); unsigned nsam = site.second.length() - unsigned( haveOutgroup==true ? 1 : 0 ) - c.n; double hom=0.; if (c.gap==0 && nsam > 1) { hom += (c.a > 0) ? double(c.a) * double(c.a-1) : 0.; hom += (c.g > 0) ? double(c.g) * double(c.g-1) : 0.; hom += (c.c > 0) ? double(c.c) * double(c.c-1) : 0.; hom += (c.t > 0) ? double(c.t) * double(c.t-1) : 0.; hom += (c.zero > 0) ? double(c.zero) * double(c.zero-1) : 0.; hom += (c.one > 0) ? double(c.one) * double(c.one-1) : 0.; } return _ssh += (hom > 0.) ? 1.- (hom/( double(nsam)*double(nsam-1) )) : 0.; }
void Turnmark::update() { atTurnmark = false; int analog = analogRead(pin); InputState state = analogToState(analog); if (state == INPUT_NEUTRAL && lastState[0] != INPUT_NEUTRAL) { int highCount = countStates(INPUT_HIGH); int lowCount = countStates(INPUT_LOW); if (highCount > 0 && lowCount > 0) { lastCarriageType = G_CARRIAGE; onTurnmark(); } else if (highCount > 1) { lastCarriageType = K_CARRIAGE; onTurnmark(); } else if (lowCount > 1) { lastCarriageType = L_CARRIAGE; onTurnmark(); } } pushState(state); }
int main() { #if VISUAL plotGrid* pg = new plotGrid; #endif // number of reps int numBlocks = 128; // length of grid int Nx = 8; int N = Nx * Nx; int N2 = 0.5 * N; int N4 = 0.5 * N2; int N_ALL = N * numBlocks; dim3 threadGrid(Nx, Nx); curandState *devRands; CUDA_CALL(cudaMalloc((void **)&devRands, N_ALL * sizeof(curandState))); srand (time(NULL)); initRands(threadGrid, numBlocks, devRands, rand()); float* d_wg; CUDA_CALL(cudaMalloc((void**)&d_wg, sizeof(float) * (N_ALL) )); int* d_states; CUDA_CALL(cudaMalloc((void**)&d_states, sizeof(int) * N_ALL)); int* d_states2; CUDA_CALL(cudaMalloc((void**)&d_states2, sizeof(int) * N_ALL)); float* d_up; CUDA_CALL(cudaMalloc((void**)&d_up, sizeof(float) * (N + 1) )); float* h_up = new float [N+1]; float* d_down; CUDA_CALL(cudaMalloc((void**)&d_down, sizeof(float) * (N + 1) )); float* h_down = new float [N+1]; int* d_upcount; CUDA_CALL(cudaMalloc((void**)&d_upcount, sizeof(int) * (N + 1) )); int* h_upcount = new int [N+1]; int* d_downcount; CUDA_CALL(cudaMalloc((void**)&d_downcount, sizeof(int) * (N + 1) )); int* h_downcount = new int [N+1]; int* d_blockTotals; CUDA_CALL(cudaMalloc((void**)&d_blockTotals, sizeof(int) * numBlocks)); float* h_wg = new float [N_ALL]; int* h_states = new int[N_ALL]; int* h_blockTotals = new int[numBlocks]; int* h_blockTimes = new int[numBlocks]; int wgCount = 1; const unsigned int shape[] = {N+1,2}; float* results = new float[(N+1)*2]; for (int i=0;i<(N+1)*2;i++) results[i]=0.0f; for (int G=0;G<wgCount;G++) { float wg = 0.25;//5 + 0.2 * float(G); for (int i=0;i<N_ALL;i++) { h_wg[i]=wg; float unum =rand()/double(RAND_MAX); // cout<<unum<<endl; // if (unum<0.2345) // h_wg[i]+=0.833; } CUDA_CALL(cudaMemcpy(d_wg, h_wg, (N_ALL) * sizeof(float), cudaMemcpyHostToDevice)); for (int b=0;b<numBlocks;b++) h_blockTimes[b] = -1; int maxTime = 100000; int checkTime = 100; float sw = 1.0f; char fileName[30]; sprintf(fileName, "potential%d-%d.npy", int(10*wg),int(100.0*sw)); // cout<<fileName<<endl; CUDA_CALL(cudaMemset (d_states, 0, sizeof(int) * (N_ALL))); CUDA_CALL(cudaMemset (d_blockTotals, 0, sizeof(int) * (numBlocks))); CUDA_CALL(cudaMemset (d_up, 0, sizeof(float) * (N + 1))); CUDA_CALL(cudaMemset (d_down, 0, sizeof(float) * (N + 1))); CUDA_CALL(cudaMemset (d_upcount, 0, sizeof(int) * (N + 1))); CUDA_CALL(cudaMemset (d_downcount, 0, sizeof(int) * (N + 1))); for (int t=0;t<maxTime;t++) { advanceTimestep(threadGrid, numBlocks, devRands, d_wg, d_states, Nx, sw, t); recordData(threadGrid, numBlocks, d_states, d_states2, Nx, d_up, d_down, d_upcount, d_downcount, t); /* CUDA_CALL(cudaMemcpy(h_states, d_states, (N_ALL) * sizeof(int), cudaMemcpyDeviceToHost)); int countUp = 0; for (int i=0;i<N_ALL;i++) if (h_states[i]>0) countUp++; cout<<"~~~~~~~~~~~~~~~~~~~~~~~~~~~"<<endl<<countUp<<endl; // */ #if VISUAL CUDA_CALL(cudaMemcpy(h_states, d_states, (N_ALL) * sizeof(int), cudaMemcpyDeviceToHost)); pg->draw(Nx, h_states); #endif if (t%checkTime == 0 ) { countStates(N, numBlocks, d_states, d_blockTotals, N_ALL); cout<<t<<" check"<<endl; CUDA_CALL(cudaMemcpy(h_blockTotals, d_blockTotals, (numBlocks) * sizeof(int), cudaMemcpyDeviceToHost)); bool allDone = true; for (int b=0;b<numBlocks;b++) { if (h_blockTotals[b]>0.75*N) { // cout<<"block total : "<<h_blockTotals[b]<<endl; if (h_blockTimes[b]<0) h_blockTimes[b]=t; } else allDone = false; } if (allDone) { for (int b=0;b<numBlocks;b++) h_blockTimes[b] = -1; CUDA_CALL(cudaMemset (d_states, 0, sizeof(int) * (N_ALL))); } } } CUDA_CALL(cudaMemcpy(h_up, d_up, (N + 1) * sizeof(float), cudaMemcpyDeviceToHost)); CUDA_CALL(cudaMemcpy(h_down, d_down, (N + 1) * sizeof(float), cudaMemcpyDeviceToHost)); CUDA_CALL(cudaMemcpy(h_upcount, d_upcount, (N + 1) * sizeof(int), cudaMemcpyDeviceToHost)); for (int i=0;i<N+1;i++) { results[2*i]=h_up[i]; results[2*i+1]=h_down[i]; cout<<i/float(N)<<" : "<<h_up[i]<<" : "<<h_down[i]<<" : "<<h_upcount[i]<<endl; } cnpy::npy_save(fileName,results,shape,2,"w"); } return 0; }
static never_inline void mergeNfa(NGHolder &dest, vector<NFAVertex> &destStateMap, ue2::unordered_map<NFAVertex, u32> &dest_state_ids, NGHolder &vic, vector<NFAVertex> &vicStateMap, size_t common_len) { map<NFAVertex, NFAVertex> vmap; // vic -> dest vmap[vic.start] = dest.start; vmap[vic.startDs] = dest.startDs; vmap[vic.accept] = dest.accept; vmap[vic.acceptEod] = dest.acceptEod; vmap[nullptr] = nullptr; u32 stateNum = countStates(dest, dest_state_ids); // For vertices in the common len, add to vmap and merge in the reports, if // any. for (u32 i = 0; i < common_len; i++) { NFAVertex v_old = vicStateMap[i], v = destStateMap[i]; vmap[v_old] = v; const auto &reports = vic[v_old].reports; dest[v].reports.insert(reports.begin(), reports.end()); } // Add in vertices beyond the common len, giving them state numbers // starting at stateNum. for (u32 i = common_len; i < vicStateMap.size(); i++) { NFAVertex v_old = vicStateMap[i]; if (is_special(v_old, vic)) { // Dest already has start vertices, just merge the reports. u32 idx = vic[v_old].index; NFAVertex v = dest.getSpecialVertex(idx); const auto &reports = vic[v_old].reports; dest[v].reports.insert(reports.begin(), reports.end()); continue; } NFAVertex v = add_vertex(vic[v_old], dest); dest_state_ids[v] = stateNum++; vmap[v_old] = v; } /* add edges */ DEBUG_PRINTF("common_len=%zu\n", common_len); for (const auto &e : edges_range(vic)) { NFAVertex u_old = source(e, vic), v_old = target(e, vic); NFAVertex u = vmap[u_old], v = vmap[v_old]; bool uspecial = is_special(u, dest); bool vspecial = is_special(v, dest); // Skip stylised edges that are already present. if (uspecial && vspecial && edge(u, v, dest).second) { continue; } // We're in the common region if v's state ID is low enough, unless v // is a special (an accept), in which case we use u's state ID. assert(contains(dest_state_ids, v)); bool in_common_region = dest_state_ids.at(v) < common_len; if (vspecial && dest_state_ids.at(u) < common_len) { in_common_region = true; } DEBUG_PRINTF("adding idx=%u (state %u) -> idx=%u (state %u)%s\n", dest[u].index, dest_state_ids.at(u), dest[v].index, dest_state_ids.at(v), in_common_region ? " [common]" : ""); if (in_common_region) { if (!is_special(v, dest)) { DEBUG_PRINTF("skipping common edge\n"); assert(edge(u, v, dest).second); // Should never merge edges with different top values. assert(vic[e].top == dest[edge(u, v, dest).first].top); continue; } else { assert(is_any_accept(v, dest)); // If the edge exists in both graphs, skip it. if (edge(u, v, dest).second) { DEBUG_PRINTF("skipping common edge to accept\n"); continue; } } } assert(!edge(u, v, dest).second); add_edge(u, v, vic[e], dest); } dest.renumberEdges(); dest.renumberVertices(); }