/** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */
bool shouldMerge(NGHolder &ha,
                 const ue2::unordered_map<NFAVertex, u32> &a_state_ids,
                 NGHolder &hb,
                 const ue2::unordered_map<NFAVertex, u32> &b_state_ids,
                 size_t cpl, const ReportManager *rm,
                 const CompileContext &cc) {
    size_t combinedStateCount =
        countStates(ha, a_state_ids) + countStates(hb, b_state_ids) - cpl;

    if (combinedStateCount > FAST_STATE_LIMIT) {
        // More complex implementability check.
        NGHolder h_temp;
        cloneHolder(h_temp, ha);
        assert(h_temp.kind == hb.kind);
        mergeNfaComponent(h_temp, hb, cpl);
        reduceImplementableGraph(h_temp, SOM_NONE, rm, cc);
        u32 numStates = isImplementableNFA(h_temp, rm, cc);
        DEBUG_PRINTF("isImplementableNFA returned %u states\n", numStates);
        if (!numStates) {
            DEBUG_PRINTF("not implementable\n");
            return false;
        } else if (numStates > FAST_STATE_LIMIT) {
            DEBUG_PRINTF("too many states to merge\n");
            return false;

    return true;
Example #2
    inline double operator()( double & _ssh,
			      const polymorphicSite & site,
			      const bool & haveOutgroup = false,
			      const unsigned & outgroup = 0 ) const
	\param _ssh a value of ssh to increment
	\param site an object representing the value type of 
	\param haveOutgroup true of one of the elements of \a site is an outgroup state,
	false otherwise
	\param outgroup the index of the outgroup sequence in \a site
	stateCounter c = countStates()(site.second.begin(),

	unsigned nsam = site.second.length() - 
	  unsigned( haveOutgroup==true ? 1 : 0 ) - c.n;
	double hom=0.;
	if (c.gap==0 && nsam > 1)
	    hom += (c.a > 0) ? double(c.a) * double(c.a-1) : 0.;
	    hom += (c.g > 0) ? double(c.g) * double(c.g-1) : 0.;
	    hom += (c.c > 0) ? double(c.c) * double(c.c-1) : 0.;
	    hom += (c.t > 0) ? double(c.t) * double(c.t-1) : 0.;
	    hom += (c.zero > 0) ? double(c.zero) * double(c.zero-1) : 0.;
	    hom += (c.one > 0) ? double(c.one) * double(c.one-1) : 0.;
	return _ssh += (hom > 0.) ? 1.- (hom/( double(nsam)*double(nsam-1) )) : 0.;
Example #3
void Turnmark::update() {
  atTurnmark = false;

  int analog = analogRead(pin);
  InputState state = analogToState(analog);

  if (state == INPUT_NEUTRAL && lastState[0] != INPUT_NEUTRAL) {
    int highCount = countStates(INPUT_HIGH);
    int lowCount = countStates(INPUT_LOW);
    if (highCount > 0 && lowCount > 0) {
      lastCarriageType = G_CARRIAGE;
    } else if (highCount > 1) {
      lastCarriageType = K_CARRIAGE;
    } else if (lowCount > 1) {
      lastCarriageType = L_CARRIAGE;

Example #4
int main() 

    plotGrid* pg = new plotGrid;

    // number of reps
    int numBlocks = 128;
    // length of grid
    int Nx = 8;
    int N = Nx * Nx;
    int N2 = 0.5 * N;
    int N4 = 0.5 * N2;
    int N_ALL = N * numBlocks;

    dim3 threadGrid(Nx, Nx);
    curandState *devRands;
    CUDA_CALL(cudaMalloc((void **)&devRands, N_ALL * sizeof(curandState)));

    srand (time(NULL));
    initRands(threadGrid, numBlocks, devRands, rand());

    float* d_wg;
    CUDA_CALL(cudaMalloc((void**)&d_wg, sizeof(float) *  (N_ALL) ));
    int* d_states;
    CUDA_CALL(cudaMalloc((void**)&d_states, sizeof(int) * N_ALL));
    int* d_states2;
    CUDA_CALL(cudaMalloc((void**)&d_states2, sizeof(int) * N_ALL));

    float* d_up;
    CUDA_CALL(cudaMalloc((void**)&d_up, sizeof(float) *  (N + 1) ));

    float* h_up = new float [N+1];

    float* d_down;
    CUDA_CALL(cudaMalloc((void**)&d_down, sizeof(float) *  (N + 1) ));

    float* h_down = new float [N+1];

    int* d_upcount;
    CUDA_CALL(cudaMalloc((void**)&d_upcount, sizeof(int) *  (N + 1) ));

    int* h_upcount = new int [N+1];

    int* d_downcount;
    CUDA_CALL(cudaMalloc((void**)&d_downcount, sizeof(int) *  (N + 1) ));

    int* h_downcount = new int [N+1];

    int* d_blockTotals;
    CUDA_CALL(cudaMalloc((void**)&d_blockTotals, sizeof(int) * numBlocks));

    float* h_wg = new float [N_ALL];
    int* h_states = new int[N_ALL];
    int* h_blockTotals = new int[numBlocks];
    int* h_blockTimes = new int[numBlocks];
    int wgCount = 1;

    const unsigned int shape[] = {N+1,2};

    float* results = new float[(N+1)*2];
    for (int i=0;i<(N+1)*2;i++)

    for (int G=0;G<wgCount;G++)
        float wg = 0.25;//5 + 0.2 * float(G);
        for (int i=0;i<N_ALL;i++)
            float unum =rand()/double(RAND_MAX);
   //         cout<<unum<<endl;
 //           if (unum<0.2345)
 //               h_wg[i]+=0.833;
        CUDA_CALL(cudaMemcpy(d_wg, h_wg, (N_ALL) * sizeof(float), cudaMemcpyHostToDevice));

        for (int b=0;b<numBlocks;b++)
            h_blockTimes[b] = -1;
        int maxTime = 100000;
        int checkTime = 100;
        float sw = 1.0f;

        char fileName[30];
        sprintf(fileName, "potential%d-%d.npy", int(10*wg),int(100.0*sw));
//        cout<<fileName<<endl;

        CUDA_CALL(cudaMemset (d_states, 0, sizeof(int) * (N_ALL)));
        CUDA_CALL(cudaMemset (d_blockTotals, 0, sizeof(int) * (numBlocks)));
        CUDA_CALL(cudaMemset (d_up, 0, sizeof(float) * (N + 1)));
        CUDA_CALL(cudaMemset (d_down, 0, sizeof(float) * (N + 1)));
        CUDA_CALL(cudaMemset (d_upcount, 0, sizeof(int) * (N + 1)));
        CUDA_CALL(cudaMemset (d_downcount, 0, sizeof(int) * (N + 1)));

        for (int t=0;t<maxTime;t++)
            advanceTimestep(threadGrid, numBlocks, devRands, d_wg, d_states, Nx, sw, t);
            recordData(threadGrid, numBlocks, d_states, d_states2, Nx, d_up, d_down, d_upcount, d_downcount, t);
            CUDA_CALL(cudaMemcpy(h_states, d_states, (N_ALL) * sizeof(int), cudaMemcpyDeviceToHost));
            int countUp = 0;
            for (int i=0;i<N_ALL;i++)
                if (h_states[i]>0)
//            */
            CUDA_CALL(cudaMemcpy(h_states, d_states, (N_ALL) * sizeof(int), cudaMemcpyDeviceToHost));
            pg->draw(Nx, h_states);
            if (t%checkTime == 0 ) 
                countStates(N, numBlocks, d_states, d_blockTotals, N_ALL);
                cout<<t<<" check"<<endl;

                CUDA_CALL(cudaMemcpy(h_blockTotals, d_blockTotals, (numBlocks) * sizeof(int), cudaMemcpyDeviceToHost));
                bool allDone = true;
                for (int b=0;b<numBlocks;b++)
                    if (h_blockTotals[b]>0.75*N)
    //                    cout<<"block total : "<<h_blockTotals[b]<<endl;
                        if (h_blockTimes[b]<0)
                        allDone = false;
                if (allDone)
                    for (int b=0;b<numBlocks;b++)
                        h_blockTimes[b] = -1;
                    CUDA_CALL(cudaMemset (d_states, 0, sizeof(int) * (N_ALL)));

        CUDA_CALL(cudaMemcpy(h_up, d_up, (N + 1) * sizeof(float), cudaMemcpyDeviceToHost));
        CUDA_CALL(cudaMemcpy(h_down, d_down, (N + 1) * sizeof(float), cudaMemcpyDeviceToHost));
        CUDA_CALL(cudaMemcpy(h_upcount, d_upcount, (N + 1) * sizeof(int), cudaMemcpyDeviceToHost));

        for (int i=0;i<N+1;i++)
            cout<<i/float(N)<<" : "<<h_up[i]<<" : "<<h_down[i]<<" : "<<h_upcount[i]<<endl;

    return 0;
static never_inline
void mergeNfa(NGHolder &dest, vector<NFAVertex> &destStateMap,
              ue2::unordered_map<NFAVertex, u32> &dest_state_ids,
              NGHolder &vic, vector<NFAVertex> &vicStateMap,
              size_t common_len) {
    map<NFAVertex, NFAVertex> vmap; // vic -> dest

    vmap[vic.start]     = dest.start;
    vmap[vic.startDs]   = dest.startDs;
    vmap[vic.accept]    = dest.accept;
    vmap[vic.acceptEod] = dest.acceptEod;
    vmap[nullptr] = nullptr;

    u32 stateNum = countStates(dest, dest_state_ids);

    // For vertices in the common len, add to vmap and merge in the reports, if
    // any.
    for (u32 i = 0; i < common_len; i++) {
        NFAVertex v_old = vicStateMap[i], v = destStateMap[i];
        vmap[v_old] = v;

        const auto &reports = vic[v_old].reports;
        dest[v].reports.insert(reports.begin(), reports.end());

    // Add in vertices beyond the common len, giving them state numbers
    // starting at stateNum.
    for (u32 i = common_len; i < vicStateMap.size(); i++) {
        NFAVertex v_old = vicStateMap[i];

        if (is_special(v_old, vic)) {
            // Dest already has start vertices, just merge the reports.
            u32 idx = vic[v_old].index;
            NFAVertex v = dest.getSpecialVertex(idx);
            const auto &reports = vic[v_old].reports;
            dest[v].reports.insert(reports.begin(), reports.end());

        NFAVertex v = add_vertex(vic[v_old], dest);
        dest_state_ids[v] = stateNum++;
        vmap[v_old] = v;

    /* add edges */
    DEBUG_PRINTF("common_len=%zu\n", common_len);
    for (const auto &e : edges_range(vic)) {
        NFAVertex u_old = source(e, vic), v_old = target(e, vic);
        NFAVertex u = vmap[u_old], v = vmap[v_old];
        bool uspecial = is_special(u, dest);
        bool vspecial = is_special(v, dest);

        // Skip stylised edges that are already present.
        if (uspecial && vspecial && edge(u, v, dest).second) {

        // We're in the common region if v's state ID is low enough, unless v
        // is a special (an accept), in which case we use u's state ID.
        assert(contains(dest_state_ids, v));
        bool in_common_region = dest_state_ids.at(v) < common_len;
        if (vspecial && dest_state_ids.at(u) < common_len) {
            in_common_region = true;

        DEBUG_PRINTF("adding idx=%u (state %u) -> idx=%u (state %u)%s\n",
                     dest[u].index, dest_state_ids.at(u),
                     dest[v].index, dest_state_ids.at(v),
                     in_common_region ? " [common]" : "");

        if (in_common_region) {
            if (!is_special(v, dest)) {
                DEBUG_PRINTF("skipping common edge\n");
                assert(edge(u, v, dest).second);
                // Should never merge edges with different top values.
                assert(vic[e].top == dest[edge(u, v, dest).first].top);
            } else {
                assert(is_any_accept(v, dest));
                // If the edge exists in both graphs, skip it.
                if (edge(u, v, dest).second) {
                    DEBUG_PRINTF("skipping common edge to accept\n");

        assert(!edge(u, v, dest).second);
        add_edge(u, v, vic[e], dest);
