Esempio n. 1
0
File: layout.c Progetto: ekg/mars
mat mars(Agraph_t* g, struct marsopts opts)
{
    int i, j, n = agnnodes(g), k = MIN(n, MAX(opts.k, 2)), iter = 0;
    mat dij, u, u_trans, q, r, q_t, tmp, tmp2, z;
    double* s = (double*) malloc(sizeof(double)*k);
    double* ones = (double*) malloc(sizeof(double)*n);
    double* d;
    int* anchors = (int*) malloc(sizeof(int)*k);
    int* clusters = NULL;
    double change = 1, old_stress = -1;
    dij = mat_new(k, n);
    u = mat_new(n,k);
    tmp = mat_new(n,k);
    darrset(ones,n,-1);
    
    select_anchors(g, dij, anchors, k);
    if(opts.color) {
        for(i = 0; i < k; i++) {
            Agnode_t* anchor = get_node(anchors[i]);
            agset(anchor, "color", "red");
        }
    }
    if(opts.power != 1) {
        clusters = graph_cluster(g,dij,anchors);
    }

    singular_vectors(g, dij, opts.power, u, s);
    vec_scalar_mult(s, k, -1);
    u_trans = mat_trans(u);
    d = mat_mult_for_d(u, s, u_trans, ones);
    for(i = 0; i < u->c; i++) {
        double* col = mat_col(u,i);
        double* b = inv_mul_ax(d,col,u->r);
        for(j = 0; j < u->r; j++) {
            tmp->m[mindex(j,i,tmp)] = b[j];     
        }
        free(b);
        free(col);
    }
    tmp2 = mat_mult(u_trans,tmp);
    for(i = 0; i < k; i++) {
        tmp2->m[mindex(i,i,tmp2)] += (1.0/s[i]);
    }
    q = mat_new(tmp2->r, tmp2->c);
    r = mat_new(tmp2->c, tmp2->c);
    qr_factorize(tmp2,q,r);
    q_t = mat_trans(q);

    if(opts.given) {
        z = get_positions(g, opts.dim);
    } else {
        z = mat_rand(n, opts.dim);
    }
    translate_by_centroid(z);
   
    if(opts.viewer) {
        init_viewer(g, opts.max_iter);
        append_layout(z);
    }
     
    old_stress = stress(z, dij, anchors, opts.power);
    while(change > EPSILON && iter < opts.max_iter) {
        mat right_side;
        double new_stress;
        
        if(opts.power == 1) {
            right_side = barnes_hut(z);
        } else {
            right_side = barnes_hut_cluster(z, dij, clusters, opts.power);
        }
        for(i = 0; i < opts.dim; i++) {
            double sum = 0;         
            double* x;
            double* b = mat_col(right_side,i);
            for(j = 0; j < right_side->r; j++) {
                sum += b[j];
            }
            x = inv_mul_full(d, b, right_side->r, u, u_trans, q_t, r);
            for(j = 0; j < z->r; j++) {
                z->m[mindex(j,i,z)] = x[j] - sum/right_side->r;
            }
            free(x);
            free(b);
        }
        
        adjust_anchors(g, anchors, k, z);
        update_anchors(z, dij, anchors, opts.power);
        translate_by_centroid(z);
   
        if(opts.viewer) {
            append_layout(z);
        }
         
        new_stress = stress(z, dij, anchors, opts.power);
        change = fabs(new_stress-old_stress)/old_stress;
        old_stress = new_stress;
        
        mat_free(right_side);
        iter++;
    }
    
    mat_free(dij);
    mat_free(u);
    mat_free(u_trans);
    mat_free(q);
    mat_free(r);
    mat_free(q_t);
    mat_free(tmp);
    mat_free(tmp2);
    free(s);
    free(ones);
    free(d);
    free(anchors);
    free(clusters);
    
    return z;
}
Esempio n. 2
0
matrix_t * mpi_mat_rand(
  idx_t const mode,
  idx_t const nfactors,
  permutation_t const * const perm,
  rank_info * const rinfo)
{
  idx_t const localdim = rinfo->mat_end[mode] - rinfo->mat_start[mode];
  matrix_t * mymat = mat_alloc(localdim, nfactors);

  MPI_Status status;

  /* figure out buffer sizes */
  idx_t maxlocaldim = localdim;
  if(rinfo->rank == 0) {
    MPI_Reduce(MPI_IN_PLACE, &maxlocaldim, 1, SPLATT_MPI_IDX, MPI_MAX, 0,
      rinfo->comm_3d);
  } else {
    MPI_Reduce(&maxlocaldim, NULL, 1, SPLATT_MPI_IDX, MPI_MAX, 0,
      rinfo->comm_3d);
  }

  /* root rank does the heavy lifting */
  if(rinfo->rank == 0) {
    /* allocate buffers */
    idx_t * loc_perm = splatt_malloc(maxlocaldim * sizeof(*loc_perm));
    val_t * vbuf = splatt_malloc(maxlocaldim * nfactors * sizeof(*vbuf));

    /* allocate initial factor */
    matrix_t * full_factor = mat_rand(rinfo->global_dims[mode], nfactors);

    /* copy root's own matrix to output */
    #pragma omp parallel for schedule(static)
    for(idx_t i=0; i < localdim; ++i) {
      idx_t const gi = rinfo->mat_start[mode] + perm->iperms[mode][i];
      for(idx_t f=0; f < nfactors; ++f) {
       mymat->vals[f + (i*nfactors)] = full_factor->vals[f+(gi*nfactors)];
      }
    }

    /* communicate! */
    for(int p=1; p < rinfo->npes; ++p) {
      /* first receive layer start and permutation info */
      idx_t layerstart;
      idx_t nrows;
      MPI_Recv(&layerstart, 1, SPLATT_MPI_IDX, p, 0, rinfo->comm_3d, &status);
      MPI_Recv(&nrows, 1, SPLATT_MPI_IDX, p, 1, rinfo->comm_3d, &status);
      MPI_Recv(loc_perm, nrows, SPLATT_MPI_IDX, p, 2, rinfo->comm_3d, &status);

      /* fill buffer */
      #pragma omp parallel for schedule(static)
      for(idx_t i=0; i < nrows; ++i) {
        idx_t const gi = layerstart + loc_perm[i];
        for(idx_t f=0; f < nfactors; ++f) {
          vbuf[f + (i*nfactors)] = full_factor->vals[f+(gi*nfactors)];
        }
      }

      /* send to rank p */
      MPI_Send(vbuf, nrows * nfactors, SPLATT_MPI_VAL, p, 3, rinfo->comm_3d);
    }

    mat_free(full_factor);
    splatt_free(loc_perm);
    splatt_free(vbuf);

  /* other ranks just send/recv */
  } else {
    /* send permutation info to root */
    MPI_Send(&(rinfo->layer_starts[mode]), 1, SPLATT_MPI_IDX, 0, 0, rinfo->comm_3d);
    MPI_Send(&localdim, 1, SPLATT_MPI_IDX, 0, 1, rinfo->comm_3d);
    MPI_Send(perm->iperms[mode] + rinfo->mat_start[mode], localdim,
        SPLATT_MPI_IDX, 0, 2, rinfo->comm_3d);

    /* receive factor */
    MPI_Recv(mymat->vals, mymat->I * mymat->J, SPLATT_MPI_VAL, 0, 3,
        rinfo->comm_3d, &status);
  }

  return mymat;
}
Esempio n. 3
0
int main(int argc, char **argv) {
    srand(time(NULL));
    Cache cache;
    double max_runtime;
    /* Overly slow ones commented out by default. */
    MatMul mat_mul_funcs[] = {
        /*mat_mul_cpu,*/
        mat_mul_cpu_trans,
        mat_mul_cpu_trans_vec,
        mat_mul_cpu_block,
        mat_mul_cpu_cblas,
        /*mat_mul_cl,*/
        mat_mul_cl_row_priv,
        mat_mul_cl_row_local,
        mat_mul_cl_row_priv_col_local,
        mat_mul_cl_row_priv_cols_local,
        /* TODO broken for larger matrics, some cells contain trash.
         * Likey some memory overflow problem. */
        /*mat_mul_cl_block,*/
        mat_mul_cl_clblas,
    };
    int first, func_done[NELEMS(mat_mul_funcs)] = {0};
    size_t f, i;
    size_t mat_sizeof;

    /* CLI args. */
    if (argc > 1) {
        max_runtime = strtod(argv[1], NULL);
    } else {
        max_runtime = 1.0;
    }

    common_init(&(cache.common), NULL);

    /* Unit test 2x2. */
    {
        const F A[] = {
            1.0, 2.0,
            3.0, 4.0
        };
        const F B[] = {
            5.0, 6.0,
            7.0, 8.0
        };
        enum N { n = 2 };
        F C[n*n];
        const F C_ref[] = {
            19.0, 22.0,
            43.0, 50.0
        };
        cl_buf_init(&cache, n * n * sizeof(F));
        for (f = 0; f < sizeof(mat_mul_funcs)/sizeof(mat_mul_funcs[0]); ++f) {
            mat_zero(C, n);
            mat_mul_funcs[f](A, B, C, n, &cache);
            mat_assert_eq(C, C_ref, n);
        }
        cl_buf_deinit(&cache);
    }

    /* Unit test 4x4. */
    {
        const F A[] = {
             1.0,  2.0,  3.0,  4.0,
             5.0,  6.0,  7.0,  8.0,
             9.0, 10.0, 11.0, 12.0,
            13.0, 14.0, 15.0, 16.0,
        };
        const F B[] = {
            17.0, 18.0, 19.0, 20.0,
            21.0, 22.0, 23.0, 24.0,
            25.0, 26.0, 27.0, 28.0,
            29.0, 30.0, 31.0, 32.0,
        };
        const F C_ref[] = {
             250.0,  260.0,  270.0,  280.0,
             618.0,  644.0,  670.0,  696.0,
             986.0, 1028.0, 1070.0, 1112.0,
            1354.0, 1412.0, 1470.0, 1528.0,
        };
        enum N { n = 4 };
        F C[n*n];
        cl_buf_init(&cache, n * n * sizeof(F));
        for (f = 0; f < NELEMS(mat_mul_funcs); ++f) {
            mat_zero(C, n);
            mat_mul_funcs[f](A, B, C, n, &cache);
            mat_assert_eq(C, C_ref, n);
        }
        cl_buf_deinit(&cache);
    }

    /* Benchmarks. */
    {
        double dt;
        F *A = NULL, *B = NULL, *C = NULL, *C_ref = NULL, *dst = NULL, *ref = NULL;
        int done;
        size_t n = 2;

        puts("#matmul");
        done = 0;
        while(1) {
            printf("%zu ", (size_t)log2(n));
            mat_sizeof = n * n * sizeof(F);

            /* CPU setup. */
            A = aligned_alloc(VECTOR_SIZEOF, mat_sizeof);
            B = aligned_alloc(VECTOR_SIZEOF, mat_sizeof);
            C = aligned_alloc(VECTOR_SIZEOF, mat_sizeof);
            C_ref = aligned_alloc(VECTOR_SIZEOF, mat_sizeof);
            if (NULL == A || NULL == B || NULL == C) {
                printf("error: could not allocate memory for n = %zu", n);
                break;
            }
            mat_rand(A, n);
            mat_rand(B, n);

            cl_buf_init(&cache, mat_sizeof);
            first = 1;
            for (f = 0; f < NELEMS(mat_mul_funcs); ++f) {
                if (func_done[f]) {
                    printf("%*s", 10, "");
                } else {
                    if (first) {
                        dst = C_ref;
                        ref = NULL;
                        first = 0;
                    } else {
                        dst = C;
                        ref = C_ref;
                    }
                    dt = bench(mat_mul_funcs[f], A, B, dst, ref, n, &cache);
                    if (dt > max_runtime)
                        func_done[f] = 1;
                }
            }
            puts("");
            done = 1;
            for (i = 0; i < NELEMS(mat_mul_funcs); ++i) {
                if (!func_done[i]) {
                    done = 0;
                    break;
                }
            }
            if (done)
                break;
            n *= 2;

            /* CPU deinit. */
            free(A);
            free(B);
            free(C);
            free(C_ref);

            cl_buf_deinit(&cache);
        }
        common_deinit(&cache.common);
    }

    return EXIT_SUCCESS;
}