void example_som(int nrows, int ncols, double** data, int** mask) /* Calculate a self-organizing map, applied to genes */ { int i, j, k; const int nxgrid = 2; const int nygrid = 2; /* Rectangular grid 2x2 */ const double inittau = 0.02; /* Initial value of the neighborhood function */ const int niter = 1000; /* Number of iterations */ const char dist = 'c'; /* Pearson correlation */ double* weight = malloc(ncols*sizeof(double)); int (*clusterid)[2] = malloc(nrows*sizeof(int[2])); double*** celldata = malloc(nxgrid*sizeof(double**)); for (i = 0; i < nxgrid; i++) { celldata[i] = malloc(nygrid*sizeof(double*)); for (j = 0; j < nygrid; j++) celldata[i][j] = malloc(ncols*sizeof(double)); } for (i = 0; i < ncols; i++) weight[i] = 1.0; printf("======================= Self-Organizing Map ===================\n"); printf("(results may change on every run)\n"); somcluster(nrows, ncols, data, mask, weight, 0, nxgrid, nygrid, inittau, niter, dist, celldata, clusterid); printf("Cluster assignments:\n"); for(i=0; i<nrows; i++) printf("Gene %2d: %2d %2d\n",i,clusterid[i][0],clusterid[i][1]); printf("Cluster centroids:\n"); printf("\t"); for (j = 0; j < ncols; j++) printf("\tCol %d", j); printf ("\n"); for (i = 0; i < nxgrid; i++) { for (j = 0; j < nygrid; j++) { printf("Cell (%d,%d):", i, j); for (k = 0; k < ncols; k++) printf("\t%5.2g",celldata[i][j][k]); printf("\n"); } } printf("\n"); /* Deallocate memory */ for (i = 0; i < nxgrid; i++) { for (j = 0; j < nygrid; j++) free(celldata[i][j]); free(celldata[i]); } free(celldata); free(weight); free(clusterid); return; }
static int PerformArraySOM(FILE* file, int XDim, int YDim, int iterations, double tau, char metric) { int i = 0; int j = 0; int k; int ok; int (*Group)[2] = malloc(_columns*sizeof(int[2])); double*** Nodes = malloc(XDim*sizeof(double**)); int* clusterid = malloc(_columns*sizeof(int)); if (Nodes) { for (i = 0; i < XDim; i++) { Nodes[i] = malloc(YDim*sizeof(double*)); j = 0; if (!Nodes[i]) break; for ( ; j < YDim; j++) { Nodes[i][j] = malloc(_rows*sizeof(double)); if (!Nodes[i][j]) break; } if (j < YDim) break; } } if (!Group || !clusterid || !Nodes || i < XDim || j < YDim) { if (Group) free(Group); if (Nodes) { if (i < XDim) { while (j--) free(Nodes[i][j]); free(Nodes[i]); } while (i--) { for (j = 0; j < YDim; j++) free(Nodes[i][j]); free(Nodes[i]); } free(Nodes); } free(clusterid); return 0; } somcluster(_rows, _columns, _data, _mask, _geneweight, 1, XDim, YDim, tau, iterations, metric, Nodes, Group); for (i=0; i<_columns; i++) clusterid[i] = Group[i][0] * YDim + Group[i][1]; free(Group); fprintf(file, "%s\t", _uniqID); for (i=0; i<XDim; i++) for (j=0; j<YDim; j++) fprintf(file, "\tNODE(%d,%d)", i, j); putc('\n', file); for (k=0;k<_rows;k++) { int index = _geneindex[k]; fprintf(file, "%s\t", _geneuniqID[index]); if (_genename[index]) fputs(_genename[index], file); else fputs(_geneuniqID[index], file); for (i=0; i<XDim; i++) for (j=0; j<YDim; j++) fprintf(file, "\t%f", Nodes[i][j][index]); putc('\n', file); } for (i=0;i<XDim;i++) { for (j=0; j<YDim; j++) free(Nodes[i][j]); free(Nodes[i]); } free(Nodes); ok = SetClusterIndex('a', XDim * YDim, clusterid); free(clusterid); return ok; }
static int PerformGeneSOM(FILE* file, int XDim, int YDim, int iterations, double tau, char metric) { int i = 0; int j = 0; int k; int ok; int (*Group)[2] = malloc(_rows*sizeof(int[2])); double*** Nodes = malloc(XDim*sizeof(double**)); int* clusterid = malloc(_rows*sizeof(int)); int* index = malloc(_columns*sizeof(int)); if (Nodes) { for (i = 0; i < XDim; i++) { Nodes[i] = malloc(YDim*sizeof(double*)); j = 0; if (!Nodes[i]) break; for ( ; j < YDim; j++) { Nodes[i][j] = malloc(_columns*sizeof(double)); if (!Nodes[i][j]) break; } if (j < YDim) break; } } if (!Group || !clusterid || !index || !Nodes || i < XDim || j < YDim) { if (Group) free(Group); if (clusterid) free(clusterid); if (index) free(index); if (Nodes) { if (i < XDim) { while (j--) free(Nodes[i][j]); free(Nodes[i]); } while (i--) { for (j = 0; j < YDim; j++) free(Nodes[i][j]); free(Nodes[i]); } free(Nodes); } return 0; } somcluster(_rows, _columns, _data, _mask, _arrayweight, 0, XDim, YDim, tau, iterations, metric, Nodes, Group); for (i=0; i<_rows; i++) clusterid[i] = Group[i][0] * YDim + Group[i][1]; free(Group); for (k=0; k<_columns; k++) index[k] = k; sort(_columns, _arrayorder, index); fputs("NODE", file); for (i=0; i<_columns; i++) fprintf(file, "\t%s", _arrayname[index[i]]); putc('\n', file); for (i=0; i<XDim; i++) { for (j=0; j<YDim; j++) { fprintf(file, "NODE(%d,%d)", i, j); for (k=0; k<_columns; k++) fprintf(file, "\t%f", Nodes[i][j][index[k]]); putc('\n', file); } } free(index); for (i=0;i<XDim;i++) { for (j=0; j<YDim; j++) free(Nodes[i][j]); free(Nodes[i]); } free(Nodes); ok = SetClusterIndex('g', XDim * YDim, clusterid); free(clusterid); return ok; }
/* @api private */ VALUE rb_do_self_organizing_map(int argc, VALUE *argv, VALUE self) { VALUE nx, ny, data, mask, weights, options; rb_scan_args(argc, argv, "31", &nx, &ny, &data, &options); if (TYPE(data) != T_ARRAY) rb_raise(rb_eArgError, "data should be an array of arrays"); mask = get_value_option(options, "mask", Qnil); if (!NIL_P(mask) && TYPE(mask) != T_ARRAY) rb_raise(rb_eArgError, "mask should be an array of arrays"); if (NIL_P(nx) || NUM2INT(rb_Integer(nx)) <= 0) rb_raise(rb_eArgError, "nx should be > 0"); if (NIL_P(ny) || NUM2INT(rb_Integer(ny)) <= 0) rb_raise(rb_eArgError, "ny should be > 0"); int nxgrid = NUM2INT(rb_Integer(nx)); int nygrid = NUM2INT(rb_Integer(ny)); int transpose = get_int_option(options, "transpose", 0); int npass = get_int_option(options, "iterations", DEFAULT_ITERATIONS); // e = euclidian, // b = city-block distance // c = correlation // a = absolute value of the correlation // u = uncentered correlation // x = absolute uncentered correlation // s = spearman's rank correlation // k = kendall's tau int dist = get_int_option(options, "metric", 'e'); double tau = get_dbl_option(options, "tau", 1.0); int i, j, k; int nrows = RARRAY_LEN(data); int ncols = RARRAY_LEN(rb_ary_entry(data, 0)); double **cdata = (double**)malloc(sizeof(double*)*nrows); int **cmask = (int **)malloc(sizeof(int *)*nrows); double *cweights = (double *)malloc(sizeof(double )*ncols); int **ccluster; double ***ccelldata; int dimx = nrows, dimy = ncols; if (transpose) { dimx = ncols; dimy = nrows; } ccluster = (int **)malloc(sizeof(int*)*dimx); for (i = 0; i < dimx; i++) ccluster[i] = (int*)malloc(sizeof(int)*2); for (i = 0; i < nrows; i++) { cdata[i] = (double*)malloc(sizeof(double)*ncols); cmask[i] = (int *)malloc(sizeof(int )*ncols); for (j = 0; j < ncols; j++) { cdata[i][j] = NUM2DBL(rb_Float(rb_ary_entry(rb_ary_entry(data, i), j))); cmask[i][j] = NIL_P(mask) ? 1 : NUM2INT(rb_Integer(rb_ary_entry(rb_ary_entry(mask, i), j))); } } weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights"))); for (i = 0; i < ncols; i++) { cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_Float(rb_ary_entry(weights, i))); } ccelldata = (double***)malloc(sizeof(double**)*nxgrid); for (i = 0; i < nxgrid; i++) { ccelldata[i] = (double **)malloc(sizeof(double*)*nygrid); for (j = 0; j < nygrid; j++) ccelldata[i][j] = (double *)malloc(sizeof(double)*dimy); } somcluster(nrows, ncols, cdata, cmask, cweights, transpose, nxgrid, nygrid, tau, npass, dist, ccelldata, ccluster); VALUE result = rb_hash_new(); VALUE cluster = rb_ary_new(); VALUE centroid = rb_ary_new(); for (i = 0; i < dimx; i++) { VALUE gridpoint = rb_ary_new(); rb_ary_push(gridpoint, INT2NUM(ccluster[i][0])); rb_ary_push(gridpoint, INT2NUM(ccluster[i][1])); rb_ary_push(cluster, gridpoint); } for (i = 0; i < nxgrid; i++) { for (j = 0; j < nygrid; j++) { VALUE point = rb_ary_new(); for (k = 0; k < dimy; k++) rb_ary_push(point, DBL2NUM(ccelldata[i][j][k])); rb_ary_push(centroid, point); } } rb_hash_aset(result, ID2SYM(rb_intern("cluster")), cluster); rb_hash_aset(result, ID2SYM(rb_intern("centroid")), centroid); for (i = 0; i < nrows; i++) { free(cdata[i]); free(cmask[i]); } for (i = 0; i < dimx; i++) free(ccluster[i]); for (i = 0; i < nxgrid; i++) { for (j = 0; j < nygrid; j++) free(ccelldata[i][j]); free(ccelldata[i]); } free(cdata); free(cmask); free(ccelldata); free(cweights); free(ccluster); return result; }