Exemple #1
0
void example_som(int nrows, int ncols, double** data, int** mask)
/* Calculate a self-organizing map, applied to genes */
{ int i, j, k;
  const int nxgrid = 2;
  const int nygrid = 2; /* Rectangular grid 2x2 */
  const double inittau = 0.02; /* Initial value of the neighborhood function */
  const int niter = 1000; /* Number of iterations */
  const char dist = 'c'; /* Pearson correlation */
  double* weight = malloc(ncols*sizeof(double));
  int (*clusterid)[2] = malloc(nrows*sizeof(int[2])); 
  double*** celldata = malloc(nxgrid*sizeof(double**));
  for (i = 0; i < nxgrid; i++)
  { celldata[i] = malloc(nygrid*sizeof(double*));
    for (j = 0; j < nygrid; j++)
      celldata[i][j] = malloc(ncols*sizeof(double));
  }
  for (i = 0; i < ncols; i++) weight[i] = 1.0;
  printf("======================= Self-Organizing Map ===================\n");
  printf("(results may change on every run)\n");
  somcluster(nrows, ncols, data, mask, weight, 0, nxgrid, nygrid, inittau, 
	     niter, dist, celldata, clusterid);
  printf("Cluster assignments:\n");
  for(i=0; i<nrows; i++)
    printf("Gene %2d: %2d %2d\n",i,clusterid[i][0],clusterid[i][1]);
  printf("Cluster centroids:\n");
  printf("\t");
  for (j = 0; j < ncols; j++) printf("\tCol %d", j);
  printf ("\n");
  for (i = 0; i < nxgrid; i++)
  { for (j = 0; j < nygrid; j++)
    { printf("Cell (%d,%d):", i, j);
      for (k = 0; k < ncols; k++) printf("\t%5.2g",celldata[i][j][k]);
      printf("\n");
    }
  }
  printf("\n");
  /* Deallocate memory */
  for (i = 0; i < nxgrid; i++)
  { for (j = 0; j < nygrid; j++) free(celldata[i][j]);
    free(celldata[i]);
  }
  free(celldata);
  free(weight);
  free(clusterid);
  return;
}
static int
PerformArraySOM(FILE* file, int XDim, int YDim, int iterations, double tau,
                char metric)
{ int i = 0;
  int j = 0;
  int k;
  int ok;
  int (*Group)[2] = malloc(_columns*sizeof(int[2]));
  double*** Nodes = malloc(XDim*sizeof(double**));
  int* clusterid = malloc(_columns*sizeof(int));
  if (Nodes)
  { for (i = 0; i < XDim; i++)
    { Nodes[i] = malloc(YDim*sizeof(double*));
      j = 0;
      if (!Nodes[i]) break;
      for ( ; j < YDim; j++)
      { Nodes[i][j] = malloc(_rows*sizeof(double));
        if (!Nodes[i][j]) break;
      }
      if (j < YDim) break;
    }
  }
  if (!Group || !clusterid || !Nodes || i < XDim || j < YDim)
  { if (Group) free(Group);
    if (Nodes)
    { if (i < XDim)
      { while (j--) free(Nodes[i][j]);
        free(Nodes[i]);
      }
      while (i--)
      { for (j = 0; j < YDim; j++) free(Nodes[i][j]);
        free(Nodes[i]);
      }
      free(Nodes);
    }
    free(clusterid);
    return 0;
  }

  somcluster(_rows, _columns, _data, _mask, _geneweight, 1,
    XDim, YDim, tau, iterations, metric, Nodes, Group);

  for (i=0; i<_columns; i++)
    clusterid[i] = Group[i][0] * YDim + Group[i][1];
  free(Group);

  fprintf(file, "%s\t", _uniqID);
  for (i=0; i<XDim; i++)
    for (j=0; j<YDim; j++) fprintf(file, "\tNODE(%d,%d)", i, j);
  putc('\n', file);

  for (k=0;k<_rows;k++)
  { int index = _geneindex[k];
    fprintf(file, "%s\t", _geneuniqID[index]);
    if (_genename[index]) fputs(_genename[index], file);
    else fputs(_geneuniqID[index], file);
    for (i=0; i<XDim; i++)
      for (j=0; j<YDim; j++) fprintf(file, "\t%f", Nodes[i][j][index]);
    putc('\n', file);
  }

  for (i=0;i<XDim;i++)
  { for (j=0; j<YDim; j++) free(Nodes[i][j]);
    free(Nodes[i]);
  }
  free(Nodes);
  ok = SetClusterIndex('a', XDim * YDim, clusterid);
  free(clusterid);
  return ok;
}
static int
PerformGeneSOM(FILE* file, int XDim, int YDim, int iterations, double tau,
               char metric)
{ int i = 0;
  int j = 0;
  int k;
  int ok;

  int (*Group)[2] = malloc(_rows*sizeof(int[2]));
  double*** Nodes = malloc(XDim*sizeof(double**));
  int* clusterid = malloc(_rows*sizeof(int));
  int* index = malloc(_columns*sizeof(int));
  if (Nodes)
  { for (i = 0; i < XDim; i++)
    { Nodes[i] = malloc(YDim*sizeof(double*));
      j = 0;
      if (!Nodes[i]) break;
      for ( ; j < YDim; j++)
      { Nodes[i][j] = malloc(_columns*sizeof(double));
        if (!Nodes[i][j]) break;
      }
      if (j < YDim) break;
    }
  }
  if (!Group || !clusterid || !index || !Nodes || i < XDim || j < YDim)
  { if (Group) free(Group);
    if (clusterid) free(clusterid);
    if (index) free(index);
    if (Nodes)
    { if (i < XDim)
      { while (j--) free(Nodes[i][j]);
        free(Nodes[i]);
      }
      while (i--)
      { for (j = 0; j < YDim; j++) free(Nodes[i][j]);
        free(Nodes[i]);
      }
      free(Nodes);
    }
    return 0;
  }

  somcluster(_rows, _columns, _data, _mask, _arrayweight, 0,
    XDim, YDim, tau, iterations, metric, Nodes, Group);

  for (i=0; i<_rows; i++) clusterid[i] = Group[i][0] * YDim + Group[i][1];
  free(Group);

  for (k=0; k<_columns; k++) index[k] = k;
  sort(_columns, _arrayorder, index);
  fputs("NODE", file);
  for (i=0; i<_columns; i++) fprintf(file, "\t%s", _arrayname[index[i]]);
  putc('\n', file);
  for (i=0; i<XDim; i++)
  { for (j=0; j<YDim; j++)
    { fprintf(file, "NODE(%d,%d)", i, j);
      for (k=0; k<_columns; k++) fprintf(file, "\t%f", Nodes[i][j][index[k]]);
      putc('\n', file);
    }
  }
  free(index);

  for (i=0;i<XDim;i++)
  { for (j=0; j<YDim; j++) free(Nodes[i][j]);
    free(Nodes[i]);
  }
  free(Nodes);

  ok = SetClusterIndex('g', XDim * YDim, clusterid);
  free(clusterid);
  return ok;
}
Exemple #4
0
/* @api private */
VALUE rb_do_self_organizing_map(int argc, VALUE *argv, VALUE self) {
    VALUE nx, ny, data, mask, weights, options;
    rb_scan_args(argc, argv, "31", &nx, &ny, &data, &options);

    if (TYPE(data) != T_ARRAY)
        rb_raise(rb_eArgError, "data should be an array of arrays");

    mask = get_value_option(options, "mask", Qnil);

    if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
        rb_raise(rb_eArgError, "mask should be an array of arrays");

    if (NIL_P(nx) || NUM2INT(rb_Integer(nx)) <= 0)
        rb_raise(rb_eArgError, "nx should be > 0");

    if (NIL_P(ny) || NUM2INT(rb_Integer(ny)) <= 0)
        rb_raise(rb_eArgError, "ny should be > 0");

    int nxgrid    = NUM2INT(rb_Integer(nx));
    int nygrid    = NUM2INT(rb_Integer(ny));
    int transpose = get_int_option(options, "transpose", 0);
    int npass     = get_int_option(options, "iterations", DEFAULT_ITERATIONS);

    // e = euclidian,
    // b = city-block distance
    // c = correlation
    // a = absolute value of the correlation
    // u = uncentered correlation
    // x = absolute uncentered correlation
    // s = spearman's rank correlation
    // k = kendall's tau
    int dist      = get_int_option(options, "metric", 'e');
    double tau    = get_dbl_option(options, "tau", 1.0);

    int i, j, k;
    int nrows = RARRAY_LEN(data);
    int ncols = RARRAY_LEN(rb_ary_entry(data, 0));

    double **cdata          = (double**)malloc(sizeof(double*)*nrows);
    int    **cmask          = (int   **)malloc(sizeof(int   *)*nrows);
    double *cweights        = (double *)malloc(sizeof(double )*ncols);

    int **ccluster;
    double ***ccelldata;
    int dimx = nrows, dimy = ncols;

    if (transpose) {
        dimx = ncols;
        dimy = nrows;
    }

    ccluster = (int **)malloc(sizeof(int*)*dimx);
    for (i = 0; i < dimx; i++)
        ccluster[i] = (int*)malloc(sizeof(int)*2);

    for (i = 0; i < nrows; i++) {
        cdata[i]          = (double*)malloc(sizeof(double)*ncols);
        cmask[i]          = (int   *)malloc(sizeof(int   )*ncols);
        for (j = 0; j < ncols; j++) {
            cdata[i][j] = NUM2DBL(rb_Float(rb_ary_entry(rb_ary_entry(data, i), j)));
            cmask[i][j] = NIL_P(mask) ? 1 : NUM2INT(rb_Integer(rb_ary_entry(rb_ary_entry(mask, i), j)));
        }
    }

    weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights")));
    for (i = 0; i < ncols; i++) {
        cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_Float(rb_ary_entry(weights, i)));
    }

    ccelldata = (double***)malloc(sizeof(double**)*nxgrid);
    for (i = 0; i < nxgrid; i++) {
        ccelldata[i] = (double **)malloc(sizeof(double*)*nygrid);
        for (j = 0; j < nygrid; j++)
            ccelldata[i][j] = (double *)malloc(sizeof(double)*dimy);
    }

    somcluster(nrows, ncols, cdata, cmask, cweights, transpose, nxgrid, nygrid, tau, npass, dist, ccelldata, ccluster);

    VALUE result   = rb_hash_new();
    VALUE cluster  = rb_ary_new();
    VALUE centroid = rb_ary_new();

    for (i = 0; i < dimx; i++) {
        VALUE gridpoint = rb_ary_new();
        rb_ary_push(gridpoint, INT2NUM(ccluster[i][0]));
        rb_ary_push(gridpoint, INT2NUM(ccluster[i][1]));
        rb_ary_push(cluster, gridpoint);
    }

    for (i = 0; i < nxgrid; i++) {
        for (j = 0; j < nygrid; j++) {
            VALUE point = rb_ary_new();
            for (k = 0; k < dimy; k++)
                rb_ary_push(point, DBL2NUM(ccelldata[i][j][k]));
            rb_ary_push(centroid, point);
        }
    }

    rb_hash_aset(result, ID2SYM(rb_intern("cluster")),   cluster);
    rb_hash_aset(result, ID2SYM(rb_intern("centroid")),  centroid);

    for (i = 0; i < nrows; i++) {
        free(cdata[i]);
        free(cmask[i]);
    }

    for (i = 0; i < dimx; i++)
        free(ccluster[i]);

    for (i = 0; i < nxgrid; i++) {
        for (j = 0; j < nygrid; j++)
            free(ccelldata[i][j]);
        free(ccelldata[i]);
    }

    free(cdata);
    free(cmask);
    free(ccelldata);
    free(cweights);
    free(ccluster);

    return result;
}