void zscore () { if (!data2d) buildData2d (); long i, j; for (j = 0; j < nvar; j++) { double cursum = 0; double curmean = 0; double curstd = 0; for (i = 0; i < nsample; i++) cursum += data2d[i][j]; curmean = cursum / nsample; cursum = 0; register double tmpf; for (i = 0; i < nsample; i++) { tmpf = data2d[i][j] - curmean; cursum += tmpf * tmpf; } curstd = (nsample == 1) ? 0 : sqrt (cursum / (nsample - 1)); //nsample -1 is an unbiased version for Gaussian for (i = 0; i < nsample; i++) { data2d[i][j] = (data2d[i][j] - curmean) / curstd; } } b_zscore = 1; }
void zscore (long indExcludeColumn, int b_discretize) { if (!data2d) buildData2d (); if (!b_discretize) return; // in this case, just generate the 2D data array long i, j; for (j = 0; j < nvar; j++) { if (j==indExcludeColumn) { continue; //this is useful to exclude the first column, which will be the target classification variable } double cursum = 0; double curmean = 0; double curstd = 0; for (i = 0; i < nsample; i++) cursum += data2d[i][j]; curmean = cursum / nsample; cursum = 0; register double tmpf; for (i = 0; i < nsample; i++) { tmpf = data2d[i][j] - curmean; cursum += tmpf * tmpf; } curstd = (nsample == 1) ? 0 : sqrt (cursum / (nsample - 1)); //nsample -1 is an unbiased version for Gaussian for (i = 0; i < nsample; i++) { data2d[i][j] = (data2d[i][j] - curmean) / curstd; } } b_zscore = 1; }