long setupOutputFile(SDDS_DATASET *SDDSout, SDDS_DATASET *SDDSin, char *output, char ***yOutputName, char ***yOutputErrorName, char ***yOutputUnits, char *xName, char *xErrorName, char **yName, char **yErrorName, long yNames, char **mainTemplate0, char **errorTemplate0, int32_t interval, long order) { long i; char *xSymbol, *ySymbol; char *mainTemplate[3] = {"%yNameDeriv", "Derivative w.r.t. %xSymbol of %ySymbol", "d[%ySymbol]/d[%xSymbol]"}; char *errorTemplate[3] = {"%yNameDerivSigma", "Sigma of derivative w.r.t. %xSymbol of %ySymbol", "Sigma[d[%ySymbol]/d[%xSymbol]]"}; char buffer[1024]; for (i=0; i<3; i++) { if (!mainTemplate0[i]) { if (order!=1) { switch (i) { case 0: /* name */ sprintf(buffer, "%%yNameDeriv%ld", order); break; case 1: /* description */ sprintf(buffer, "Derivative %ld w.r.t. %%xSymbol of %%ySymbol", order); break; case 2: /* symbol */ sprintf(buffer, "d$a%ld$n[%%ySymbol]/d[%%xSymbol]$a%ld$n", order, order); break; } cp_str(&mainTemplate[i], buffer); } } else mainTemplate[i] = mainTemplate0[i]; if (errorTemplate0[i]) errorTemplate[i] = errorTemplate0[i]; } *yOutputName = tmalloc(sizeof(*yOutputName)*yNames); *yOutputErrorName = tmalloc(sizeof(*yOutputErrorName)*yNames); *yOutputUnits = tmalloc(sizeof(*yOutputUnits)*yNames); if (!SDDS_InitializeOutput(SDDSout, SDDS_BINARY, 0, NULL, "sddsderiv output", output) || SDDS_DefineParameter1(SDDSout, "derivInterval", NULL, NULL, NULL, NULL, SDDS_LONG, &interval)<0) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, xName, NULL) || (xErrorName && !SDDS_TransferColumnDefinition(SDDSout, SDDSin, xErrorName, NULL))) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (SDDS_GetColumnInformation(SDDSout, "symbol", &xSymbol, SDDS_GET_BY_NAME, xName)!=SDDS_STRING) { fprintf(stderr, "error: problem getting symbol for column %s\n", xName); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (!xSymbol) SDDS_CopyString(&xSymbol, xName); for (i=0; i<yNames; i++) { if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yName[i], NULL)) { fprintf(stderr, "error: problem transferring definition for column %s\n", yName[i]); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (SDDS_GetColumnInformation(SDDSout, "symbol", &ySymbol, SDDS_GET_BY_NAME, yName[i])!=SDDS_STRING) { fprintf(stderr, "error: problem getting symbol for column %s\n", yName[i]); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (!ySymbol || SDDS_StringIsBlank(ySymbol)) SDDS_CopyString(&ySymbol, yName[i]); (*yOutputUnits)[i] = divideColumnUnits(SDDSout, yName[i], xName); (*yOutputName)[i] = changeInformation(SDDSout, yName[i], yName[i], ySymbol, xName, xSymbol, mainTemplate, (*yOutputUnits)[i]); if (yErrorName || xErrorName) { if (yErrorName && yErrorName[i]) { if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yErrorName[i], NULL)) { fprintf(stderr, "error: problem transferring definition for column %s\n", yErrorName[i]); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } (*yOutputErrorName)[i] = changeInformation(SDDSout, yErrorName[i], yName[i], ySymbol, xName, xSymbol, errorTemplate, (*yOutputUnits)[i]); } else { if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yName[i], NULL)) { fprintf(stderr, "error: problem transferring error definition for column %s\n", yName[i]); SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } (*yOutputErrorName)[i] = changeInformation(SDDSout, yName[i], yName[i], ySymbol, xName, xSymbol, errorTemplate, (*yOutputUnits)[i]); } } else (*yOutputErrorName)[i] = NULL; } if (!SDDS_TransferAllParameterDefinitions(SDDSout, SDDSin, SDDS_TRANSFER_KEEPOLD) || !SDDS_WriteLayout(SDDSout)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); return(1); }
int main(int argc, char **argv) { int iArg; char **column, **excludeColumn, *withOnly; long columns, excludeColumns; char *input, *output; SCANNED_ARG *scanned; SDDS_DATASET SDDSin, SDDSout; long i, j, row, rows, count, readCode, rankOrder, iName1, iName2; int32_t outlierStDevPasses; double **data, correlation, significance, outlierStDevLimit; double **rank; short **accept; char s[SDDS_MAXLINE]; unsigned long pipeFlags, dummyFlags, majorOrderFlag; short columnMajorOrder=-1; SDDS_RegisterProgramName(argv[0]); argc = scanargs(&scanned, argc, argv); if (argc<2) bomb(NULL, USAGE); output = input = withOnly = NULL; columns = excludeColumns = 0; column = excludeColumn = NULL; pipeFlags = 0; rankOrder = 0; outlierStDevPasses = 0; outlierStDevLimit = 1.; rank = NULL; accept = NULL; for (iArg=1; iArg<argc; iArg++) { if (scanned[iArg].arg_type==OPTION) { /* process options here */ switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) { case SET_MAJOR_ORDER: majorOrderFlag=0; scanned[iArg].n_items--; if (scanned[iArg].n_items>0 && (!scanItemList(&majorOrderFlag, scanned[iArg].list+1, &scanned[iArg].n_items, 0, "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER, "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER, NULL))) SDDS_Bomb("invalid -majorOrder syntax/values"); if (majorOrderFlag&SDDS_COLUMN_MAJOR_ORDER) columnMajorOrder=1; else if (majorOrderFlag&SDDS_ROW_MAJOR_ORDER) columnMajorOrder=0; break; case SET_COLUMNS: if (columns) SDDS_Bomb("only one -columns option may be given"); if (scanned[iArg].n_items<2) SDDS_Bomb("invalid -columns syntax"); column = tmalloc(sizeof(*column)*(columns=scanned[iArg].n_items-1)); for (i=0; i<columns; i++) column[i] = scanned[iArg].list[i+1]; break; case SET_EXCLUDE: if (scanned[iArg].n_items<2) SDDS_Bomb("invalid -excludeColumns syntax"); moveToStringArray(&excludeColumn, &excludeColumns, scanned[iArg].list+1, scanned[iArg].n_items-1); break; case SET_WITHONLY: if (withOnly) SDDS_Bomb("only one -withOnly option may be given"); if (scanned[iArg].n_items<2) SDDS_Bomb("invalid -withOnly syntax"); withOnly = scanned[iArg].list[1]; break; case SET_PIPE: if (!processPipeOption(scanned[iArg].list+1, scanned[iArg].n_items-1, &pipeFlags)) SDDS_Bomb("invalid -pipe syntax"); break; case SET_RANKORDER: rankOrder = 1; break; case SET_STDEVOUTLIER: scanned[iArg].n_items--; outlierStDevPasses = 1; outlierStDevLimit = 1; if (!scanItemList(&dummyFlags, scanned[iArg].list+1, &scanned[iArg].n_items, 0, "limit", SDDS_DOUBLE, &outlierStDevLimit, 1, 0, "passes", SDDS_LONG, &outlierStDevPasses, 1, 0, NULL) || outlierStDevPasses<=0 || outlierStDevLimit<=0) SDDS_Bomb("invalid -stdevOutlier syntax/values"); break; default: fprintf(stderr, "error: unknown/ambiguous option: %s\n", scanned[iArg].list[0]); exit(1); break; } } else { if (!input) input = scanned[iArg].list[0]; else if (!output) output = scanned[iArg].list[0]; else SDDS_Bomb("too many filenames seen"); } } processFilenames("sddscorrelate", &input, &output, pipeFlags, 0, NULL); if (!SDDS_InitializeInput(&SDDSin, input)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (!columns) columns = appendToStringArray(&column, columns, "*"); if (withOnly) columns = appendToStringArray(&column, columns, withOnly); if ((columns=expandColumnPairNames(&SDDSin, &column, NULL, columns, excludeColumn, excludeColumns, FIND_NUMERIC_TYPE, 0))<=0) { SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); SDDS_Bomb("no columns selected for correlation analysis"); } if (!SDDS_InitializeOutput(&SDDSout, SDDS_BINARY, 0, NULL, "sddscorrelate output", output) || SDDS_DefineColumn(&SDDSout, "Correlate1Name", NULL, NULL, "Name of correlated quantity 1", NULL, SDDS_STRING, 0)<0 || SDDS_DefineColumn(&SDDSout, "Correlate2Name", NULL, NULL, "Name of correlated quantity 2", NULL, SDDS_STRING, 0)<0 || SDDS_DefineColumn(&SDDSout, "CorrelatePair", NULL, NULL, "Names of correlated quantities", NULL, SDDS_STRING, 0)<0 || SDDS_DefineColumn(&SDDSout, "CorrelationCoefficient", "r", NULL, "Linear correlation coefficient", NULL, SDDS_DOUBLE, 0)<0 || SDDS_DefineColumn(&SDDSout, "CorrelationSignificance", "P$br$n", NULL, "Linear correlation coefficient significance", NULL, SDDS_DOUBLE, 0)<0 || SDDS_DefineColumn(&SDDSout, "CorrelationPoints", NULL, NULL, "Number of points used for correlation", NULL, SDDS_LONG, 0)<0 || SDDS_DefineParameter(&SDDSout, "CorrelatedRows", NULL, NULL, "Number of data rows in correlation analysis", NULL, SDDS_LONG, NULL)<0 || SDDS_DefineParameter(&SDDSout, "sddscorrelateInputFile", NULL, NULL, "Data file processed by sddscorrelate", NULL, SDDS_STRING, input?input:"stdin")<0 || SDDS_DefineParameter(&SDDSout, "sddscorrelateMode", NULL, NULL, NULL, NULL, SDDS_STRING, rankOrder?"Rank-Order (Spearman)":"Linear (Pearson)")<0 || SDDS_DefineParameter1(&SDDSout, "sddscorrelateStDevOutlierPasses", NULL, NULL, "Number of passes of standard-deviation outlier elimination applied", NULL, SDDS_LONG, &outlierStDevPasses)<0 || SDDS_DefineParameter1(&SDDSout, "sddscorrelateStDevOutlierLimit", NULL, NULL, "Standard-deviation outlier limit applied", NULL, SDDS_DOUBLE, &outlierStDevLimit)<0) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (columnMajorOrder!=-1) SDDSout.layout.data_mode.column_major = columnMajorOrder; else SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major; if (!SDDS_WriteLayout(&SDDSout)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (!(data = (double**)malloc(sizeof(*data)*columns)) || (rankOrder && !(rank = (double**)malloc(sizeof(*rank)*columns))) || !(accept = (short**)malloc(sizeof(*accept)*columns))) SDDS_Bomb("allocation failure"); while ((readCode=SDDS_ReadPage(&SDDSin))>0) { if ((rows = SDDS_CountRowsOfInterest(&SDDSin))<3) continue; if (!SDDS_StartPage(&SDDSout, columns*(columns-1)/2) || !SDDS_SetParameters(&SDDSout, SDDS_SET_BY_NAME|SDDS_PASS_BY_VALUE, "CorrelatedRows", rows, NULL)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); for (i=0; i<columns; i++) { if (!(data[i] = SDDS_GetColumnInDoubles(&SDDSin, column[i]))) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); if (rankOrder) rank[i] = findRank(data[i], rows); if (outlierStDevPasses) { if (!(accept[i] = (short*)malloc(sizeof(**accept)*rows))) SDDS_Bomb("allocation failure"); markStDevOutliers(data[i], outlierStDevLimit, outlierStDevPasses, accept[i], rows); } else accept[i] = NULL; } for (i=row=0; i<columns; i++) { for (j=i+1; j<columns; j++) { iName1 = i; iName2 = j; if (withOnly) { if (strcmp(withOnly, column[i])==0) { iName1 = j; iName2 = i; } else if (strcmp(withOnly, column[j])==0) { iName1 = i; iName2 = j; } else continue; } correlation = linearCorrelationCoefficient(rankOrder?rank[i]:data[i], rankOrder?rank[j]:data[j], accept[i], accept[j], rows, &count); significance = linearCorrelationSignificance(correlation, count); sprintf(s, "%s.%s", column[iName1], column[iName2]); if (!SDDS_SetRowValues(&SDDSout, SDDS_SET_BY_INDEX|SDDS_PASS_BY_VALUE, row++, 0, column[iName1], 1, column[iName2], 2, s, 3, correlation, 4, significance, 5, count, -1)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } } for (i=0; i<columns; i++) { free(data[i]); if (rankOrder) free(rank[i]); if (accept[i]) free(accept[i]); } if (!SDDS_WritePage(&SDDSout)) SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors); } if (!SDDS_Terminate(&SDDSin)) { SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors); exit(1); } if (!SDDS_Terminate(&SDDSout)) { SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors); exit(1); } return 0; }