Esempio n. 1
0
long setupOutputFile(SDDS_DATASET *SDDSout, SDDS_DATASET *SDDSin, char *output,
                     char ***yOutputName, char ***yOutputErrorName, char ***yOutputUnits,
                     char *xName, char *xErrorName,
                     char **yName, char **yErrorName, long yNames,
                     char **mainTemplate0, char **errorTemplate0, int32_t interval,
                     long order)
{
    long i;
    char *xSymbol, *ySymbol;
    char *mainTemplate[3] = {"%yNameDeriv", "Derivative w.r.t. %xSymbol of %ySymbol", "d[%ySymbol]/d[%xSymbol]"};
    char *errorTemplate[3] = {"%yNameDerivSigma", "Sigma of derivative w.r.t. %xSymbol of %ySymbol", 
                                  "Sigma[d[%ySymbol]/d[%xSymbol]]"};
    char buffer[1024];

    for (i=0; i<3; i++) {
      if (!mainTemplate0[i]) {
        if (order!=1) {
          switch (i) {
          case 0:
            /* name */
            sprintf(buffer, "%%yNameDeriv%ld", order);
            break;
          case 1:
            /* description */
            sprintf(buffer, "Derivative %ld w.r.t. %%xSymbol of %%ySymbol", order);
            break;
          case 2:
            /* symbol */
            sprintf(buffer, "d$a%ld$n[%%ySymbol]/d[%%xSymbol]$a%ld$n", order, order);
            break;
          }
          cp_str(&mainTemplate[i], buffer);
        }
      } else
        mainTemplate[i] = mainTemplate0[i];
      if (errorTemplate0[i])
        errorTemplate[i] = errorTemplate0[i];
    }
    
    *yOutputName = tmalloc(sizeof(*yOutputName)*yNames);
    *yOutputErrorName = tmalloc(sizeof(*yOutputErrorName)*yNames);
    *yOutputUnits = tmalloc(sizeof(*yOutputUnits)*yNames);
    if (!SDDS_InitializeOutput(SDDSout, SDDS_BINARY, 0, NULL, "sddsderiv output", output) ||
        SDDS_DefineParameter1(SDDSout, "derivInterval", NULL, NULL, NULL, NULL, SDDS_LONG, &interval)<0)
        SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
    if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, xName, NULL) ||
        (xErrorName && !SDDS_TransferColumnDefinition(SDDSout, SDDSin, xErrorName, NULL)))
        SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
    if (SDDS_GetColumnInformation(SDDSout, "symbol", &xSymbol, SDDS_GET_BY_NAME, xName)!=SDDS_STRING) {
        fprintf(stderr, "error: problem getting symbol for column %s\n", xName);
        SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
        }
    if (!xSymbol)
        SDDS_CopyString(&xSymbol, xName);
    for (i=0; i<yNames; i++) {
        if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yName[i], NULL)) {
            fprintf(stderr, "error: problem transferring definition for column %s\n", yName[i]);
            SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
            }
        if (SDDS_GetColumnInformation(SDDSout, "symbol", &ySymbol, SDDS_GET_BY_NAME, yName[i])!=SDDS_STRING) {
            fprintf(stderr, "error: problem getting symbol for column %s\n", yName[i]);
            SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
            }
        if (!ySymbol || SDDS_StringIsBlank(ySymbol))
            SDDS_CopyString(&ySymbol, yName[i]);
        (*yOutputUnits)[i] = divideColumnUnits(SDDSout, yName[i], xName);
        (*yOutputName)[i] = changeInformation(SDDSout, yName[i], yName[i], ySymbol, xName, xSymbol, mainTemplate,
                                              (*yOutputUnits)[i]);
        if (yErrorName || xErrorName) {
            if (yErrorName && yErrorName[i]) {
                if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yErrorName[i], NULL)) { 
                    fprintf(stderr, "error: problem transferring definition for column %s\n", yErrorName[i]);
                    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
                    }
                (*yOutputErrorName)[i] = changeInformation(SDDSout, yErrorName[i], yName[i], ySymbol, xName, xSymbol,
                                                           errorTemplate, (*yOutputUnits)[i]);
                }
            else {
                if (!SDDS_TransferColumnDefinition(SDDSout, SDDSin, yName[i], NULL)) { 
                    fprintf(stderr, "error: problem transferring error definition for column %s\n", yName[i]);
                    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
                    }
                (*yOutputErrorName)[i] = changeInformation(SDDSout, yName[i], yName[i], ySymbol, xName, xSymbol,
                                                           errorTemplate, (*yOutputUnits)[i]);
                }
            }
        else
            (*yOutputErrorName)[i] = NULL;
        }
    if (!SDDS_TransferAllParameterDefinitions(SDDSout, SDDSin, SDDS_TRANSFER_KEEPOLD) ||
        !SDDS_WriteLayout(SDDSout))
      SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
    return(1);
  }
Esempio n. 2
0
int main(int argc, char **argv)
{
  int iArg;
  char **column, **excludeColumn, *withOnly;
  long columns, excludeColumns;
  char *input, *output;
  SCANNED_ARG *scanned;
  SDDS_DATASET SDDSin, SDDSout;
  long i, j, row, rows, count, readCode, rankOrder, iName1, iName2;
  int32_t outlierStDevPasses;
  double **data, correlation, significance, outlierStDevLimit;
  double **rank;
  short **accept;
  char s[SDDS_MAXLINE];
  unsigned long pipeFlags, dummyFlags, majorOrderFlag;
  short columnMajorOrder=-1;
  
  SDDS_RegisterProgramName(argv[0]);
  argc = scanargs(&scanned, argc, argv); 
  if (argc<2)
    bomb(NULL, USAGE);
  
  output = input = withOnly = NULL;
  columns = excludeColumns = 0;
  column = excludeColumn = NULL;
  pipeFlags = 0;
  rankOrder = 0;
  outlierStDevPasses = 0;
  outlierStDevLimit  = 1.;
  rank = NULL;
  accept = NULL;
  
  for (iArg=1; iArg<argc; iArg++) {
    if (scanned[iArg].arg_type==OPTION) {
      /* process options here */
      switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
      case SET_MAJOR_ORDER:
        majorOrderFlag=0;
        scanned[iArg].n_items--;
        if (scanned[iArg].n_items>0 &&
            (!scanItemList(&majorOrderFlag, scanned[iArg].list+1, &scanned[iArg].n_items, 0,
			   "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
                           "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER,
                           NULL)))
          SDDS_Bomb("invalid -majorOrder syntax/values");
        if (majorOrderFlag&SDDS_COLUMN_MAJOR_ORDER)
          columnMajorOrder=1;
        else if (majorOrderFlag&SDDS_ROW_MAJOR_ORDER)
          columnMajorOrder=0;
        break;
      case SET_COLUMNS:
	if (columns)
	  SDDS_Bomb("only one -columns option may be given");
	if (scanned[iArg].n_items<2)
	  SDDS_Bomb("invalid -columns syntax");
	column = tmalloc(sizeof(*column)*(columns=scanned[iArg].n_items-1));
	for (i=0; i<columns; i++)
	  column[i] = scanned[iArg].list[i+1];
	break;
      case SET_EXCLUDE:
	if (scanned[iArg].n_items<2)
	  SDDS_Bomb("invalid -excludeColumns syntax");
	moveToStringArray(&excludeColumn, &excludeColumns, scanned[iArg].list+1, scanned[iArg].n_items-1);
	break;
      case SET_WITHONLY:
	if (withOnly)
	  SDDS_Bomb("only one -withOnly option may be given");
	if (scanned[iArg].n_items<2)
	  SDDS_Bomb("invalid -withOnly syntax");
	withOnly = scanned[iArg].list[1];
	break;
      case SET_PIPE:
	if (!processPipeOption(scanned[iArg].list+1, scanned[iArg].n_items-1, &pipeFlags))
	  SDDS_Bomb("invalid -pipe syntax");
	break;
      case SET_RANKORDER:
	rankOrder = 1;
	break;
      case SET_STDEVOUTLIER:
	scanned[iArg].n_items--;
	outlierStDevPasses = 1;
	outlierStDevLimit = 1;
	if (!scanItemList(&dummyFlags, scanned[iArg].list+1, &scanned[iArg].n_items, 0,
			  "limit", SDDS_DOUBLE, &outlierStDevLimit, 1, 0,
			  "passes", SDDS_LONG, &outlierStDevPasses, 1, 0,
			  NULL) || 
	    outlierStDevPasses<=0 || outlierStDevLimit<=0)
	  SDDS_Bomb("invalid -stdevOutlier syntax/values");
	break;
      default:
	fprintf(stderr, "error: unknown/ambiguous option: %s\n", 
		scanned[iArg].list[0]);
	exit(1);
	break;
      }
    }
    else {
      if (!input)
	input = scanned[iArg].list[0];
      else if (!output)
	output = scanned[iArg].list[0];
      else
	SDDS_Bomb("too many filenames seen");
    }
  }
  
  processFilenames("sddscorrelate", &input, &output, pipeFlags, 0, NULL);
  
  if (!SDDS_InitializeInput(&SDDSin, input))
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
  
  if (!columns)
    columns = appendToStringArray(&column, columns, "*"); 
  if (withOnly)
    columns = appendToStringArray(&column, columns, withOnly);
  
  if ((columns=expandColumnPairNames(&SDDSin, &column, NULL, columns, 
				     excludeColumn, excludeColumns, FIND_NUMERIC_TYPE, 0))<=0) {
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
    SDDS_Bomb("no columns selected for correlation analysis");
  }
  
  if (!SDDS_InitializeOutput(&SDDSout, SDDS_BINARY, 0, NULL, "sddscorrelate output", output) ||
      SDDS_DefineColumn(&SDDSout, "Correlate1Name", NULL, NULL, "Name of correlated quantity 1", NULL, SDDS_STRING, 0)<0 ||
      SDDS_DefineColumn(&SDDSout, "Correlate2Name", NULL, NULL, "Name of correlated quantity 2", NULL, SDDS_STRING, 0)<0 ||
      SDDS_DefineColumn(&SDDSout, "CorrelatePair", NULL, NULL, "Names of correlated quantities", NULL, SDDS_STRING, 0)<0 ||
      SDDS_DefineColumn(&SDDSout, "CorrelationCoefficient", "r", NULL, "Linear correlation coefficient", 
			NULL, SDDS_DOUBLE, 0)<0 ||
      SDDS_DefineColumn(&SDDSout, "CorrelationSignificance", "P$br$n", NULL, "Linear correlation coefficient significance", 
			NULL, SDDS_DOUBLE, 0)<0 ||
      SDDS_DefineColumn(&SDDSout, "CorrelationPoints", NULL, NULL, "Number of points used for correlation",
			NULL, SDDS_LONG, 0)<0 ||
      SDDS_DefineParameter(&SDDSout, "CorrelatedRows", NULL, NULL, "Number of data rows in correlation analysis", 
			   NULL, SDDS_LONG, NULL)<0 ||
      SDDS_DefineParameter(&SDDSout, "sddscorrelateInputFile", NULL, NULL, "Data file processed by sddscorrelate", 
			   NULL, SDDS_STRING, input?input:"stdin")<0 ||
      SDDS_DefineParameter(&SDDSout, "sddscorrelateMode", NULL, NULL, NULL, NULL, SDDS_STRING,
			   rankOrder?"Rank-Order (Spearman)":"Linear (Pearson)")<0 ||
      SDDS_DefineParameter1(&SDDSout, "sddscorrelateStDevOutlierPasses", NULL, NULL, 
			    "Number of passes of standard-deviation outlier elimination applied",
			    NULL, SDDS_LONG, &outlierStDevPasses)<0 ||
      SDDS_DefineParameter1(&SDDSout, "sddscorrelateStDevOutlierLimit", NULL, NULL, 
			    "Standard-deviation outlier limit applied",
			    NULL, SDDS_DOUBLE, &outlierStDevLimit)<0)
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);

  if (columnMajorOrder!=-1)
    SDDSout.layout.data_mode.column_major = columnMajorOrder;
  else
    SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
  
  if (!SDDS_WriteLayout(&SDDSout))
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
  
  if (!(data = (double**)malloc(sizeof(*data)*columns)) ||
      (rankOrder && !(rank = (double**)malloc(sizeof(*rank)*columns))) ||
      !(accept = (short**)malloc(sizeof(*accept)*columns)))
    SDDS_Bomb("allocation failure");
  while ((readCode=SDDS_ReadPage(&SDDSin))>0) {
    if ((rows = SDDS_CountRowsOfInterest(&SDDSin))<3)
      continue;
    if (!SDDS_StartPage(&SDDSout, columns*(columns-1)/2) ||
	!SDDS_SetParameters(&SDDSout, SDDS_SET_BY_NAME|SDDS_PASS_BY_VALUE, 
			    "CorrelatedRows", rows, NULL))
      SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
    for (i=0; i<columns; i++) {
      if (!(data[i] = SDDS_GetColumnInDoubles(&SDDSin, column[i])))
	SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
      if (rankOrder)
	rank[i] = findRank(data[i], rows);
      if (outlierStDevPasses) {
	if (!(accept[i] = (short*)malloc(sizeof(**accept)*rows)))
	  SDDS_Bomb("allocation failure");
	markStDevOutliers(data[i], outlierStDevLimit, outlierStDevPasses,
			  accept[i], rows);
      }
      else
	accept[i] = NULL;
    }
    for (i=row=0; i<columns; i++) {
      for (j=i+1; j<columns; j++) {
	iName1 = i;
	iName2 = j;
	if (withOnly) {
	  if (strcmp(withOnly, column[i])==0) {
	    iName1 = j;
	    iName2 = i;
	  }
	  else if (strcmp(withOnly, column[j])==0) {
	    iName1 = i;
	    iName2 = j;
	  }
	  else 
	    continue;
	}
	correlation 
	  = linearCorrelationCoefficient(rankOrder?rank[i]:data[i], 
					 rankOrder?rank[j]:data[j], 
					 accept[i], accept[j], rows, &count);
	significance = linearCorrelationSignificance(correlation, count);
	sprintf(s, "%s.%s", column[iName1], column[iName2]);
                if (!SDDS_SetRowValues(&SDDSout, SDDS_SET_BY_INDEX|SDDS_PASS_BY_VALUE, row++,
                                       0, column[iName1], 
                                       1, column[iName2], 
                                       2, s, 3, correlation, 4, significance, 
                                       5, count, -1))
		  SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
      }
    }
    for (i=0; i<columns; i++) {
      free(data[i]);
      if (rankOrder)
	free(rank[i]);
      if (accept[i])
	free(accept[i]);
    }
    if (!SDDS_WritePage(&SDDSout))
      SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
  }
  
  if (!SDDS_Terminate(&SDDSin)) {
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
    exit(1);
  }
  if (!SDDS_Terminate(&SDDSout)) {
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
    exit(1);
  }
  
  return 0;
}