int main()
{
	char str[] = "string";
	printf ("%d", findRank(str));
	return 0;
}
Beispiel #2
0
int main(int argc, char **argv)
{
  int iArg;
  char **column, **excludeColumn, *withOnly;
  long columns, excludeColumns;
  char *input, *output;
  SCANNED_ARG *scanned;
  SDDS_DATASET SDDSin, SDDSout;
  long i, j, row, rows, count, readCode, rankOrder, iName1, iName2;
  int32_t outlierStDevPasses;
  double **data, correlation, significance, outlierStDevLimit;
  double **rank;
  short **accept;
  char s[SDDS_MAXLINE];
  unsigned long pipeFlags, dummyFlags, majorOrderFlag;
  short columnMajorOrder=-1;
  
  SDDS_RegisterProgramName(argv[0]);
  argc = scanargs(&scanned, argc, argv); 
  if (argc<2)
    bomb(NULL, USAGE);
  
  output = input = withOnly = NULL;
  columns = excludeColumns = 0;
  column = excludeColumn = NULL;
  pipeFlags = 0;
  rankOrder = 0;
  outlierStDevPasses = 0;
  outlierStDevLimit  = 1.;
  rank = NULL;
  accept = NULL;
  
  for (iArg=1; iArg<argc; iArg++) {
    if (scanned[iArg].arg_type==OPTION) {
      /* process options here */
      switch (match_string(scanned[iArg].list[0], option, N_OPTIONS, 0)) {
      case SET_MAJOR_ORDER:
        majorOrderFlag=0;
        scanned[iArg].n_items--;
        if (scanned[iArg].n_items>0 &&
            (!scanItemList(&majorOrderFlag, scanned[iArg].list+1, &scanned[iArg].n_items, 0,
			   "row", -1, NULL, 0, SDDS_ROW_MAJOR_ORDER,
                           "column", -1, NULL, 0, SDDS_COLUMN_MAJOR_ORDER,
                           NULL)))
          SDDS_Bomb("invalid -majorOrder syntax/values");
        if (majorOrderFlag&SDDS_COLUMN_MAJOR_ORDER)
          columnMajorOrder=1;
        else if (majorOrderFlag&SDDS_ROW_MAJOR_ORDER)
          columnMajorOrder=0;
        break;
      case SET_COLUMNS:
	if (columns)
	  SDDS_Bomb("only one -columns option may be given");
	if (scanned[iArg].n_items<2)
	  SDDS_Bomb("invalid -columns syntax");
	column = tmalloc(sizeof(*column)*(columns=scanned[iArg].n_items-1));
	for (i=0; i<columns; i++)
	  column[i] = scanned[iArg].list[i+1];
	break;
      case SET_EXCLUDE:
	if (scanned[iArg].n_items<2)
	  SDDS_Bomb("invalid -excludeColumns syntax");
	moveToStringArray(&excludeColumn, &excludeColumns, scanned[iArg].list+1, scanned[iArg].n_items-1);
	break;
      case SET_WITHONLY:
	if (withOnly)
	  SDDS_Bomb("only one -withOnly option may be given");
	if (scanned[iArg].n_items<2)
	  SDDS_Bomb("invalid -withOnly syntax");
	withOnly = scanned[iArg].list[1];
	break;
      case SET_PIPE:
	if (!processPipeOption(scanned[iArg].list+1, scanned[iArg].n_items-1, &pipeFlags))
	  SDDS_Bomb("invalid -pipe syntax");
	break;
      case SET_RANKORDER:
	rankOrder = 1;
	break;
      case SET_STDEVOUTLIER:
	scanned[iArg].n_items--;
	outlierStDevPasses = 1;
	outlierStDevLimit = 1;
	if (!scanItemList(&dummyFlags, scanned[iArg].list+1, &scanned[iArg].n_items, 0,
			  "limit", SDDS_DOUBLE, &outlierStDevLimit, 1, 0,
			  "passes", SDDS_LONG, &outlierStDevPasses, 1, 0,
			  NULL) || 
	    outlierStDevPasses<=0 || outlierStDevLimit<=0)
	  SDDS_Bomb("invalid -stdevOutlier syntax/values");
	break;
      default:
	fprintf(stderr, "error: unknown/ambiguous option: %s\n", 
		scanned[iArg].list[0]);
	exit(1);
	break;
      }
    }
    else {
      if (!input)
	input = scanned[iArg].list[0];
      else if (!output)
	output = scanned[iArg].list[0];
      else
	SDDS_Bomb("too many filenames seen");
    }
  }
  
  processFilenames("sddscorrelate", &input, &output, pipeFlags, 0, NULL);
  
  if (!SDDS_InitializeInput(&SDDSin, input))
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
  
  if (!columns)
    columns = appendToStringArray(&column, columns, "*"); 
  if (withOnly)
    columns = appendToStringArray(&column, columns, withOnly);
  
  if ((columns=expandColumnPairNames(&SDDSin, &column, NULL, columns, 
				     excludeColumn, excludeColumns, FIND_NUMERIC_TYPE, 0))<=0) {
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
    SDDS_Bomb("no columns selected for correlation analysis");
  }
  
  if (!SDDS_InitializeOutput(&SDDSout, SDDS_BINARY, 0, NULL, "sddscorrelate output", output) ||
      SDDS_DefineColumn(&SDDSout, "Correlate1Name", NULL, NULL, "Name of correlated quantity 1", NULL, SDDS_STRING, 0)<0 ||
      SDDS_DefineColumn(&SDDSout, "Correlate2Name", NULL, NULL, "Name of correlated quantity 2", NULL, SDDS_STRING, 0)<0 ||
      SDDS_DefineColumn(&SDDSout, "CorrelatePair", NULL, NULL, "Names of correlated quantities", NULL, SDDS_STRING, 0)<0 ||
      SDDS_DefineColumn(&SDDSout, "CorrelationCoefficient", "r", NULL, "Linear correlation coefficient", 
			NULL, SDDS_DOUBLE, 0)<0 ||
      SDDS_DefineColumn(&SDDSout, "CorrelationSignificance", "P$br$n", NULL, "Linear correlation coefficient significance", 
			NULL, SDDS_DOUBLE, 0)<0 ||
      SDDS_DefineColumn(&SDDSout, "CorrelationPoints", NULL, NULL, "Number of points used for correlation",
			NULL, SDDS_LONG, 0)<0 ||
      SDDS_DefineParameter(&SDDSout, "CorrelatedRows", NULL, NULL, "Number of data rows in correlation analysis", 
			   NULL, SDDS_LONG, NULL)<0 ||
      SDDS_DefineParameter(&SDDSout, "sddscorrelateInputFile", NULL, NULL, "Data file processed by sddscorrelate", 
			   NULL, SDDS_STRING, input?input:"stdin")<0 ||
      SDDS_DefineParameter(&SDDSout, "sddscorrelateMode", NULL, NULL, NULL, NULL, SDDS_STRING,
			   rankOrder?"Rank-Order (Spearman)":"Linear (Pearson)")<0 ||
      SDDS_DefineParameter1(&SDDSout, "sddscorrelateStDevOutlierPasses", NULL, NULL, 
			    "Number of passes of standard-deviation outlier elimination applied",
			    NULL, SDDS_LONG, &outlierStDevPasses)<0 ||
      SDDS_DefineParameter1(&SDDSout, "sddscorrelateStDevOutlierLimit", NULL, NULL, 
			    "Standard-deviation outlier limit applied",
			    NULL, SDDS_DOUBLE, &outlierStDevLimit)<0)
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);

  if (columnMajorOrder!=-1)
    SDDSout.layout.data_mode.column_major = columnMajorOrder;
  else
    SDDSout.layout.data_mode.column_major = SDDSin.layout.data_mode.column_major;
  
  if (!SDDS_WriteLayout(&SDDSout))
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
  
  if (!(data = (double**)malloc(sizeof(*data)*columns)) ||
      (rankOrder && !(rank = (double**)malloc(sizeof(*rank)*columns))) ||
      !(accept = (short**)malloc(sizeof(*accept)*columns)))
    SDDS_Bomb("allocation failure");
  while ((readCode=SDDS_ReadPage(&SDDSin))>0) {
    if ((rows = SDDS_CountRowsOfInterest(&SDDSin))<3)
      continue;
    if (!SDDS_StartPage(&SDDSout, columns*(columns-1)/2) ||
	!SDDS_SetParameters(&SDDSout, SDDS_SET_BY_NAME|SDDS_PASS_BY_VALUE, 
			    "CorrelatedRows", rows, NULL))
      SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
    for (i=0; i<columns; i++) {
      if (!(data[i] = SDDS_GetColumnInDoubles(&SDDSin, column[i])))
	SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
      if (rankOrder)
	rank[i] = findRank(data[i], rows);
      if (outlierStDevPasses) {
	if (!(accept[i] = (short*)malloc(sizeof(**accept)*rows)))
	  SDDS_Bomb("allocation failure");
	markStDevOutliers(data[i], outlierStDevLimit, outlierStDevPasses,
			  accept[i], rows);
      }
      else
	accept[i] = NULL;
    }
    for (i=row=0; i<columns; i++) {
      for (j=i+1; j<columns; j++) {
	iName1 = i;
	iName2 = j;
	if (withOnly) {
	  if (strcmp(withOnly, column[i])==0) {
	    iName1 = j;
	    iName2 = i;
	  }
	  else if (strcmp(withOnly, column[j])==0) {
	    iName1 = i;
	    iName2 = j;
	  }
	  else 
	    continue;
	}
	correlation 
	  = linearCorrelationCoefficient(rankOrder?rank[i]:data[i], 
					 rankOrder?rank[j]:data[j], 
					 accept[i], accept[j], rows, &count);
	significance = linearCorrelationSignificance(correlation, count);
	sprintf(s, "%s.%s", column[iName1], column[iName2]);
                if (!SDDS_SetRowValues(&SDDSout, SDDS_SET_BY_INDEX|SDDS_PASS_BY_VALUE, row++,
                                       0, column[iName1], 
                                       1, column[iName2], 
                                       2, s, 3, correlation, 4, significance, 
                                       5, count, -1))
		  SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
      }
    }
    for (i=0; i<columns; i++) {
      free(data[i]);
      if (rankOrder)
	free(rank[i]);
      if (accept[i])
	free(accept[i]);
    }
    if (!SDDS_WritePage(&SDDSout))
      SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors|SDDS_EXIT_PrintErrors);
  }
  
  if (!SDDS_Terminate(&SDDSin)) {
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
    exit(1);
  }
  if (!SDDS_Terminate(&SDDSout)) {
    SDDS_PrintErrors(stderr, SDDS_VERBOSE_PrintErrors);
    exit(1);
  }
  
  return 0;
}