Exemplos de pearson em C++ (Cpp)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: tsmat.c Projeto: pauldmccarthy/ccnet

uint8_t _mk_corr_matrix(
  analyze_volume_t *vol,
  mat_t            *mat,
  corrtype_t        corrtype,
  uint32_t         *incvxls,
  uint32_t          nincvxls
) {

  uint64_t  row;
  uint64_t  col;
  uint32_t  len;
  double    corrval;
  double   *rowtsdata;
  double   *coltsdata;
  
  rowtsdata = NULL;
  coltsdata = NULL;

  len = vol->nimgs;

  rowtsdata = malloc(len*sizeof(double));
  if (rowtsdata == NULL) goto fail;
  coltsdata = malloc(len*sizeof(double));
  if (coltsdata == NULL) goto fail;

  for (row = 0; row < nincvxls; row++) {
    
    if (analyze_read_timeseries_by_idx(vol, incvxls[row], rowtsdata))
      goto fail;
    
    for (col = row; col < nincvxls; col++) {

      if (col == row) {
        if (mat_write_elem(mat, row, col, 0.0)) goto fail;
        continue;
      }

      if (analyze_read_timeseries_by_idx(vol, incvxls[col], coltsdata))
        goto fail;

      corrval = pearson(rowtsdata, coltsdata, len);

      if (mat_write_elem(mat, row, col, corrval))
        goto fail;
    }
  }

  free(rowtsdata);
  free(coltsdata);
  return 0;
  
fail:
  if (rowtsdata != NULL) free(rowtsdata);
  if (coltsdata != NULL) free(coltsdata);
  
  return 1;
}

Exemplo n.º 2

0

Exibir arquivo

Arquivo: qboduino_driver.cpp Projeto: Atom-machinerule/OpenQbo

void CQboduinoDriver::prepareData(std::string& escritura, std::string& preparedData)
{
    preparedData.clear();
    preparedData.push_back(INPUT_FLAG);
        
    uint8_t check=pearson((uint8_t *)escritura.c_str(),(uint8_t)escritura.size());
    escritura.push_back((char)check);
    
    for(unsigned int i=0;i<escritura.size();i++)
    {
      if((uint8_t)escritura[i]==INPUT_FLAG||(uint8_t)escritura[i]==INPUT_SCAPE||(uint8_t)escritura[i]==OUTPUT_FLAG)
      {
        preparedData.push_back(INPUT_SCAPE);
        preparedData.push_back(escritura[i]-2);
      }
      else
      {
        preparedData.push_back(escritura[i]);
      }
    }
    
    preparedData.push_back(OUTPUT_FLAG);
}

Exemplo n.º 3

0

Exibir arquivo

Arquivo: qboduino_driver.cpp Projeto: Atom-machinerule/OpenQbo

int CQboduinoDriver::processResponse(uint8_t *buf, uint32_t length, std::string& lectura)
{
  lectura.clear();
  if (length<5) return -1;
  if(buf[0]!=INPUT_FLAG) return -2;
  if(buf[length-1]!=OUTPUT_FLAG) return -3;
  uint8_t data[128];
  bool escapeEntrada=false;
  int datosComando=0;
  for(uint32_t i=1;i<length-1;i++)
  {
    if(escapeEntrada)
    {
      data[datosComando]=buf[i]+2;
      escapeEntrada=false;
      datosComando++;
    }
    else if(buf[i]==INPUT_SCAPE)
      escapeEntrada=true;
    else
    {
      data[datosComando]=buf[i];
      escapeEntrada=false;
      datosComando++;
    }
  }
  uint8_t check=pearson(data,datosComando-1);
  uint8_t inCheck=data[datosComando-1];
  
  if(check!=inCheck){
    return -4;
  }
  for(int i=0;i<datosComando-1;i++)
    lectura.push_back(data[i]);
  return 1;
}

Exemplo n.º 4

0

Exibir arquivo

Arquivo: pcc_by_chenweizhong.c Projeto: downsea/calculating_pcc_case_study

main(int argc,char *argv[]) 
{
	int i,j,k,w;
	char line[20000];
	int linecount;
	int columncount;
	FILE *in,*out;
	int determine;
	
	in=fopen(argv[1], "r");
	if (in!=NULL) printf("Input file is opened.\n");
	out=fopen(argv[2], "w");
	
	if (argc!=3)
	{
		printf("A programme to caculate Pearson Correlation Coefficient.\n");
		printf("Please input the command as the format:  Pearson2 <infile> <outfile>\n");exit(0);
	}
	if (in==NULL)
	{
		printf("cannot open infile\n");exit(0);
	}
	if (out==NULL)
	{
		printf("cannot open outfile\n");exit(0);
	}
	
	linecount=0;
	for (;fgets(line,20000,in)!=NULL;)
	{
	    linecount++;
	}
	columncount=0;
	for (i=0;line[i]!=NULL;i++)
	{
	    if (line[i]=='\t')
		    { columncount++;}
	}

	printf("line: %d\n",linecount);
	printf("column: %d\n",columncount);
	
	determine=fclose(in);
	if(determine==0) printf("Input file is closed.\n");
	
    int rowcount;
	rowcount=linecount-1;
	printf("rowcount: %d\n",rowcount);
	
	
	
	char name1[30];
	char name2[30];
	char transfer[30];

	
	float data1[columncount];
	float data2[columncount];
	char t;
	int line_n1;
	int line_n2;
	int file1_linecount;
	
	
	for (line_n1=1;line_n1<=linecount;line_n1++)
	{
	    in=fopen(argv[1], "r");
	    /*if (in!=NULL) printf("\n\nInput file is opened again.\n");*/
		fgets(line,20000,in);
		printf("line_n1: %d\n",line_n1);
		/*printf("%s\n",line);*/

		if(line_n1>1)
		{
			for (file1_linecount=2;file1_linecount<=line_n1;file1_linecount++)
			{
				/*printf("file1_linecount: %d\n",file1_linecount);*/
				fgets(line,20000,in);
				/*printf("%s\n",line);*/
			}
			fclose(in);
		
			for (i=0;line[i]!='\t';i++)
			{
				name1[i]=line[i];
			}
			name1[i]='\0';
			/*printf("The name is: %s\n",name1);*/
			i++;
			for (k=0;k<columncount;k++)
			{
				for (j=0;line[i]!='\t'&&line[i]!='\n';i++,j++)
					{transfer[j]=line[i];}
				data1[k]=atof(transfer);
			
				for (j=0;(t=transfer[j])!='\0';j++)
				{
					transfer[j]=0;
				}
			i++;
			}
			/*printf("The data of %s are: ",name1);
			for (k=0;k<columncount;k++)
			{
				printf("%f\t",data1[k]);
			}
			printf("\n");*/
		
			in=fopen(argv[1], "r");
			for (line_n2=1;line_n2<=linecount;line_n2++)
			{
				fgets(line,20000,in);
				if(line_n2>1)
				{
					if(line_n2!=line_n1)
					{
						for (i=0;line[i]!='\t';i++)
						{
							name2[i]=line[i];
						}
						name2[i]='\0';
						i++;
						for (k=0;k<columncount;k++)
						{
							for (j=0;line[i]!='\t'&&line[i]!='\n';i++,j++)
								{transfer[j]=line[i];}
							data2[k]=atof(transfer);
			
							for (j=0;(t=transfer[j])!='\0';j++)
							{
								transfer[j]=0;
							}
							i++;
						}

						/*printf("%s\t%s\t%f\n",name1,name2,pearson(data1,data2,columncount));*/
						fprintf(out,"%s\t%s\t%f\r\n",name1,name2,pearson(data1,data2,columncount));
					}
				}
			}
			fclose(in);
		
		}
	}

	fclose(out);
}

Exemplo n.º 5

0

Exibir arquivo

Arquivo: mcxquery.c Projeto: koadman/proxigenomics

static void vary_threshold
(  mcxIO* xf
,  FILE*  fp
,  int vary_a
,  int vary_z
,  int vary_s
,  int vary_n
,  unsigned mode
)
   {  dim cor_i = 0, j
   ;  int step

   ;  mclx* mx
   ;  unsigned long noe
   ;  pval*  allvals
   ;  dim  n_allvals = 0
   ;  double sum_vals = 0.0

   ;  mx = mclxRead(xf, EXIT_ON_FAIL)
   ;  mcxIOclose(xf)

   ;  if (transform)
      mclgTFexec(mx, transform)

   ;  noe = mclxNrofEntries(mx)
   ;  allvals = mcxAlloc(noe * sizeof allvals[0], EXIT_ON_FAIL)

   ;  if (!weight_scale)
      {  if (mode == 'c')
         weight_scale = 1.0
      ;  else
         weight_scale = vary_n
   ;  }

      n_allvals = get_n_sort_allvals(mx, allvals, noe, &sum_vals, FALSE)

   ;  if (mode == 'c')
      {  double smallest = n_allvals ? allvals[n_allvals-1] : -DBL_MAX
      ;  if (vary_a * 1.0 / vary_n < smallest)
         {  while (vary_a * 1.0 / vary_n < smallest)
            vary_a++
         ;  vary_a--
      ;  }
         mcxTell
         (  me
         ,  "smallest correlation is %.2f, using starting point %.2f"
         ,  smallest
         ,  vary_a * 1.0 / vary_n
         )
   ;  }

      if (output_flags & OUTPUT_TABLE)
      {
;fprintf(fp, "L\tD\tR\tS\tcce\tEWmean\tEWmed\tEWiqr\tNDmean\tNDmed\tNDiqr\tCCF\t%s\n", mode == 'k' ? "kNN" : mode == 'l' ? "N" : "Cutoff")
;}    else
      {  if (output_flags & OUTPUT_KEY)
 {
;fprintf(fp, "-------------------------------------------------------------------------------\n")
;fprintf(fp, " L       Percentage of nodes in the largest component\n")
;fprintf(fp, " D       Percentage of nodes in components of size at most %d [-div option]\n", (int) divide_g)
;fprintf(fp, " R       Percentage of nodes not in L or D: 100 - L -D\n")
;fprintf(fp, " S       Percentage of nodes that are singletons\n")
;fprintf(fp, " cce     Expected size of component, nodewise [ sum(sz^2) / sum^2(sz) ]\n")
;fprintf(fp, "*EW      Edge weight traits (mean, median and IQR, all scaled!)\n")
;fprintf(fp, "            Scaling is used to avoid printing of fractional parts throughout.\n")
;fprintf(fp, "            The scaling factor is %.2f [-report-scale option]\n", weight_scale)
;fprintf(fp, " ND      Node degree traits [mean, median and IQR]\n")
;fprintf(fp, " CCF     Clustering coefficient %s\n", compute_flags & COMPUTE_CLCF ? "(not computed; use --clcf to include this)" : "")
;fprintf(fp, " eff     Induced component efficiency %s\n", compute_flags & COMPUTE_EFF ? "(not computed; use --eff to include this)" : "")

;if (mode == 'c')
 fprintf(fp, "Cutoff   The threshold used.\n")
;else if (mode == 't')
 fprintf(fp, "*Cutoff  The threshold with scale factor %.2f and fractional parts removed\n", weight_scale)
;else if (mode == 'k')
 fprintf(fp, "k-NN     The knn parameter\n")
;else if (mode == 'l')
 fprintf(fp, "N        The knn parameter (merge mode)\n")
;else if (mode == 'n')
 fprintf(fp, "ceil     The ceil parameter\n")
;fprintf(fp, "Total number of nodes: %lu\n", (ulong) N_COLS(mx))
;}
 fprintf(fp, "-------------------------------------------------------------------------------\n")
;fprintf(fp, "  L   D   R   S     cce *EWmean  *EWmed *EWiqr NDmean  NDmed  NDiqr CCF  eff %6s \n", mode == 'k' ? "k-NN" : mode == 'l' ? "N" : mode == 'n' ? "Ceil" : "Cutoff")
;fprintf(fp, "-------------------------------------------------------------------------------\n")
;     }

      for (step = vary_a; step <= vary_z; step += vary_s)
      {  double cutoff = step * 1.0 / vary_n
      ;  double eff = -1.0
      ;  mclv* nnodes = mclvCanonical(NULL, N_COLS(mx), 0.0)
      ;  mclv* degree = mclvCanonical(NULL, N_COLS(mx), 0.0)
      ;  dim i, n_sample = 0
      ;  double cor, y_prev, iqr = 0.0
      ;  mclx* cc = NULL, *res = NULL
      ;  mclv* sz, *ccsz = NULL
      ;  int step2 = vary_z + vary_a - step

      ;  sum_vals = 0.0
      
      ;  if (mode == 't' || mode == 'c')
            mclxSelectValues(mx, &cutoff, NULL, MCLX_EQT_GQ)
         ,  res = mx
      ;  else if (mode == 'k')
         {  res = rebase_g ? mclxCopy(mx) : mx
         ;  mclxKNNdispatch(res, step2, n_thread_l, 1)
      ;  }
         else if (mode == 'l')
         {  res = mx
         ;  mclxKNNdispatch(res, step2, n_thread_l, 0)
      ;  }
         else if (mode == 'n')
         {  res = rebase_g ? mclxCopy(mx) : mx
         ;  mclv* cv = mclgCeilNB(res, step2, NULL, NULL, NULL)
         ;  mclvFree(&cv)
      ;  }

         sz = mclxColSizes(res, MCL_VECTOR_COMPLETE)
      ;  mclvSortDescVal(sz)

      ;  cc = clmUGraphComponents(res, NULL)     /* fixme: user has to specify -tf '#max()' if graph is directed */
      ;  if (cc)
         {  ccsz = mclxColSizes(cc, MCL_VECTOR_COMPLETE)
         ;  if (compute_flags & COMPUTE_EFF)
            {  clmPerformanceTable pftable
            ;  clmPerformance(mx, cc, &pftable)
            ;  eff = pftable.efficiency
         ;  }
         }

         if (mode == 't' || mode == 'c')
         {  for
            (
            ;  n_allvals > 0 && allvals[n_allvals-1] < cutoff
            ;  n_allvals--
            )
         ;  sum_vals = 0.0
         ;  for (i=0;i<n_allvals;i++)
            sum_vals += allvals[i]
      ;  }
         else if (mode == 'k' || mode == 'n' || mode == 'l')
         {  n_allvals = get_n_sort_allvals(res, allvals, noe, &sum_vals, FALSE)
      ;  }

         levels[cor_i].sim_median=  mcxMedian(allvals, n_allvals, sizeof allvals[0], pval_get_double, &iqr)
      ;  levels[cor_i].sim_iqr   =  iqr
      ;  levels[cor_i].sim_mean  =  n_allvals ? sum_vals / n_allvals : 0.0

      ;  levels[cor_i].nb_median =  mcxMedian(sz->ivps, sz->n_ivps, sizeof sz->ivps[0], ivp_get_double, &iqr)
      ;  levels[cor_i].nb_iqr    =  iqr
      ;  levels[cor_i].nb_mean   =  mclvSum(sz) / N_COLS(res)
      ;  levels[cor_i].cc_exp    =  cc ? mclvPowSum(ccsz, 2.0) / N_COLS(res) : 0
      ;  levels[cor_i].nb_sum    =  mclxNrofEntries(res)

      ;  if (compute_flags & COMPUTE_CLCF)
         {  mclv* clcf = mclgCLCFdispatch(res, n_thread_l)
         ;  levels[cor_i].clcf   =  mclvSum(clcf) / N_COLS(mx)
         ;  mclvFree(&clcf)
      ;  }
         else
         levels[cor_i].clcf = 0.0

      ;  levels[cor_i].threshold =  mode == 'k' || mode == 'l' || mode == 'n' ? step2 : cutoff
      ;  levels[cor_i].bigsize   =  cc ? cc->cols[0].n_ivps : 0
      ;  levels[cor_i].n_single  =  0
      ;  levels[cor_i].n_edge    =  n_allvals
      ;  levels[cor_i].n_lq      =  0

      ;  if (cc)
         for (i=0;i<N_COLS(cc);i++)
         {  dim n = cc->cols[N_COLS(cc)-1-i].n_ivps
         ;  if (n == 1)
            levels[cor_i].n_single++
         ;  if (n <= divide_g)
            levels[cor_i].n_lq += n
         ;  else
            break
      ;  }

         if (levels[cor_i].bigsize <= divide_g)
         levels[cor_i].bigsize = 0

      ;  y_prev = sz->ivps[0].val

                  /* wiki says:
                     A scale-free network is a network whose degree distribution follows a power
                     law, at least asymptotically. That is, the fraction P(k) of nodes in the
                     network having k connections to other nodes goes for large values of k as P(k)
                     ~ k^−g where g is a constant whose value is typically in the range 2<g<3,
                     although occasionally it may lie outside these bounds.
                 */
      ;  for (i=1;i<sz->n_ivps;i++)
         {  double y = sz->ivps[i].val
         ;  if (y > y_prev - 0.5)
            continue                                              /* same as node degree seen last */
         ;  nnodes->ivps[n_sample].val = log( (i*1.0) / (1.0*N_COLS(res)))    /* x = #nodes >= k, as fraction   */
         ;  degree->ivps[n_sample].val = log(y_prev ? y_prev : 1)            /* y = k = degree of node         */
         ;  n_sample++
;if(0)fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) y_prev, (int) i, (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val)
         ;  y_prev = y
      ;  }
         nnodes->ivps[n_sample].val = 0
      ;  nnodes->ivps[n_sample++].val = log(y_prev ? y_prev : 1)
;if(0){fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) sz->ivps[sz->n_ivps-1].val, (int) N_COLS(res), (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val)
;}

      ;  mclvResize(nnodes, n_sample)
      ;  mclvResize(degree, n_sample)
      ;  cor = pearson(nnodes, degree, n_sample)

      ;  levels[cor_i].degree_cor =  cor * cor

;if(0)fprintf(stdout, "cor at cutoff %.2f %.3f\n\n", cutoff, levels[cor_i-1].degree_cor)
      ;  mclvFree(&nnodes)
      ;  mclvFree(&degree)
      ;  mclvFree(&sz)
      ;  mclvFree(&ccsz)
      ;  mclxFree(&cc)

;  if(output_flags & OUTPUT_TABLE)
   {  fprintf
      (  fp
      ,  "%lu\t%lu\t%lu\t%lu\t%lu"
         "\t%6g\t%6g\t%6g"
         "\t%6g\t%lu\t%6g"

      ,  (ulong) levels[cor_i].bigsize
      ,  (ulong) levels[cor_i].n_lq
      ,  (ulong) N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq
      ,  (ulong) levels[cor_i].n_single
      ,  (ulong) levels[cor_i].cc_exp

      ,  (double) levels[cor_i].sim_mean
      ,  (double) levels[cor_i].sim_median
      ,  (double) levels[cor_i].sim_iqr

      ,  (double) levels[cor_i].nb_mean
      ,  (ulong) levels[cor_i].nb_median
      ,  (double) levels[cor_i].nb_iqr
      )

   ;  if (compute_flags & COMPUTE_CLCF) fprintf(fp, "\t%6g", levels[cor_i].clcf)   ;  else fputs("\tNA", fp)
   ;  if (eff >= 0.0) fprintf(fp, "\t%4g", eff)              ;  else fputs("\tNA", fp)

   ;  fprintf(fp, "\t%6g", (double) levels[cor_i].threshold)
   ;  fputc('\n', fp)
;  }
   else
   {  fprintf
      (  fp
      ,  "%3d %3d %3d %3d %7d "
         "%7.0f %7.0f %6.0f"
         "%6.1f %6.0f %6.0f"

      ,  0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].bigsize) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_lq) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + (100.0 * (N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq)) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_single) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + levels[cor_i].cc_exp)

      ,  0 ? 1.0 : (double) (levels[cor_i].sim_mean   * weight_scale)
      ,  0 ? 1.0 : (double) (levels[cor_i].sim_median * weight_scale)
      ,  0 ? 1.0 : (double) (levels[cor_i].sim_iqr    * weight_scale)

      ,  0 ? 1.0 : (double) (levels[cor_i].nb_mean                 )
      ,  0 ? 1.0 : (double) (levels[cor_i].nb_median + 0.5         )
      ,  0 ? 1.0 : (double) (levels[cor_i].nb_iqr + 0.5            )
      )

   ;  if (compute_flags & COMPUTE_CLCF)
      fprintf(fp, " %3d", 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].clcf)))
   ;  else
      fputs("   -", fp)

   ;  if (eff >= 0.0)
      fprintf(fp, "  %3d", (int) (0.5 + 1000 * eff))
   ;  else
      fputs("    -", fp)

   ;  if (mode == 'c')
      fprintf(fp, "%8.2f\n", (double) levels[cor_i].threshold)
   ;  else if (mode == 't')
      fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold  * weight_scale)
   ;  else if (mode == 'k' || mode == 'n' || mode == 'l')
      fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold)
 ; }

      ;  cor_i++
      ;  if (res != mx)
         mclxFree(&res)
   ;  }

   if (!(output_flags & OUTPUT_TABLE))
   {  if (weefreemen)
      {
fprintf(fp, "-------------------------------------------------------------------------------\n")
;fprintf(fp, "The graph below plots the R^2 squared value for the fit of a log-log plot of\n")
;fprintf(fp, "<node degree k> versus <#nodes with degree >= k>, for the network resulting\n")
;fprintf(fp, "from applying a particular %s cutoff.\n", mode == 'c' ? "correlation" : "similarity")
;fprintf(fp, "-------------------------------------------------------------------------------\n")
   ;  for (j=0;j<cor_i;j++)
      {  dim jj
      ;  for (jj=30;jj<=100;jj++)
         {  char c = ' '
         ;  if (jj * 0.01 < levels[j].degree_cor && (jj+1.0) * 0.01 > levels[j].degree_cor)
            c = 'X'
         ;  else if (jj % 5 == 0)
            c = '|'
         ;  fputc(c, fp)
      ;  }
         if (mode == 'c')
         fprintf(fp, "%8.2f\n", (double) levels[j].threshold)
      ;  else
         fprintf(fp, "%8.0f\n", (double) levels[j].threshold * weight_scale)
   ;  }

 fprintf(fp, "|----+----|----+----|----+----|----+----|----+----|----+----|----+----|--------\n")
;fprintf(fp, "| R^2   0.4       0.5       0.6       0.7       0.8       0.9    |  1.0    -o)\n")
;fprintf(fp, "+----+----+----+----+----+---------+----+----+----+----+----+----+----+    /\\\\\n")
;fprintf(fp, "| 2 4 6 8   2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 |   _\\_/\n")
;fprintf(fp, "+----+----|----+----|----+----|----+----|----+----|----+----|----+----+--------\n")
;     }
      else
      fprintf(fp, "-------------------------------------------------------------------------------\n")
;  }

      mclxFree(&mx)
   ;  mcxFree(allvals)
;  }

Exemplo n.º 6

0

Exibir arquivo

Arquivo: vistatistician.cpp Projeto: EQ4/Visore

void ViStatistician::process()
{
	QObject::disconnect(mCurrentObject.data(), SIGNAL(decoded()), this, SLOT(process()));

	ViAudioReadData data(mCurrentObject->buffer(ViAudio::Target));
	data.setSampleCount(mWindowSize);
	qreal value = 0;
	qint64 count = 0;

	qreal top = 0, bottom1 = 0, bottom2 = 0;
	qreal valuePos = 0, valueNeg = 0;
	int countPos = 0, countNeg = 0;

	while(data.hasData())
	{
		data.read();
		ViSampleChunk &samples1 = data.splitSamples(0);
		ViSampleChunk &samples2 = data.splitSamples(1);

		if(mMode == Mean)
		{
			value += mean(samples1);
			value += mean(samples2);
			count += 2;
		}
		else if(mMode == StandardDeviation)
		{
			value += standardDeviation(samples1);
			value +standardDeviation(samples2);
			count += 2;
		}
		else if(mMode == Pearson)
		{
			qreal theTopA = 0, theBottom1A = 0, theBottom2A = 0;
			qreal theTopB = 0, theBottom1B = 0, theBottom2B = 0;

			qreal value1 = pearson(samples1, theTopA, theBottom1A, theBottom2A);
			qreal value2 = pearson(samples2, theTopB, theBottom1B, theBottom2B);
			/*if(value1 != 0)
			{
				top += theTopA;
				bottom1 += theBottom1A;
				bottom2 += theBottom2A;
			}
			if(value2 != 0)
			{
				top += theTopB;
				bottom1 += theBottom1B;
				bottom2 += theBottom2B;
			}*/
			if(value1 > 0)
			{
				valuePos += value1;
				++countPos;
			}
			else if(value1 < 0)
			{
				valueNeg += value1;
				++countNeg;
			}
			if(value2 > 0)
			{
				valuePos += value2;
				++countPos;
			}
			else if(value2 < 0)
			{
				valueNeg += value2;
				++countNeg;
			}
		}
	}

	if(mMode == Pearson)
	{
		/*value = top / (qSqrt(bottom1) * qSqrt(bottom2));
		mValue += value;
		mCount += 1;

		printFileData(value);
		printTerminal(value);*/


		mValuePos += valuePos;
		mCountPos += countPos;
		mValueNeg += valueNeg;
		mCountNeg += countNeg;

		valuePos /= countPos;
		valueNeg /= countNeg;

		printFileDataPearson(valuePos, valueNeg);
		printTerminalPearson(valuePos, valueNeg);
	}
	else
	{
		mValue += value;
		mCount += count;
		value /= count;
		printFileData(value);
		printTerminal(value);
	}

	nextFile();
}

Exemplo n.º 7

0

Exibir arquivo

Arquivo: kernel.c Projeto: PacktPublishing/Mastering-Parallel-Programming-with-R

/* *************************** *
 *  Main computational kernel  *
 * *************************** */
int correlationKernel(int rank,
                      int size,
                      double* dataMatrixX,
                      double* dataMatrixY,
                      int columns,
                      int rows,
                      char *out_filename,
                      int distance_flag) {

    int local_check = 0, global_check = 0;
    int i = 0, j, taskNo;
    int err, count = 0;
    unsigned long long fair_chunk = 0, coeff_count = 0;
    unsigned int init_and_cleanup_loop_iter=0;
    unsigned long long cor_cur_size = 0;
    
    double start_time, end_time;

    // Variables needed by the Indexed Datatype
    MPI_Datatype coeff_index_dt;
    MPI_File fh;
    int *blocklens, *indices;

    MPI_Status stat;
    MPI_Comm comm = MPI_COMM_WORLD;

    // Master processor keeps track of tasks
    if (rank == 0) {

        // Make sure everything will work fine even if there are
        // less genes than available workers (there are size-1 workers
        // master does not count)
        if ( (size-1) > rows )
            init_and_cleanup_loop_iter = rows+1;
        else
            init_and_cleanup_loop_iter = size;

        // Start timer
        start_time = MPI_Wtime();

        // Send out initial tasks (remember you have size-1 workers, master does not count)
        for (i=1; i<init_and_cleanup_loop_iter; i++) {
            taskNo = i-1;
            err = MPI_Send(&taskNo, 1, MPI_INT, i, 0, comm);
        }        

        // Terminate any processes that were not working due to the fact
        // that the number of rows where less than the actual available workers
        for(i=init_and_cleanup_loop_iter; i < size; i++) {
            PROF(rank, "\nPROF_idle : Worker %d terminated due to insufficient work load", i);
            err = -1;
            err = MPI_Send(&err, 1, MPI_INT, i, 0, comm);
        }

        // Wait for workers to finish their work assignment and ask for more
        for (i=init_and_cleanup_loop_iter-1; i<rows; i++) {
            err = MPI_Recv(&taskNo, 1, MPI_INT, MPI_ANY_SOURCE, 0, comm, &stat);

            // Check taskNo to make sure everything is ok. Negative means there is problem
            // thus terminate gracefully all remaining working workers
            if ( taskNo < 0 ) {
                // Reduce by one because one worker is already terminated
                init_and_cleanup_loop_iter--;
                // Break and cleanup
                break;
            }

            // The sending processor is ready to work:
            // It's ID is in stat.MPI_SOURCE
            // Send it the current task (i)
            err = MPI_Send(&i, 1, MPI_INT, stat.MPI_SOURCE, 0, comm);
        }

        // Clean up processors
        for (i=1; i<init_and_cleanup_loop_iter; i++) {
            // All tasks complete - shutdown workers
            err = MPI_Recv(&taskNo, 1, MPI_INT, MPI_ANY_SOURCE, 0, comm, &stat);
            // If process failed then it will not be waiting to receive anything
            // We have to ignore the send because it will deadlock
            if ( taskNo < 0 )
                continue;
            err = -1;
            err = MPI_Send(&err, 1, MPI_INT, stat.MPI_SOURCE, 0, comm);
        }

        // Master is *always* OK
        local_check = 0;
        MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

        // Check failed, abort
        if ( global_check != 0 ) {
            return -1;
        }
        
        // Stop timer
        end_time = MPI_Wtime();
        PROF(rank, "\nPROF_comp (workers=%d) : Time taken by correlation coefficients computations : %g\n", size-1, end_time - start_time);

        // Start timer
        start_time = MPI_Wtime();

        // Master process must call MPI_File_set_view as well, it's a collective call
        // Open the file handler
        MPI_File_open(comm, out_filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);

        // Create the file view
        MPI_File_set_view(fh, 0, MPI_DOUBLE, MPI_DOUBLE, "native", MPI_INFO_NULL);

        // Write data to disk
        MPI_File_write_all(fh, &cor[0], 0, MPI_DOUBLE, &stat);

        // Stop timer
        end_time = MPI_Wtime();
        PROF(rank, "\nPROF_write (workers=%d) : Time taken for global write-file : %g\n",  size-1, end_time - start_time);

    } else {

        // Compute how many workers will share the work load
        // Two scenarios exist:
        // (1) more OR equal number of workers and rows exist
        // (2) more rows than workers
        if ( (size-1) > rows ) {
            // For this scenario each worker will get exaclty one work asssignment.
            // There is not going to be any other work so it only compute "rows" number
            // of coefficients
            fair_chunk = rows;
            cor_cur_size = fair_chunk;
        } else {
            // For this scenario we are going to allocate space equal to a fair
            // distribution of work assignments *plus* an extra amount of space to
            // cover any load imbalancing. This amount is expressed as a percentage
            // of the fair work distribution (see on top, 20% for now)

            // Plus 1 to round it up or just add some extra space, both are fine
            fair_chunk = (rows / (size-1)) + 1;
            DEBUG("fair_chunk %d \n", fair_chunk);

            // We can use "j" as temporary variable.
            // Plus 1 to avoid getting 0 from the multiplication.
            j = (fair_chunk * MEM_PERC) + 1;

            cor_cur_size = (fair_chunk + j) * rows;
            DEBUG("cor_cur_size %lld \n", cor_cur_size);
        }

        // Allocate memory
        DEBUG("cor_cur_size %lld \n", cor_cur_size);
        long long double_size = sizeof(double);
        DEBUG("malloc size %lld \n", (double_size * cor_cur_size));
        cor = (double *)malloc(double_size * cor_cur_size);

        blocklens = (int *)malloc(sizeof(int) * rows);
        indices = (int *)malloc(sizeof(int) * rows);

        mean_value_vectorX = (double *)malloc(sizeof(double) * rows);
        Sxx_vector = (double *)malloc(sizeof(double) * rows);
        mean_value_vectorY = (double *)malloc(sizeof(double) * rows);
        Syy_vector = (double *)malloc(sizeof(double) * rows);

        // Check that all memory is successfully allocated
        if ( ( cor == NULL ) || ( blocklens == NULL ) || ( indices == NULL ) || 
             ( mean_value_vectorX == NULL ) || ( Sxx_vector == NULL ) ||
             ( mean_value_vectorY == NULL ) || ( Syy_vector == NULL ) ) {
            ERR("**ERROR** : Memory allocation failed on worker process %d. Aborting.\n", rank);

            // Free allocated memory
            free_all(cor, blocklens, indices, mean_value_vectorX, Sxx_vector, mean_value_vectorY, Syy_vector);

            // Let the master process know its aborting in order to terminate
            // the rest of the working workers
            // We have to receive a work assignment first and then terminate
            // otherwise the master will deadlock trying to give work to this worker
            err = MPI_Recv(&taskNo, 1, MPI_INT, 0, 0, comm, &stat);
            taskNo = -1;
            err = MPI_Send(&taskNo, 1, MPI_INT, 0, 0, comm);

            // This worker failed
            local_check = 1;
            MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

            return -1;
        }

        // Compute necessary parameters for Pearson method
        // (this will transform the values of the input array to more meaningful data
        //  and save us from a lot of redundant computations)
        compute_parameters(dataMatrixX, dataMatrixY, rows, columns);

        // Main loop for workers. They get work from master, compute coefficients,
        // save them to their *local* vector and ask for more work
        for(;;) {
            // Get work
            err = 0;
            err = MPI_Recv(&taskNo, 1, MPI_INT, 0, 0, comm, &stat);

            // If received task is -1, function is terminated
            if ( taskNo == -1 )  break;

            // Check if there is enough memory to store the new coefficients, if not reallocate
            // the current memory and expand it by MEM_PERC of the approximated size
            if ( cor_cur_size < (coeff_count + rows) ) {
                PROF(0, "\n**WARNING** : Worker process %3d run out of memory and reallocates. Potential work imbalancing\n", rank);
                DEBUG("\n**WARNING** : Worker process %3d run out of memory and reallocates. Potential work imbalancing\n", rank);

                // Use j as temporary again. Add two (or any other value) to avoid 0.
                // (two is just a random value, you can put any value really...)
                j = (fair_chunk * MEM_PERC) + 2;
                cor_cur_size += (j * rows);

                // Reallocate and check
                cor = (double *)realloc(cor, sizeof(double) * cor_cur_size);
                if ( cor == NULL ) {
                    ERR("**ERROR** : Memory re-allocation failed on worker process %d. Aborting.\n", rank);

                    // Let the master process know its aborting in order to terminate
                    // the rest of the working workers
                    taskNo = -1;
                    err = MPI_Send(&taskNo, 1, MPI_INT, 0, 0, comm);

                    // This worker failed
                    local_check = 1;
                    MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

                    // Free all allocated memory
                    free_all(cor, blocklens, indices, mean_value_vectorX, Sxx_vector, mean_value_vectorY, Syy_vector);

                    return -1;
                }
            }

            // Compute the correlation coefficients
            if(dataMatrixY != NULL) {
              for (j=0; j < rows; j++) {
                cor[coeff_count] = pearson_XY(dataMatrixX, dataMatrixY, j, taskNo, columns);
                coeff_count++;
              }

            } else {
              for (j=0; j < rows; j++) {
                // Set main diagonal to 1
                if ( j == taskNo ) {
                  cor[coeff_count] = 1.0;
                  coeff_count++;
                  continue;
                }
                cor[coeff_count] = pearson(dataMatrixX, taskNo, j, columns);
                coeff_count++;
              }
            }

            // The value of blocklens[] represents the number of coefficients on each
            // row of the corellation array
            blocklens[count] = rows;

            // The value of indices[] represents the offset of each row in the data file
            indices[count] = (taskNo * rows);
            count++;

            // Give the master the taskID
            err = MPI_Send(&taskNo, 1, MPI_INT, 0, 0, comm);
        }

        // There are two possibilities
        //   (a) everything went well and all workers finished ok
        //   (b) some processes finished ok but one or more of the remaining working workers failed
        // To make sure all is well an all-reduce will be performed to sync all workers and guarantee success
        // before moving on to write the output file
        // This worker is OK
        local_check = 0;
        MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

        // Check failed
        if ( global_check != 0 ) {
            // Free all allocated memory
          free_all(cor, blocklens, indices, mean_value_vectorX, Sxx_vector, mean_value_vectorY, Syy_vector);
            return -1;
        }

        PROF(0, "\nPROF_stats (thread %3d) : Fair chunk of work : %d \t\t Allocated : %d \t\t Computed : %d\n",
                rank, fair_chunk, cor_cur_size, coeff_count);

        // If the distance_flag is set, then transform all correlation coefficients to distances
        if ( distance_flag == 1 ) {
            for(j=0; j < coeff_count; j++) {
                cor[j] = 1 - cor[j];
            }
        }

        // Create and commit the Indexed datatype *ONLY* if there are data available
        if ( coeff_count != 0 ) {
            MPI_Type_indexed(count, blocklens, indices, MPI_DOUBLE, &coeff_index_dt);
            MPI_Type_commit(&coeff_index_dt);
        }

        // Open the file handler
        MPI_File_open(comm, out_filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);

        // Create the file view
        if ( coeff_count != 0 ) {
            MPI_File_set_view(fh, 0, MPI_DOUBLE, coeff_index_dt, "native", MPI_INFO_NULL);
        } else {
            MPI_File_set_view(fh, 0, MPI_DOUBLE, MPI_DOUBLE, "native", MPI_INFO_NULL);
        }

        // Write data to disk
        // TODO coeff_count cannot be greater than max int (for use in the MPI_File_write_all call). 
        // A better fix should be possible, for now throw error.
        
        DEBUG("\ncoeff_count is %lld\n", coeff_count);
        DEBUG("\INT_MAX is %d\n", INT_MAX);
        if(coeff_count>INT_MAX)
        {
            ERR("**ERROR** : Could not run as the chunks of data are too large. Try running again with more MPI processes.\n");

            // Free allocated memory
            free_all(cor, blocklens, indices, mean_value_vectorX, Sxx_vector, mean_value_vectorY, Syy_vector);

            // Let the master process know its aborting in order to terminate
            // the rest of the working workers
            // We have to receive a work assignment first and then terminate
            // otherwise the master will deadlock trying to give work to this worker
            err = MPI_Recv(&taskNo, 1, MPI_INT, 0, 0, comm, &stat);
            taskNo = -1;
            err = MPI_Send(&taskNo, 1, MPI_INT, 0, 0, comm);

            // This worker failed
            local_check = 1;
            MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

            return -1;
        }

        
        
        DEBUG("\nWriting %d to disk\n", coeff_count);

        MPI_File_write_all(fh, &cor[0], coeff_count, MPI_DOUBLE, &stat);

        if (coeff_count != 0 )
            MPI_Type_free(&coeff_index_dt);

        // Free all allocated memory
        free_all(cor, blocklens, indices, mean_value_vectorX, Sxx_vector, mean_value_vectorY, Syy_vector);
    }

         DEBUG("\nAbout to write to disk %d\n", rank);
    MPI_File_sync( fh ) ;   		// Causes all previous writes to be transferred to the storage device
         DEBUG("\nWritten to disk %d\n",rank);
  //  MPI_Barrier( MPI_COMM_WORLD ) ; 	// Blocks until all processes in the communicator have reached this routine.
         DEBUG("\nAfter barrier \n", rank);

    // Close file handler
    MPI_File_close(&fh);
  DEBUG("\nAfter file closed /n");
   // MPI_Barrier( MPI_COMM_WORLD ) ; 	// Blocks until all processes in the communicator have reached this routine.
      DEBUG("\nAbout to return from kernel /n");
      return 0;
}