void *sum_array_r(void *initid_r)
{

	// Plugin thread function
	int thid;

	// Each thread has a subset of elements 
	unsigned long *th_array_dim = malloc(sizeof(unsigned long));

	for (thid = 0; thid < NTHREAD; thid++) {
		if (pthread_equal(((th_data *) (((UDF_INIT *) (initid_r))->extension))->thread[thid], pthread_self()))
			break;
	}
	for (;;) {
		barrier_wait(&(((th_data *) (((UDF_INIT *) (initid_r))->extension))->barr_start));
		if (((th_data *) (((UDF_INIT *) (initid_r))->extension))->exit_flag == 1)
			break;

		oph_string measurea;
		oph_string measureb;

		int res = 0;

		if (((UDF_ARGS *) ((th_data *) (((UDF_INIT *) (initid_r))->extension))->curr_args)->arg_count < 3) {
			core_set_type(&(measurea), NULL, 0);
			core_set_type(&(measureb), NULL, 0);
		} else if (((UDF_ARGS *) ((th_data *) (((UDF_INIT *) (initid_r))->extension))->curr_args)->arg_count == 3) {
			core_set_type(&(measurea), ((UDF_ARGS *) ((th_data *) (((UDF_INIT *) (initid_r))->extension))->curr_args)->args[2],
				      &(((UDF_ARGS *) ((th_data *) (((UDF_INIT *) (initid_r))->extension))->curr_args)->lengths[2]));
			core_set_type(&(measureb), NULL, 0);
		} else {
			core_set_type(&(measurea), ((UDF_ARGS *) ((th_data *) (((UDF_INIT *) (initid_r))->extension))->curr_args)->args[2],
				      &(((UDF_ARGS *) ((th_data *) (((UDF_INIT *) (initid_r))->extension))->curr_args)->lengths[2]));
			core_set_type(&(measureb), ((UDF_ARGS *) ((th_data *) (((UDF_INIT *) (initid_r))->extension))->curr_args)->args[3],
				      &(((UDF_ARGS *) ((th_data *) (((UDF_INIT *) (initid_r))->extension))->curr_args)->lengths[3]));
		}



		if (measurea.type != measureb.type) {
			pmesg(1, __FILE__, __LINE__, "Type are different; unable to sum values\n");
		}

		*th_array_dim = (((UDF_ARGS *) ((th_data *) (((UDF_INIT *) (initid_r))->extension))->curr_args)->lengths[0]) / NTHREAD;

		measurea.content = ((UDF_ARGS *) ((th_data *) (((UDF_INIT *) (initid_r))->extension))->curr_args)->args[0] + ((*th_array_dim) * thid);
		measurea.length = th_array_dim;
		measureb.content = ((UDF_ARGS *) ((th_data *) (((UDF_INIT *) (initid_r))->extension))->curr_args)->args[1] + ((*th_array_dim) * thid);
		measureb.length = th_array_dim;

		core_set_elemsize(&(measurea));

		if (core_set_numelem(&(measurea))) {
			pmesg(1, __FILE__, __LINE__, "Error on counting elements\n");
		}

		core_set_elemsize(&(measureb));

		if (core_set_numelem(&(measureb))) {
			pmesg(1, __FILE__, __LINE__, "Error on counting elements\n");
		}
		//Exit if array series have different number of elements
		if (measurea.numelem != measureb.numelem) {
			pmesg(1, __FILE__, __LINE__, "Number of array elements are different; unable to sum values\n");
		}


		res = core_oph_sum_array(&measurea, &measureb, (((UDF_INIT *) (initid_r))->ptr) + ((*th_array_dim) * thid));
		if (res) {
			pmesg(1, __FILE__, __LINE__, "Unable to compute result\n");
		}
		barrier_wait(&(((th_data *) (((UDF_INIT *) (initid_r))->extension))->barr_end));
	}			// end for
	free(th_array_dim);
	th_array_dim = NULL;
}
void oph_aggregate_stats_partial_add(UDF_INIT * initid, UDF_ARGS * args, char *is_null, char *error)
{
	if (*error != 0)
		return;
	if (args->args[2]) {
		oph_agg_stats_partial_data *dat = (oph_agg_stats_partial_data *) initid->ptr;

		/* Setting of the aggregate result */
		if (!dat->result.content) {
			//It's the first row

			// default values
			unsigned long def_mask_len = MASK_LEN;
			dat->mask.content = (char *) calloc(MASK_LEN + 1, sizeof(char));
			if (!dat->mask.content) {
				pmesg(1, __FILE__, __LINE__, "Error allocating mask content\n");
				*error = 1;
				return;
			}
			core_strncpy(dat->mask.content, DEFAULT_MASK, &def_mask_len);

			dat->mask.length = (unsigned long *) calloc(1, sizeof(unsigned long));
			if (!dat->mask.length) {
				pmesg(1, __FILE__, __LINE__, "Error allocating mask length\n");
				*error = 1;
				return;
			}
			*(dat->mask.length) = def_mask_len;

			core_set_type(&(dat->measure), NULL, 0);
			dat->measure.length = (unsigned long *) calloc(1, sizeof(unsigned long));
			if (!dat->measure.length) {
				pmesg(1, __FILE__, __LINE__, "Error allocating measure length\n");
				*error = 1;
				return;
			}
			*(dat->measure.length) = args->lengths[2];

			core_set_type(&(dat->measure), args->args[0], &(args->lengths[0]));
			if (dat->measure.type != OPH_SHORT && dat->measure.type != OPH_BYTE && dat->measure.type != OPH_INT && dat->measure.type != OPH_LONG && dat->measure.type != OPH_FLOAT
			    && dat->measure.type != OPH_DOUBLE) {
				pmesg(1, __FILE__, __LINE__, "Invalid type\n");
				*error = 1;
				return;
			}
			if (args->arg_count > 3) {
				core_strncpy(dat->mask.content, args->args[3], &(args->lengths[3]));
				*(dat->mask.length) = args->lengths[3];
			}

			if (core_set_elemsize(&(dat->measure))) {
				pmesg(1, __FILE__, __LINE__, "Error on setting measure elements size\n");
				*error = 1;
				return;
			}
			if (core_set_numelem(&(dat->measure))) {
				pmesg(1, __FILE__, __LINE__, "Error on counting measure elements\n");
				*error = 1;
				return;
			}

			core_set_type(&dat->result, args->args[1], &(args->lengths[1]));
			if (!dat->result.type) {
				pmesg(1, __FILE__, __LINE__, "Unable to recognize measures type\n");
				*error = 1;
				return;
			}
			if (core_set_elemsize(&dat->result)) {
				pmesg(1, __FILE__, __LINE__, "Unable to recognize measures type\n");
				*error = 1;
				return;
			}
			// mask processing
			int i;
			for (i = 0; i < *(dat->mask.length); i++) {
				if (dat->mask.content[i] == '1') {
					(dat->mask.numelem)++;	// count 1s
					switch (i) {
						case 0:	// mean
							dat->sum1 = 1;	// sum{x_i} needed
							break;
						case 1:	// variance
							dat->sum1 = 1;
							dat->sum2 = 1;	// sum{(x_i)^2} needed
							break;
						case 2:	// std dev
							dat->sum1 = 1;
							dat->sum2 = 1;
							break;
						case 3:	// skew
							dat->sum1 = 1;
							dat->sum2 = 1;
							dat->sum3 = 1;	// sum{(x_i)^3} needed
							break;
						case 4:	// kurtosis
							dat->sum1 = 1;
							dat->sum2 = 1;
							dat->sum3 = 1;
							dat->sum4 = 1;	// sum{(x_i)^4} needed
							break;
						case 5:	// max
							dat->max = 1;	// array with max values needed
							break;
						case 6:	// min
							dat->min = 1;	// array with min values needed
					}
				}
			}
			if (dat->mask.numelem == 0) {
				pmesg(1, __FILE__, __LINE__, "Invalid mask\n");
				*error = 1;
				return;
			}

			int size = 1;
			if (dat->sum1)
				size++;
			if (dat->sum2)
				size++;
			if (dat->sum3)
				size++;
			if (dat->sum4)
				size++;
			if (dat->max)
				size++;
			if (dat->min)
				size++;

			// output array allocation
			dat->result.numelem = size * dat->measure.numelem;	// In future use a structed type of 'size' fields
			unsigned long outlen = dat->result.numelem * dat->result.elemsize;
			dat->result.length = (unsigned long *) calloc(1, sizeof(unsigned long));
			if (!dat->result.length) {
				pmesg(1, __FILE__, __LINE__, "Error allocating result length\n");
				*error = 1;
				return;
			}
			*(dat->result.length) = outlen;
			dat->result.content = (char *) calloc(1, *(dat->result.length));
			if (!dat->result.content) {
				pmesg(1, __FILE__, __LINE__, "Error allocating output array\n");
				*error = 1;
				return;
			}
			// partial results array allocation
			dat->partials = (oph_stringPtr) calloc((size - 1), sizeof(oph_string));
			if (!dat->partials) {
				pmesg(1, __FILE__, __LINE__, "Error allocating intermediate arrays\n");
				*error = 1;
				return;
			}
			for (i = 0; i < (size - 1); i++) {
				dat->partials[i].type = dat->measure.type;
				dat->partials[i].elemsize = dat->measure.elemsize;
				dat->partials[i].numelem = dat->measure.numelem;
				dat->partials[i].length = dat->measure.length;
				dat->partials[i].content = (char *) calloc(1, *(dat->partials[i].length));
				if (!dat->partials[i].content) {
					pmesg(1, __FILE__, __LINE__, "Error allocating intermediate array\n");
					*error = 1;
					return;
				}
			}
		}

		if (!dat->count) {
			//It's the first row or the first row in the group of the GROUP BY clause

			int i = 0;
			int j;
			// copy input values
			if (dat->sum1) {
				memcpy((void *) (dat->partials[i].content), (void *) (args->args[2]), *(dat->partials[i].length));
				i++;
			}
			// copy input values squared
			if (dat->sum2) {
				switch (dat->measure.type) {
					case OPH_INT:
						{
							int val_i;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_i = ((int *) (args->args[2]))[j];
								((int *) (dat->partials[i].content))[j] = val_i * val_i;
							}
							break;
						}
					case OPH_SHORT:
						{
							int val_i;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_i = ((short *) (args->args[2]))[j];
								((short *) (dat->partials[i].content))[j] = val_i * val_i;
							}
							break;
						}
					case OPH_BYTE:
						{
							int val_i;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_i = ((char *) (args->args[2]))[j];
								((char *) (dat->partials[i].content))[j] = val_i * val_i;
							}
							break;
						}
					case OPH_LONG:
						{
							long long val_l;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_l = ((long long *) (args->args[2]))[j];
								((long long *) (dat->partials[i].content))[j] = val_l * val_l;
							}
							break;
						}
					case OPH_FLOAT:
						{
							float val_f;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_f = ((float *) (args->args[2]))[j];
								((float *) (dat->partials[i].content))[j] = val_f * val_f;
							}
							break;
						}
					case OPH_DOUBLE:
						{
							double val_d;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_d = ((double *) (args->args[2]))[j];
								((double *) (dat->partials[i].content))[j] = val_d * val_d;
							}
						}
					case OPH_COMPLEX_INT:
					case OPH_COMPLEX_LONG:
					case OPH_COMPLEX_FLOAT:
					case OPH_COMPLEX_DOUBLE:
					case INVALID_TYPE:
						break;
				}
				i++;
			}
			// copy input values raised to 3
			if (dat->sum3) {
				switch (dat->measure.type) {
					case OPH_INT:
						{
							int val_i;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_i = ((int *) (args->args[2]))[j];
								((int *) (dat->partials[i].content))[j] = val_i * val_i * val_i;
							}
							break;
						}
					case OPH_SHORT:
						{
							short val_i;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_i = ((short *) (args->args[2]))[j];
								((short *) (dat->partials[i].content))[j] = val_i * val_i * val_i;
							}
							break;
						}
					case OPH_BYTE:
						{
							char val_i;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_i = ((char *) (args->args[2]))[j];
								((char *) (dat->partials[i].content))[j] = val_i * val_i * val_i;
							}
							break;
						}
					case OPH_LONG:
						{
							long long val_l;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_l = ((long long *) (args->args[2]))[j];
								((long long *) (dat->partials[i].content))[j] = val_l * val_l * val_l;
							}
							break;
						}
					case OPH_FLOAT:
						{
							float val_f;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_f = ((float *) (args->args[2]))[j];
								((float *) (dat->partials[i].content))[j] = val_f * val_f * val_f;
							}
							break;
						}
					case OPH_DOUBLE:
						{
							double val_d;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_d = ((double *) (args->args[2]))[j];
								((double *) (dat->partials[i].content))[j] = val_d * val_d * val_d;
							}
						}
					case OPH_COMPLEX_INT:
					case OPH_COMPLEX_LONG:
					case OPH_COMPLEX_FLOAT:
					case OPH_COMPLEX_DOUBLE:
					case INVALID_TYPE:
						break;
				}
				i++;
			}
			// copy input values raised to 4
			if (dat->sum4) {
				switch (dat->measure.type) {
					case OPH_INT:
						{
							int val_i;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_i = ((int *) (args->args[2]))[j];
								((int *) (dat->partials[i].content))[j] = val_i * val_i * val_i * val_i;
							}
							break;
						}
					case OPH_SHORT:
						{
							short val_i;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_i = ((short *) (args->args[2]))[j];
								((short *) (dat->partials[i].content))[j] = val_i * val_i * val_i * val_i;
							}
							break;
						}
					case OPH_BYTE:
						{
							char val_i;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_i = ((char *) (args->args[2]))[j];
								((char *) (dat->partials[i].content))[j] = val_i * val_i * val_i * val_i;
							}
							break;
						}
					case OPH_LONG:
						{
							long long val_l;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_l = ((long long *) (args->args[2]))[j];
								((long long *) (dat->partials[i].content))[j] = val_l * val_l * val_l * val_l;
							}
							break;
						}
					case OPH_FLOAT:
						{
							float val_f;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_f = ((float *) (args->args[2]))[j];
								((float *) (dat->partials[i].content))[j] = val_f * val_f * val_f * val_f;
							}
							break;
						}
					case OPH_DOUBLE:
						{
							double val_d;
							for (j = 0; j < dat->measure.numelem; j++) {
								val_d = ((double *) (args->args[2]))[j];
								((double *) (dat->partials[i].content))[j] = val_d * val_d * val_d * val_d;
							}
						}
					case OPH_COMPLEX_INT:
					case OPH_COMPLEX_LONG:
					case OPH_COMPLEX_FLOAT:
					case OPH_COMPLEX_DOUBLE:
					case INVALID_TYPE:
						break;
				}
				i++;
			}
			// copy input values
			if (dat->max) {
				memcpy(dat->partials[i].content, (void *) (args->args[2]), *(dat->partials[i].length));
				i++;
			}
			// copy input values
			if (dat->min) {
				memcpy(dat->partials[i].content, (void *) (args->args[2]), *(dat->partials[i].length));
				i++;
			}
		} else {
			// Not the first row in the group => execute next aggregation step

			dat->measure.content = args->args[2];

			int i = 0;
			if (dat->sum1) {
				if (core_oph_sum_array(&(dat->measure), &(dat->partials[i]), dat->partials[i].content)) {
					pmesg(1, __FILE__, __LINE__, "Unable to compute result\n");
					*error = 1;
					return;
				}
				i++;
			}
			if (dat->sum2) {
				if (core_oph_sum2_array(&(dat->measure), &(dat->partials[i]), dat->partials[i].content)) {
					pmesg(1, __FILE__, __LINE__, "Unable to compute result\n");
					*error = 1;
					return;
				}
				i++;
			}
			if (dat->sum3) {
				if (core_oph_sum3_array(&(dat->measure), &(dat->partials[i]), dat->partials[i].content)) {
					pmesg(1, __FILE__, __LINE__, "Unable to compute result\n");
					*error = 1;
					return;
				}
				i++;
			}
			if (dat->sum4) {
				if (core_oph_sum4_array(&(dat->measure), &(dat->partials[i]), dat->partials[i].content)) {
					pmesg(1, __FILE__, __LINE__, "Unable to compute result\n");
					*error = 1;
					return;
				}
				i++;
			}
			if (dat->max) {
				if (core_oph_max_array(&(dat->measure), &(dat->partials[i]), dat->partials[i].content)) {
					pmesg(1, __FILE__, __LINE__, "Unable to compute result\n");
					*error = 1;
					return;
				}
				i++;
			}
			if (dat->min) {
				if (core_oph_min_array(&(dat->measure), &(dat->partials[i]), dat->partials[i].content)) {
					pmesg(1, __FILE__, __LINE__, "Unable to compute result\n");
					*error = 1;
					return;
				}
				i++;
			}
		}
		dat->count++;
	}
}