void extractU_Shapelets(double **pd_Dataset, int* ds_len, int n_sample, int sLen, int app_no, char app[][100], char file[][100], char*outputname, int start_ts_id)
{
    
	int sl;
	int i;
	int iter = 0;
	int cluster_id[n_sample];
	double *ts;
	int ts_len;
	int cnt;
	int newsize;
	int k,j,l;
	int index, index2;
	double mean, stddev, range;
    
	/*Empty discriminatory ushapelets*/
	double* ushapelet[n_sample];
	int ushapelet_len[n_sample];
    
	ts = pd_Dataset[start_ts_id];
	ts_len = ds_len[start_ts_id];
    
	printf("\nextractU_Shapelet\n");
    
	memset(cluster_id, -1, n_sample *sizeof(int));
    
    FILE *fp;
    
	fp = fopen(outputname, "a");
    
	if(!fp) {
		perror(outputname);
		return;
	}
    
    fprintf(fp, "\n\n\nNew Clustering Batch------------------------------\n");
    fprintf(fp,"************************\n");
    fprintf(fp, "Iteration %d\n", iter);
    fprintf(fp, "Application %s\n Dataset path %s\n", app[start_ts_id], file[start_ts_id]);
    
	while(1) {
		
		cnt = 0;
		newsize = 0;
        
		for(sl=sLen-LOWER; sl <= sLen+UPPER && sl <= ts_len; sl+=STEP) {
			cnt += (ts_len - sl+1);
            //			printf("sl = %d ts_len =%d cnt = %d\n", sl, ts_len, cnt);
		}
        
		double* p_subseq[cnt];
		int ps_len[cnt];
		double gap[cnt];
		double dt[cnt];
        
        
		for (k = 0, j= 0 ; k< n_sample; k++) {
            
			if(cluster_id[k] == -1)
				newsize++;
		}
        
		double* n_dataset[newsize];
		int n_datalen[newsize];
		int k,j;
        
		int old_id[n_sample];
		memset(old_id, 0, newsize*sizeof(int));
        
		/*create new unclustered data set*/
        
		for (k = 0, j= 0 ; k< n_sample; k++) {
            
			if(cluster_id[k] == -1) {
                
				// copy the data set and len and keep a mapping
                
				n_dataset[j] = pd_Dataset[k];
				n_datalen[j] = ds_len[k];
				old_id[j] = k;
				j++;
			}
		}
        
		
		double dist[newsize];
        
		/*index of the distance  within threshold*/
		int dataset_A[newsize];//--------Check: declared in both gobal and local
		int dataset_Alen;//--------Check: declared in both gobal and local
        
        
		memset(dataset_A, -1, newsize *sizeof(int));
        
		/*********VERIFY*******/
        
		if(ts_len < sLen) {
			
			printf(" The time series is too short to classify\n");
			
			break; // break?
		}
		int cluster_no = app_no;
        
		/*****VERIFY END****/
        
        
		cnt = 0;
        
		/*For all possible subsequences for a timeseries from the new dataset*/
		for(sl=sLen-LOWER; sl <= sLen+UPPER && sl <= ts_len; sl+=STEP) {
            
			//printf("sl is %d ts_len -sl +1 is %d \n\n", sl, ts_len - sl + 1);
            
			for(i=0; i< ts_len - sl +1; i++) {
                
				p_subseq[cnt] = ts + i;
				ps_len[cnt] = sl;
				/*Compute the gap and threshold for each of the subsequence*/
				gap[cnt] = computeGap(&dt[cnt], cluster_no, p_subseq[cnt], ps_len[cnt], n_dataset, newsize , n_datalen);
				//printf("i=%d sl=%d ps_len=%d cnt = %d\n", i, sl, ps_len[cnt], cnt);
				//printf("gap is %2.6f dt is %2.6f\n", gap[cnt], dt[cnt]);
				cnt++;
			}
		}
        
		/*Find the subsequence which gives the maximum gap for the dataset*/
		printf("max gap is %d\n", cnt);
		index = max_index(gap, cnt);
        
		/*Add the discriminatory subsequence to the ushapelet list*/
		printf("Discovered ushapelet gap is %2.6f dt is %2.6f index %d len %d\n", gap[index], dt[index], index, ps_len[index]);
        
        fprintf(fp, "Shapelet: ");
		for(k=0; k<ps_len[index]; k++)
			fprintf(fp,"%2.2f ", p_subseq[index][k]);
		printf("\n");
        
        
		ushapelet[iter] = p_subseq[index];
		ushapelet_len[iter] = ps_len[index];
        fprintf(fp, "Shapelet len: %d\n",ps_len[index]);
		
		dataset_Alen = 0;
		j=0;
		for(l=0; l<newsize; l++) {
            
			/*Compute the minimum distance of the shapelet from each of the dataset */
			dist[l]= computeDistance(p_subseq[index],  ps_len[index], n_dataset[l], n_datalen[l]);
            
			/*If the computed distance is less than threshold then add to Dataset A*/
			if (CompareDoubles2(dist[l], dt[index]) <= 0) {
				//printf("distance within threshold %2.2f %2.2f %d\n", dist[l], dt[index], l);
				dataset_A[j] = l;
				j++;
				dataset_Alen++;
			}
		}
		
        
		if(clustered(dataset_Alen, newsize)) break;
		else {
            
			mean = 0.0;
			stddev = 0.0;
			range = 0.0;
            
			/*Compute the mean standard deviation and range of the Dataset A*/
			compute_mean_stddev(dataset_A, dataset_Alen, dist, newsize, &mean, &stddev);
            
			range = mean + stddev;
            
			//printf("%2.2f is the range mean %2.2f stddev %2.2f \n", range, mean, stddev);
            
			/*Exclude all the dataset within the range by marking it as clustered*/
            
			for (k = 0, j= 0 ; k< newsize; k++) {
                
				if(CompareDoubles2(dist[k], range) <= 0) {
					//printf("Clustered dataset %d\n", old_id[k]);
                    fprintf(fp, "Appname: %s Filename: %s\n", app[old_id[k]], file[old_id[k]]);
					cluster_id[old_id[k]] = iter;
				}
			}
            
            /*Find the dataset far away from the ushapelet*/
			//printf("max distance is at %d\n", newsize);
			index2 = max_index(dist, newsize);
			ts = n_dataset[index2];
            
			//printf("Finding next data set is at  %d\n", index2);
            
            fprintf(fp,"************************\n");
            fprintf(fp, "Iteration %d\n", iter+1);
            fprintf(fp, "Application %s\n Dataset path %s\n", app[old_id[index2]], file[old_id[index2]]);
            
		}
        
		++iter;
		
	}
    
    fprintf(fp,"************************\n");
    fprintf(fp, "Remaining set\n");
	for( int z=0; z< n_sample; z++) {
		
        if(cluster_id[z]==-1)
            fprintf(fp, "Appname: %s Filename: %s\n", app[z], file[z]);
        
	}
    
	fclose(fp);
	printf("\n");
    
}
Example #2
0
/**
 * points : 3 x num_points
 * points[0] : all x co-ords
 * points[1] : all y co-ords
 * points[2] : all z co-ords
 */
void calcBiometrics(
					
					double **points, 		// 3 x numPoints array
					int      numPoints, 	//the number of points in 'points'
					float    minEasting, 	//easting of bottome left corner of grid
					float    minNorthing, 	//norhting of bottom left corner of grid
					int      blockSize,		//block dimension in meters
					float    xBlock,		//x block position in grid
					float    yBlock,		//y block position in grid
					int      outRes, 		//resolution of output
					int      zThreshold		//z threshold value
					
					)
{
	float *** outArr;
	float *** lhqArr;
	float *** ccfArr;
	float blockEasting, blockNorthing;
	int dimensions;
	
	dimensions 	  = blockSize / outRes; //must be an integer
	blockEasting  = minEasting + (xBlock * blockSize);
	blockNorthing = minNorthing + (yBlock * blockSize);
	
	/** ouput arrays **/
	outArr = initFloatBuffer3D(8 , dimensions, dimensions, -999.0);
	lhqArr = initFloatBuffer3D(21, dimensions, dimensions, -999.0);
	ccfArr = initFloatBuffer3D(21, dimensions, dimensions, -999.0);
	
	float xStart, yStart, xEnd, yEnd;
	int j, i;
	
	for(j = 0; j < dimensions; j++){
		for(i = 0; i < dimensions; i++)
		{
			xStart = blockEasting + outRes * j;
			yStart = blockNorthing + outRes * i;
			xEnd   = xStart + outRes;
			yEnd   = yStart + outRes;
			
			Array1D A;
			
			A = filterByArea(points, numPoints, xStart, yStart, xEnd, yEnd);
			if(A.size == 0)
				continue;
			
			int     numAll = A.size;
			
			A = filterByZThreshold(A.points, A.size, zThreshold);
			
			float * z_threshold = A.points;
			int     numThresh   = A.size;
			
			if(numThresh == 0)
				continue;
			if(numThresh <= zThreshold)// why is this necessary
				continue;
			
			qsort(z_threshold, numThresh, sizeof(float), floatCompare);
			
			computeRugosity(z_threshold, numThresh, &outArr[0][j][i]);
			computeGap(numThresh, numAll, &outArr[1][j][i]);
			computePercentile(z_threshold, numThresh, 0.85, &outArr[2][j][i]);
			
			if(A.size < 75)
				continue;
			
			float * lMoments =
			computeLMoments(z_threshold,  numThresh);
			outArr[3][j][i] = lMoments[0];
			outArr[4][j][i] = lMoments[1];
			outArr[5][j][i] = lMoments[2];
			outArr[6][j][i] = lMoments[3];
			
			/*****************************************************************************
			 *********************** LHQ Statistics **************************
			 *****************************************************************/
			
			int band, rank1, rank2;
			float fRank, iRank, difRank;
			for(band = 0; band <= 20; band++)
			{
				fRank   = (band/20.0) * (numThresh + 1);
				iRank   = (int)fRank;			//Todo: make sure case truncates
				difRank = fRank - iRank;
				rank1   = iRank - 1;
				if(rank1 < 0)
					rank1 = 0;
				rank2 = iRank;
				
				if(band < 20)
				{
					lhqArr[band][j][i] = (1 - difRank) * z_threshold[rank1] + difRank * z_threshold[rank2];
				}
				else
				{
					lhqArr[band][j][i] = z_threshold[numThresh-1]; // max.. this sorted so it should be max
				}
				
				if( band == 17 ) 
				{
					outArr[7][j][i] = lhqArr[band][j][i];
				}
			}
			
			/*****************************************************************************
			 *********************** CCF Statistics **************************
			 *****************************************************************/
			
			int numDivs, ccfCount;
			float minHeight, maxHeight, curHeight, ccfPercent, htIncrement;
			
			minHeight = zThreshold;
			maxHeight = getMax(z_threshold, numThresh); // = z_threshold[numThresh-1]
			numDivs   = 20;
			htIncrement = (maxHeight - minHeight) / numDivs;
			
			for(band = 0; band <= 20; band++)
			{
				curHeight  = minHeight + (htIncrement * band);
				ccfCount   = getCCFCount(z_threshold, numThresh, curHeight);
				ccfPercent = ccfCount / numAll;
				
				ccfArr[band][j][i] = ccfPercent;
			}
		}
	}
}