void extractU_Shapelets(double **pd_Dataset, int* ds_len, int n_sample, int sLen, int app_no, char app[][100], char file[][100], char*outputname, int start_ts_id) { int sl; int i; int iter = 0; int cluster_id[n_sample]; double *ts; int ts_len; int cnt; int newsize; int k,j,l; int index, index2; double mean, stddev, range; /*Empty discriminatory ushapelets*/ double* ushapelet[n_sample]; int ushapelet_len[n_sample]; ts = pd_Dataset[start_ts_id]; ts_len = ds_len[start_ts_id]; printf("\nextractU_Shapelet\n"); memset(cluster_id, -1, n_sample *sizeof(int)); FILE *fp; fp = fopen(outputname, "a"); if(!fp) { perror(outputname); return; } fprintf(fp, "\n\n\nNew Clustering Batch------------------------------\n"); fprintf(fp,"************************\n"); fprintf(fp, "Iteration %d\n", iter); fprintf(fp, "Application %s\n Dataset path %s\n", app[start_ts_id], file[start_ts_id]); while(1) { cnt = 0; newsize = 0; for(sl=sLen-LOWER; sl <= sLen+UPPER && sl <= ts_len; sl+=STEP) { cnt += (ts_len - sl+1); // printf("sl = %d ts_len =%d cnt = %d\n", sl, ts_len, cnt); } double* p_subseq[cnt]; int ps_len[cnt]; double gap[cnt]; double dt[cnt]; for (k = 0, j= 0 ; k< n_sample; k++) { if(cluster_id[k] == -1) newsize++; } double* n_dataset[newsize]; int n_datalen[newsize]; int k,j; int old_id[n_sample]; memset(old_id, 0, newsize*sizeof(int)); /*create new unclustered data set*/ for (k = 0, j= 0 ; k< n_sample; k++) { if(cluster_id[k] == -1) { // copy the data set and len and keep a mapping n_dataset[j] = pd_Dataset[k]; n_datalen[j] = ds_len[k]; old_id[j] = k; j++; } } double dist[newsize]; /*index of the distance within threshold*/ int dataset_A[newsize];//--------Check: declared in both gobal and local int dataset_Alen;//--------Check: declared in both gobal and local memset(dataset_A, -1, newsize *sizeof(int)); /*********VERIFY*******/ if(ts_len < sLen) { printf(" The time series is too short to classify\n"); break; // break? } int cluster_no = app_no; /*****VERIFY END****/ cnt = 0; /*For all possible subsequences for a timeseries from the new dataset*/ for(sl=sLen-LOWER; sl <= sLen+UPPER && sl <= ts_len; sl+=STEP) { //printf("sl is %d ts_len -sl +1 is %d \n\n", sl, ts_len - sl + 1); for(i=0; i< ts_len - sl +1; i++) { p_subseq[cnt] = ts + i; ps_len[cnt] = sl; /*Compute the gap and threshold for each of the subsequence*/ gap[cnt] = computeGap(&dt[cnt], cluster_no, p_subseq[cnt], ps_len[cnt], n_dataset, newsize , n_datalen); //printf("i=%d sl=%d ps_len=%d cnt = %d\n", i, sl, ps_len[cnt], cnt); //printf("gap is %2.6f dt is %2.6f\n", gap[cnt], dt[cnt]); cnt++; } } /*Find the subsequence which gives the maximum gap for the dataset*/ printf("max gap is %d\n", cnt); index = max_index(gap, cnt); /*Add the discriminatory subsequence to the ushapelet list*/ printf("Discovered ushapelet gap is %2.6f dt is %2.6f index %d len %d\n", gap[index], dt[index], index, ps_len[index]); fprintf(fp, "Shapelet: "); for(k=0; k<ps_len[index]; k++) fprintf(fp,"%2.2f ", p_subseq[index][k]); printf("\n"); ushapelet[iter] = p_subseq[index]; ushapelet_len[iter] = ps_len[index]; fprintf(fp, "Shapelet len: %d\n",ps_len[index]); dataset_Alen = 0; j=0; for(l=0; l<newsize; l++) { /*Compute the minimum distance of the shapelet from each of the dataset */ dist[l]= computeDistance(p_subseq[index], ps_len[index], n_dataset[l], n_datalen[l]); /*If the computed distance is less than threshold then add to Dataset A*/ if (CompareDoubles2(dist[l], dt[index]) <= 0) { //printf("distance within threshold %2.2f %2.2f %d\n", dist[l], dt[index], l); dataset_A[j] = l; j++; dataset_Alen++; } } if(clustered(dataset_Alen, newsize)) break; else { mean = 0.0; stddev = 0.0; range = 0.0; /*Compute the mean standard deviation and range of the Dataset A*/ compute_mean_stddev(dataset_A, dataset_Alen, dist, newsize, &mean, &stddev); range = mean + stddev; //printf("%2.2f is the range mean %2.2f stddev %2.2f \n", range, mean, stddev); /*Exclude all the dataset within the range by marking it as clustered*/ for (k = 0, j= 0 ; k< newsize; k++) { if(CompareDoubles2(dist[k], range) <= 0) { //printf("Clustered dataset %d\n", old_id[k]); fprintf(fp, "Appname: %s Filename: %s\n", app[old_id[k]], file[old_id[k]]); cluster_id[old_id[k]] = iter; } } /*Find the dataset far away from the ushapelet*/ //printf("max distance is at %d\n", newsize); index2 = max_index(dist, newsize); ts = n_dataset[index2]; //printf("Finding next data set is at %d\n", index2); fprintf(fp,"************************\n"); fprintf(fp, "Iteration %d\n", iter+1); fprintf(fp, "Application %s\n Dataset path %s\n", app[old_id[index2]], file[old_id[index2]]); } ++iter; } fprintf(fp,"************************\n"); fprintf(fp, "Remaining set\n"); for( int z=0; z< n_sample; z++) { if(cluster_id[z]==-1) fprintf(fp, "Appname: %s Filename: %s\n", app[z], file[z]); } fclose(fp); printf("\n"); }
/** * points : 3 x num_points * points[0] : all x co-ords * points[1] : all y co-ords * points[2] : all z co-ords */ void calcBiometrics( double **points, // 3 x numPoints array int numPoints, //the number of points in 'points' float minEasting, //easting of bottome left corner of grid float minNorthing, //norhting of bottom left corner of grid int blockSize, //block dimension in meters float xBlock, //x block position in grid float yBlock, //y block position in grid int outRes, //resolution of output int zThreshold //z threshold value ) { float *** outArr; float *** lhqArr; float *** ccfArr; float blockEasting, blockNorthing; int dimensions; dimensions = blockSize / outRes; //must be an integer blockEasting = minEasting + (xBlock * blockSize); blockNorthing = minNorthing + (yBlock * blockSize); /** ouput arrays **/ outArr = initFloatBuffer3D(8 , dimensions, dimensions, -999.0); lhqArr = initFloatBuffer3D(21, dimensions, dimensions, -999.0); ccfArr = initFloatBuffer3D(21, dimensions, dimensions, -999.0); float xStart, yStart, xEnd, yEnd; int j, i; for(j = 0; j < dimensions; j++){ for(i = 0; i < dimensions; i++) { xStart = blockEasting + outRes * j; yStart = blockNorthing + outRes * i; xEnd = xStart + outRes; yEnd = yStart + outRes; Array1D A; A = filterByArea(points, numPoints, xStart, yStart, xEnd, yEnd); if(A.size == 0) continue; int numAll = A.size; A = filterByZThreshold(A.points, A.size, zThreshold); float * z_threshold = A.points; int numThresh = A.size; if(numThresh == 0) continue; if(numThresh <= zThreshold)// why is this necessary continue; qsort(z_threshold, numThresh, sizeof(float), floatCompare); computeRugosity(z_threshold, numThresh, &outArr[0][j][i]); computeGap(numThresh, numAll, &outArr[1][j][i]); computePercentile(z_threshold, numThresh, 0.85, &outArr[2][j][i]); if(A.size < 75) continue; float * lMoments = computeLMoments(z_threshold, numThresh); outArr[3][j][i] = lMoments[0]; outArr[4][j][i] = lMoments[1]; outArr[5][j][i] = lMoments[2]; outArr[6][j][i] = lMoments[3]; /***************************************************************************** *********************** LHQ Statistics ************************** *****************************************************************/ int band, rank1, rank2; float fRank, iRank, difRank; for(band = 0; band <= 20; band++) { fRank = (band/20.0) * (numThresh + 1); iRank = (int)fRank; //Todo: make sure case truncates difRank = fRank - iRank; rank1 = iRank - 1; if(rank1 < 0) rank1 = 0; rank2 = iRank; if(band < 20) { lhqArr[band][j][i] = (1 - difRank) * z_threshold[rank1] + difRank * z_threshold[rank2]; } else { lhqArr[band][j][i] = z_threshold[numThresh-1]; // max.. this sorted so it should be max } if( band == 17 ) { outArr[7][j][i] = lhqArr[band][j][i]; } } /***************************************************************************** *********************** CCF Statistics ************************** *****************************************************************/ int numDivs, ccfCount; float minHeight, maxHeight, curHeight, ccfPercent, htIncrement; minHeight = zThreshold; maxHeight = getMax(z_threshold, numThresh); // = z_threshold[numThresh-1] numDivs = 20; htIncrement = (maxHeight - minHeight) / numDivs; for(band = 0; band <= 20; band++) { curHeight = minHeight + (htIncrement * band); ccfCount = getCCFCount(z_threshold, numThresh, curHeight); ccfPercent = ccfCount / numAll; ccfArr[band][j][i] = ccfPercent; } } } }