//implementation of spring outlier int TimeSeriesSetSlice::springOutlier(void) { double *timeSeries = set->getRawData(0, 0); int seqLen = set->seqLength; int seqCnt = set->seqCount; int startT = slice.start; int endT = slice.end; //for each subsequence in this interval //get each subsequence double msf = 0; //max distance so far int maxTS = -1; //Sequence double tempDist = 0; vector<double> temp1; vector<double> temp2; for (int i = 0; i < seqCnt; i++) { temp1.clear(); temp2.clear(); //copy the subsequence in temp for (int p = startT; p <= endT; p++) temp1.push_back(timeSeries[i * seqLen + p]); //compute the distance of this subsequence with all sequences for (int j = 0; j < seqCnt; j++) { if (j!=i) { for (int p = startT; p <= endT; p++) temp2.push_back(timeSeries[j * seqLen + p]); tempDist+=simpleDTW(temp2,temp1); temp2.clear(); } } if (tempDist > msf) { msf=tempDist; maxTS=i; } tempDist=0; } cout << "Outlier found by SPRING " << maxTS << " " << msf << endl; return maxTS; }
// the incoming file should be PAA'd already DTWData detectOutliers(std::string dataFile, int length) { std::ifstream dataStream(dataFile.c_str()); if (!dataStream) { std::cout << "ERROR: ifstream failed on " << dataFile << ": " << strerror(errno) << std::endl; return DTWData(); } std::string dataTimePoints; std::string queryData; std::vector<double> dataVector; std::vector<double> queryVector; int dataPos; int curQTimeSeries = -1; double maxSum = 0; int maxSeries; int maxOffset; int queryStart = 0, queryEnd = length; int dataStart = 0, dataEnd = length; // get first query time series while (queryData.empty()) std::getline(dataStream, queryData); // run through each time series as query while (!queryData.empty()) { queryStart = 0; queryEnd = length; curQTimeSeries++; // turn time series into vector queryVector = timeSeriesToVector(queryData); // grab location in file dataPos = dataStream.tellg(); // run through all chunks of the query while (queryEnd <= (int) queryVector.size()) { double curDist = 0; std::vector<double> subQVec(queryVector.begin()+queryStart, queryVector.begin()+queryEnd); // return to the beginning of the file dataStream.clear(); dataStream.seekg(0, dataStream.beg); // grab first data time series while (dataTimePoints.empty() && dataStream.good()) std::getline(dataStream, dataTimePoints); // run through the rest of the time series while (!dataTimePoints.empty()) { // turn time series into vector dataVector = timeSeriesToVector(dataTimePoints); // reset the data start and end for the next query chunk dataStart = 0; dataEnd = length; // run through all chunks of the data while (dataEnd <= (int) dataVector.size()) { std::vector<double> subDVec(dataVector.begin()+dataStart, dataVector.begin()+dataEnd); curDist += simpleDTW(subQVec, subDVec); dataStart++; dataEnd++; // std::cout << "subV: " << subQVec << std::endl; // std::cout << "subD: " << subDVec << std::endl; } dataVector.clear(); dataTimePoints = ""; // get next time series while (dataTimePoints.empty() && dataStream.good()) std::getline(dataStream, dataTimePoints); } // check against worst so far double oldMax = maxSum; maxSum = std::max(maxSum, curDist); if (maxSum != oldMax) { maxSeries = curQTimeSeries; maxOffset = queryStart; } queryStart++; queryEnd++; } dataStream.clear(); dataStream.seekg(dataPos, dataStream.beg); queryData = ""; dataTimePoints = ""; dataVector.clear(); queryVector.clear(); // turn "next" data time series into query vector while (queryData.empty() && dataStream.good()) std::getline(dataStream, queryData); } return DTWData(maxSum, maxSeries, maxOffset, length); }
//implementation of Spring //query Type 1 mean k similar vector<kBest> TimeSeriesSetSlice::springkSimilar(vector<double> tempQ, int queryType, int k) { vector<kBest> kbestArray; double *timeSeries = set->getRawData(0, 0); int N = set->seqCount; int L = set->seqLength; double bsf=INF; int bsfIndex = -1; //index of bsf time series double currentDist=INF; int bestIntervalS = -1; int bestIntervalE = -1; vector<double> temp; //temporary subsequence int kbestCount=0; kBest tempBest; if(queryType==1) { if(k==1) //most similar { //get each subsequence for(int j=1;j<L;j++) { for (int l=0, m=l+j; m<L; l++, m++) { for (int i=0;i<N;i++) { //copy the subsequence in temp for(int p=0;p<=m-l;p++) temp.push_back(timeSeries[i*L + l+p]); currentDist=simpleDTW(temp,tempQ); if(currentDist<bsf) { bsf=currentDist; bsfIndex=i; //best time series index bestIntervalS=l; //record the best interval bestIntervalE=m; } //clear the temp array temp.clear(); } } } cout<<"TS "<<bsfIndex<<" Interval "<<bestIntervalS<<" "<<bestIntervalE<<" Dist "<<bsf<<endl; } else { //have to find more than 1 similar time series //get each subsequence for(int j=1;j<L;j++) { for (int l=0, m=l+j; m<L-1; l++, m++) { for (int i=0;i<N;i++) { //copy the subsequence in temp for(int p=0;p<m-l;p++) temp.push_back(timeSeries[i*L + l+p]); currentDist=simpleDTW(tempQ,temp); if(kbestCount<k) { //add this TS to k best kbestCount++; tempBest.dist=currentDist; tempBest.id=i; kbestArray.push_back(tempBest); } else { sort(kbestArray.begin(),kbestArray.end(), _sortByDist); double tempD=kbestArray[kbestCount].dist; //getting the last distance if(tempD>currentDist) { tempBest.dist=currentDist; tempBest.id=i; kbestArray[kbestCount]=tempBest; } } temp.clear(); } } } } } return kbestArray; }
// both incoming files should be PAA'd already DTWData DTWaFile(std::string dataFile, std::string queryFile) { std::ifstream data(getPAAFilename(dataFile).c_str()); // bigger file std::ifstream query(getPAAFilename(queryFile).c_str()); // smaller file if (!data) { std::cout << "ERROR: ifstream failed on " << dataFile << ": " << strerror(errno) << std::endl; return DTWData(); } if (!query) { std::cout << "ERROR: ifstream failed on " << queryFile << ": " << strerror(errno) << std::endl; return DTWData(); } std::string dataTimePoints; std::string queryData; std::vector<double> dataVector; std::vector<double> queryVector; int curTimeSeries = -1; int bestMatchTimeSeries = 1; int bestMatchIdx = 1; int bestMatchBlkSz = 2; double bestMatchDistance = std::numeric_limits<double>::max(); // turn query into vector std::getline(query, queryData); std::size_t prev = 0, pos; while ((pos = queryData.find_first_of(" ,", prev)) != std::string::npos) { if (pos > prev) queryVector.push_back(stod(queryData.substr(prev, pos-prev))); prev = pos+1; } if (prev < queryData.length()) queryVector.push_back(stod(queryData.substr(prev, std::string::npos))); while (dataTimePoints.empty() && data.good()) std::getline(data, dataTimePoints); while (data.good()) { // get the next data timeseries curTimeSeries++; // split the timeseries numbers on space or comma std::size_t prev = 0, pos; while ((pos = dataTimePoints.find_first_of(" ,", prev)) != std::string::npos) { if (pos > prev) dataVector.push_back(stod(dataTimePoints.substr(prev, pos-prev))); prev = pos+1; } if (prev < dataTimePoints.length()) dataVector.push_back(stod(dataTimePoints.substr(prev, std::string::npos))); // run through all combinations from query for (int blkSz = 2; blkSz <= (int)dataVector.size(); blkSz++) { for (int startIdx = 0; startIdx+blkSz <= (int)dataVector.size(); startIdx++) { std::vector<double> subVec(dataVector.begin()+startIdx, dataVector.begin()+startIdx+blkSz); double newBest = std::min(simpleDTW(queryVector, subVec), bestMatchDistance); if (newBest != bestMatchDistance) { bestMatchDistance = newBest; bestMatchIdx = startIdx; bestMatchBlkSz = blkSz; bestMatchTimeSeries = curTimeSeries; } } } // get the next data timeseries dataVector.clear(); dataTimePoints = ""; while (dataTimePoints.empty() && data.good()) std::getline(data, dataTimePoints); } return DTWData(bestMatchDistance, bestMatchTimeSeries, bestMatchIdx, bestMatchBlkSz); }