Beispiel #1
0
//implementation of spring outlier
int TimeSeriesSetSlice::springOutlier(void)
{
    double *timeSeries = set->getRawData(0, 0);
    int seqLen = set->seqLength;
    int seqCnt = set->seqCount;

    int startT = slice.start;
    int endT = slice.end;
    //for each subsequence in this interval
    //get each subsequence
    double msf = 0; //max distance so far
    int maxTS = -1;    //Sequence
    double tempDist = 0;
    vector<double> temp1;
    vector<double> temp2;
     for (int i = 0; i < seqCnt; i++)
     {
         temp1.clear();
         temp2.clear();
         //copy  the subsequence in temp
         for (int p = startT; p <= endT; p++)
             temp1.push_back(timeSeries[i * seqLen + p]);
         //compute the distance of this subsequence with all sequences
         for (int j = 0; j < seqCnt; j++)
         {
             if (j!=i)
             {
                 for (int p = startT; p <= endT; p++)
                     temp2.push_back(timeSeries[j * seqLen + p]);
                 tempDist+=simpleDTW(temp2,temp1);
                 temp2.clear();
             }
         }
         if (tempDist > msf)
         {
             msf=tempDist;
             maxTS=i;
         }
         tempDist=0;
     }

     cout << "Outlier found by SPRING " << maxTS << " " << msf << endl;

     return maxTS;
}
Beispiel #2
0
// the incoming file should be PAA'd already
DTWData detectOutliers(std::string dataFile, int length) {
    std::ifstream dataStream(dataFile.c_str());

    if (!dataStream) {
        std::cout << "ERROR: ifstream failed on " << dataFile << ": " << strerror(errno) << std::endl;
        return DTWData();
    }

    std::string dataTimePoints;
    std::string queryData;
    std::vector<double> dataVector;
    std::vector<double> queryVector;
    int dataPos;
    int curQTimeSeries = -1;
    double maxSum = 0;
    int maxSeries;
    int maxOffset;
    int queryStart = 0, queryEnd = length;
    int dataStart = 0, dataEnd = length;

    // get first query time series
    while (queryData.empty())
        std::getline(dataStream, queryData);

    // run through each time series as query
    while (!queryData.empty()) {
        queryStart = 0;
        queryEnd = length;
        curQTimeSeries++;
        // turn time series into vector
        queryVector = timeSeriesToVector(queryData);
        // grab location in file
        dataPos = dataStream.tellg();

        // run through all chunks of the query
        while (queryEnd <= (int) queryVector.size()) {
            double curDist = 0;
            std::vector<double> subQVec(queryVector.begin()+queryStart, queryVector.begin()+queryEnd);
            // return to the beginning of the file
            dataStream.clear();
            dataStream.seekg(0, dataStream.beg);

            // grab first data time series
            while (dataTimePoints.empty() && dataStream.good())
                std::getline(dataStream, dataTimePoints);

            // run through the rest of the time series
            while (!dataTimePoints.empty()) {
                // turn time series into vector
                dataVector = timeSeriesToVector(dataTimePoints);
                // reset the data start and end for the next query chunk
                dataStart = 0;
                dataEnd = length;

                // run through all chunks of the data
                while (dataEnd <= (int) dataVector.size()) {
                    std::vector<double> subDVec(dataVector.begin()+dataStart, dataVector.begin()+dataEnd);
                    curDist += simpleDTW(subQVec, subDVec);
                    dataStart++;
                    dataEnd++;

//                    std::cout << "subV: " << subQVec << std::endl;
//                    std::cout << "subD: " << subDVec << std::endl;
                }
                dataVector.clear();
                dataTimePoints = "";

                // get next time series
                while (dataTimePoints.empty() && dataStream.good())
                    std::getline(dataStream, dataTimePoints);
            }
            // check against worst so far
            double oldMax = maxSum;
            maxSum = std::max(maxSum, curDist);
            if (maxSum != oldMax) {
                maxSeries = curQTimeSeries;
                maxOffset = queryStart;
            }

            queryStart++;
            queryEnd++;
        }

        dataStream.clear();
        dataStream.seekg(dataPos, dataStream.beg);

        queryData = "";
        dataTimePoints = "";
        dataVector.clear();
        queryVector.clear();
        // turn "next" data time series into query vector
        while (queryData.empty() && dataStream.good())
            std::getline(dataStream, queryData);
    }

    return DTWData(maxSum, maxSeries, maxOffset, length);
}
Beispiel #3
0
//implementation of Spring
//query Type 1 mean k similar
vector<kBest> TimeSeriesSetSlice::springkSimilar(vector<double> tempQ, int queryType, int k)
{
    vector<kBest> kbestArray;
    double *timeSeries = set->getRawData(0, 0);
    int N = set->seqCount;
    int L = set->seqLength;

    double bsf=INF;
    int bsfIndex = -1;       //index of bsf time series
    double currentDist=INF;
    int bestIntervalS = -1;
    int bestIntervalE = -1;
    vector<double> temp;    //temporary subsequence
    int kbestCount=0;
    kBest tempBest;

    if(queryType==1)
    {
        if(k==1)        //most similar
        {
            //get each subsequence
            for(int j=1;j<L;j++)
            {
                for (int l=0, m=l+j; m<L; l++, m++)
                {

                     for (int i=0;i<N;i++)
                     {
                         //copy  the subsequence in temp
                         for(int p=0;p<=m-l;p++)
                             temp.push_back(timeSeries[i*L + l+p]);
                         currentDist=simpleDTW(temp,tempQ);
                         if(currentDist<bsf)
                         {
                             bsf=currentDist;
                             bsfIndex=i;        //best time series index
                             bestIntervalS=l;   //record the best interval
                             bestIntervalE=m;
                         }
                         //clear the temp array
                         temp.clear();
                     }

                 }
             }
            cout<<"TS "<<bsfIndex<<" Interval "<<bestIntervalS<<" "<<bestIntervalE<<" Dist "<<bsf<<endl;
         }
        else
        {
            //have to find more than 1 similar time series
            //get each subsequence
            for(int j=1;j<L;j++)
            {
                for (int l=0, m=l+j; m<L-1; l++, m++)
                {

                     for (int i=0;i<N;i++)
                     {
                         //copy  the subsequence in temp
                         for(int p=0;p<m-l;p++)
                             temp.push_back(timeSeries[i*L + l+p]);
                         currentDist=simpleDTW(tempQ,temp);
                         if(kbestCount<k)
                         {
                             //add this TS to k best
                             kbestCount++;
                             tempBest.dist=currentDist;
                             tempBest.id=i;
                             kbestArray.push_back(tempBest);

                         }
                         else
                         {
                             sort(kbestArray.begin(),kbestArray.end(), _sortByDist);
                             double tempD=kbestArray[kbestCount].dist;    //getting the last distance
                             if(tempD>currentDist)
                             {
                                 tempBest.dist=currentDist;
                                 tempBest.id=i;
                                 kbestArray[kbestCount]=tempBest;

                             }
                         }
                         temp.clear();
                     }

                 }
             }

        }
    }
    return kbestArray;
}
Beispiel #4
0
// both incoming files should be PAA'd already
DTWData DTWaFile(std::string dataFile, std::string queryFile) {
    std::ifstream data(getPAAFilename(dataFile).c_str()); // bigger file
    std::ifstream query(getPAAFilename(queryFile).c_str()); // smaller file

    if (!data) {
            std::cout << "ERROR: ifstream failed on " << dataFile << ": " << strerror(errno) << std::endl;
            return DTWData();
    }
    if (!query) {
            std::cout << "ERROR: ifstream failed on " << queryFile << ": " << strerror(errno) << std::endl;
            return DTWData();
    }

    std::string dataTimePoints;
    std::string queryData;

    std::vector<double> dataVector;
    std::vector<double> queryVector;
    int curTimeSeries = -1;
    int bestMatchTimeSeries = 1;
    int bestMatchIdx = 1;
    int bestMatchBlkSz = 2;
    double bestMatchDistance = std::numeric_limits<double>::max();

    // turn query into vector
    std::getline(query, queryData);
    std::size_t prev = 0, pos;
    while ((pos = queryData.find_first_of(" ,", prev)) != std::string::npos) {
        if (pos > prev)
            queryVector.push_back(stod(queryData.substr(prev, pos-prev)));
        prev = pos+1;
    }
    if (prev < queryData.length())
        queryVector.push_back(stod(queryData.substr(prev, std::string::npos)));

    while (dataTimePoints.empty() && data.good())
        std::getline(data, dataTimePoints);

    while (data.good()) {
        // get the next data timeseries
        curTimeSeries++;
        // split the timeseries numbers on space or comma
        std::size_t prev = 0, pos;
        while ((pos = dataTimePoints.find_first_of(" ,", prev)) != std::string::npos) {
            if (pos > prev)
                dataVector.push_back(stod(dataTimePoints.substr(prev, pos-prev)));
            prev = pos+1;
        }
        if (prev < dataTimePoints.length())
            dataVector.push_back(stod(dataTimePoints.substr(prev, std::string::npos)));

        // run through all combinations from query
        for (int blkSz = 2; blkSz <= (int)dataVector.size(); blkSz++) {
            for (int startIdx = 0; startIdx+blkSz <= (int)dataVector.size(); startIdx++) {
                std::vector<double> subVec(dataVector.begin()+startIdx, dataVector.begin()+startIdx+blkSz);
                double newBest = std::min(simpleDTW(queryVector, subVec), bestMatchDistance);
                if (newBest != bestMatchDistance) {
                            bestMatchDistance = newBest;
                            bestMatchIdx = startIdx;
                            bestMatchBlkSz = blkSz;
                            bestMatchTimeSeries = curTimeSeries;
                }
            }
        }

        // get the next data timeseries
        dataVector.clear();
        dataTimePoints = "";
        while (dataTimePoints.empty() && data.good())
            std::getline(data, dataTimePoints);
    }
    return DTWData(bestMatchDistance, bestMatchTimeSeries, bestMatchIdx, bestMatchBlkSz);
}