void SpectrogramComponent::process() { //Get a DataSet from the input DataBuffer DataSet< complex<float> >* readDataSet = NULL; getInputDataSet("input1", readDataSet); std::size_t size = readDataSet->data.size(); //Fill windows and push to spectrogram CplxVecIt it = readDataSet->data.begin(); for(;it != readDataSet->data.end(); ++it) { window_.push_back(*it); if(window_.size() == windowLength_x) processWindow(); } if(!isSink_x && isProbe_x) { //Pass data through DataSet< complex<float> >* writeDataSet = NULL; getOutputDataSet("output1", writeDataSet, size); writeDataSet->data = readDataSet->data; writeDataSet->sampleRate = readDataSet->sampleRate; writeDataSet->timeStamp = readDataSet->timeStamp; releaseOutputDataSet("output1", writeDataSet); } releaseInputDataSet("input1", readDataSet); }
void processRecord(CtxPtr ctx, bolt::FrameworkRecord &&r) override { // Close any existing full windows while(!windows_.empty() && isWindowFull(windows_.front())) { processWindow(windows_.front(), ctx); windows_.pop_front(); } // Create new window if count is within slide if(isNextWindowReady()) { windows_.emplace_back(windowCount_++); } // In the case of tumbling windows there may be spaces of time where // we aggregate into no buckets. if(!windows_.empty()) { auto recordPtr = std::make_shared<bolt::FrameworkRecord>(r); for(auto &w : windows_) { w.records_.push_back(recordPtr); } } recordCount_++; }
/** * Counts all kmers contained in the "sequence" and store its frequences in the "feature array". * Kmers are computed only from the substrings that contain "AaTtGgCc", i.e. substrings that contain * any other character are not counted. Indices of kmers correspond to the number representation using base 4, * where A~0,T~1,G~2,C~3 (e.g. index of kmer TGAC will be: 1*4^3 + 2*4^2 + 0*4^1 + 3*4^0 = 99 in the feature * array) If kmers of different length are stored in the feature array, indices of short kmers are stored * first, i.e. to store counts of kmers of length 1,2, and 3: kmers of length 1 will occupy indices 0-3 * of the feature array, kmers of length 2 will occupy indices 4-19; and kmers of length 3 will occupy * indices 20-83. Each entry of the feature array correspond to the number - how many times a particular * kmer is contained in the sequence. The maximum kmer length is defined in the header. * (Kmer is a string that contain [ACGTacgt]of a specific length) * * @param sequence: an array of characters that correspond to a DNA sequence * @param seqLen: length of the sequence (max. unsigned long) * @param kmerArray: array of kmer lengths in ascending order! (e.g. [4,5,6]) * @param kmerArrayLen: length of the kmerArray (e.g. 3) * @param featureArray: output array to store feature counts (max. unsigned long) * @param featureArrayLen: the length of the featureArray (max. unsigned long) * (@param featureShuffleTable: store mapping: feature internal index -> feature required index TODO if needed) * */ inline int kmerCounterCountKmers(KmerCounter * kmerCounter, DNASeq * seq, KmerFeatureVector * featureVector){ kmerFeatureVectorClear(featureVector); double * featureArray = kmerFeatureGetFeatureArray(featureVector); unsigned long featureArrayLen = kmerFeatureGetFeatureArrayLen(featureVector); /*unsigned long * kmerCounterLong = kmerFeatureGetKmerCounterLong(featureVector);*/ char * sequence = seqGetSeq(seq); unsigned long seqLen = seqGetSeqLen(seq); KCounter * kc = (KCounter *)kmerCounter; if (kc == NULL){ return 1; } if (sequence == NULL){ printf("kmerCounterCountKmers: sequence = NULL\n"); return 1; } if (seqLen <= 0){ printf("kmerCounterCountKmers: seq_len = %lu\n", seqLen); return 1; } if (featureArray == NULL){ printf("kmerCounterCountKmers: feature_array = NULL\n"); return 1; } /*if (kmerCounterLong == NULL){ printf("kmerCounterCountKmers: kmerCounterLong = NULL\n"); return 1; }*/ if ((kc->kmerArray[kc->kmerArrayLen-1]) > seqLen){ printf("kmerCounterCountKmers: the longest kmer (%d) is longer than the sequence (%lu)\n", (kc->kmerArray[kc->kmerArrayLen-1]), seqLen); return 1; } if (kc->featureArrayRequiredLen != featureArrayLen){ printf("kmerCounterCountKmers: the length of the feature_array provided" " (%lu) is different from the length of the array needed %lu\n", featureArrayLen, kc->featureArrayRequiredLen); return 1; } /* init feature array */ //unsigned long i; //for (i=0; i<featureArrayLen; i++){ // featureArray[i] = 0.0; //} /* init param */ unsigned long window; //sliding window that represents the biggest kmer unsigned long windowNBmp;//sliding window that represents bitmap of undefined characters int maxKmer = kc->kmerArray[kc->kmerArrayLen-1];//length of the maximum kmer int maxKmerMinus1 = maxKmer -1; int twoTimesMaxKmerMinus1 = maxKmerMinus1 << 1; int minKmer = kc->kmerArray[0];//length of the minimum kmer //unsigned long kmerOffsetArray[kc->kmerArrayLen];//for each kmer length compute offset in the feature array //getKmerOffsetArray(kmerOffsetArray, kmerArray, kmerArrayLen); /* init window with the first longest kmer */ initWindow(sequence, maxKmer, &window, &windowNBmp); /* increment indices of all kmers in the window that start at index 0 */ processWindow(kc, &window, &windowNBmp, featureArray/*, kmerCounterLong*/); //printf("%d %d\n", window, windowNBmp); /* go with the sliding window till the end of the sequence */ unsigned long index; for (index = maxKmer; index < seqLen; index++){ /* update the window by reading one more character on the right (removing one on the left) */ updateWindow(sequence[index], maxKmerMinus1, twoTimesMaxKmerMinus1, &window, &windowNBmp); /* increment kmers` indices */ processWindow(kc, &window, &windowNBmp, featureArray/*, kmerCounterLong*/); //printf("%d %d\n", window, windowNBmp); } /* go with the sliding window beyond the end of the sequence to count shorter kmers */ for (index=0; index < (maxKmer - minKmer); index++){ updateWindow('N', maxKmerMinus1, twoTimesMaxKmerMinus1, &window, &windowNBmp); processWindow(kc, &window, &windowNBmp, featureArray/*, kmerCounterLong*/); //printf("%d %d\n", window, windowNBmp); } return 0; }