void stimulateGroup(unsigned group, std::vector<unsigned>& fstim) { for(unsigned local=0; local < groupSize; ++local) { fstim.push_back(globalIdx(group, local)); } }
ulong nextPrime(int p, int s, ulong n, ulong k, ulong *x) { // find minimal i s.t. i > k and i unmarked ulong newK = k+1; ulong local = MAX( localIdx(p,s,n,newK), 0); // do not consider primes outside our range while(local < blockSize(p,s,n)-1 && x[local] == 0) { local++; } if(x[local] == 0) { return LLONG_MAX; // no primes for this processor. This is possible. } else { // if we get here we assume we found a prime! return globalIdx(p,s,n,local); } } /* end nextPrime */
/* Return number of synapses per neuron */ unsigned construct(nemo::Network& net, bool noiseConnections) { /* Neurons in the two groups have standard parameters and no spontaneous * firing */ for(unsigned group=0; group < 2; ++group) { for(unsigned local=0; local < groupSize; ++local) { float r = 0.5; float b = 0.25f - 0.05f * r; float v = -65.0; net.addNeuron(globalIdx(group, local), 0.02f + 0.08f * r, b, v, 2.0f, b*v, v, 0.0f); } } /* The plastic synapses are one-way, from group 0 to group 1. The delay * varies depending on the target neuron. The weights are set that a single * spike is enough to induce firing in the postsynaptic neuron. */ for(unsigned local=0; local < groupSize; ++local) { net.addSynapse( globalIdx(0, local), globalIdx(1, local), delay(local), initWeight, 1); } /* To complicate spike delivery and STDP computation, add a number of * connections with very low negative weights. Even if potentiated, these * will not lead to additional firing. Use a mix of plastic and static * synapses. */ if(noiseConnections) { for(unsigned lsrc=0; lsrc < groupSize; ++lsrc) for(unsigned ltgt=0; ltgt < groupSize; ++ltgt) { if(lsrc != ltgt) { net.addSynapse( globalIdx(0, lsrc), globalIdx(1, ltgt), delay(ltgt + lsrc), -0.00001f, ltgt & 0x1); } } } return noiseConnections ? groupSize : 1; }
void verifyWeightChange(unsigned epoch, nemo::Simulation* sim, unsigned m, float reward) { unsigned checked = 0; for(unsigned local = 0; local < groupSize; ++local) { const std::vector<synapse_id>& synapses = sim->getSynapsesFrom(globalIdx(0, local)); for(std::vector<synapse_id>::const_iterator id = synapses.begin(); id != synapses.end(); ++id) { unsigned target = sim->getSynapseTarget(*id); if(local != localIdx(target)) continue; unsigned actualDelay = sim->getSynapseDelay(*id); BOOST_REQUIRE_EQUAL(delay(localIdx(target)), actualDelay); BOOST_REQUIRE(sim->getSynapsePlastic(*id)); /* dt is positive for pre-post pair, and negative for post-pre * pairs */ int dt = -(int(postFireDelay - actualDelay)); float dw_expected = 0.0f; if(dt > 0) { dw_expected = dwPost(dt-1); } else if(dt <= 0) { dw_expected = dwPre(dt); } float expectedWeight = initWeight + epoch * reward * dw_expected; float actualWeight = sim->getSynapseWeight(*id); const float tolerance = 0.001f; // percent BOOST_REQUIRE_CLOSE(expectedWeight, actualWeight, tolerance); checked += 1; } } std::cout << "Epoch " << epoch << ": checked " << checked << " synapses\n"; }
void bspsieve(){ double time0, time1; ulong *x; // local list of candidates ulong *ks; //place for proc0 to store intermediate ks ulong n, nl, i, iglob; int s, p; ulong k; // the current largest sure-prime n = N+1; // copy global N and increase by 1. (only proc 1 knows this) // this is so the maximum array idx == N bsp_begin(P); p= bsp_nprocs(); /* p = number of processors obtained */ printf("Now we have %d processors.\n", p); s= bsp_pid(); /* s = processor number */ if (s==0){ if(n<0) bsp_abort("Error in input: n is negative"); ks = vecalloculi(p); } bsp_push_reg(&n,SZULL); bsp_sync(); bsp_get(0,&n,0,&n,SZULL); //everyone reads N from proc 0 bsp_sync(); bsp_pop_reg(&n); nl= blockSize(p,s,n); // how big must s block be? printf("P(%d) tries to alloc vec of %lld ulongs", s, nl); printf(", size would be = %lld Mb\n", nl*SZULL/1024/1024); x= vecalloculi(nl); for (i=0; i<nl; i++){ // start by assuming everything is prime, except 1 iglob= globalIdx(p,s,n,i); x[i]= iglob; } if(s==0) x[1]=0; bsp_sync(); time0=bsp_time(); k = 2; // begin work while( k*k <= n ) { bspmarkmultiples(p,s,n,k,x); k = nextPrime(p,s,n,k,x); bsp_push_reg(&k, SZULL); bsp_sync(); if(s==0) { ks[0] = k; // my k for(i=1;i<p; i++) { bsp_get(i, &k, 0, &ks[i], SZULL); } } bsp_sync(); if(s==0) { k = findMinimum(p,ks); } bsp_sync(); //broadcast minimum bsp_get(0,&k,0,&k,SZULL); bsp_sync(); bsp_pop_reg(&k); } // end work bsp_sync(); time1=bsp_time(); ulong primes= 0; //printf("Processor %lld primes: \n", s); for(i = 0; i < blockSize(p,s,n); i++) if( x[i] != 0) primes++; //do not print primes, just count them. printf("proc %d finds %lld primes.\n", s, primes); fflush(stdout); if (s==0){ printf("This took only %.6lf seconds.\n", time1-time0); fflush(stdout); vecfreeuli(ks); } vecfreeuli(x); bsp_end(); } /* end bspsieve */
int main(int argc, char **argv) { #if defined(PARALLEL) || defined(PARTIALPAR) omp_set_num_threads(initParams::nprocs); size_t threadID; std::vector<size_t> neighbourThreadId; std::vector<size_t> workingThreadId; std::vector<cv::KeyPoint> globalKeypoints; cv::Mat globalDescriptors; std::vector<size_t> globalIdx(initParams::nprocs/2, 0); std::vector<size_t> levelThreadIdx; #endif cv::Mat frame; cv::VideoCapture cap; class initParams params; // Set motion field norm threshold. If intra-frame motion greater than this threshold carry out outlier classification, rotation compensation, FOE and TTC computations #if !defined(PARALLEL) const float OFThresh = 10.0f; #endif #ifdef USE_PAPI int events[NUM_EVENTS] = { PAPI_L1_DCM, PAPI_L2_DCA, PAPI_L2_DCM }; long long values[NUM_EVENTS+1]; long long start_time = PAPI_get_real_usec(); double total_time = 0.0f; #endif // Initialization of parameters params.set(); if (argc < 3) { std::cerr << "Run Configuration :\n\t./[program name] [input video file] [Total # of frames in video file, 798 for data set-1 and 977 for data set-2]\n"; exit(EXIT_FAILURE); } cap.open(argv[1]); if(!cap.isOpened()){ std::cerr << "Could not access video file: " << argv[1] << std::endl; exit(EXIT_FAILURE); } size_t frameTotal = (size_t) std::atoi(argv[2]); class agast objExtractor; class FindRobustFeatures rFeatures(&objExtractor, ¶ms); cv::Mat tmpDescriptors; std::vector<cv::KeyPoint> tmpKeypoints; #ifdef WRITEDATA analysis::TTCvector.resize((size_t)cap.get(CV_CAP_PROP_FRAME_COUNT), 0.0f); analysis::kpVector.resize((size_t)cap.get(CV_CAP_PROP_FRAME_COUNT), 0); analysis::outliersVector.resize((size_t)cap.get(CV_CAP_PROP_FRAME_COUNT), 0); analysis::matchOutliersVector.resize((size_t)cap.get(CV_CAP_PROP_FRAME_COUNT), 0); analysis::OFNorm.resize((size_t)cap.get(CV_CAP_PROP_FRAME_COUNT), 0); analysis::keypointsVector.resize((size_t)cap.get(CV_CAP_PROP_FRAME_COUNT), 0); #endif #if defined(PROF) || defined(WRITEDATA) time_t rawtime; char buffer[80]; time(&rawtime); struct tm * timeinfo = localtime(&rawtime); strftime(buffer,80, "%d-%m-%y_%H-%M-%S", timeinfo); analysis::timeStamp = std::string(buffer); #endif #ifdef PROF analysis::perfTime.resize((size_t)cap.get(CV_CAP_PROP_FRAME_COUNT), std::vector<float>(3, 0.0f)); #endif #ifdef USE_PAPI //********** PAPI Module initialization for performance analysis***********// // Setup PAPI library int ret = PAPI_library_init(PAPI_VER_CURRENT); if (ret != PAPI_VER_CURRENT) { printf("Problem while initializing PAPI library: %s\n", PAPI_strerror(ret)); exit(EXIT_FAILURE); } // Init multiplexer ret = PAPI_multiplex_init(); if (ret != PAPI_OK) { printf("Problem while initializing PAPI multiplexer: %s\n", PAPI_strerror(ret)); exit(1); } // Create event set for the events that we want to measure int eventset = PAPI_NULL; ret = PAPI_create_eventset(&eventset); if (ret != PAPI_OK) { printf("Problem while creating PAPI event set: %s\n", PAPI_strerror(ret)); exit(1); } ret = PAPI_add_event(eventset, PAPI_L1_DCA); if (ret != PAPI_OK) { printf("Problem while adding first PAPI event: %s\n", PAPI_strerror(ret)); exit(1); } // set multiplexer for event set ret = PAPI_set_multiplex(eventset); if (ret != PAPI_OK) { printf("Problem while setting PAPI multiplex: %s\n", PAPI_strerror(ret)); exit(1); } // Add events to event set ret = PAPI_add_events(eventset, events, NUM_EVENTS); if (ret != PAPI_OK) { printf("Problem while adding PAPI events: %s\n", PAPI_strerror(ret)); exit(1); } //********** END INITIALIZATION **********// // Start measuring computation runtime start_time = PAPI_get_real_usec(); // Start measuring events in event set ret = PAPI_start(eventset); if (ret != PAPI_OK) { printf("Problem while starting PAPI eventset: %s\n", PAPI_strerror(ret)); exit(1); } #endif try { while(( (size_t)cap.get(CV_CAP_PROP_POS_FRAMES) + initParams::frameDelay ) < frameTotal) { /****************************PARALLEL-REGION************************************/ #if defined(PARALLEL) || defined(PARTIALPAR) globalIdx.resize(initParams::nprocs/2, 0); analysis::refFrameCount = cap.get(CV_CAP_PROP_POS_FRAMES); analysis::frameCount = cap.get(CV_CAP_PROP_POS_FRAMES); for (size_t i = 0; i < initParams::nprocs; ++i) { if (i % 2 == 0) { workingThreadId.push_back(i); } else { neighbourThreadId.push_back(i); } } #pragma omp parallel num_threads(initParams::nprocs) private(threadID, frame) shared(globalIdx, params, neighbourThreadId, workingThreadId, objExtractor) { threadID = omp_get_thread_num(); // get index of the frame to be handled by a particular thread executed only once: size_t myFrameID = 0; #pragma omp for ordered schedule(static,1) for (size_t i = 0; i < initParams::nprocs; ++i) { #pragma omp ordered myFrameID = threadID + analysis::frameCount; analysis::frameCount++; } class agast objExtractorLoc; while(cap.get(CV_CAP_PROP_POS_FRAMES) != myFrameID) { std::this_thread::sleep_for(std::chrono::milliseconds(5)); } // get first frame and extract new features: cap >> objExtractorLoc.frame; objExtractorLoc.extractFeatures(); if (threadID == initParams::nprocs-1) { objExtractorLoc.frame.copyTo(objExtractor.frame); } #pragma omp master { // keep ref. of the first keypoints and their descriptors: objExtractor.descriptorsOld = objExtractorLoc.descriptors.clone(); objExtractor.keypointsOld = objExtractorLoc.keypoints; #ifdef VISUALIZE objExtractorLoc.frame.copyTo(objExtractor.frameOld); #endif } // follow keypoints over the next framedelay # of frames: objExtractorLoc.swap(); cap >> objExtractorLoc.frame; objExtractorLoc.extractFeatures(); objExtractorLoc.featureMatching(); #if ! defined(WRITEDATA) objExtractorLoc.homographyRANSAC(3.0f); #else objExtractorLoc.homographyRANSAC(3.0f, analysis::outliers, analysis::frameCount, analysis::outliersVector); #endif if((size_t)objExtractorLoc.goodMatches.size() > 5) objExtractorLoc.swapGM(); if (!globalIdx.empty()) { helperFunc::storeGlobal(globalKeypoints, globalDescriptors, objExtractorLoc, threadID, globalIdx, neighbourThreadId); #pragma omp barrier helperFunc::recomputeInterThreadGoodMatches(globalKeypoints, globalDescriptors, objExtractorLoc, workingThreadId, globalIdx, threadID, neighbourThreadId); } #pragma omp master { // re-swaping into global extractors' new points, due to swaping carried out at the end of inter thread matching: objExtractor.keypoints = objExtractorLoc.keypointsOld; objExtractor.descriptors = objExtractorLoc.descriptorsOld.clone(); objExtractor.featureMatching(); rFeatures.homographyRANSAC(3.0f, true); #ifdef VISUALIZE analysis::frameCount = cap.get(CV_CAP_PROP_POS_FRAMES) - 1; #endif } } //Compute velocity projection vectors: rFeatures.compute(); objExtractor.goodMatches.clear(); #ifdef VISUALIZE //Analyze and verify data if necessary: if((char)cv::waitKey(cap.get(CV_CAP_PROP_FPS))==27) break; cv::waitKey(10); #endif /****************************SERIAL-REGION**************************************/ #else //Get first frame and extract new features: analysis::refFrameCount = cap.get(CV_CAP_PROP_POS_FRAMES); #if defined(VISUALIZE) || defined(WRITEIMAGE) cv::Mat refFrame; #endif cap >> objExtractor.frame; #if defined(VISUALIZE) || defined(WRITEIMAGE) objExtractor.frame.copyTo(refFrame); #endif objExtractor.extractFeatures(); #ifdef PROF analysis::perfTime[analysis::frameCount][2] = objExtractor.elapsedTime.count(); #endif // Keep ref. of the first keypoints and their descriptors: objExtractor.descriptors.copyTo(tmpDescriptors); tmpKeypoints = objExtractor.keypoints; // Specify initial features as old features: objExtractor.swap(); // Follow keypoints over the next frames: while((size_t)cap.get(CV_CAP_PROP_POS_FRAMES) < frameTotal ) { analysis::frameCount = cap.get(CV_CAP_PROP_POS_FRAMES); analysis::outliers = 0; cap >> objExtractor.frame; objExtractor.extractFeatures(); #ifdef PROF analysis::perfTime[analysis::frameCount][2] = objExtractor.elapsedTime.count(); analysis::start = std::chrono::high_resolution_clock::now(); #endif // FLANN based matching of keypoints: objExtractor.featureMatching(); #ifdef PROF analysis::stop = std::chrono::high_resolution_clock::now(); analysis::elapsedTime = analysis::stop - analysis::start; analysis::perfTime[analysis::frameCount][0] = analysis::elapsedTime.count(); #endif #ifdef VISUALIZE analysis::visMatchingPts(objExtractor); cv::waitKey(5); #endif #ifdef WRITEDATA analysis::keypointsVector[analysis::frameCount] = std::min((size_t)objExtractor.keypoints.size(), (size_t)objExtractor.keypointsOld.size()); analysis::matchOutliersVector[analysis::frameCount] = analysis::keypointsVector[analysis::frameCount] - (size_t)objExtractor.goodMatches.size(); #endif #ifdef PROF analysis::start = std::chrono::high_resolution_clock::now(); #endif // RANSAC based outlier classification: rFeatures.homographyRANSAC(1.2f, true); #ifdef PROF analysis::stop = std::chrono::high_resolution_clock::now(); analysis::elapsedTime = analysis::stop - analysis::start; analysis::perfTime[analysis::frameCount][1] = analysis::elapsedTime.count(); #endif #ifdef VISUALIZE analysis::visMatchingPts(objExtractor); cv::waitKey(5); #endif float OF = 0.0f; // Calculation of motion field norm makes sense only if matching points are greater than 3 if ((size_t)objExtractor.goodMatches.size() > 3) { // Calculate intra-frame motion flow norm OF = helperFunc::calcMeanOFNorm(objExtractor.keypoints, objExtractor.keypointsOld, objExtractor.goodMatches); } #ifdef WRITEDATA analysis::OFNorm[analysis::frameCount] = helperFunc::calcMeanOFNorm(objExtractor.keypoints, objExtractor.keypointsOld, objExtractor.goodMatches); #endif // Different cases of computation: if ((size_t)objExtractor.goodMatches.size() < 5 || OF >= OFThresh) { // Case 1: Matching(refFrame(n) , refFrame(n+1)) lead to no good matches if (analysis::refFrameCount == (analysis::frameCount - 1) ) { // If large motion field norm was the case then compute ttc: if (OF >= OFThresh) { //Compute velocity projection vectors and ttc: rFeatures.compute(); } // Define latest frame as refFrame analysis::refFrameCount = analysis::frameCount; objExtractor.descriptors.copyTo(tmpDescriptors); tmpKeypoints = objExtractor.keypoints; #if defined(VISUALIZE) || defined(WRITEIMAGE) refFrame = objExtractor.frame.clone(); #endif // Move latest frame data to old objExtractor.swap(); } // Case 2: Matching(refFrame(n+i) , refFrame(n+i+1)) lead to no good matches else { // In case of motion flow greater than threshold compute ttc for the given intra-frame motion otherwise move consistent features and compute motion flow if (OF >= OFThresh) { // Compute velocity projection vectors: rFeatures.compute(); // Define latest frame as refFrame analysis::refFrameCount = analysis::frameCount; objExtractor.descriptors.copyTo(tmpDescriptors); tmpKeypoints = objExtractor.keypoints; #if defined(VISUALIZE) || defined(WRITEIMAGE) refFrame = objExtractor.frame.clone(); #endif // Move latest frame data to old objExtractor.swap(); } else { // Move consistent good matching features to new objExtractor.keypoints.swap(objExtractor.keypointsOld); objExtractor.keypointsOld.swap(tmpKeypoints); cv::Mat placeHolder(objExtractor.descriptors.size(), objExtractor.descriptors.type()); #ifdef OPTIMIZED objExtractor.descriptors.copyTo(placeHolder); #else placeHolder = objExtractor.descriptors.clone(); #endif objExtractor.descriptors.resize(0); objExtractor.descriptorsOld.copyTo(objExtractor.descriptors); tmpDescriptors.copyTo(objExtractor.descriptorsOld ); tmpDescriptors.resize(0); #ifdef OPTIMIZED placeHolder.copyTo(tmpDescriptors); #else tmpDescriptors = placeHolder.clone(); #endif placeHolder.release(); #if defined(VISUALIZE) || defined(WRITEIMAGE) cv::Mat tmpFrame; tmpFrame = objExtractor.frame.clone(); objExtractor.frame = objExtractor.frameOld.clone(); objExtractor.frameOld = refFrame.clone(); refFrame = tmpFrame.clone(); tmpFrame.release(); #endif // Match refFrame features with consistent good matching features uptil refFrame(n+i) objExtractor.featureMatching(); #ifdef VISUALIZE analysis::visMatchingPts(objExtractor, "Using Consistent Features"); cv::waitKey(10); #endif // Calculate Motion Flow norm OF = helperFunc::calcMeanOFNorm(objExtractor.keypoints, objExtractor.keypointsOld, objExtractor.goodMatches); if( OF > OFThresh ) { // Compute motion flow and ttc: rFeatures.compute(); } objExtractor.goodMatches.clear(); objExtractor.keypointsOld = tmpKeypoints; tmpDescriptors.copyTo(objExtractor.descriptorsOld); // Define latest frame as refFrame analysis::refFrameCount = analysis::frameCount; } } } else { objExtractor.swapGM(); } #ifdef VISUALIZE if((char)cv::waitKey(cap.get(CV_CAP_PROP_FPS))==27) break; cv::waitKey(10); #endif } #endif } } catch(cv::Exception) { std::cerr << "Exception @ frame : " << cap.get(CV_CAP_PROP_POS_FRAMES) << "\n"; } #ifdef USE_PAPI // Collect data from the event set if ( PAPI_stop(eventset, values) != PAPI_OK ) { printf("Problem while reading PAPI event set!\n"); exit(EXIT_FAILURE); } // Print results total_time = ((double)(PAPI_get_real_usec() - start_time))/1000000; printf("Computation execution time: %lf seconds\n", total_time); printf("L1 Cache miss rate = %lf \n",((double)values[1])/(values[0])); printf("L2 Cache miss rate = %lf \n",((double)values[3])/(values[2])); #endif /*************************WRITE DATA TO FILE BLOCK********************************/ /*<---------------------------- REGQUIRES SETTING OF PERSONAL PATH TO SAVING GENERATED DATA -------------------------->*/ #ifdef WRITEDATA std::ofstream file; std::string fileName; fileName = "/home/ragesam/gitThesis/testCase/poseFeatures/build/ttc_" + analysis::timeStamp + ".dat"; std::cout << "\nWRITING TTC DATA TO FILE \n"; file.open(fileName); if(!file.is_open()) { std::cout << "\nCan not write TTC to file\n"; std::exit(EXIT_FAILURE); } else { for (size_t i = 0; i < (size_t)analysis::TTCvector.size(); ++i) { file << i << "\t" << analysis::TTCvector[i] << "\t" << analysis::kpVector[i] <<"\n"; } } file.close(); fileName = "/home/ragesam/gitThesis/testCase/poseFeatures/build/outliers_" + analysis::timeStamp + ".dat"; std::cout << "\nWRITING OUTLIER DATA TO FILE \n"; file.open(fileName); if(!file.is_open()) { std::cout << "\nCan not write outliers to file\n"; std::exit(EXIT_FAILURE); } else { file << "Frame #\t# of Keypoints\tFLANN based outliers\tRANSAC based outliers\tIntra-fram projection vector norm\n"; for (size_t i = 0; i < (size_t)analysis::outliersVector.size(); ++i) { file << i << "\t" << analysis::keypointsVector[i] << "\t" << analysis::matchOutliersVector[i] << "\t" << analysis::outliersVector[i] << "\t" << analysis::OFNorm[i] <<"\n"; } } file.close(); #endif #ifdef PROF std::ofstream fileProf; std::string fileNameProf; fileNameProf = "/home/ragesam/gitThesis/testCase/poseFeatures/build/profile_" + analysis::timeStamp + ".dat"; std::cout << "\nWRITING PROFILING DATA TO FILE \n"; fileProf.open(fileNameProf); if(!fileProf.is_open()) { std::cout << "\nCan not write profiling data to file\n"; std::exit(EXIT_FAILURE); } else { fileProf << "Frame #\telapsedTime Matching\telapsedTime RANSAC\telapsedTimeDetection\n"; for (size_t i = 0; i < (size_t)analysis::perfTime.size(); ++i) { fileProf << i << "\t" << analysis::perfTime[i][0] << "\t" << analysis::perfTime[i][1] << "\t" << analysis::perfTime[i][2] << "\n"; } } fileProf.close(); #endif cap.release(); return 0; }