/** * @timef */ int PlasticConnTestProbe::outputState(double timed) { HyPerConn * c = getTargetHyPerConn(); InterColComm * icComm = c->getParent()->icCommunicator(); const int rcvProc = 0; if( icComm->commRank() != rcvProc ) { return PV_SUCCESS; } assert(getTargetConn()!=NULL); outputStream->printf(" Time %f, connection \"%s\":\n", timed, getTargetName()); const pvwdata_t * w = c->get_wDataHead(getArbor(), getKernelIndex()); const pvdata_t * dw = c->get_dwDataHead(getArbor(), getKernelIndex()); if( getOutputPlasticIncr() && dw == NULL ) { pvError().printf("PlasticConnTestProbe \"%s\": connection \"%s\" has dKernelData(%d,%d) set to null.\n", getName(), getTargetName(), getKernelIndex(), getArbor()); } int nxp = c->xPatchSize(); int nyp = c->yPatchSize(); int nfp = c->fPatchSize(); int status = PV_SUCCESS; for( int k=0; k<nxp*nyp*nfp; k++ ) { int x=kxPos(k,nxp,nyp,nfp); int wx = (nxp-1)/2 - x; // assumes connection is one-to-one if(getOutputWeights()) { pvdata_t wCorrect = timed*wx; pvdata_t wObserved = w[k]; if( fabs( ((double) (wObserved - wCorrect))/timed ) > 1e-4 ) { int y=kyPos(k,nxp,nyp,nfp); int f=featureIndex(k,nxp,nyp,nfp); outputStream->printf(" index %d (x=%d, y=%d, f=%d: w = %f, should be %f\n", k, x, y, f, wObserved, wCorrect); } } if(timed > 0 && getOutputPlasticIncr() && dw != NULL) { pvdata_t dwCorrect = wx; pvdata_t dwObserved = dw[k]; if( dwObserved != dwCorrect ) { int y=kyPos(k,nxp,nyp,nfp); int f=featureIndex(k,nxp,nyp,nfp); outputStream->printf(" index %d (x=%d, y=%d, f=%d: dw = %f, should be %f\n", k, x, y, f, dwObserved, dwCorrect); } } } assert(status==PV_SUCCESS); if( status == PV_SUCCESS ) { if (getOutputWeights()) { outputStream->printf(" All weights are correct.\n"); } if (getOutputPlasticIncr()) { outputStream->printf(" All plastic increments are correct.\n"); } } if(getOutputPatchIndices()) { patchIndices(c); } return PV_SUCCESS; }
int CIFARGTLayer::updateState(double timef, double dt) { //getline (inputfile,inputString); inputString = std::string(imageLayer->getFilename()); unsigned found = inputString.find_last_of("/\\"); //CIFAR is 0 indexed char cVal = inputString.at(found-1); iVal = cVal - '0'; pvdata_t * A = getCLayer()->activity->data; const PVLayerLoc * loc = getLayerLoc(); //std::cout << "time: " << parent->simulationTime() << " inputString:" << inputString << " iVal:" << iVal << "\n"; assert(iVal >= 0 && iVal < 10); //NF must be 10, one for each class assert(loc->nf == 10); for(int ni = 0; ni < getNumNeurons(); ni++){ int nExt = kIndexExtended(ni, loc->nx, loc->ny, loc->nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); int fi = featureIndex(nExt, loc->nx+loc->halo.rt+loc->halo.lt, loc->ny+loc->halo.dn+loc->halo.up, loc->nf); if(fi == iVal){ A[nExt] = 1; } else{ if(negativeGt){ A[nExt] = -1; } else{ A[nExt] = 0; } } } return PV_SUCCESS; }
// // A replacement for globalIndexFromLocal from conversions.h. // WARNING - any changes in conversions.h should be reflected here. static inline int globalIndexFromLocal_nompi(int kl, PVLayerLoc loc) { int kxg = loc.kx0 + kxPos(kl, loc.nx, loc.ny, loc.nf); int kyg = loc.ky0 + kyPos(kl, loc.nx, loc.ny, loc.nf); int kf = featureIndex(kl, loc.nx, loc.ny, loc.nf); return kIndex(kxg, kyg, kf, loc.nxGlobal, loc.nyGlobal, loc.nf); }
int ImageTestLayer::updateStateWrapper(double time, double dt) { Image::updateStateWrapper(time, dt); const PVLayerLoc * loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int nbatch = loc->nbatch; for(int b = 0; b < nbatch; b++){ pvdata_t * dataBatch = data + b * getNumExtended(); for(int nkRes = 0; nkRes < getNumNeurons(); nkRes++){ //Calculate extended index int nkExt = kIndexExtended(nkRes, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); //checkVal is the value from batch index 0 pvdata_t checkVal = dataBatch[nkExt] * 255; int kxGlobal = kxPos(nkRes, nx, ny, nf) + loc->kx0; int kyGlobal = kyPos(nkRes, nx, ny, nf) + loc->ky0; int kf = featureIndex(nkRes, nx, ny, nf); pvdata_t expectedVal = kIndex(kxGlobal, kyGlobal, kf, loc->nxGlobal, loc->nyGlobal, nf); if(fabs(checkVal - expectedVal) >= 1e-5){ std::cout << "ImageFileIO test Expected: " << expectedVal << " Actual: " << checkVal << "\n"; exit(-1); } } } return PV_SUCCESS; }
int KernelProbe::patchIndices(HyPerConn * conn) { int nxp = conn->xPatchSize(); int nyp = conn->yPatchSize(); int nfp = conn->fPatchSize(); int nPreExt = conn->getNumWeightPatches(); assert(nPreExt == conn->preSynapticLayer()->getNumExtended()); const PVLayerLoc * loc = conn->preSynapticLayer()->getLayerLoc(); const PVHalo * halo = &loc->halo; int nxPre = loc->nx; int nyPre = loc->ny; int nfPre = loc->nf; int nxPreExt = nxPre+loc->halo.lt+loc->halo.rt; int nyPreExt = nyPre+loc->halo.dn+loc->halo.up; for( int kPre = 0; kPre < nPreExt; kPre++ ) { PVPatch * w = conn->getWeights(kPre,arborID); int xOffset = kxPos(w->offset, nxp, nyp, nfp); int yOffset = kyPos(w->offset, nxp, nyp, nfp); int kxPre = kxPos(kPre,nxPreExt,nyPreExt,nfPre)-loc->halo.lt; int kyPre = kyPos(kPre,nxPreExt,nyPreExt,nfPre)-loc->halo.up; int kfPre = featureIndex(kPre,nxPreExt,nyPreExt,nfPre); fprintf(outputstream->fp," presynaptic neuron %d (x=%d, y=%d, f=%d) uses kernel index %d, starting at x=%d, y=%d\n", kPre, kxPre, kyPre, kfPre, conn->patchIndexToDataIndex(kPre), xOffset, yOffset); } return PV_SUCCESS; }
int InputLayer::updateState(double timef, double dt) { if(!constantValue || firstRun){ char cVal = inputString.at(int(parent->simulationTime()-1)%numExamples); iVal = cVal - '0'; } pvdata_t * A = getCLayer()->activity->data; const PVLayerLoc * loc = getLayerLoc(); assert(loc->nf == 2); //Set binary values of xor values std::cout << timef << ": input val:" << iVal << "\n"; int negVal; negVal = -1; for(int ni = 0; ni < getNumNeurons(); ni++){ int nExt = kIndexExtended(ni, loc->nx, loc->ny, loc->nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); int fi = featureIndex(nExt, loc->nx+loc->halo.lt+loc->halo.rt, loc->ny+loc->halo.dn+loc->halo.up, loc->nf); switch(iVal){ case 0: if(fi == 0){ A[nExt] = negVal; } if(fi == 1){ A[nExt] = negVal; } break; case 1: if(fi == 0){ A[nExt] = negVal; } if(fi == 1){ A[nExt] = 1; } break; case 2: if(fi == 0){ A[nExt] = 1; } if(fi == 1){ A[nExt] = negVal; } break; case 3: if(fi == 0){ A[nExt] = 1; } if(fi == 1){ A[nExt] = 1; } break; } } firstRun = false; return PV_SUCCESS; }
int FilenameParsingGroundTruthLayer::updateState(double time, double dt) { update_timer->start(); pvdata_t * A = getCLayer()->activity->data; const PVLayerLoc * loc = getLayerLoc(); int num_neurons = getNumNeurons(); if (num_neurons != numClasses) { pvError() << "The number of neurons in " << getName() << " is not equal to the number of classes specified in " << parent->getOutputPath() << "/classes.txt\n"; } for(int b = 0; b < loc->nbatch; b++){ char * currentFilename = NULL; int filenameLen = 0; //TODO depending on speed of this layer, more efficient way would be to preallocate currentFilename buffer if(parent->icCommunicator()->commRank()==0){ currentFilename = strdup(movieLayer->getFilename(b)); //Get length of currentFilename and broadcast int filenameLen = (int) strlen(currentFilename) + 1; //+1 for the null terminator //Using local communicator, as each batch MPI will handle it's own run MPI_Bcast(&filenameLen, 1, MPI_INT, 0, parent->icCommunicator()->communicator()); //Braodcast filename to all other local processes MPI_Bcast(currentFilename, filenameLen, MPI_CHAR, 0, parent->icCommunicator()->communicator()); } else{ //Receive broadcast about length of filename MPI_Bcast(&filenameLen, 1, MPI_INT, 0, parent->icCommunicator()->communicator()); currentFilename = (char*)calloc(sizeof(char), filenameLen); //Receive filename MPI_Bcast(currentFilename, filenameLen, MPI_CHAR, 0, parent->icCommunicator()->communicator()); } std::string fil = currentFilename; pvdata_t * ABatch = A + b * getNumExtended(); for(int i = 0; i < num_neurons; i++){ int nExt = kIndexExtended(i, loc->nx, loc->ny, loc->nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); int fi = featureIndex(nExt, loc->nx+loc->halo.rt+loc->halo.lt, loc->ny+loc->halo.dn+loc->halo.up, loc->nf); int match = fil.find(classes[i]); if(0 <= match){ ABatch[nExt] = gtClassTrueValue; } else{ ABatch[nExt] = gtClassFalseValue; } } //Free buffer, TODO, preallocate buffer to avoid this free(currentFilename); } update_timer->stop(); return PV_SUCCESS; }
/** * @timef */ int MomentumConnTestProbe::outputState(double timed) { HyPerConn * c = getTargetHyPerConn(); InterColComm * icComm = c->getParent()->icCommunicator(); const int rcvProc = 0; if( icComm->commRank() != rcvProc ) { return PV_SUCCESS; } assert(getTargetConn()!=NULL); FILE * fp = getStream()->fp; fprintf(fp, " Time %f, connection \"%s\":\n", timed, getTargetName()); const pvwdata_t * w = c->get_wDataHead(getArbor(), getKernelIndex()); const pvdata_t * dw = c->get_dwDataHead(getArbor(), getKernelIndex()); if( getOutputPlasticIncr() && dw == NULL ) { fprintf(stderr, "MomentumConnTestProbe \"%s\": connection \"%s\" has dKernelData(%d,%d) set to null.\n", getName(), getTargetName(), getKernelIndex(), getArbor()); assert(false); } int nxp = c->xPatchSize(); int nyp = c->yPatchSize(); int nfp = c->fPatchSize(); int status = PV_SUCCESS; for( int k=0; k<nxp*nyp*nfp; k++ ) { pvdata_t wObserved = w[k]; //Pulse happens at time 3 pvdata_t wCorrect; if(timed < 3){ wCorrect = 0; } else{ if(isViscosity){ wCorrect = 1; for(int i = 0; i < (timed - 3); i++){ wCorrect += exp(-(2*(i+1))); } } else{ wCorrect = 2 - pow(2, -(timed - 3)); } } if( fabs( ((double) (wObserved - wCorrect))/timed ) > 1e-4 ) { int y=kyPos(k,nxp,nyp,nfp); int f=featureIndex(k,nxp,nyp,nfp); fprintf(fp, " w = %f, should be %f\n", wObserved, wCorrect); exit(-1); } } return PV_SUCCESS; }
int CPTestInputLayer::initializeV() { assert(parent->parameters()->value(name, "restart", 0.0f, false)==0.0f); // initializeV should only be called if restart is false const PVLayerLoc * loc = getLayerLoc(); for (int b = 0; b < parent->getNBatch(); b++){ pvdata_t * VBatch = getV() + b * getNumNeurons(); for (int k = 0; k < getNumNeurons(); k++){ int kx = kxPos(k,loc->nx,loc->nx,loc->nf); int ky = kyPos(k,loc->nx,loc->ny,loc->nf); int kf = featureIndex(k,loc->nx,loc->ny,loc->nf); int kGlobal = kIndex(loc->kx0+kx,loc->ky0+ky,kf,loc->nxGlobal,loc->nyGlobal,loc->nf); VBatch[k] = (pvdata_t) kGlobal; } } return PV_SUCCESS; }
int MoviePvpTestLayer::updateStateWrapper(double time, double dt) { MoviePvp::updateStateWrapper(time, dt); const PVLayerLoc * loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int nbatch = loc->nbatch; for(int b = 0; b < nbatch; b++){ pvdata_t * dataBatch = data + b * getNumExtended(); int frameIdx; if(strcmp(getBatchMethod(), "byImage") == 0){ frameIdx = (time-1) * nbatch + b; } else if(strcmp(getBatchMethod(), "byMovie") == 0){ frameIdx = b * 2 + (time-1); } for(int nkRes = 0; nkRes < getNumNeurons(); nkRes++){ //Calculate extended index int nkExt = kIndexExtended(nkRes, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); //checkVal is the value from batch index 0 pvdata_t checkVal = dataBatch[nkExt]; int kxGlobal = kxPos(nkRes, nx, ny, nf) + loc->kx0; int kyGlobal = kyPos(nkRes, nx, ny, nf) + loc->ky0; int kf = featureIndex(nkRes, nx, ny, nf); pvdata_t expectedVal = kIndex(kxGlobal, kyGlobal, kf, loc->nxGlobal, loc->nyGlobal, nf) + frameIdx*192; if(fabs(checkVal - expectedVal) >= 1e-5){ std::cout << "ImageFileIO " << name << " test Expected: " << expectedVal << " Actual: " << checkVal << "\n"; //exit(-1); } } } return PV_SUCCESS; }
/** * @timef * NOTES: * - kPost, kxPost, kyPost are indices in the restricted post-synaptic layer. * */ int PostConnProbe::outputState(double timef) { int k, kxPre, kyPre; HyPerConn * c = getTargetHyPerConn(); PVPatch * w; PVPatch *** wPost = c->convertPreSynapticWeights(timef); // TODO - WARNING: currently only works if nfPre==0 const PVLayer * lPre = c->preSynapticLayer()->clayer; const PVLayer * lPost = c->postSynapticLayer()->clayer; const int nxPre = lPre->loc.nx; const int nyPre = lPre->loc.ny; const int nfPre = lPre->loc.nf; const PVHalo * haloPre = &lPre->loc.halo; const int nxPost = lPost->loc.nx; const int nyPost = lPost->loc.ny; const int nfPost = lPost->loc.nf; const PVHalo * haloPost = &lPost->loc.halo; // calc kPost if needed if (kPost < 0) { kPost = kIndex(kxPost, kyPost, kfPost, nxPost, nyPost, nfPost); } else { kxPost = kxPos(kPost, nxPost, nyPost, nfPost); kyPost = kyPos(kPost, nxPost, nyPost, nfPost); kfPost = featureIndex(kPost, nxPost, nyPost, nfPost); } c->preSynapticPatchHead(kxPost, kyPost, kfPost, &kxPre, &kyPre); const int kxPreEx = kxPre + haloPre->lt; const int kyPreEx = kyPre + haloPre->up; const int kxPostEx = kxPost + haloPost->lt; const int kyPostEx = kyPost + haloPost->up; const int kPostEx = kIndex(kxPostEx, kyPostEx, kfPost, nxPost+haloPost->lt+haloPost->rt, nyPost+haloPost->dn+haloPost->up, nfPost); const bool postFired = lPost->activity->data[kPostEx] > 0.0; w = wPost[getArborID()][kPost]; pvwdata_t * wPostData = c->getWPostData(getArborID(),kPost); const int nw = w->nx * w->ny * nfPost; //w->nf; if (wPrev == NULL) { wPrev = (pvwdata_t *) calloc(nw, sizeof(pvwdata_t)); for (k = 0; k < nw; k++) { wPrev[k] = wPostData[k]; // This is broken if the patch is shrunken } } if (wActiv == NULL) { wActiv = (pvwdata_t *) calloc(nw, sizeof(pvwdata_t)); } k = 0; for (int ky = 0; ky < w->ny; ky++) { for (int kx = 0; kx < w->nx; kx++) { int kPre = kIndex(kx+kxPreEx, ky+kyPreEx, 0, nxPre+haloPre->lt+haloPre->rt, nyPre+haloPre->dn+haloPre->up, nfPre); wActiv[k++] = lPre->activity->data[kPre]; } } bool changed = false; for (k = 0; k < nw; k++) { if (wPrev[k] != wPostData[k] || wActiv[k] != 0.0) { changed = true; break; } } FILE * fp = getStream()->fp; if (stdpVars && (postFired || changed)) { if (postFired) fprintf(fp, "*"); else fprintf(fp, " "); fprintf(fp, "t=%.1f w%d(%d,%d,%d) prePatchHead(%d,%d): ", timef, kPost, kxPost, kyPost, kfPost, kxPre, kyPre); if (image) fprintf(fp, "tag==%d ", image->tag()); fprintf(fp, "\n"); } if (stdpVars && changed) { text_write_patch_extra(fp, w, wPostData, wPrev, wActiv, getTargetHyPerConn()); fflush(fp); } for (k = 0; k < nw; k++) { wPrev[k] = wPostData[k]; } if (outputIndices) { fprintf(fp, "w%d(%d,%d,%d) prePatchHead(%d,%d): ", kPost, kxPost, kyPost, kfPost, kxPre, kyPre); if(!stdpVars){ fprintf(fp,"\n"); } const PVLayer * lPre = c->preSynapticLayer()->clayer; write_patch_indices(fp, w, &lPre->loc, kxPre, kyPre, 0); fflush(fp); } return 0; }
int PoolingConn::deliverPostsynapticPerspective(PVLayerCube const * activity, int arborID) { //Check channel number for noupdate if(getChannel() == CHANNEL_NOUPDATE) { return PV_SUCCESS; } assert(post->getChannel(getChannel())); assert(arborID >= 0); //Get number of neurons restricted target const int numPostRestricted = post->getNumNeurons(); float dt_factor = getConvertToRateDeltaTimeFactor(); const PVLayerLoc * sourceLoc = preSynapticLayer()->getLayerLoc(); const PVLayerLoc * targetLoc = post->getLayerLoc(); const int sourceNx = sourceLoc->nx; const int sourceNy = sourceLoc->ny; const int sourceNf = sourceLoc->nf; const int targetNx = targetLoc->nx; const int targetNy = targetLoc->ny; const int targetNf = targetLoc->nf; const PVHalo * sourceHalo = &sourceLoc->halo; const PVHalo * targetHalo = &targetLoc->halo; //get source layer's extended y stride int sy = (sourceNx+sourceHalo->lt+sourceHalo->rt)*sourceNf; //The start of the gsyn buffer pvdata_t * gSynPatchHead = post->getChannel(this->getChannel()); clearGateIdxBuffer(); int* gatePatchHead = NULL; if(needPostIndexLayer) { gatePatchHead = postIndexLayer->getChannel(CHANNEL_EXC); } long * startSourceExtBuf = getPostToPreActivity(); if(!startSourceExtBuf) { std::cout << "HyPerLayer::recvFromPost error getting preToPostActivity from connection. Is shrink_patches on?\n"; exit(EXIT_FAILURE); } float resetVal = 0; if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) { resetVal = -INFINITY; } for(int b = 0; b < parent->getNBatch(); b++) { #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for (int kTargetRes = 0; kTargetRes < numPostRestricted; kTargetRes++) { pvdata_t * activityBatch = activity->data + b * (sourceNx + sourceHalo->rt + sourceHalo->lt) * (sourceNy + sourceHalo->up + sourceHalo->dn) * sourceNf; pvdata_t * gSynPatchHeadBatch = gSynPatchHead + b * targetNx * targetNy * targetNf; //Change restricted to extended post neuron int kTargetExt = kIndexExtended(kTargetRes, targetNx, targetNy, targetNf, targetHalo->lt, targetHalo->rt, targetHalo->dn, targetHalo->up); //Read from buffer long startSourceExt = startSourceExtBuf[kTargetRes]; //Calculate target's start of gsyn pvdata_t * gSynPatchPos = gSynPatchHeadBatch + kTargetRes; //Initialize patch as a huge negative number *gSynPatchPos = resetVal; int* gatePatchPos = NULL; if(needPostIndexLayer) { gatePatchPos = gatePatchHead + b * postIndexLayer->getNumNeurons() + kTargetRes; //Initialize gatePatchPos as a negative number *gatePatchPos = -1; } float* activityStartBuf = &(activityBatch[startSourceExt]); pvwdata_t * weightY = NULL; //No weights in pooling int sf = postConn->fPatchSize(); int yPatchSize = postConn->yPatchSize(); int numPerStride = postConn->xPatchSize() * postConn->fPatchSize(); const PVLayerLoc * postLoc = post->getLayerLoc(); const int kfPost = featureIndex(kTargetExt, postLoc->nx + postLoc->halo.lt + postLoc->halo.rt, postLoc->ny + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); int offset = kfPost; pvwdata_t w = 1.0; if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING) { float relative_XScale = pow(2, (post->getXScale() - pre->getXScale())); float relative_YScale = pow(2, (post->getYScale() - pre->getYScale())); w = 1.0/(nxp*nyp*relative_XScale*relative_YScale); } for (int ky = 0; ky < yPatchSize; ky++) { int kPreExt = startSourceExt + ky*sy+offset; const int kxPreExt = kxPos(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf); const int kyPreExt = kyPos(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf); const int kfPre = featureIndex(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf); const int kxPreGlobalExt = kxPreExt + sourceLoc->kx0; const int kyPreGlobalExt = kyPreExt + sourceLoc->ky0; const int kPreGlobalExt = kIndex(kxPreGlobalExt, kyPreGlobalExt, kfPre, sourceLoc->nxGlobal + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->nyGlobal + sourceLoc->halo.up + sourceLoc->halo.dn, sourceLoc->nf); float * activityY = &(activityStartBuf[ky*sy+offset]); (accumulateFunctionFromPostPointer)(kPreGlobalExt, numPerStride, gSynPatchPos, activityY, &w, dt_factor, gatePatchPos, sf); } } } return PV_SUCCESS; }
int PoolingConn::deliverPresynapticPerspective(PVLayerCube const * activity, int arborID) { //Check if we need to update based on connection's channel if(getChannel() == CHANNEL_NOUPDATE) { return PV_SUCCESS; } assert(post->getChannel(getChannel())); float dt_factor; if (getPvpatchAccumulateType()==ACCUMULATE_STOCHASTIC) { dt_factor = getParent()->getDeltaTime(); } else { dt_factor = getConvertToRateDeltaTimeFactor(); } const PVLayerLoc * preLoc = preSynapticLayer()->getLayerLoc(); const PVLayerLoc * postLoc = postSynapticLayer()->getLayerLoc(); assert(arborID >= 0); const int numExtended = activity->numItems; float resetVal = 0; if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) { resetVal = -INFINITY; float* gSyn = post->getChannel(getChannel()); //gSyn is res #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < post->getNumNeuronsAllBatches(); i++) { gSyn[i] = resetVal; } } clearGateIdxBuffer(); for(int b = 0; b < parent->getNBatch(); b++) { pvdata_t * activityBatch = activity->data + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; pvdata_t * gSynPatchHeadBatch = post->getChannel(getChannel()) + b * postLoc->nx * postLoc->ny * postLoc->nf; int* gatePatchHeadBatch = NULL; if(needPostIndexLayer) { gatePatchHeadBatch = postIndexLayer->getChannel(CHANNEL_EXC) + b * postIndexLayer->getNumNeurons(); } unsigned int * activeIndicesBatch = NULL; if(activity->isSparse) { activeIndicesBatch = activity->activeIndices + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; } int numLoop; if(activity->isSparse) { numLoop = activity->numActive[b]; } else { numLoop = numExtended; } if(thread_gateIdxBuffer) { #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < parent->getNumThreads() * post->getNumNeurons(); i++) { int ti = i/post->getNumNeurons(); int ni = i % post->getNumNeurons(); thread_gateIdxBuffer[ti][ni] = -1; } } #ifdef PV_USE_OPENMP_THREADS //Clear all gsyn buffers if(thread_gSyn) { int numNeurons = post->getNumNeurons(); #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < parent->getNumThreads() * numNeurons; i++) { int ti = i/numNeurons; int ni = i % numNeurons; thread_gSyn[ti][ni] = resetVal; } } #endif // PV_USE_OPENMP_THREADS #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for schedule(static) #endif for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) { int kPreExt; if(activity->isSparse) { kPreExt = activeIndicesBatch[loopIndex]; } else { kPreExt = loopIndex; } float a = activityBatch[kPreExt] * dt_factor; //if (a == 0.0f) continue; //If we're using thread_gSyn, set this here pvdata_t * gSynPatchHead; //float * gatePatchHead = NULL; int * gatePatchHead = NULL; #ifdef PV_USE_OPENMP_THREADS if(thread_gSyn) { int ti = omp_get_thread_num(); gSynPatchHead = thread_gSyn[ti]; } else { gSynPatchHead = gSynPatchHeadBatch; } if(needPostIndexLayer) { if(thread_gateIdxBuffer) { int ti = omp_get_thread_num(); gatePatchHead = thread_gateIdxBuffer[ti]; } else { gatePatchHead = gatePatchHeadBatch; } } #else // PV_USE_OPENMP_THREADS gSynPatchHead = gSynPatchHeadBatch; if(needPostIndexLayer) { gatePatchHead = gatePatchHeadBatch; } #endif // PV_USE_OPENMP_THREADS //deliverOnePreNeuronActivity(kPreExt, arborID, a, gSynPatchHead, gatePatchHead); PVPatch * weights = getWeights(kPreExt, arborID); const int nk = weights->nx * fPatchSize(); const int ny = weights->ny; const int sy = getPostNonextStrides()->sy; // stride in layer pvwdata_t * weightDataStart = NULL; pvgsyndata_t * postPatchStart = gSynPatchHead + getGSynPatchStart(kPreExt, arborID); int* postGatePatchStart = gatePatchHead + getGSynPatchStart(kPreExt, arborID); //float* postGatePatchStart = gatePatchHead + getGSynPatchStart(kPreExt, arborID); const int kxPreExt = kxPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kyPreExt = kyPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kfPre = featureIndex(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kxPreGlobalExt = kxPreExt + preLoc->kx0; const int kyPreGlobalExt = kyPreExt + preLoc->ky0; const int kPreGlobalExt = kIndex(kxPreGlobalExt, kyPreGlobalExt, kfPre, preLoc->nxGlobal + preLoc->halo.lt + preLoc->halo.rt, preLoc->nyGlobal + preLoc->halo.up + preLoc->halo.dn, preLoc->nf); int offset = kfPre; int sf = fPatchSize(); pvwdata_t w = 1.0; if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING) { float relative_XScale = pow(2, (post->getXScale() - pre->getXScale())); float relative_YScale = pow(2, (post->getYScale() - pre->getYScale())); w = 1.0/(nxp*nyp*relative_XScale*relative_YScale); } void* auxPtr = NULL; for (int y = 0; y < ny; y++) { if(needPostIndexLayer) { auxPtr = (postGatePatchStart+ y*sy + offset); } (accumulateFunctionPointer)(kPreGlobalExt, nk, postPatchStart + y*sy + offset, a, &w, auxPtr, sf); } } #ifdef PV_USE_OPENMP_THREADS //Accumulate back into gSyn // Should this be done in HyPerLayer where it can be done once, as opposed to once per connection? if(thread_gSyn) { pvdata_t * gSynPatchHead = gSynPatchHeadBatch; //float* gateIdxBuffer = postIndexLayer->getChannel(CHANNEL_EXC); int * gateIdxBuffer = NULL; if(needPostIndexLayer && thread_gateIdxBuffer) { gateIdxBuffer = gatePatchHeadBatch; } int numNeurons = post->getNumNeurons(); //Looping over neurons first to be thread safe #pragma omp parallel for for(int ni = 0; ni < numNeurons; ni++) { //Different for maxpooling if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) { for(int ti = 0; ti < parent->getNumThreads(); ti++) { if(gSynPatchHead[ni] < thread_gSyn[ti][ni]) { gSynPatchHead[ni] = thread_gSyn[ti][ni]; if(needPostIndexLayer && thread_gateIdxBuffer) { gateIdxBuffer[ni] = thread_gateIdxBuffer[ti][ni]; assert(gateIdxBuffer >= 0); } } } } else { for(int ti = 0; ti < parent->getNumThreads(); ti++) { gSynPatchHead[ni] += thread_gSyn[ti][ni]; } } } } #endif } if(activity->isSparse) { pvdata_t * gSyn = post->getChannel(getChannel()); for (int k=0; k<post->getNumNeuronsAllBatches(); k++) { if (gSyn[k]==-INFINITY) { gSyn[k] = 0.0f; } } } return PV_SUCCESS; }
int HyPerConnDebugInitWeights::cocircCalcWeights(PVPatch * wp, pvdata_t * dataStart, int dataPatchIndex, int noPre, int noPost, float sigma_cocirc, float sigma_kurve, float sigma_chord, float delta_theta_max, float cocirc_self, float delta_radius_curvature, int numFlanks, float shift, float aspect, float rotate, float sigma, float r2Max, float strength) { // pvdata_t * w = wp->data; const float min_weight = 0.0f; // read in as param const float sigma2 = 2 * sigma * sigma; const float sigma_cocirc2 = 2 * sigma_cocirc * sigma_cocirc; const int nxPatch = (int) wp->nx; const int nyPatch = (int) wp->ny; const int nfPatch = fPatchSize(); if (nxPatch * nyPatch * nfPatch == 0) { return 0; // reduced patch size is zero } // get strides of (potentially shrunken) patch const int sx = xPatchStride(); assert(sx == nfPatch); // const int sy = yPatchStride(); // no assert here because patch may be shrunken const int sf = fPatchStride(); assert(sf == 1); // make full sized temporary patch, positioned around center of unit cell // PVPatch * wp_tmp; // wp_tmp = pvpatch_inplace_new(nxp, nyp, nfp); // pvdata_t * w_tmp = wp_tmp->data; pvdata_t * w_tmp = dataStart; // get/check dimensions and strides of full sized temporary patch const int nxPatch_tmp = nxp; // wp_tmp->nx; const int nyPatch_tmp = nyp; // wp_tmp->ny; const int nfPatch_tmp = fPatchSize(); // should nfPatch_tmp just be replaced with nfPatch throughout? int kxKernelIndex; int kyKerneIndex; int kfKernelIndex; this->dataIndexToUnitCellIndex(dataPatchIndex, &kxKernelIndex, &kyKerneIndex, &kfKernelIndex); const int kxPre_tmp = kxKernelIndex; const int kyPre_tmp = kyKerneIndex; // const int kfPre_tmp = kfKernelIndex; const int sx_tmp = xPatchStride(); assert(sx_tmp == fPatchSize()); const int sy_tmp = yPatchStride(); assert(sy_tmp == fPatchSize() * nxPatch_tmp); const int sf_tmp = fPatchStride(); assert(sf_tmp == 1); // get distances to nearest neighbor in post synaptic layer float xDistNNPreUnits; float xDistNNPostUnits; dist2NearestCell(kxPre_tmp, pre->getXScale(), post->getXScale(), &xDistNNPreUnits, &xDistNNPostUnits); float yDistNNPreUnits; float yDistNNPostUnits; dist2NearestCell(kyPre_tmp, pre->getYScale(), post->getYScale(), &yDistNNPreUnits, &yDistNNPostUnits); // get indices of nearest neighbor int kxNN; int kyNN; kxNN = nearby_neighbor(kxPre_tmp, pre->getXScale(), post->getXScale()); kyNN = nearby_neighbor(kyPre_tmp, pre->getYScale(), post->getYScale()); // get indices of patch head int kxHead; int kyHead; kxHead = zPatchHead(kxPre_tmp, nxPatch_tmp, pre->getXScale(), post->getXScale()); kyHead = zPatchHead(kyPre_tmp, nyPatch_tmp, pre->getYScale(), post->getYScale()); // get distance to patch head float xDistHeadPostUnits; xDistHeadPostUnits = xDistNNPostUnits + (kxHead - kxNN); float yDistHeadPostUnits; yDistHeadPostUnits = yDistNNPostUnits + (kyHead - kyNN); float xRelativeScale = xDistNNPreUnits == xDistNNPostUnits ? 1.0f : xDistNNPreUnits / xDistNNPostUnits; float xDistHeadPreUnits; xDistHeadPreUnits = xDistHeadPostUnits * xRelativeScale; float yRelativeScale = yDistNNPreUnits == yDistNNPostUnits ? 1.0f : yDistNNPreUnits / yDistNNPostUnits; float yDistHeadPreUnits; yDistHeadPreUnits = yDistHeadPostUnits * yRelativeScale; // sigma is in units of pre-synaptic layer const float dxPost = powf(2, post->getXScale()); const float dyPost = powf(2, post->getYScale()); //const int kfPre = kPre % pre->clayer->loc.nf; const int kfPre = featureIndex(dataPatchIndex, pre->getLayerLoc()->nx, pre->getLayerLoc()->ny, pre->getLayerLoc()->nf); bool POS_KURVE_FLAG = false; // handle pos and neg curvature separately bool SADDLE_FLAG = false; // handle saddle points separately const int nKurvePre = pre->getLayerLoc()->nf / noPre; const int nKurvePost = post->getLayerLoc()->nf / noPost; const float dThPre = PI / noPre; const float dThPost = PI / noPost; const float th0Pre = rotate * dThPre / 2.0; const float th0Post = rotate * dThPost / 2.0; const int iThPre = dataPatchIndex % noPre; //const int iThPre = kfPre / nKurvePre; const float thetaPre = th0Pre + iThPre * dThPre; int iKvPre = kfPre % nKurvePre; bool iPosKurvePre = false; bool iSaddlePre = false; float radKurvPre = delta_radius_curvature + iKvPre * delta_radius_curvature; float kurvePre = (radKurvPre != 0.0f) ? 1 / radKurvPre : 1.0f; int iKvPreAdj = iKvPre; if (POS_KURVE_FLAG) { assert(nKurvePre >= 2); iPosKurvePre = iKvPre >= (int) (nKurvePre / 2); if (SADDLE_FLAG) { assert(nKurvePre >= 4); iSaddlePre = (iKvPre % 2 == 0) ? 0 : 1; iKvPreAdj = ((iKvPre % (nKurvePre / 2)) / 2);} else { // SADDLE_FLAG iKvPreAdj = (iKvPre % (nKurvePre/2));} } // POS_KURVE_FLAG radKurvPre = delta_radius_curvature + iKvPreAdj * delta_radius_curvature; kurvePre = (radKurvPre != 0.0f) ? 1 / radKurvPre : 1.0f; float sigma_kurve_pre = sigma_kurve * radKurvPre; float sigma_kurve_pre2 = 2 * sigma_kurve_pre * sigma_kurve_pre; sigma_chord *= PI * radKurvPre; float sigma_chord2 = 2.0 * sigma_chord * sigma_chord; // loop over all post synaptic neurons in patch for (int kfPost = 0; kfPost < nfPatch_tmp; kfPost++) { //int iThPost = kfPost / nKurvePost; int iThPost = kfPost % noPost; float thetaPost = th0Post + iThPost * dThPost; int iKvPost = kfPost % nKurvePost; bool iPosKurvePost = false; bool iSaddlePost = false; float radKurvPost = delta_radius_curvature + iKvPost * delta_radius_curvature; float kurvePost = (radKurvPost != 0.0f) ? 1 / radKurvPost : 1.0f; int iKvPostAdj = iKvPost; if (POS_KURVE_FLAG) { assert(nKurvePost >= 2); iPosKurvePost = iKvPost >= (int) (nKurvePost / 2); if (SADDLE_FLAG) { assert(nKurvePost >= 4); iSaddlePost = (iKvPost % 2 == 0) ? 0 : 1; iKvPostAdj = ((iKvPost % (nKurvePost / 2)) / 2); } else { // SADDLE_FLAG iKvPostAdj = (iKvPost % (nKurvePost / 2)); } } // POS_KURVE_FLAG radKurvPost = delta_radius_curvature + iKvPostAdj * delta_radius_curvature; kurvePost = (radKurvPost != 0.0f) ? 1 / radKurvPost : 1.0f; float sigma_kurve_post = sigma_kurve * radKurvPost; float sigma_kurve_post2 = 2 * sigma_kurve_post * sigma_kurve_post; float deltaTheta = fabsf(thetaPre - thetaPost); deltaTheta = (deltaTheta <= PI / 2.0) ? deltaTheta : PI - deltaTheta; if (deltaTheta > delta_theta_max) { continue; } for (int jPost = 0; jPost < nyPatch_tmp; jPost++) { float yDelta = (yDistHeadPreUnits + jPost * dyPost); for (int iPost = 0; iPost < nxPatch_tmp; iPost++) { float xDelta = (xDistHeadPreUnits + iPost * dxPost); float gDist = 0.0; float gChord = 1.0; float gCocirc = 1.0; float gKurvePre = 1.0; float gKurvePost = 1.0; // rotate the reference frame by th float dxP = +xDelta * cosf(thetaPre) + yDelta * sinf(thetaPre); float dyP = -xDelta * sinf(thetaPre) + yDelta * cosf(thetaPre); // include shift to flanks float dyP_shift = dyP - shift; float dyP_shift2 = dyP + shift; float d2 = dxP * dxP + aspect * dyP * aspect * dyP; float d2_shift = dxP * dxP + (aspect * (dyP_shift) * aspect * (dyP_shift)); float d2_shift2 = dxP * dxP + (aspect * (dyP_shift2) * aspect * (dyP_shift2)); if (d2_shift <= r2Max) { gDist += expf(-d2_shift / sigma2); } if (numFlanks > 1) { // include shift in opposite direction if (d2_shift2 <= r2Max) { gDist += expf(-d2_shift2 / sigma2); } } if (gDist == 0.0) continue; if (d2 == 0) { bool sameLoc = (kfPre == kfPost); if ((!sameLoc) || (cocirc_self)) { gCocirc = sigma_cocirc > 0 ? expf(-deltaTheta * deltaTheta / sigma_cocirc2) : expf(-deltaTheta * deltaTheta / sigma_cocirc2) - 1.0; if ((nKurvePre > 1) && (nKurvePost > 1)) { gKurvePre = expf(-(kurvePre - kurvePost) * (kurvePre - kurvePost) / 2 * (sigma_kurve_pre * sigma_kurve_pre + sigma_kurve_post * sigma_kurve_post)); } } else { // sameLoc && !cocircSelf gCocirc = 0.0; continue; } } else { // d2 > 0 float atanx2_shift = thetaPre + 2. * atan2f(dyP_shift, dxP); // preferred angle (rad) atanx2_shift += 2. * PI; atanx2_shift = fmodf(atanx2_shift, PI); atanx2_shift = fabsf(atanx2_shift - thetaPost); float chi_shift = atanx2_shift; //fabsf(atanx2_shift - thetaPost); // radians if (chi_shift >= PI / 2.0) { chi_shift = PI - chi_shift; } if (noPre > 1 && noPost > 1) { gCocirc = sigma_cocirc2 > 0 ? expf(-chi_shift * chi_shift / sigma_cocirc2) : expf(-chi_shift * chi_shift / sigma_cocirc2) - 1.0; } // compute curvature of cocircular contour float cocircKurve_shift = d2_shift > 0 ? fabsf(2 * dyP_shift) / d2_shift : 0.0f; if (POS_KURVE_FLAG) { if (SADDLE_FLAG) { if ((iPosKurvePre) && !(iSaddlePre) && (dyP_shift < 0)) { continue; } if (!(iPosKurvePre) && !(iSaddlePre) && (dyP_shift > 0)) { continue; } if ((iPosKurvePre) && (iSaddlePre) && (((dyP_shift > 0) && (dxP < 0)) || ((dyP_shift > 0) && (dxP < 0)))) { continue; } if (!(iPosKurvePre) && (iSaddlePre) && (((dyP_shift > 0) && (dxP > 0)) || ((dyP_shift < 0) && (dxP < 0)))) { continue; } } else { //SADDLE_FLAG if ((iPosKurvePre) && (dyP_shift < 0)) { continue; } if (!(iPosKurvePre) && (dyP_shift > 0)) { continue; } } } // POS_KURVE_FLAG gKurvePre = (nKurvePre > 1) ? expf(-powf((cocircKurve_shift - fabsf( kurvePre)), 2) / sigma_kurve_pre2) : 1.0; gKurvePost = ((nKurvePre > 1) && (nKurvePost > 1) && (sigma_cocirc2 > 0)) ? expf( -powf((cocircKurve_shift - fabsf(kurvePost)), 2) / sigma_kurve_post2) : 1.0; // compute distance along contour float d_chord_shift = (cocircKurve_shift != 0.0f) ? atanx2_shift / cocircKurve_shift : sqrt(d2_shift); gChord = (nKurvePre > 1) ? expf(-powf(d_chord_shift, 2) / sigma_chord2) : 1.0; if (numFlanks > 1) { float atanx2_shift2 = thetaPre + 2. * atan2f(dyP_shift2, dxP); // preferred angle (rad) atanx2_shift2 += 2. * PI; atanx2_shift2 = fmodf(atanx2_shift2, PI); atanx2_shift2 = fabsf(atanx2_shift2 - thetaPost); float chi_shift2 = atanx2_shift2; //fabsf(atanx2_shift2 - thetaPost); // radians if (chi_shift2 >= PI / 2.0) { chi_shift2 = PI - chi_shift2; } if (noPre > 1 && noPost > 1) { gCocirc += sigma_cocirc2 > 0 ? expf(-chi_shift2 * chi_shift2 / sigma_cocirc2) : expf(-chi_shift2 * chi_shift2 / sigma_cocirc2) - 1.0; } float cocircKurve_shift2 = d2_shift2 > 0 ? fabsf(2 * dyP_shift2) / d2_shift2 : 0.0f; if (POS_KURVE_FLAG) { if (SADDLE_FLAG) { if ((iPosKurvePre) && !(iSaddlePre) && (dyP_shift2 < 0)) { continue; } if (!(iPosKurvePre) && !(iSaddlePre) && (dyP_shift2 > 0)) { continue; } if ((iPosKurvePre) && (iSaddlePre) && (((dyP_shift2 > 0) && (dxP < 0)) || ((dyP_shift2 > 0) && (dxP < 0)))) { continue; } if (!(iPosKurvePre) && (iSaddlePre) && (((dyP_shift2 > 0) && (dxP > 0)) || ((dyP_shift2 < 0) && (dxP < 0)))) { continue; } } else { //SADDLE_FLAG if ((iPosKurvePre) && (dyP_shift2 < 0)) { continue; } if (!(iPosKurvePre) && (dyP_shift2 > 0)) { continue; } } // SADDLE_FLAG } // POS_KURVE_FLAG gKurvePre += (nKurvePre > 1) ? expf(-powf((cocircKurve_shift2 - fabsf( kurvePre)), 2) / sigma_kurve_pre2) : 1.0; gKurvePost += ((nKurvePre > 1) && (nKurvePost > 1) && (sigma_cocirc2 > 0)) ? expf(-powf((cocircKurve_shift2 - fabsf(kurvePost)), 2) / sigma_kurve_post2) : 1.0; float d_chord_shift2 = cocircKurve_shift2 != 0.0f ? atanx2_shift2 / cocircKurve_shift2 : sqrt(d2_shift2); gChord += (nKurvePre > 1) ? expf(-powf(d_chord_shift2, 2) / sigma_chord2) : 1.0; } } float weight_tmp = gDist * gKurvePre * gKurvePost * gCocirc; if (weight_tmp < min_weight) continue; w_tmp[iPost * sx_tmp + jPost * sy_tmp + kfPost * sf_tmp] = weight_tmp; } } } // copy weights from full sized temporary patch to (possibly shrunken) patch // copyToWeightPatch(wp_tmp, 0, kPre); /* w = wp->data; const int nxunshrunkPatch = wp_tmp->nx; const int nyunshrunkPatch = wp_tmp->ny; const int nfunshrunkPatch = fPatchSize(); const int unshrunkPatchSize = nxunshrunkPatch*nyunshrunkPatch*nfunshrunkPatch; pvdata_t *wtop = this->getPatchDataStart(0); //pvdata_t * data_head = &wtop[unshrunkPatchSize*kPre]; //pvdata_t * data_head = (pvdata_t *) ((char*) wp + sizeof(PVPatch)); //size_t data_offset = w - data_head; pvdata_t * data_head1 = &wtop[unshrunkPatchSize*kPre]; // (pvdata_t *) ((char*) wp + sizeof(PVPatch)); pvdata_t * data_head2 = (pvdata_t *) ((char*) wp + sizeof(PVPatch)); size_t data_offset1 = w - data_head1; size_t data_offset2 = w - data_head2; size_t data_offset = fabs(data_offset1) < fabs(data_offset2) ? data_offset1 : data_offset2; w_tmp = &wp_tmp->data[data_offset]; int nk = nxPatch * nfPatch; for (int ky = 0; ky < nyPatch; ky++) { for (int iWeight = 0; iWeight < nk; iWeight++) { w[iWeight] = w_tmp[iWeight]; } w += sy; w_tmp += sy_tmp; } */ // free(wp_tmp); return 0; }
int main(int argc, char* argv[]) { PVLayerLoc loc; int kl, kg; int kx, ky, kf, kxg, kyg, kfg; #ifdef FEATURES_LAST int ij; #endif //printf("size_loc==%ld size_cube==%ld size_ptr==%ld\n", sizeof(PVLayerLoc), sizeof(PVLayerCube), sizeof(pvdata_t*)); //printf("size_int==%ld size_float==%ld, size_size_t==%ld\n", sizeof(int), sizeof(float), sizeof(size_t)); int nf = loc.nf = 3; int nx = loc.nx = 63; int ny = loc.ny = 127; loc.kx0 = 0; loc.ky0 = 0; loc.nxGlobal = nx; loc.nyGlobal = ny; for (kl = 0; kl < nx*ny*nf; kl++) { kg = globalIndexFromLocal_nompi(kl, loc); if (kg != kl) { printf("FAILED:TEST_KG: (kl,kg) = (%d,%d)\n", kl, kg); exit(1); } } // divide in halve by x, take right nf = loc.nf = 2; nx = loc.nx = 32; ny = loc.ny = 128; loc.kx0 = 32; loc.ky0 = 0; loc.nxGlobal = 2.0*nx; loc.nyGlobal = ny; #ifdef FEATURES_LAST for (kf = 0; kf < nf; kf++) { for (ij = 0; ij < nx*ny; ij++) { kl = ij + nx*ny*kf; kx = kxPos(kl, loc.nx, loc.ny, nf); ky = kyPos(kl, loc.nx, loc.ny, nf); kg = globalIndexFromLocal_nompi(kl, loc); kxg = kxPos(kg, loc.nxGlobal, loc.nyGlobal, nf); kyg = kyPos(kg, loc.nxGlobal, loc.nyGlobal, nf); kfg = featureIndex(kg, loc.nxGlobal, loc.nyGlobal, nf); if ((kg-kl) != loc.kx0 + (loc.ky0 + kyg)*loc.nx + kf*nx*ny) { printf("FAILED:TEST_KG: right (kl,kg) = (%d,%d)\n", kl, kg); exit(1); } } } #else for (kl = 0; kl < nx*ny*nf; kl++) { kx = kxPos(kl, loc.nx, loc.ny, nf); ky = kyPos(kl, loc.nx, loc.ny, nf); kf = featureIndex(kl, loc.nx, loc.ny, nf); kg = globalIndexFromLocal_nompi(kl, loc); kxg = kxPos(kg, loc.nxGlobal, loc.nyGlobal, nf); kyg = kyPos(kg, loc.nxGlobal, loc.nyGlobal, nf); kfg = featureIndex(kg, loc.nxGlobal, loc.nyGlobal, nf); assert(loc.kx0+kx == kxg); assert(ky == kyg); assert(kf == kfg); if ((kg-kl) != loc.kx0*nf*(1+ky)) { printf("FAILED:TEST_KG: right (kl,kg) = (%d,%d)\n", kl, kg); exit(1); } } #endif // divide in halve by y, take bottom nf = loc.nf = 5; nx = loc.nx = 32; ny = loc.ny = 128; loc.kx0 = 0; loc.ky0 = 64; loc.nxGlobal = nx; loc.nyGlobal = 2.0*ny; #ifdef FEATURES_LAST for (kf = 0; kf < nf; kf++) { for (ij = 0; ij < nx*ny; ij++) { int kl = ij + nx*ny*kf; int kx = kxPos(kl, loc.nx, loc.ny, nf); int ky = kyPos(kl, loc.nx, loc.ny, nf); kg = globalIndexFromLocal_nompi(kl, loc); kx = kxPos(kg, loc.nxGlobal, loc.nyGlobal, nf); ky = kyPos(kg, loc.nxGlobal, loc.nyGlobal, nf); // kg = ky0*nxGlobal + kf*nxGlobal*nyGlobal // kl = kf*nx*ny if ((kg-kl) != nx*loc.ky0 + kf*nx*(loc.nyGlobal - ny)) { printf("FAILED:TEST_KG: bottom (kl,kg) = (%d,%d)\n", kl, kg); exit(1); } } } #else for (kl = 0; kl < nx*ny*nf; kl++) { kx = kxPos(kl, loc.nx, loc.ny, nf); ky = kyPos(kl, loc.nx, loc.ny, nf); kf = featureIndex(kl, loc.nx, loc.ny, nf); kg = globalIndexFromLocal_nompi(kl, loc); kxg = kxPos(kg, loc.nxGlobal, loc.nyGlobal, nf); kyg = kyPos(kg, loc.nxGlobal, loc.nyGlobal, nf); kfg = featureIndex(kg, loc.nxGlobal, loc.nyGlobal, nf); assert(loc.kx0+kx == kxg); assert(loc.ky0+ky == kyg); assert(kf == kfg); if ((kg-kl) != loc.ky0*nf*nx) { printf("FAILED:TEST_KG: bottom (kl,kg) = (%d,%d)\n", kl, kg); exit(1); } } #endif nf = loc.nf = 1; nx = loc.nx = 4096; ny = loc.ny = 4096+1; // this should fail (probably not now with ints) ny = loc.ny = 4096; loc.kx0 = 0; loc.ky0 = 0; loc.nxGlobal = nx; loc.nyGlobal = ny; for (kl = 0; kl < nx*ny*nf; kl++) { kg = globalIndexFromLocal_nompi(kl, loc); if (kg != kl) { printf("FAILED:TEST_KG: max ny (kl,kg) = (%d,%d)\n", kl, kg); exit(1); } } return 0; }
void MLPOutputLayer::binaryNonlocalStats(){ const PVLayerLoc * loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; assert(nf == 1); int numNeurons = getNumNeurons(); pvdata_t * A = getCLayer()->activity->data; pvdata_t * gtA = gtLayer->getCLayer()->activity->data; float sumsq = 0; float sum = 0; float gtSum = 0; int currNumRight = 0; int currNumWrong = 0; int totNum = 0; //Only go through restricted //Calculate the sum squared error for(int ni = 0; ni < numNeurons; ni++){ int nExt = kIndexExtended(ni, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); int fi = featureIndex(nExt, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); //Sum over x and y direction sumsq += pow(A[nExt] - gtA[nExt], 2); //Sum over activity to find mean sum += A[nExt]; gtSum += gtA[nExt]; } #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, &sumsq, 1, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->communicator()); MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->communicator()); MPI_Allreduce(MPI_IN_PLACE, >Sum, 1, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->communicator()); #endif // PV_USE_MPI //Normalize sum to find mean sum /= loc->nxGlobal * loc->nyGlobal; gtSum /= loc->nxGlobal * loc->nyGlobal; //gtSum should be the same as the values assert(gtSum == gtA[0]); //Calculate stats if(sum < 0 && gtSum < 0){ currNumRight++; } else if(sum > 0 && gtSum > 0){ currNumRight++; } else{ currNumWrong++; } #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, &currNumRight, 1, MPI_INT, MPI_SUM, parent->icCommunicator()->communicator()); MPI_Allreduce(MPI_IN_PLACE, &currNumWrong, 1, MPI_INT, MPI_SUM, parent->icCommunicator()->communicator()); #endif // PV_USE_MPI numRight += currNumRight; numWrong += currNumWrong; progressNumRight += currNumRight; progressNumWrong += currNumWrong; //Print if need float timef = parent->simulationTime(); if(timef >= nextStatProgress){ //Update nextStatProgress nextStatProgress += statProgressPeriod; if (parent->columnId()==0) { float totalScore = 100*float(numRight)/float(numRight+numWrong); float progressScore = 100*float(progressNumRight)/float(progressNumRight+progressNumWrong); fprintf(stdout, "time:%f layer:\"%s\" total:%f%% progressStep:%f%% energy:%f\n", timef, name, totalScore, progressScore, sumsq/2); } //Reset progressStats progressNumRight = 0; progressNumWrong = 0; } }
int TransposePoolingConn::deliverPresynapticPerspective(PVLayerCube const * activity, int arborID) { //Check if we need to update based on connection's channel if(getChannel() == CHANNEL_NOUPDATE){ return PV_SUCCESS; } assert(post->getChannel(getChannel())); const PVLayerLoc * preLoc = preSynapticLayer()->getLayerLoc(); const PVLayerLoc * postLoc = postSynapticLayer()->getLayerLoc(); assert(arborID >= 0); const int numExtended = activity->numItems; //Grab postIdxLayer's data int* postIdxData = NULL; if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ PoolingIndexLayer* postIndexLayer = originalConn->getPostIndexLayer(); assert(postIndexLayer); //Make sure this layer is an integer layer assert(postIndexLayer->getDataType() == PV_INT); DataStore * store = parent->icCommunicator()->publisherStore(postIndexLayer->getLayerId()); int delay = getDelay(arborID); //TODO this is currently a hack, need to properly implement data types. postIdxData = (int*) store->buffer(LOCAL, delay); } for(int b = 0; b < parent->getNBatch(); b++){ pvdata_t * activityBatch = activity->data + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; pvdata_t * gSynPatchHeadBatch = post->getChannel(getChannel()) + b * postLoc->nx * postLoc->ny * postLoc->nf; int * postIdxDataBatch = NULL; if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ postIdxDataBatch = postIdxData + b * originalConn->getPostIndexLayer()->getNumExtended(); } unsigned int * activeIndicesBatch = NULL; if(activity->isSparse){ activeIndicesBatch = activity->activeIndices + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; } int numLoop; if(activity->isSparse){ numLoop = activity->numActive[b]; } else{ numLoop = numExtended; } #ifdef PV_USE_OPENMP_THREADS //Clear all thread gsyn buffer if(thread_gSyn){ int numNeurons = post->getNumNeurons(); #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < parent->getNumThreads() * numNeurons; i++){ int ti = i/numNeurons; int ni = i % numNeurons; thread_gSyn[ti][ni] = 0; } } #endif // PV_USE_OPENMP_THREADS #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for schedule(static) #endif for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) { int kPreExt; if(activity->isSparse){ kPreExt = activeIndicesBatch[loopIndex]; } else{ kPreExt = loopIndex; } float a = activityBatch[kPreExt]; if (a == 0.0f) continue; //If we're using thread_gSyn, set this here pvdata_t * gSynPatchHead; #ifdef PV_USE_OPENMP_THREADS if(thread_gSyn){ int ti = omp_get_thread_num(); gSynPatchHead = thread_gSyn[ti]; } else{ gSynPatchHead = gSynPatchHeadBatch; } #else // PV_USE_OPENMP_THREADS gSynPatchHead = gSynPatchHeadBatch; #endif // PV_USE_OPENMP_THREADS const int kxPreExt = kxPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kyPreExt = kyPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kfPre = featureIndex(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ const int kxPreGlobalExt = kxPreExt + preLoc->kx0; const int kyPreGlobalExt = kyPreExt + preLoc->ky0; if(kxPreGlobalExt < preLoc->halo.lt || kxPreGlobalExt >= preLoc->nxGlobal + preLoc->halo.lt || kyPreGlobalExt < preLoc->halo.up || kyPreGlobalExt >= preLoc->nyGlobal + preLoc->halo.up){ continue; } //Convert stored global extended index into local extended index int postGlobalExtIdx = postIdxDataBatch[kPreExt]; // If all inputs are zero and input layer is sparse, postGlobalExtIdx will still be -1. if(postGlobalExtIdx == -1) { continue; } //Make sure the index is in bounds assert(postGlobalExtIdx >= 0 && postGlobalExtIdx < (postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt) * (postLoc->nyGlobal + postLoc->halo.up + postLoc->halo.dn) * postLoc->nf); const int kxPostGlobalExt = kxPos(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); const int kyPostGlobalExt = kyPos(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); const int kfPost = featureIndex(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); const int kxPostLocalRes = kxPostGlobalExt - postLoc->kx0 - postLoc->halo.lt; const int kyPostLocalRes = kyPostGlobalExt - postLoc->ky0 - postLoc->halo.up; if(kxPostLocalRes < 0 || kxPostLocalRes >= postLoc->nx|| kyPostLocalRes < 0 || kyPostLocalRes >= postLoc->ny){ continue; } const int kPostLocalRes = kIndex(kxPostLocalRes, kyPostLocalRes, kfPost, postLoc->nx, postLoc->ny, postLoc->nf); gSynPatchHeadBatch[kPostLocalRes] = a; } else{ PVPatch * weights = getWeights(kPreExt, arborID); const int nk = weights->nx * fPatchSize(); const int ny = weights->ny; pvgsyndata_t * postPatchStart = gSynPatchHead + getGSynPatchStart(kPreExt, arborID); const int sy = getPostNonextStrides()->sy; // stride in layer int offset = kfPre; int sf = fPatchSize(); pvwdata_t w = 1.0; if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING){ float relative_XScale = pow(2, (post->getXScale() - pre->getXScale())); float relative_YScale = pow(2, (post->getYScale() - pre->getYScale())); w = 1.0/(nxp*nyp*relative_XScale*relative_YScale); } void* auxPtr = NULL; for (int y = 0; y < ny; y++) { (accumulateFunctionPointer)(0, nk, postPatchStart + y*sy + offset, a, &w, auxPtr, sf); } } } #ifdef PV_USE_OPENMP_THREADS //Set back into gSyn if(thread_gSyn){ pvdata_t * gSynPatchHead = gSynPatchHeadBatch; int numNeurons = post->getNumNeurons(); //Looping over neurons first to be thread safe #pragma omp parallel for for(int ni = 0; ni < numNeurons; ni++){ for(int ti = 0; ti < parent->getNumThreads(); ti++){ if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ if(gSynPatchHead[ni] < fabs(thread_gSyn[ti][ni])){ gSynPatchHead[ni] = thread_gSyn[ti][ni]; } } else{ gSynPatchHead[ni] += thread_gSyn[ti][ni]; } } } } #endif } return PV_SUCCESS; }
void MLPOutputLayer::multiclassNonlocalStats(){ const PVLayerLoc * loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int numNeurons = getNumNeurons(); pvdata_t * A = getCLayer()->activity->data; pvdata_t * gtA = gtLayer->getCLayer()->activity->data; float sumsq = 0; //Winner take all in the output layer int currNumRight = 0; int currNumWrong = 0; assert(classBuffer); //Clear classBuffer for(int i = 0; i < nf; i++){ classBuffer[i] = 0; } //Only go through restricted //Calculate the sum squared error for(int ni = 0; ni < numNeurons; ni++){ int nExt = kIndexExtended(ni, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); int fi = featureIndex(nExt, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); //Sum over x and y direction classBuffer[fi] += A[nExt]; sumsq += pow(A[nExt] - gtA[nExt], 2); } //Normalize classBuffer to find mean for(int i = 0; i < nf; i++){ classBuffer[i] /= nx*ny; } //Reduce all classBuffers through a mean #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, &sumsq, 1, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->communicator()); MPI_Allreduce(MPI_IN_PLACE, classBuffer, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->communicator()); //Normalize classBuffer across processors for(int i = 0; i < nf; i++){ classBuffer[i] /= parent->icCommunicator()->commSize(); } #endif // PV_USE_MPI //Find max float estMaxF = -1000; int estMaxFi = -1; float actualMaxF = -1000; int actualMaxFi = -1; for(int i = 0; i < nf; i++){ if(classBuffer[i] >= estMaxF){ estMaxF = classBuffer[i]; estMaxFi = i; } int nExt = kIndex(loc->halo.lt, loc->halo.up, i, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); if(gtA[nExt] >= actualMaxF){ actualMaxF = gtA[nExt]; actualMaxFi = i; } } //Calculate stats //Found winning feature, compare to ground truth if(estMaxFi == actualMaxFi){ currNumRight++; } else{ currNumWrong++; } #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, &currNumRight, 1, MPI_INT, MPI_SUM, parent->icCommunicator()->communicator()); MPI_Allreduce(MPI_IN_PLACE, &currNumWrong, 1, MPI_INT, MPI_SUM, parent->icCommunicator()->communicator()); #endif // PV_USE_MPI numRight += currNumRight; numWrong += currNumWrong; progressNumRight += currNumRight; progressNumWrong += currNumWrong; //Print if need float timef = parent->simulationTime(); if(timef >= nextStatProgress){ //Update nextStatProgress nextStatProgress += statProgressPeriod; if (parent->columnId()==0) { float totalScore = 100*float(numRight)/float(numRight+numWrong); float progressScore = 100*float(progressNumRight)/float(progressNumRight+progressNumWrong); fprintf(stdout, "time:%f layer:\"%s\" total:%f%% progressStep:%f%% energy:%f\n", timef, name, totalScore, progressScore, sumsq/2); } //Reset progressStats progressNumRight = 0; progressNumWrong = 0; } }