static void bernoulli_generate(int n, double p, int* r) { int seed = 17 + caffe_rng_rand() % 4096; #ifdef _OPENMP int nthr = omp_get_max_threads(); int threshold = nthr * caffe::cpu::OpenMpManager::getProcessorSpeedMHz() / 3; bool run_parallel = (Caffe::mode() != Caffe::GPU) && (omp_in_parallel() == 0) && (n >= threshold); if (!run_parallel) nthr = 1; # pragma omp parallel num_threads(nthr) { const int ithr = omp_get_thread_num(); const int avg_amount = (n + nthr - 1) / nthr; const int my_offset = ithr * avg_amount; const int my_amount = std::min(my_offset + avg_amount, n) - my_offset; #else { const int my_amount = n; const int my_offset = 0; #endif VSLStreamStatePtr stream; vslNewStream(&stream, VSL_BRNG_MCG31, seed); vslSkipAheadStream(stream, my_offset); viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, my_amount, r + my_offset, p); vslDeleteStream(&stream); } }
JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_viRngBernoulli (JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jintArray j_r, jdouble a) { VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); jint * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); jint retval = viRngBernoulli(method, stream, n, r, a); (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); return retval; }
void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data()); const int count = bottom[0]->count(); if (Caffe::phase() == Caffe::TRAIN) { // Create random numbers #if 0 viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(), count, mask, 1. - threshold_); for (int i = 0; i < count; ++i) { top_data[i] = bottom_data[i] * mask[i] * scale_; } #else NOT_IMPLEMENTED; #endif } else { memcpy(top_data, bottom_data, bottom[0]->count() * sizeof(Dtype)); } }