void MLP::modifyWeights(const integer &exampleIndex, const realnumber &learningRate) { modifyDelta(m_input.col(exampleIndex), m_output.col(exampleIndex), 0); for (integer j = m_last; j >= 0; --j) m_layers[j] += learningRate * m_Delta[j] * addBias( (j>0) ? run(exampleIndex, j-1) : m_input.col(exampleIndex) ).transpose(); }
void myConvKernel_naive() { float *filterOutput_buf = (float*) _mm_malloc(sizeof(float) * outputSize, 512); assert(filterOutput_buf != NULL); memset(outputPlanes, 0, outputSize * nOutputPlanes); #pragma omp parallel { int tid = omp_get_thread_num(); int nthreads = omp_get_num_threads(); int ioHeight_spos = BLOCK_LOW(tid, nthreads, ioHeight); int ioHeight_epos = BLOCK_LOW(tid + 1, nthreads, ioHeight); int oS_spos = ioHeight_spos * ioWidth; int oS_size = (ioHeight_epos - ioHeight_spos) * ioWidth; for (int opIndex = 0; opIndex < nOutputPlanes; opIndex++) { float *filterOutput = filterOutput_buf; float *outputPlane = outputPlanes + opIndex * outputSize; for (int ipIndex = 0; ipIndex < nInputPlanes; ipIndex++) { int wMatIndex = nInputPlanes * opIndex + ipIndex; float *inputPlane = inputPlanes + ipIndex * paddedInSize; float *weightMatrix = weights + wMatIndex * wSize; convolve3x3withPad( inputPlane, filterOutput, weightMatrix, ioHeight_spos, ioHeight_epos ); addVec(oS_size, filterOutput + oS_spos, outputPlane + oS_spos); } } #pragma omp barrier #pragma omp for for (int opIndex = 0; opIndex < nOutputPlanes; opIndex++) { int wMatIndex = nInputPlanes * opIndex; float *outputPlane = outputPlanes + opIndex * outputSize; addBias(outputSize, (float)(biases[opIndex]), outputPlane); scaleIfLessThanX(outputSize, outputPlane, 0.0, 0.1); } } _mm_free(filterOutput_buf); }