void trainOneEpochDenseCPU(int itask, float *data, float *numerator, float *denominator, float *codebook, unsigned int nSomX, unsigned int nSomY, unsigned int nDimensions, unsigned int nVectors, unsigned int nVectorsPerRank, float radius, float scale, string mapType, int *globalBmus) { unsigned int p1[2] = {0, 0}; unsigned int *bmus = new unsigned int[nVectorsPerRank*2]; #pragma omp parallel default(shared) private(p1) { #pragma omp for for (unsigned int n = 0; n < nVectorsPerRank; n++) { if (itask*nVectorsPerRank+n<nVectors) { /// get the best matching unit get_bmu_coord(codebook, data, nSomY, nSomX, nDimensions, p1, n); bmus[2*n] = p1[0]; bmus[2*n+1] = p1[1]; } } } float *localNumerator = new float[nSomY*nSomX*nDimensions]; float *localDenominator = new float[nSomY*nSomX]; #pragma omp parallel default(shared) { #pragma omp for for (unsigned int som_y = 0; som_y < nSomY; som_y++) { for (unsigned int som_x = 0; som_x < nSomX; som_x++) { localDenominator[som_y*nSomX + som_x] = 0.0; for (unsigned int d = 0; d < nDimensions; d++) localNumerator[som_y*nSomX*nDimensions + som_x*nDimensions + d] = 0.0; } } /// Accumulate denoms and numers #pragma omp for for (unsigned int som_y = 0; som_y < nSomY; som_y++) { for (unsigned int som_x = 0; som_x < nSomX; som_x++) { for (unsigned int n = 0; n < nVectorsPerRank; n++) { if (itask*nVectorsPerRank+n<nVectors) { float dist = 0.0f; if (mapType == "planar") { dist = euclideanDistanceOnPlanarMap(som_x, som_y, bmus[2*n], bmus[2*n+1]); } else if (mapType == "toroid") { dist = euclideanDistanceOnToroidMap(som_x, som_y, bmus[2*n], bmus[2*n+1], nSomX, nSomY); } float neighbor_fuct = getWeight(dist, radius, scale); for (unsigned int d = 0; d < nDimensions; d++) { localNumerator[som_y*nSomX*nDimensions + som_x*nDimensions + d] += 1.0f * neighbor_fuct * (*(data + n*nDimensions + d)); } localDenominator[som_y*nSomX + som_x] += neighbor_fuct; } } } } } #ifdef HAVE_MPI MPI_Reduce(localNumerator, numerator, nSomY*nSomX*nDimensions, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(localDenominator, denominator, nSomY*nSomX, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Gather(bmus, nVectorsPerRank*2, MPI_INT, globalBmus, nVectorsPerRank*2, MPI_INT, 0, MPI_COMM_WORLD); #else for (unsigned int i=0; i < nSomY*nSomX*nDimensions; ++i) { numerator[i] = localNumerator[i]; } for (unsigned int i=0; i < nSomY*nSomX; ++i) { denominator[i] = localDenominator[i]; } for (unsigned int i=0; i < 2*nVectorsPerRank; ++i) { globalBmus[i]=bmus[i]; } #endif delete [] bmus; delete [] localNumerator; delete [] localDenominator; }
void trainOneEpochSparseCPU(int itask, svm_node **sparseData, float *numerator, float *denominator, float *codebook, unsigned int nSomX, unsigned int nSomY, unsigned int nDimensions, unsigned int nVectors, unsigned int nVectorsPerRank, float radius, float scale, string mapType, string gridType, bool compact_support, bool gaussian, int *globalBmus) { int p1[2] = {0, 0}; int *bmus = new int[nVectorsPerRank * 2]; #ifdef _OPENMP #pragma omp parallel default(shared) private(p1) #endif { #ifdef _OPENMP #pragma omp for #endif #ifdef _WIN32 for (int n = 0; n < nVectorsPerRank; n++) { #else for (unsigned int n = 0; n < nVectorsPerRank; n++) { #endif if (itask * nVectorsPerRank + n < nVectors) { /// get the best matching unit get_bmu_coord(codebook, sparseData, nSomY, nSomX, nDimensions, p1, n); bmus[2 * n] = p1[0]; bmus[2 * n + 1] = p1[1]; } } } float *localNumerator = new float[nSomY * nSomX * nDimensions]; float *localDenominator = new float[nSomY * nSomX]; #ifdef _OPENMP #pragma omp parallel default(shared) #endif { #ifdef _OPENMP #pragma omp for #endif #ifdef _WIN32 for (int som_y = 0; som_y < nSomY; som_y++) { #else for (unsigned int som_y = 0; som_y < nSomY; som_y++) { #endif for (unsigned int som_x = 0; som_x < nSomX; som_x++) { localDenominator[som_y * nSomX + som_x] = 0.0; for (unsigned int d = 0; d < nDimensions; d++) localNumerator[som_y * nSomX * nDimensions + som_x * nDimensions + d] = 0.0; } } /// Accumulate denoms and numers #ifdef _OPENMP #pragma omp for #endif #ifdef _WIN32 for (int som_y = 0; som_y < nSomY; som_y++) { #else for (unsigned int som_y = 0; som_y < nSomY; som_y++) { #endif for (unsigned int som_x = 0; som_x < nSomX; som_x++) { for (unsigned int n = 0; n < nVectorsPerRank; n++) { if (itask * nVectorsPerRank + n < nVectors) { float dist = 0.0f; if (gridType == "rectangular") { if (mapType == "planar") { dist = euclideanDistanceOnPlanarMap(som_x, som_y, bmus[2 * n], bmus[2 * n + 1]); } else if (mapType == "toroid") { dist = euclideanDistanceOnToroidMap(som_x, som_y, bmus[2 * n], bmus[2 * n + 1], nSomX, nSomY); } } else { if (mapType == "planar") { dist = euclideanDistanceOnHexagonalPlanarMap(som_x, som_y, bmus[2 * n], bmus[2 * n + 1]); } else if (mapType == "toroid") { dist = euclideanDistanceOnHexagonalToroidMap(som_x, som_y, bmus[2 * n], bmus[2 * n + 1], nSomX, nSomY); } } float neighbor_fuct = getWeight(dist, radius, scale, compact_support, gaussian); unsigned int j = 0; while ( sparseData[n][j].index != -1 ) { localNumerator[som_y * nSomX * nDimensions + som_x * nDimensions + sparseData[n][j].index] += 1.0f * neighbor_fuct * sparseData[n][j].value; j++; } localDenominator[som_y * nSomX + som_x] += neighbor_fuct; } } } } } #ifdef HAVE_MPI MPI_Reduce(localNumerator, numerator, nSomY * nSomX * nDimensions, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(localDenominator, denominator, nSomY * nSomX, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Gather(bmus, nVectorsPerRank * 2, MPI_INT, globalBmus, nVectorsPerRank * 2, MPI_INT, 0, MPI_COMM_WORLD); #else for (unsigned int i = 0; i < nSomY * nSomX * nDimensions; ++i) { numerator[i] = localNumerator[i]; } for (unsigned int i = 0; i < nSomY * nSomX; ++i) { denominator[i] = localDenominator[i]; } for (unsigned int i = 0; i < 2 * nVectorsPerRank; ++i) { globalBmus[i] = bmus[i]; } #endif delete [] bmus; delete [] localNumerator; delete [] localDenominator; }