GSparseMatrix* GSparseMatrix::subMatrix(int row, int col, int height, int width) { if(row < 0 || col < 0 || row + height >= (int)m_rows || col + width >= (int)m_cols || height < 0 || width < 0) ThrowError("out of range"); GSparseMatrix* pSub = new GSparseMatrix(height, width); for(int y = 0; y < height; y++) { for(int x = 0; x < width; x++) pSub->set(y, x, get(row + y, col + x)); } return pSub; }
GSparseMatrix* GRecommenderLib::loadSparseData(const char* szFilename) { // Load the dataset by extension PathData pd; GFile::parsePath(szFilename, &pd); if(_stricmp(szFilename + pd.extStart, ".arff") == 0) { // Convert a 3-column dense ARFF file to a sparse matrix GMatrix data; data.loadArff(szFilename); if(data.cols() != 3) throw Ex("Expected 3 columns: 0) user or row-index, 1) item or col-index, 2) value or rating"); double m0 = data.columnMin(0); double r0 = data.columnMax(0) - m0; double m1 = data.columnMin(1); double r1 = data.columnMax(1) - m1; if(m0 < 0 || m0 > 1e10 || r0 < 2 || r0 > 1e10) throw Ex("Invalid row indexes"); if(m1 < 0 || m1 > 1e10 || r1 < 2 || r1 > 1e10) throw Ex("Invalid col indexes"); GSparseMatrix* pMatrix = new GSparseMatrix(size_t(m0 + r0) + 1, size_t(m1 + r1) + 1, UNKNOWN_REAL_VALUE); std::unique_ptr<GSparseMatrix> hMatrix(pMatrix); for(size_t i = 0; i < data.rows(); i++) { GVec& row = data.row(i); pMatrix->set(size_t(row[0]), size_t(row[1]), row[2]); } return hMatrix.release(); } else if(_stricmp(szFilename + pd.extStart, ".sparse") == 0) { GDom doc; doc.loadJson(szFilename); return new GSparseMatrix(doc.root()); } throw Ex("Unsupported file format: ", szFilename + pd.extStart); return NULL; }
GSparseMatrix* loadSparseData(const char* szFilename) { // Load the dataset by extension PathData pd; GFile::parsePath(szFilename, &pd); if(_stricmp(szFilename + pd.extStart, ".arff") == 0) { // Convert a 3-column dense ARFF file to a sparse matrix GMatrix* pData = GMatrix::loadArff(szFilename); if(pData->cols() != 3) ThrowError("Expected 3 columns: 0) user or row-index, 1) item or col-index, 2) value or rating"); double m0, r0, m1, r1; pData->minAndRange(0, &m0, &r0); pData->minAndRange(1, &m1, &r1); if(m0 < 0 || m0 > 1e10 || r0 < 2 || r0 > 1e10) ThrowError("Invalid row indexes"); if(m1 < 0 || m1 > 1e10 || r1 < 2 || r1 > 1e10) ThrowError("Invalid col indexes"); GSparseMatrix* pMatrix = new GSparseMatrix(size_t(m0 + r0) + 1, size_t(m1 + r1) + 1, UNKNOWN_REAL_VALUE); Holder<GSparseMatrix> hMatrix(pMatrix); for(size_t i = 0; i < pData->rows(); i++) { double* pRow = pData->row(i); pMatrix->set(size_t(pRow[0]), size_t(pRow[1]), pRow[2]); } return hMatrix.release(); } else if(_stricmp(szFilename + pd.extStart, ".sparse") == 0) { GDom doc; doc.loadJson(szFilename); return new GSparseMatrix(doc.root()); } ThrowError("Unsupported file format: ", szFilename + pd.extStart); return NULL; }
void GSparseMatrix::singularValueDecompositionHelper(GSparseMatrix** ppU, double** ppDiag, GSparseMatrix** ppV, int maxIters) { int m = rows(); int n = cols(); if(m < n) ThrowError("Expected at least as many rows as columns"); int i, j, k; int l = 0; int q, iter; double c, f, h, s, x, y, z; double norm = 0.0; double g = 0.0; double scale = 0.0; GSparseMatrix* pU = new GSparseMatrix(m, m); Holder<GSparseMatrix> hU(pU); pU->copyFrom(this); double* pSigma = new double[n]; ArrayHolder<double> hSigma(pSigma); GSparseMatrix* pV = new GSparseMatrix(n, n); Holder<GSparseMatrix> hV(pV); GTEMPBUF(double, temp, n + m); double* temp2 = temp + n; // Householder reduction to bidiagonal form for(int i = 0; i < n; i++) { // Left-hand reduction temp[i] = scale * g; l = i + 1; g = 0.0; s = 0.0; scale = 0.0; if(i < m) { Iter kend = pU->colEnd(i); for(Iter kk = pU->colBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)i) scale += ABS(kk->second); } if(scale != 0.0) { for(Iter kk = pU->colBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)i) { double t = kk->second / scale; pU->set(kk->first, i, t); s += (t * t); } } f = pU->get(i, i); g = -GSparseMatrix_takeSign(sqrt(s), f); h = f * g - s; pU->set(i, i, f - g); if(i != n - 1) { for(j = l; j < n; j++) { s = 0.0; for(Iter kk = pU->colBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)i) s += kk->second * pU->get(kk->first, j); } f = s / h; for(Iter kk = pU->colBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)i) pU->set(kk->first, j, pU->get(kk->first, j) + f * kk->second); } } } for(Iter kk = pU->colBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)i) pU->set(kk->first, i, pU->get(kk->first, i) * scale); } } } pSigma[i] = scale * g; // Right-hand reduction g = 0.0; s = 0.0; scale = 0.0; if(i < m && i != n - 1) { Iter kend = pU->rowEnd(i); for(Iter kk = pU->rowBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)n) break; if(kk->first >= (unsigned int)l) scale += ABS(kk->second); } if(scale != 0.0) { for(Iter kk = pU->rowBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)n) break; if(kk->first >= (unsigned int)l) { double t = kk->second / scale; pU->set(i, kk->first, t); s += (t * t); } } f = pU->get(i, l); g = -GSparseMatrix_takeSign(sqrt(s), f); h = f * g - s; pU->set(i, l, f - g); for(k = l; k < n; k++) temp[k] = pU->get(i, k) / h; if(i != m - 1) { for(j = l; j < m; j++) { s = 0.0; for(Iter kk = pU->rowBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)n) break; if(kk->first >= (unsigned int)l) s += pU->get(j, kk->first) * kk->second; } Iter kend2 = pU->rowEnd(j); for(Iter kk = pU->rowBegin(j); kk != kend2; kk++) { if(kk->first >= (unsigned int)n) break; if(kk->first >= (unsigned int)l) pU->set(j, kk->first, pU->get(j, kk->first) + s * temp[kk->first]); } } } for(Iter kk = pU->rowBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)n) break; if(kk->first >= (unsigned int)l) pU->set(i, kk->first, kk->second * scale); } } } norm = MAX(norm, ABS(pSigma[i]) + ABS(temp[i])); } // Accumulate right-hand transform for(int i = n - 1; i >= 0; i--) { if(i < n - 1) { if(g != 0.0) { Iter jend = pU->rowEnd(i); for(Iter jj = pU->rowBegin(i); jj != jend; jj++) { if(jj->first >= (unsigned int)n) break; if(jj->first >= (unsigned int)l) pV->set(i, jj->first, (jj->second / pU->get(i, l)) / g); // (double-division to avoid underflow) } for(j = l; j < n; j++) { s = 0.0; Iter kend = pU->rowEnd(i); for(Iter kk = pU->rowBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)n) break; if(kk->first >= (unsigned int)l) s += kk->second * pV->get(j, kk->first); } kend = pV->rowEnd(i); for(Iter kk = pV->rowBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)n) break; if(kk->first >= (unsigned int)l) pV->set(j, kk->first, pV->get(j, kk->first) + s * kk->second); } } } for(j = l; j < n; j++) { pV->set(i, j, 0.0); pV->set(j, i, 0.0); } } pV->set(i, i, 1.0); g = temp[i]; l = i; } // Accumulate left-hand transform for(i = n - 1; i >= 0; i--) { l = i + 1; g = pSigma[i]; if(i < n - 1) { for(j = l; j < n; j++) pU->set(i, j, 0.0); } if(g != 0.0) { g = 1.0 / g; if(i != n - 1) { for(j = l; j < n; j++) { s = 0.0; Iter kend = pU->colEnd(i); for(Iter kk = pU->colBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)l) s += kk->second * pU->get(kk->first, j); } f = (s / pU->get(i, i)) * g; if(f != 0.0) { for(Iter kk = pU->colBegin(i); kk != kend; kk++) { if(kk->first >= (unsigned int)i) pU->set(kk->first, j, pU->get(kk->first, j) + f * kk->second); } } } } for(j = i; j < m; j++) pU->set(j, i, pU->get(j, i) * g); } else { for(j = i; j < m; j++) pU->set(j, i, 0.0); } pU->set(i, i, pU->get(i, i) + 1.0); } // Diagonalize the bidiagonal matrix for(k = n - 1; k >= 0; k--) // For each singular value { for(iter = 1; iter <= maxIters; iter++) { // Test for splitting bool flag = true; for(l = k; l >= 0; l--) { q = l - 1; if(ABS(temp[l]) + norm == norm) { flag = false; break; } if(ABS(pSigma[q]) + norm == norm) break; } if(flag) { c = 0.0; s = 1.0; for(i = l; i <= k; i++) { f = s * temp[i]; temp[i] *= c; if(ABS(f) + norm == norm) break; g = pSigma[i]; h = GSparseMatrix_pythag(f, g); pSigma[i] = h; h = 1.0 / h; c = g * h; s = -f * h; Iter jendi = pU->colEnd(i); Iter jendq = pU->colEnd(q); Iter jji = pU->colBegin(i); Iter jjq = pU->colBegin(q); int tpos; for(tpos = 0; jji != jendi || jjq != jendq; tpos++) { if(jjq == jendq || (jji != jendi && jji->first < jjq->first)) { temp2[tpos] = jji->first; jji++; } else { temp2[tpos] = jjq->first; if(jji != jendi && jjq->first == jji->first) jji++; jjq++; } } for(int tpos2 = 0; tpos2 < tpos; tpos2++) { y = pU->get((unsigned int)temp2[tpos2], q); z = pU->get((unsigned int)temp2[tpos2], i); pU->set((unsigned int)temp2[tpos2], q, y * c + z * s); pU->set((unsigned int)temp2[tpos2], i, z * c - y * s); } } } z = pSigma[k]; if(l == k) { // Detect convergence if(z < 0.0) { // Singular value should be positive pSigma[k] = -z; for(j = 0; j < n; j++) pV->set(k, j, pV->get(k, j) * -1.0); } break; } if(iter >= maxIters) ThrowError("failed to converge"); // Shift from bottom 2x2 minor x = pSigma[l]; q = k - 1; y = pSigma[q]; g = temp[q]; h = temp[k]; f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y); g = GSparseMatrix_pythag(f, 1.0); f = ((x - z) * (x + z) + h * ((y / (f + GSparseMatrix_takeSign(g, f))) - h)) / x; // QR transform c = 1.0; s = 1.0; for(j = l; j <= q; j++) { i = j + 1; g = temp[i]; y = pSigma[i]; h = s * g; g = c * g; z = GSparseMatrix_pythag(f, h); temp[j] = z; c = f / z; s = h / z; f = x * c + g * s; g = g * c - x * s; h = y * s; y = y * c; Iter pendi = pV->rowEnd(i); Iter pendj = pV->rowEnd(j); Iter ppi = pV->rowBegin(i); Iter ppj = pV->rowBegin(j); int tpos; for(tpos = 0; ppi != pendi || ppj != pendj; tpos++) { if(ppj == pendj || (ppi != pendi && ppi->first < ppj->first)) { temp2[tpos] = ppi->first; ppi++; } else { temp2[tpos] = ppj->first; if(ppi != pendi && ppj->first == ppi->first) ppi++; ppj++; } } for(int tpos2 = 0; tpos2 < tpos; tpos2++) { x = pV->get(j, (unsigned int)temp2[tpos2]); z = pV->get(i, (unsigned int)temp2[tpos2]); pV->set(j, (unsigned int)temp2[tpos2], x * c + z * s); pV->set(i, (unsigned int)temp2[tpos2], z * c - x * s); } z = GSparseMatrix_pythag(f, h); pSigma[j] = z; if(z != 0.0) { z = 1.0 / z; c = f * z; s = h * z; } f = c * g + s * y; x = c * y - s * g; pendi = pU->colEnd(i); pendj = pU->colEnd(j); ppi = pU->colBegin(i); ppj = pU->colBegin(j); for(tpos = 0; ppi != pendi || ppj != pendj; tpos++) { if(ppj == pendj || (ppi != pendi && ppi->first < ppj->first)) { temp2[tpos] = ppi->first; ppi++; } else { temp2[tpos] = ppj->first; if(ppi != pendi && ppj->first == ppi->first) ppi++; ppj++; } } for(int tpos2 = 0; tpos2 < tpos; tpos2++) { y = pU->get((unsigned int)temp2[tpos2], j); z = pU->get((unsigned int)temp2[tpos2], i); pU->set((unsigned int)temp2[tpos2], j, y * c + z * s); pU->set((unsigned int)temp2[tpos2], i, z * c - y * s); } } temp[l] = 0.0; temp[k] = f; pSigma[k] = x; } } // Sort the singular values from largest to smallest for(i = 1; i < n; i++) { for(j = i; j > 0; j--) { if(pSigma[j - 1] >= pSigma[j]) break; pU->swapColumns(j - 1, j); pV->swapRows(j - 1, j); std::swap(pSigma[j - 1], pSigma[j]); } } // Return results *ppU = hU.release(); *ppDiag = hSigma.release(); *ppV = hV.release(); }