Esempio n. 1
0
GSparseMatrix* GSparseMatrix::subMatrix(int row, int col, int height, int width)
{
	if(row < 0 || col < 0 || row + height >= (int)m_rows || col + width >= (int)m_cols || height < 0 || width < 0)
		ThrowError("out of range");
	GSparseMatrix* pSub = new GSparseMatrix(height, width);
	for(int y = 0; y < height; y++)
	{
		for(int x = 0; x < width; x++)
			pSub->set(y, x, get(row + y, col + x));
	}
	return pSub;
}
Esempio n. 2
0
GSparseMatrix* GRecommenderLib::loadSparseData(const char* szFilename)
{
	// Load the dataset by extension
	PathData pd;
	GFile::parsePath(szFilename, &pd);
	if(_stricmp(szFilename + pd.extStart, ".arff") == 0)
	{
		// Convert a 3-column dense ARFF file to a sparse matrix
		GMatrix data;
		data.loadArff(szFilename);
		if(data.cols() != 3)
			throw Ex("Expected 3 columns: 0) user or row-index, 1) item or col-index, 2) value or rating");
		double m0 = data.columnMin(0);
		double r0 = data.columnMax(0) - m0;
		double m1 = data.columnMin(1);
		double r1 = data.columnMax(1) - m1;
		if(m0 < 0 || m0 > 1e10 || r0 < 2 || r0 > 1e10)
			throw Ex("Invalid row indexes");
		if(m1 < 0 || m1 > 1e10 || r1 < 2 || r1 > 1e10)
			throw Ex("Invalid col indexes");
		GSparseMatrix* pMatrix = new GSparseMatrix(size_t(m0 + r0) + 1, size_t(m1 + r1) + 1, UNKNOWN_REAL_VALUE);
		std::unique_ptr<GSparseMatrix> hMatrix(pMatrix);
		for(size_t i = 0; i < data.rows(); i++)
		{
			GVec& row = data.row(i);
			pMatrix->set(size_t(row[0]), size_t(row[1]), row[2]);
		}
		return hMatrix.release();
	}
	else if(_stricmp(szFilename + pd.extStart, ".sparse") == 0)
	{
		GDom doc;
		doc.loadJson(szFilename);
		return new GSparseMatrix(doc.root());
	}
	throw Ex("Unsupported file format: ", szFilename + pd.extStart);
	return NULL;
}
Esempio n. 3
0
GSparseMatrix* loadSparseData(const char* szFilename)
{
	// Load the dataset by extension
	PathData pd;
	GFile::parsePath(szFilename, &pd);
	if(_stricmp(szFilename + pd.extStart, ".arff") == 0)
	{
		// Convert a 3-column dense ARFF file to a sparse matrix
		GMatrix* pData = GMatrix::loadArff(szFilename);
		if(pData->cols() != 3)
			ThrowError("Expected 3 columns: 0) user or row-index, 1) item or col-index, 2) value or rating");
		double m0, r0, m1, r1;
		pData->minAndRange(0, &m0, &r0);
		pData->minAndRange(1, &m1, &r1);
		if(m0 < 0 || m0 > 1e10 || r0 < 2 || r0 > 1e10)
			ThrowError("Invalid row indexes");
		if(m1 < 0 || m1 > 1e10 || r1 < 2 || r1 > 1e10)
			ThrowError("Invalid col indexes");
		GSparseMatrix* pMatrix = new GSparseMatrix(size_t(m0 + r0) + 1, size_t(m1 + r1) + 1, UNKNOWN_REAL_VALUE);
		Holder<GSparseMatrix> hMatrix(pMatrix);
		for(size_t i = 0; i < pData->rows(); i++)
		{
			double* pRow = pData->row(i);
			pMatrix->set(size_t(pRow[0]), size_t(pRow[1]), pRow[2]);
		}
		return hMatrix.release();
	}
	else if(_stricmp(szFilename + pd.extStart, ".sparse") == 0)
	{
		GDom doc;
		doc.loadJson(szFilename);
		return new GSparseMatrix(doc.root());
	}
	ThrowError("Unsupported file format: ", szFilename + pd.extStart);
	return NULL;
}
Esempio n. 4
0
void GSparseMatrix::singularValueDecompositionHelper(GSparseMatrix** ppU, double** ppDiag, GSparseMatrix** ppV, int maxIters)
{
	int m = rows();
	int n = cols();
	if(m < n)
		ThrowError("Expected at least as many rows as columns");
	int i, j, k;
	int l = 0;
	int q, iter;
	double c, f, h, s, x, y, z;
	double norm = 0.0;
	double g = 0.0;
	double scale = 0.0;
	GSparseMatrix* pU = new GSparseMatrix(m, m);
	Holder<GSparseMatrix> hU(pU);
	pU->copyFrom(this);
	double* pSigma = new double[n];
	ArrayHolder<double> hSigma(pSigma);
	GSparseMatrix* pV = new GSparseMatrix(n, n);
	Holder<GSparseMatrix> hV(pV);
	GTEMPBUF(double, temp, n + m);
	double* temp2 = temp + n;

	// Householder reduction to bidiagonal form
	for(int i = 0; i < n; i++)
	{
		// Left-hand reduction
		temp[i] = scale * g;
		l = i + 1;
		g = 0.0;
		s = 0.0;
		scale = 0.0;
		if(i < m)
		{
			Iter kend = pU->colEnd(i);
			for(Iter kk = pU->colBegin(i); kk != kend; kk++)
			{
				if(kk->first >= (unsigned int)i)
					scale += ABS(kk->second);
			}
			if(scale != 0.0)
			{
				for(Iter kk = pU->colBegin(i); kk != kend; kk++)
				{
					if(kk->first >= (unsigned int)i)
					{
						double t = kk->second / scale;
						pU->set(kk->first, i, t);
						s += (t * t);
					}
				}
				f = pU->get(i, i);
				g = -GSparseMatrix_takeSign(sqrt(s), f);
				h = f * g - s;
				pU->set(i, i, f - g);
				if(i != n - 1)
				{
					for(j = l; j < n; j++)
					{
						s = 0.0;
						for(Iter kk = pU->colBegin(i); kk != kend; kk++)
						{
							if(kk->first >= (unsigned int)i)
								s += kk->second * pU->get(kk->first, j);
						}
						f = s / h;
						for(Iter kk = pU->colBegin(i); kk != kend; kk++)
						{
							if(kk->first >= (unsigned int)i)
								pU->set(kk->first, j, pU->get(kk->first, j) + f * kk->second);
						}
					}
				}
				for(Iter kk = pU->colBegin(i); kk != kend; kk++)
				{
					if(kk->first >= (unsigned int)i)
						pU->set(kk->first, i, pU->get(kk->first, i) * scale);
				}
			}
		}
		pSigma[i] = scale * g;

		// Right-hand reduction
		g = 0.0;
		s = 0.0;
		scale = 0.0;
		if(i < m && i != n - 1) 
		{
			Iter kend = pU->rowEnd(i);
			for(Iter kk = pU->rowBegin(i); kk != kend; kk++)
			{
				if(kk->first >= (unsigned int)n)
					break;
				if(kk->first >= (unsigned int)l)
					scale += ABS(kk->second);
			}
			if(scale != 0.0) 
			{
				for(Iter kk = pU->rowBegin(i); kk != kend; kk++)
				{
					if(kk->first >= (unsigned int)n)
						break;
					if(kk->first >= (unsigned int)l)
					{
						double t = kk->second / scale;
						pU->set(i, kk->first, t);
						s += (t * t);
					}
				}
				f = pU->get(i, l);
				g = -GSparseMatrix_takeSign(sqrt(s), f);
				h = f * g - s;
				pU->set(i, l, f - g);
				for(k = l; k < n; k++)
					temp[k] = pU->get(i, k) / h;
				if(i != m - 1) 
				{
					for(j = l; j < m; j++) 
					{
						s = 0.0;
						for(Iter kk = pU->rowBegin(i); kk != kend; kk++)
						{
							if(kk->first >= (unsigned int)n)
								break;
							if(kk->first >= (unsigned int)l)
								s += pU->get(j, kk->first) * kk->second;
						}
						Iter kend2 = pU->rowEnd(j);
						for(Iter kk = pU->rowBegin(j); kk != kend2; kk++)
						{
							if(kk->first >= (unsigned int)n)
								break;
							if(kk->first >= (unsigned int)l)
								pU->set(j, kk->first, pU->get(j, kk->first) + s * temp[kk->first]);
						}
					}
				}
				for(Iter kk = pU->rowBegin(i); kk != kend; kk++)
				{
					if(kk->first >= (unsigned int)n)
						break;
					if(kk->first >= (unsigned int)l)
						pU->set(i, kk->first, kk->second * scale);
				}
			}
		}
		norm = MAX(norm, ABS(pSigma[i]) + ABS(temp[i]));
	}

	// Accumulate right-hand transform
	for(int i = n - 1; i >= 0; i--)
	{
		if(i < n - 1)
		{
			if(g != 0.0)
			{
				Iter jend = pU->rowEnd(i);
				for(Iter jj = pU->rowBegin(i); jj != jend; jj++)
				{
					if(jj->first >= (unsigned int)n)
						break;
					if(jj->first >= (unsigned int)l)
						pV->set(i, jj->first, (jj->second / pU->get(i, l)) / g); // (double-division to avoid underflow)
				}
				for(j = l; j < n; j++)
				{
					s = 0.0;
					Iter kend = pU->rowEnd(i);
					for(Iter kk = pU->rowBegin(i); kk != kend; kk++)
					{
						if(kk->first >= (unsigned int)n)
							break;
						if(kk->first >= (unsigned int)l)
							s += kk->second * pV->get(j, kk->first);
					}
					kend = pV->rowEnd(i);
					for(Iter kk = pV->rowBegin(i); kk != kend; kk++)
					{
						if(kk->first >= (unsigned int)n)
							break;
						if(kk->first >= (unsigned int)l)
							pV->set(j, kk->first, pV->get(j, kk->first) + s * kk->second);
					}
				}
			}
			for(j = l; j < n; j++)
			{
				pV->set(i, j, 0.0);
				pV->set(j, i, 0.0);
			}
		}
		pV->set(i, i, 1.0);
		g = temp[i];
		l = i;
	}

	// Accumulate left-hand transform
	for(i = n - 1; i >= 0; i--)
	{
		l = i + 1;
		g = pSigma[i];
		if(i < n - 1)
		{
			for(j = l; j < n; j++)
				pU->set(i, j, 0.0);
		}
		if(g != 0.0)
		{
			g = 1.0 / g;
			if(i != n - 1)
			{
				for(j = l; j < n; j++)
				{
					s = 0.0;
					Iter kend = pU->colEnd(i);
					for(Iter kk = pU->colBegin(i); kk != kend; kk++)
					{
						if(kk->first >= (unsigned int)l)
							s += kk->second * pU->get(kk->first, j);
					}
					f = (s / pU->get(i, i)) * g;
					if(f != 0.0)
					{
						for(Iter kk = pU->colBegin(i); kk != kend; kk++)
						{
							if(kk->first >= (unsigned int)i)
								pU->set(kk->first, j, pU->get(kk->first, j) + f * kk->second);
						}
					}
				}
			}
			for(j = i; j < m; j++)
				pU->set(j, i, pU->get(j, i) * g);
		} 
		else 
		{
			for(j = i; j < m; j++)
				pU->set(j, i, 0.0);
		}
		pU->set(i, i, pU->get(i, i) + 1.0);
	}

	// Diagonalize the bidiagonal matrix
	for(k = n - 1; k >= 0; k--) // For each singular value
	{
		for(iter = 1; iter <= maxIters; iter++)
		{
			// Test for splitting
			bool flag = true;
			for(l = k; l >= 0; l--)
			{
				q = l - 1;
				if(ABS(temp[l]) + norm == norm)
				{
					flag = false;
					break;
				}
				if(ABS(pSigma[q]) + norm == norm)
					break;
			}

			if(flag)
			{
				c = 0.0;
				s = 1.0;
				for(i = l; i <= k; i++)
				{
					f = s * temp[i];
					temp[i] *= c;
					if(ABS(f) + norm == norm)
						break;
					g = pSigma[i];
					h = GSparseMatrix_pythag(f, g);
					pSigma[i] = h;
					h = 1.0 / h;
					c = g * h;
					s = -f * h;
					Iter jendi = pU->colEnd(i);
					Iter jendq = pU->colEnd(q);
					Iter jji = pU->colBegin(i);
					Iter jjq = pU->colBegin(q);
					int tpos;
					for(tpos = 0; jji != jendi || jjq != jendq; tpos++)
					{
						if(jjq == jendq || (jji != jendi && jji->first < jjq->first))
						{
							temp2[tpos] = jji->first;
							jji++;
						}
						else
						{
							temp2[tpos] = jjq->first;
							if(jji != jendi && jjq->first == jji->first)
								jji++;
							jjq++;
						}
					}
					for(int tpos2 = 0; tpos2 < tpos; tpos2++)
					{
						y = pU->get((unsigned int)temp2[tpos2], q);
						z = pU->get((unsigned int)temp2[tpos2], i);
						pU->set((unsigned int)temp2[tpos2], q, y * c + z * s);
						pU->set((unsigned int)temp2[tpos2], i, z * c - y * s);
					}
				}
			}

			z = pSigma[k];
			if(l == k)
			{
				// Detect convergence
				if(z < 0.0)
				{
					// Singular value should be positive
					pSigma[k] = -z;
					for(j = 0; j < n; j++)
						pV->set(k, j, pV->get(k, j) * -1.0);
				}
				break;
			}
			if(iter >= maxIters)
				ThrowError("failed to converge");

			// Shift from bottom 2x2 minor
			x = pSigma[l];
			q = k - 1;
			y = pSigma[q];
			g = temp[q];
			h = temp[k];
			f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
			g = GSparseMatrix_pythag(f, 1.0);
			f = ((x - z) * (x + z) + h * ((y / (f + GSparseMatrix_takeSign(g, f))) - h)) / x;

			// QR transform
			c = 1.0;
			s = 1.0;
			for(j = l; j <= q; j++)
			{
				i = j + 1;
				g = temp[i];
				y = pSigma[i];
				h = s * g;
				g = c * g;
				z = GSparseMatrix_pythag(f, h);
				temp[j] = z;
				c = f / z;
				s = h / z;
				f = x * c + g * s;
				g = g * c - x * s;
				h = y * s;
				y = y * c;
				Iter pendi = pV->rowEnd(i);
				Iter pendj = pV->rowEnd(j);
				Iter ppi = pV->rowBegin(i);
				Iter ppj = pV->rowBegin(j);
				int tpos;
				for(tpos = 0; ppi != pendi || ppj != pendj; tpos++)
				{
					if(ppj == pendj || (ppi != pendi && ppi->first < ppj->first))
					{
						temp2[tpos] = ppi->first;
						ppi++;
					}
					else
					{
						temp2[tpos] = ppj->first;
						if(ppi != pendi && ppj->first == ppi->first)
							ppi++;
						ppj++;
					}
				}
				for(int tpos2 = 0; tpos2 < tpos; tpos2++)
				{
					x = pV->get(j, (unsigned int)temp2[tpos2]);
					z = pV->get(i, (unsigned int)temp2[tpos2]);
					pV->set(j, (unsigned int)temp2[tpos2], x * c + z * s);
					pV->set(i, (unsigned int)temp2[tpos2], z * c - x * s);
				}
				z = GSparseMatrix_pythag(f, h);
				pSigma[j] = z;
				if(z != 0.0)
				{
					z = 1.0 / z;
					c = f * z;
					s = h * z;
				}
				f = c * g + s * y;
				x = c * y - s * g;
				pendi = pU->colEnd(i);
				pendj = pU->colEnd(j);
				ppi = pU->colBegin(i);
				ppj = pU->colBegin(j);
				for(tpos = 0; ppi != pendi || ppj != pendj; tpos++)
				{
					if(ppj == pendj || (ppi != pendi && ppi->first < ppj->first))
					{
						temp2[tpos] = ppi->first;
						ppi++;
					}
					else
					{
						temp2[tpos] = ppj->first;
						if(ppi != pendi && ppj->first == ppi->first)
							ppi++;
						ppj++;
					}
				}
				for(int tpos2 = 0; tpos2 < tpos; tpos2++)
				{
					y = pU->get((unsigned int)temp2[tpos2], j);
					z = pU->get((unsigned int)temp2[tpos2], i);
					pU->set((unsigned int)temp2[tpos2], j, y * c + z * s);
					pU->set((unsigned int)temp2[tpos2], i, z * c - y * s);
				}
			}
			temp[l] = 0.0;
			temp[k] = f;
			pSigma[k] = x;
		}
	}

	// Sort the singular values from largest to smallest
	for(i = 1; i < n; i++)
	{
		for(j = i; j > 0; j--)
		{
			if(pSigma[j - 1] >= pSigma[j])
				break;
			pU->swapColumns(j - 1, j);
			pV->swapRows(j - 1, j);
			std::swap(pSigma[j - 1], pSigma[j]);
		}
	}

	// Return results
	*ppU = hU.release();
	*ppDiag = hSigma.release();
	*ppV = hV.release();
}