void GPolynomialSingleLabel::toBezierCoefficients() { if(m_featureDims == 0) ThrowError("init has not been called"); // Make Pascal's triangle GTEMPBUF(size_t, pCoords, m_featureDims); GTEMPBUF(size_t, pPascalsTriangle, m_nControlPoints); // In each dimensional direction... GMath::pascalsTriangle(pPascalsTriangle, m_nControlPoints - 1); for(size_t n = 0; n < m_featureDims; n++) { // Across that dimension... for(size_t j = 0; j < m_nControlPoints; j++) { // Iterate over the entire lattice of coefficients (except in dimension n) GPolynomialLatticeIterator iter(pCoords, m_featureDims, m_nControlPoints, n); while(true) { // Divide by the corresponding row of Pascal's triangle pCoords[n] = j; m_pCoefficients[calcIndex(pCoords)] /= pPascalsTriangle[j]; if(!iter.Advance()) break; } } } // Forward sum the coefficients double d; for(size_t i = m_nControlPoints - 1; i >= 1; i--) { // In each dimensional direction... for(size_t n = 0; n < m_featureDims; n++) { // Subtract the neighbor of lesser-significance from each coefficient for(size_t j = i; j < m_nControlPoints; j++) { // Iterate over the entire lattice of coefficients (except in dimension n) GPolynomialLatticeIterator iter(pCoords, m_featureDims, m_nControlPoints, n); while(true) { // Subtract the neighbor of lesser-significance from this coefficient pCoords[n] = j - 1; d = m_pCoefficients[calcIndex(pCoords)]; pCoords[n] = j; m_pCoefficients[calcIndex(pCoords)] += d; if(!iter.Advance()) break; } } } } }
void GPolynomialSingleLabel::integrate() { if(m_featureDims == 0) ThrowError("init has not been called"); GTEMPBUF(size_t, pCoords, m_featureDims); double d; for(size_t n = 0; n < m_featureDims; n++) { // Iterate over the entire lattice of coefficients (except in dimension n) GPolynomialLatticeIterator iter(pCoords, m_featureDims, m_nControlPoints, n); while(true) { // Integrate in the n'th dimension pCoords[n] = 0; m_pCoefficients[calcIndex(pCoords)] = 0; for(size_t j = m_nControlPoints - 1; j > 0; j--) { pCoords[n] = j - 1; d = m_pCoefficients[calcIndex(pCoords)]; pCoords[n] = j; size_t index = calcIndex(pCoords); GAssert(j < m_nControlPoints - 1 || m_pCoefficients[index] == 0); // There's a non-zero value in a highest-order coefficient. This polynomial, therefore, isn't big enough to hold the integral m_pCoefficients[index] = d / j; } if(!iter.Advance()) break; } } }
void GPolynomialSingleLabel::differentiate() { if(m_featureDims == 0) ThrowError("init has not been called"); GTEMPBUF(size_t, pCoords, m_featureDims); double d; for(size_t n = 0; n < m_featureDims; n++) { // Iterate over the entire lattice of coefficients (except in dimension n) GPolynomialLatticeIterator iter(pCoords, m_featureDims, m_nControlPoints, n); while(true) { // Differentiate with respect to the n'th dimension for(size_t j = 1; j < m_nControlPoints; j++) { pCoords[n] = j; d = m_pCoefficients[calcIndex(pCoords)]; pCoords[n] = j - 1; m_pCoefficients[calcIndex(pCoords)] = d * j; } pCoords[n] = m_nControlPoints - 1; m_pCoefficients[calcIndex(pCoords)] = 0; if(!iter.Advance()) break; } } }
void GPolynomialSingleLabel::copy(GPolynomialSingleLabel* pOther) { m_featureDims = pOther->m_featureDims; if(controlPointCount() >= pOther->controlPointCount()) ThrowError("this polynomial must have at least as many control points as pOther"); if(controlPointCount() > pOther->controlPointCount()) GVec::setAll(m_pCoefficients, 0.0, m_nCoefficients); GTEMPBUF(size_t, pCoords, m_featureDims); GPolynomialLatticeIterator iter(pCoords, m_featureDims, pOther->m_nControlPoints, (size_t)-1); while(true) { m_pCoefficients[calcIndex(pCoords)] = pOther->m_pCoefficients[pOther->calcIndex(pCoords)]; if(!iter.Advance()) break; } }
// (Warning: this method relies on the order in which GPolynomialLatticeIterator // visits coefficients in the lattice) double GPolynomialSingleLabel::predict(const double* pIn) { if(m_featureDims == 0) ThrowError("init has not been called"); GTEMPBUF(size_t, pCoords, m_featureDims); GPolynomialLatticeIterator iter(pCoords, m_featureDims, m_nControlPoints, (size_t)-1); double dSum = 0; double dVar; for(size_t nCoeff = m_nCoefficients - 1; nCoeff < m_nCoefficients; nCoeff--) { dVar = 1; for(size_t n = 0; n < m_featureDims; n++) { for(size_t i = pCoords[n]; i > 0; i--) dVar *= pIn[n]; } dVar *= m_pCoefficients[nCoeff]; dSum += dVar; iter.Advance(); } return dSum; }
// virtual void GNeighborTransducer::transduce(GData* pDataLabeled, GData* pDataUnlabeled, int labelDims) { if(labelDims != 1) ThrowError("Only 1 nominal label is supported"); if(!pDataLabeled->relation()->areNominal(pDataLabeled->relation()->size() - 1, 1)) ThrowError("Only nominal labels are supported"); if(!pDataLabeled->relation()->areContinuous(0, pDataLabeled->relation()->size() - 1)) ThrowError("Only continuous features are supported"); if(pDataLabeled->cols() != pDataUnlabeled->cols()) ThrowError("relations don't match"); // Make a dataset containing all rows GData dataAll(pDataLabeled->relation()); dataAll.reserve(pDataLabeled->rows() + pDataUnlabeled->rows()); GReleaseDataHolder hDataAll(&dataAll); for(size_t i = 0; i < pDataUnlabeled->rows(); i++) dataAll.takeRow(pDataUnlabeled->row(i)); for(size_t i = 0; i < pDataLabeled->rows(); i++) dataAll.takeRow(pDataLabeled->row(i)); int featureDims = pDataLabeled->cols() - labelDims; sp_relation pRelInputs = new GUniformRelation(featureDims, 0); dataAll.setRelation(pRelInputs); // Find friends GNeighborFinder* pNF; if(m_intrinsicDims == 0) pNF = new GNeighborFinderCacheWrapper(new GKdTree(&dataAll, 0, m_friendCount, NULL, true), true); else pNF = new GManifoldNeighborFinder( &dataAll, m_friendCount, // littleK m_friendCount * 4, // bigK m_intrinsicDims, // intrinsicDims m_alpha, // alpha m_beta, // beta false, // prune? m_pRand); Holder<GNeighborFinder> hNF(pNF); GTEMPBUF(size_t, neighbors, m_friendCount); int labelValues = pDataLabeled->relation()->valueCount(featureDims); GTEMPBUF(double, tallys, labelValues); // Label the unlabeled patterns GBitTable labeled(pDataUnlabeled->rows()); GData labelList(3); // pattern index, most likely label, confidence labelList.newRows(pDataUnlabeled->rows()); for(size_t i = 0; i < pDataUnlabeled->rows(); i++) labelList.row(i)[0] = i; while(labelList.rows() > 0) { // Compute the most likely label and the confidence for each pattern for(size_t i = 0; i < labelList.rows(); i++) { // Find the most common label double* pRow = labelList.row(i); size_t index = (size_t)pRow[0]; pNF->neighbors(neighbors, index); GVec::setAll(tallys, 0.0, labelValues); for(int j = 0; j < m_friendCount; j++) { if(neighbors[j] >= dataAll.rows()) continue; double* pFriend = dataAll.row(neighbors[j]); if(neighbors[j] >= pDataUnlabeled->rows()) { if((int)pFriend[featureDims] >= 0 && (int)pFriend[featureDims] < labelValues) tallys[(int)pFriend[featureDims]] += 1.0; } else if(labeled.bit(neighbors[j])) { if((int)pFriend[featureDims] >= 0 && (int)pFriend[featureDims] < labelValues) tallys[(int)pFriend[featureDims]] += 0.6; } } int label = GVec::indexOfMax(tallys, labelValues, m_pRand); double conf = tallys[label]; // Penalize for dissenting votes for(int j = 0; j < m_friendCount; j++) { if(neighbors[j] >= dataAll.rows()) continue; double* pFriend = dataAll.row(neighbors[j]); if(neighbors[j] >= pDataUnlabeled->rows()) { if((int)pFriend[featureDims] != label) conf *= 0.5; } else if(labeled.bit(neighbors[j])) { if((int)pFriend[featureDims] != label) conf *= 0.8; } } pRow[1] = label; pRow[2] = conf; } labelList.sort(2); // Assign the labels to the patterns we are most confident about size_t maxCount = MAX((size_t)5, pDataLabeled->rows() / 5); size_t count = 0; for(size_t i = labelList.rows() - 1; i < labelList.rows(); i--) { double* pRow = labelList.row(i); size_t index = (size_t)pRow[0]; int label = (int)pRow[1]; pDataUnlabeled->row(index)[featureDims] = label; labeled.set(index); labelList.deleteRow(i); if(count >= maxCount) break; count++; } } }