//--------------------------------------------------------------------------- // Deregister this parition from the concept. //--------------------------------------------------------------------------- bool CTDPartition::deregisterPartition() { CTDRecord* pFirstRec = NULL; if ( getNumRecords()== 0){ // Get the first record of the generalized records. pFirstRec = getGenRecords()->GetAt(0); } else { // Get the first record of the partition. pFirstRec = getRecord(0); if (!pFirstRec) { ASSERT(false); return false; } } int a = 0; CTDPartAttrib* pPartAttrib = NULL; CTDConcept* pCurrentConcept = NULL; for (POSITION pos = m_partAttribs.GetHeadPosition(); pos != NULL; ++a) { // Find the current concept of this attribute. pPartAttrib = m_partAttribs.GetNext(pos); pCurrentConcept = pFirstRec->getValue(a)->getCurrentConcept(); if (!pCurrentConcept) { ASSERT(false); return false; } pCurrentConcept->deregisterPartition(pPartAttrib->m_relatedPos); pPartAttrib->m_relatedPos = NULL; } return true; }
//--------------------------------------------------------------------------- // Register this parition from the concept for test data. //--------------------------------------------------------------------------- bool CTDPartition::testRegisterPartition() { // Get the first record of the partition. CTDRecord* pFirstRec = getRecord(0); if (!pFirstRec) { ASSERT(false); return false; } int a = 0; CTDPartAttrib* pPartAttrib = NULL; CTDConcept* pCurrentConcept = NULL; for (POSITION pos = m_partAttribs.GetHeadPosition(); pos != NULL; ++a) { // Find the current concept of this attribute. pPartAttrib = m_partAttribs.GetNext(pos); pCurrentConcept = pFirstRec->getValue(a)->getCurrentConcept(); if (!pCurrentConcept) { ASSERT(false); return false; } pPartAttrib->m_relatedPos = pCurrentConcept->testRegisterPartition(this); } return true; }
//--------------------------------------------------------------------------- // Count the number of distortions. // Each time a record is generalized from a child value to a parent value, // we charge 1 unit of distortion. So if 100 records are involved in the // generalization, we charge 100 unit. //--------------------------------------------------------------------------- bool CTDEvalMgr::countNumDistortions(int& catDistortion, float& contDistortion) { cout << _T("Counting number of distortions...") << endl; catDistortion = 0; contDistortion = 0.0f; int nRecs = 0, nValues = 0; CTDRecord* pRec = NULL; CTDAttrib* pAttrib = NULL; CTDPartition* pPartition = NULL; CTDValue* pValue = NULL; CTDConcept* pCurrentConcept = NULL; CTDConcept* pRawConcept = NULL; CTDPartitions* pLeafPartitions = m_pPartitioner->getLeafPartitions(); // For each partition. for (POSITION leafPos = pLeafPartitions->GetHeadPosition(); leafPos != NULL;) { pPartition = pLeafPartitions->GetNext(leafPos); nRecs = pPartition->getNumRecords(); // For each record. for (int r = 0; r < nRecs; ++r) { pRec = pPartition->getRecord(r); nValues = pRec->getNumValues(); // For each value. for (int v = 0; v < nValues; ++v) { pAttrib = m_pAttribMgr->getAttribute(v); if (!pAttrib->m_bVirtualAttrib) continue; pValue = pRec->getValue(v); pCurrentConcept = pValue->getCurrentConcept(); if (pAttrib->isContinuous()) { CTDContConcept* pContConcept = (CTDContConcept*) pCurrentConcept; CTDContConcept* pRoot = (CTDContConcept*) pAttrib->getConceptRoot(); contDistortion += (pContConcept->m_upperBound - pContConcept->m_lowerBound) / (pRoot->m_upperBound - pRoot->m_lowerBound); } else { pRawConcept = ((CTDStringValue*) pValue)->getRawConcept(); #if defined(_TD_SCORE_FUNTION_TRANSACTION) // In case of transaction data, count a distortion only if suppressing "1". if (pRawConcept->m_conceptValue.CompareNoCase(TD_TRANSACTION_ITEM_PRESENT) != 0) continue; #endif if (pRawConcept->m_depth < 0 || pCurrentConcept->m_depth < 0) { cout << _T("CSAEvalMgr::countNumDistortions: Negative depth.") << endl; ASSERT(false); return false; } catDistortion += pRawConcept->m_depth - pCurrentConcept->m_depth; } } } } cout << _T("Counting number of distortions succeeded.") << endl; return true; }
//--------------------------------------------------------------------------- // Distribute records from parent paritition to child partitions. //--------------------------------------------------------------------------- bool CTDPartitioner::testDistributeRecords(CTDPartition* pParentPartition, CTDAttrib* pSplitAttrib, CTDConcept* pSplitConcept, CTDPartitions& childPartitions) { childPartitions.RemoveAll(); // Construct a partition for each child concept. for (int childIdx = 0; childIdx < pSplitConcept->getNumChildConcepts(); ++childIdx) childPartitions.AddTail(new CTDPartition( gTestPartitionIndex++, m_pAttribMgr->getAttributes())); // Scan through each record in the parent partition and // add records to the corresponding child partition based // on the child concept. CTDRecord* pRec = NULL; CTDValue* pSplitValue = NULL; POSITION childPartitionPos = NULL; int childConceptIdx = -1; int splitIdx = pSplitAttrib->m_attribIdx; int nRecs = pParentPartition->getNumRecords(); ASSERT(nRecs > 0); for (int r = 0; r < nRecs; ++r) { pRec = pParentPartition->getRecord(r); pSplitValue = pRec->getValue(splitIdx); // Lower the concept by one level. if (!pSplitValue->lowerCurrentConcept()) { cerr << _T("CTDPartition: Should not specialize on this concept."); childPartitions.cleanup(); ASSERT(false); return false; } // Get the child concept of the current concept in this record. childConceptIdx = pSplitValue->getCurrentConcept()->m_childIdx; ASSERT(childConceptIdx != -1); childPartitionPos = childPartitions.FindIndex(childConceptIdx); ASSERT(childPartitionPos); // Add the record to this child partition. if (!childPartitions.GetAt(childPartitionPos)->addRecord(pRec)) { childPartitions.cleanup(); ASSERT(false); return false; } } // Delete empty child partitions. childPartitions.deleteEmptyPartitions(); return true; }
//--------------------------------------------------------------------------- //--------------------------------------------------------------------------- bool CTDPartition::constructSupportMatrix(double epsilon) { CTDRecord* pFirstRec = NULL; if ( getNumRecords()== 0){ // Get the first record of the generalized records. pFirstRec = getGenRecords()->GetAt(0); } else { // Get the first record of the partition. pFirstRec = getRecord(0); if (!pFirstRec) { ASSERT(false); return false; } } int a = 0; CTDPartAttrib* pPartAttrib = NULL; CTDConcept* pCurrentConcept = NULL; for (POSITION pos = m_partAttribs.GetHeadPosition(); pos != NULL; ++a) { pPartAttrib = m_partAttribs.GetNext(pos); if (!pPartAttrib->m_bCandidate) // pPartAttrib->m_bCandidate is true by default. continue; // Find the current concept of this attribute. pCurrentConcept = pFirstRec->getValue(a)->getCurrentConcept(); // Need to find the split point. We also turn it true when we read the configuration file. if (pPartAttrib->getActualAttrib()->m_bVirtualAttrib) { if (!pCurrentConcept->divideConcept(epsilon, m_nClasses)) { ASSERT(false); return false; } } // If this attribute does not have child concepts, this cannot be candidate. if (pCurrentConcept->getNumChildConcepts() == 0) { pCurrentConcept->m_bCutCandidate = false; // "true" by default. pPartAttrib->m_bCandidate = false; continue; } // Construct the support matrix. if (!pPartAttrib->initSupportMatrix(pCurrentConcept, m_nClasses)) { ASSERT(false); return false; } } // initializing the noisy class sum count m_classNoisySums.SetSize(m_nClasses); for (int j = 0; j < m_nClasses; ++j) m_classNoisySums.SetAt(j, 0); // Compute the support matrix CTDConcept* pClassConcept = NULL; CTDConcept* pLowerConcept = NULL; CTDMDIntArray* pSupMatrix = NULL; CTDIntArray* pSupSums = NULL; CTDIntArray* pClassSums = NULL; CTDRecord* pRec = NULL; int nRecs = getNumRecords(); int classIdx = m_partAttribs.GetCount(); for (int r = 0; r < nRecs; ++r) { pRec = getRecord(r); // Get the class concept. pClassConcept = pRec->getValue(classIdx)->getCurrentConcept(); ++m_classNoisySums[pClassConcept->m_childIdx]; // Compute support counts for each attribute int aIdx = 0; for (POSITION pos = m_partAttribs.GetHeadPosition(); pos != NULL; ++aIdx) { // The partition attribute. pPartAttrib = m_partAttribs.GetNext(pos); if (!pPartAttrib->m_bCandidate) continue; // Get the lower concept value pLowerConcept = pRec->getValue(aIdx)->getLowerConcept(); if (!pLowerConcept) { cerr << _T("No more child concepts. This should not be a candidate.") << endl; ASSERT(false); return false; } // Construct the support matrix. pSupMatrix = pPartAttrib->getSupportMatrix(); if (!pSupMatrix) { ASSERT(false); return false; } ++((*pSupMatrix)[pLowerConcept->m_childIdx][pClassConcept->m_childIdx]); // Compute the support sum of this matrix. pSupSums = pPartAttrib->getSupportSums(); if (!pSupSums) { ASSERT(false); return false; } ++((*pSupSums)[pLowerConcept->m_childIdx]); // Compute the class sum of this matrix. pClassSums = pPartAttrib->getClassSums(); if (!pClassSums) { ASSERT(false); return false; } ++((*pClassSums)[pClassConcept->m_childIdx]); } } return true; }