//--------------------------------------------------------------------------- // spliting records among test partitions like the previous function //--------------------------------------------------------------------------- bool CTDPartitioner::splitTestPartitions(CTDAttrib* pSplitAttrib, CTDConcept* pSplitConcept) { ASSERT(pSplitAttrib && pSplitConcept); // For each partition CTDPartitions childPartitions, allChildPartitions; CTDPartition* pParentPartition = NULL; CTDPartition* pChildPartition = NULL; CTDPartitions* pRelParts = pSplitConcept->getTestRelatedPartitions(); for (POSITION partPos = pRelParts->GetHeadPosition(); partPos != NULL;) { pParentPartition = pRelParts->GetNext(partPos); #ifdef _DEBUG_PRT_INFO cout << _T("----------------------[Splitting Parent Partition]------------------------") << endl; cout << *pParentPartition; #endif // Deregister this parent partition from the related concepts. if (!pParentPartition->testDeregisterPartition()) return false; // Distribute records from parent paritition to child partitions. if (!testDistributeRecords(pParentPartition, pSplitAttrib, pSplitConcept, childPartitions)) return false; for (POSITION childPos = childPartitions.GetHeadPosition(); childPos != NULL;) { pChildPartition = childPartitions.GetNext(childPos); // Register this child partition to the related concepts. if (!pChildPartition->testRegisterPartition()) return false; // Add child partitions to leaf partitions. pChildPartition->m_leafPos = m_testLeafPartitions.AddTail(pChildPartition); //cout << _T("# of leaf partitions: ") << m_leafPartitions.GetCount() << endl; #ifdef _DEBUG_PRT_INFO cout << _T("------------------------[Splitted Child Partition]------------------------") << endl; cout << *pChildPartition; #endif } // Remove parent partition from leaf partitions. m_testLeafPartitions.RemoveAt(pParentPartition->m_leafPos); delete pParentPartition; pParentPartition = NULL; } return true; }
//--------------------------------------------------------------------------- // The main algorithm. //--------------------------------------------------------------------------- bool CTDPartitioner::transformData() { cout << _T("Partitioning data...") << endl; // Initialize the first partition. CTDPartition* pRootPartition = initRootPartition(); if (!pRootPartition) return false; // Initialize the generalized records of the first partition. if(!pRootPartition->initGenRecords(m_pAttribMgr->getAttributes())){ delete pRootPartition; return false; } // We maintain a separate tree structure for test data to perform the same "cut" (genearalization) CTDPartition* pTestRootPartition = initTestRootPartition(); if (!pTestRootPartition) return false; // initialize budget for exponential mechanism if(!initializeBudget()){ ASSERT(false); return false; } // Register this root partition to the related concepts. if (!pRootPartition->registerPartition()) { delete pRootPartition; return false; } // Register this root partition for test data if (!pTestRootPartition->testRegisterPartition()) { delete pTestRootPartition; return false; } // Adjust budget for determining the split point for all continuous attributes m_pBudget = m_pBudget - (m_workingBudget * m_pAttribMgr->getNumConAttribs()); // Construct raw counts of the partition. if (!pRootPartition->constructSupportMatrix(m_workingBudget)) { delete pRootPartition; return false; } // Compute score (e.g. infoGain or Max) of each concept in the cut. if (!m_pAttribMgr->computeScore()) { delete pRootPartition; return false; } // Add root partition to leaf partitions. m_leafPartitions.cleanup(); pRootPartition->m_leafPos = m_leafPartitions.AddTail(pRootPartition); pRootPartition = NULL; // Add testRoot partition to testLeaf partitions. m_testLeafPartitions.cleanup(); pTestRootPartition->m_leafPos = m_testLeafPartitions.AddTail(pTestRootPartition); pTestRootPartition = NULL; // Select an attribute to specialize. int splitCounter = 0; CTDAttrib* pSelectedAttrib = NULL; CTDConcept* pSelectedConcept = NULL; while (splitCounter < m_nSpecialization) { #ifdef _DEBUG_PRT_INFO cout << endl; cout << _T("* * * * * [Split Counter: ") << splitCounter << _T("] * * * * *") << endl; #endif // Adjust budget for picking winner attribube. m_pBudget = m_pBudget - m_workingBudget; // Select an concept for specialization if(!m_pAttribMgr->pickSpecializeConcept(pSelectedAttrib, pSelectedConcept, m_workingBudget)){ m_leafPartitions.cleanup(); return false; } // Adjust budget for determining the splitting point for continuous attribute if the winner is continuous attribute if(pSelectedAttrib->isContinuous()){ m_pBudget = m_pBudget - m_workingBudget; } // Split the related partitions based on the selected concept. if (!splitPartitions(pSelectedAttrib, pSelectedConcept)) { m_leafPartitions.cleanup(); return false; } // Split the related partitions for test data if (!splitTestPartitions(pSelectedAttrib, pSelectedConcept)) { m_testLeafPartitions.cleanup(); return false; } // Compute Score of each concept in the cut. if (!m_pAttribMgr->computeScore()) { m_leafPartitions.cleanup(); return false; } ++splitCounter; } cout << _T("Partitioning data succeeded.") << endl; return true; }