Beispiel #1
0
bool CTDPartitioner::splitPartitions(CTDAttrib* pSplitAttrib, CTDConcept* pSplitConcept)
{
    ASSERT(pSplitAttrib && pSplitConcept);

    // For each partition
    CTDPartitions childPartitions, allChildPartitions;
    CTDPartition* pParentPartition = NULL;
    CTDPartition* pChildPartition = NULL;
    CTDPartitions* pRelParts = pSplitConcept->getRelatedPartitions();
    for (POSITION partPos = pRelParts->GetHeadPosition(); partPos != NULL;) {
        pParentPartition = pRelParts->GetNext(partPos);

#ifdef _DEBUG_PRT_INFO                        
        cout << _T("----------------------[Splitting Parent Partition]------------------------") << endl;
        cout << *pParentPartition;
#endif
        // Deregister this parent partition from the related concepts.
        if (!pParentPartition->deregisterPartition())
            return false;

        // Distribute records from parent paritition to child partitions.
        if (!distributeRecords(pParentPartition, pSplitAttrib, pSplitConcept, childPartitions))
            return false;
        for (POSITION childPos = childPartitions.GetHeadPosition(); childPos != NULL;) {
            pChildPartition = childPartitions.GetNext(childPos);

            // Register this child partition to the related concepts.
            if (!pChildPartition->registerPartition())
                return false;

            // Add child partitions to leaf partitions.
            pChildPartition->m_leafPos = m_leafPartitions.AddTail(pChildPartition);
            //cout << _T("# of leaf partitions: ") << m_leafPartitions.GetCount() << endl;
#ifdef _DEBUG_PRT_INFO
            cout << _T("------------------------[Splitted Child Partition]------------------------") << endl;
            cout << *pChildPartition;
#endif
		}

        // Remove parent partition from leaf partitions.
        m_leafPartitions.RemoveAt(pParentPartition->m_leafPos);
        delete pParentPartition;
        pParentPartition = NULL;

        // Keep track of all new child partitions.
        allChildPartitions.AddTail(&childPartitions);
    }

    // For each new child partition in this split, compute support matrix.
    for (POSITION childPos = allChildPartitions.GetHeadPosition(); childPos != NULL;) {
        pChildPartition = allChildPartitions.GetNext(childPos);

        // Construct raw counts of the child partition.
        if (!pChildPartition->constructSupportMatrix(m_workingBudget)) {
            ASSERT(false);
            return false;
        }
    }
    return true;
}
Beispiel #2
0
//---------------------------------------------------------------------------
// The main algorithm.
//---------------------------------------------------------------------------
bool CTDPartitioner::transformData()
{
    cout << _T("Partitioning data...") << endl;

    // Initialize the first partition.
    CTDPartition* pRootPartition = initRootPartition();
    if (!pRootPartition)
        return false;

	// Initialize the generalized records of the first partition.
	if(!pRootPartition->initGenRecords(m_pAttribMgr->getAttributes())){
		delete pRootPartition;
		return false;
	}

	// We maintain a separate tree structure for test data to perform the same "cut" (genearalization)
	CTDPartition* pTestRootPartition = initTestRootPartition();
    if (!pTestRootPartition)
        return false;

	// initialize budget for exponential mechanism
	if(!initializeBudget()){
		ASSERT(false);
		return false;
	}

    // Register this root partition to the related concepts.
 	if (!pRootPartition->registerPartition()) {
        delete pRootPartition;
        return false;
    }

	// Register this root partition for test data
	if (!pTestRootPartition->testRegisterPartition()) {
        delete pTestRootPartition;
        return false;
    }
	
	// Adjust budget for determining the split point for all continuous attributes 
	m_pBudget = m_pBudget - (m_workingBudget * m_pAttribMgr->getNumConAttribs());

	// Construct raw counts of the partition.
    if (!pRootPartition->constructSupportMatrix(m_workingBudget)) {
        delete pRootPartition;
        return false;
    }
	
    // Compute score (e.g. infoGain or Max) of each concept in the cut.
    if (!m_pAttribMgr->computeScore()) {
        delete pRootPartition;
        return false;
    }

    // Add root partition to leaf partitions.
    m_leafPartitions.cleanup();
	pRootPartition->m_leafPos = m_leafPartitions.AddTail(pRootPartition);
    pRootPartition = NULL;


	// Add testRoot partition to testLeaf partitions.
	m_testLeafPartitions.cleanup();
	pTestRootPartition->m_leafPos = m_testLeafPartitions.AddTail(pTestRootPartition);
    pTestRootPartition = NULL;


    // Select an attribute to specialize.
    int splitCounter = 0;
    CTDAttrib* pSelectedAttrib = NULL;
    CTDConcept* pSelectedConcept = NULL;
	while (splitCounter < m_nSpecialization) {

		#ifdef _DEBUG_PRT_INFO
			cout << endl;
			cout << _T("* * * * * [Split Counter: ") << splitCounter << _T("] * * * * *") << endl;
		#endif
        
		// Adjust budget for picking winner attribube.
	    m_pBudget = m_pBudget - m_workingBudget;	
		
		// Select an concept for specialization
		if(!m_pAttribMgr->pickSpecializeConcept(pSelectedAttrib, pSelectedConcept, m_workingBudget)){
			m_leafPartitions.cleanup();
            return false;
		}
        
		// Adjust budget for determining the splitting point for continuous attribute if the winner is continuous attribute
		if(pSelectedAttrib->isContinuous()){
		    m_pBudget = m_pBudget - m_workingBudget;
		}
		
		// Split the related partitions based on the selected concept.
        if (!splitPartitions(pSelectedAttrib, pSelectedConcept)) {
            m_leafPartitions.cleanup();
            return false;
        }
	
		// Split the related partitions for test data
		if (!splitTestPartitions(pSelectedAttrib, pSelectedConcept)) {
            m_testLeafPartitions.cleanup();
            return false;
        }

        // Compute Score of each concept in the cut.
        if (!m_pAttribMgr->computeScore()) {
            m_leafPartitions.cleanup();
            return false;
        }

		++splitCounter;
    }
    
    cout << _T("Partitioning data succeeded.") << endl;
    
	return true;
}