Beispiel #1
0
//---------------------------------------------------------------------------
// Count the number of distortions.
// Each time a record is generalized from a child value to a parent value, 
// we charge 1 unit of distortion. So if 100 records are involved in the 
// generalization, we charge 100 unit. 
//---------------------------------------------------------------------------
bool CTDEvalMgr::countNumDistortions(int& catDistortion, float& contDistortion)
{
    cout << _T("Counting number of distortions...") << endl;
    catDistortion = 0;
    contDistortion = 0.0f;
    int nRecs = 0, nValues = 0;    
    CTDRecord* pRec = NULL;
    CTDAttrib* pAttrib = NULL;
    CTDPartition* pPartition = NULL;
    CTDValue* pValue = NULL;
    CTDConcept* pCurrentConcept = NULL;
    CTDConcept* pRawConcept = NULL;
    CTDPartitions* pLeafPartitions = m_pPartitioner->getLeafPartitions();

    // For each partition.
    for (POSITION leafPos = pLeafPartitions->GetHeadPosition(); leafPos != NULL;) {
        pPartition = pLeafPartitions->GetNext(leafPos);
        nRecs = pPartition->getNumRecords();

        // For each record.
        for (int r = 0; r < nRecs; ++r) {
            pRec = pPartition->getRecord(r);
            nValues = pRec->getNumValues();

            // For each value.
            for (int v = 0; v < nValues; ++v) {
                pAttrib = m_pAttribMgr->getAttribute(v);
                if (!pAttrib->m_bVirtualAttrib)
                    continue;

                pValue = pRec->getValue(v);
                pCurrentConcept = pValue->getCurrentConcept();
                if (pAttrib->isContinuous()) {
                    CTDContConcept* pContConcept = (CTDContConcept*) pCurrentConcept;
                    CTDContConcept* pRoot = (CTDContConcept*) pAttrib->getConceptRoot();
                    contDistortion += (pContConcept->m_upperBound - pContConcept->m_lowerBound) / (pRoot->m_upperBound - pRoot->m_lowerBound);
                }
                else {
                    pRawConcept = ((CTDStringValue*) pValue)->getRawConcept();
#if defined(_TD_SCORE_FUNTION_TRANSACTION)
                    // In case of transaction data, count a distortion only if suppressing "1".
                    if (pRawConcept->m_conceptValue.CompareNoCase(TD_TRANSACTION_ITEM_PRESENT) != 0)
                        continue;
#endif
                    if (pRawConcept->m_depth < 0 || pCurrentConcept->m_depth < 0) {
                        cout << _T("CSAEvalMgr::countNumDistortions: Negative depth.") << endl;
                        ASSERT(false);
                        return false;
                    }
                    catDistortion += pRawConcept->m_depth - pCurrentConcept->m_depth;
                }
            }
        }
    }
    cout << _T("Counting number of distortions succeeded.") << endl;
    return true;
}
Beispiel #2
0
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
bool CTDPartition::initGenRecords( CTDAttribs* pAttribs)
{
	m_genRecords.cleanup();
	int nAttribs = pAttribs->GetSize();

	for (int classInd = 0; classInd < m_nClasses; ++classInd){

		CTDAttrib* pAttrib = NULL;
		CTDValue* pNewValue = NULL;
		CTDRecord* pNewRecord = new CTDRecord(); 
		
		for (int attribID = 0; attribID < nAttribs; ++attribID){
	
			pNewValue = NULL;
			pAttrib = pAttribs->GetAt(attribID);
        
			if (pAttrib->isContinuous())
				pNewValue = new CTDNumericValue(-1.0);
			else
	            pNewValue = new CTDStringValue();

			if (!pNewValue) {
				ASSERT(false);
  				return false;
			}
	
			if (attribID == pAttribs->GetSize() - 1) {
				// Class attribute
				if (!pNewValue->assignGenClassValue(pAttrib, classInd))
					return false;
			}
			else {
				// Ordinary attribute
				// Initialize the current concept to the root concept.                    
				if (!pNewValue->initConceptToRoot(pAttrib))
					return false;
			}

			// Add the value to the record.
			if (!pNewRecord->addValue(pNewValue))
				return false;

		}    

		if (pNewRecord){
			pNewRecord->setRecordID(m_genRecords.Add(pNewRecord));
		}

	}
	if (m_genRecords.GetSize() != m_nClasses) {
            cerr << _T("CTDPartition::initGenRecords: Number of generalized record is not current.") << endl;
            return false;
    }

	return true;
}
Beispiel #3
0
//---------------------------------------------------------------------------
// The main algorithm.
//---------------------------------------------------------------------------
bool CTDPartitioner::transformData()
{
    cout << _T("Partitioning data...") << endl;

    // Initialize the first partition.
    CTDPartition* pRootPartition = initRootPartition();
    if (!pRootPartition)
        return false;

	// Initialize the generalized records of the first partition.
	if(!pRootPartition->initGenRecords(m_pAttribMgr->getAttributes())){
		delete pRootPartition;
		return false;
	}

	// We maintain a separate tree structure for test data to perform the same "cut" (genearalization)
	CTDPartition* pTestRootPartition = initTestRootPartition();
    if (!pTestRootPartition)
        return false;

	// initialize budget for exponential mechanism
	if(!initializeBudget()){
		ASSERT(false);
		return false;
	}

    // Register this root partition to the related concepts.
 	if (!pRootPartition->registerPartition()) {
        delete pRootPartition;
        return false;
    }

	// Register this root partition for test data
	if (!pTestRootPartition->testRegisterPartition()) {
        delete pTestRootPartition;
        return false;
    }
	
	// Adjust budget for determining the split point for all continuous attributes 
	m_pBudget = m_pBudget - (m_workingBudget * m_pAttribMgr->getNumConAttribs());

	// Construct raw counts of the partition.
    if (!pRootPartition->constructSupportMatrix(m_workingBudget)) {
        delete pRootPartition;
        return false;
    }
	
    // Compute score (e.g. infoGain or Max) of each concept in the cut.
    if (!m_pAttribMgr->computeScore()) {
        delete pRootPartition;
        return false;
    }

    // Add root partition to leaf partitions.
    m_leafPartitions.cleanup();
	pRootPartition->m_leafPos = m_leafPartitions.AddTail(pRootPartition);
    pRootPartition = NULL;


	// Add testRoot partition to testLeaf partitions.
	m_testLeafPartitions.cleanup();
	pTestRootPartition->m_leafPos = m_testLeafPartitions.AddTail(pTestRootPartition);
    pTestRootPartition = NULL;


    // Select an attribute to specialize.
    int splitCounter = 0;
    CTDAttrib* pSelectedAttrib = NULL;
    CTDConcept* pSelectedConcept = NULL;
	while (splitCounter < m_nSpecialization) {

		#ifdef _DEBUG_PRT_INFO
			cout << endl;
			cout << _T("* * * * * [Split Counter: ") << splitCounter << _T("] * * * * *") << endl;
		#endif
        
		// Adjust budget for picking winner attribube.
	    m_pBudget = m_pBudget - m_workingBudget;	
		
		// Select an concept for specialization
		if(!m_pAttribMgr->pickSpecializeConcept(pSelectedAttrib, pSelectedConcept, m_workingBudget)){
			m_leafPartitions.cleanup();
            return false;
		}
        
		// Adjust budget for determining the splitting point for continuous attribute if the winner is continuous attribute
		if(pSelectedAttrib->isContinuous()){
		    m_pBudget = m_pBudget - m_workingBudget;
		}
		
		// Split the related partitions based on the selected concept.
        if (!splitPartitions(pSelectedAttrib, pSelectedConcept)) {
            m_leafPartitions.cleanup();
            return false;
        }
	
		// Split the related partitions for test data
		if (!splitTestPartitions(pSelectedAttrib, pSelectedConcept)) {
            m_testLeafPartitions.cleanup();
            return false;
        }

        // Compute Score of each concept in the cut.
        if (!m_pAttribMgr->computeScore()) {
            m_leafPartitions.cleanup();
            return false;
        }

		++splitCounter;
    }
    
    cout << _T("Partitioning data succeeded.") << endl;
    
	return true;
}