vf2D getCPTArray(DSL_network &network, string &childName) {
    //DSL_network* net = node->Network(); // node network
    int childIdx = network.FindNode(childName.c_str());
    DSL_node* node = network.GetNode(childIdx);
    DSL_nodeDefinition *def = node->Definition();
    const DSL_Dmatrix &cpt = *def->GetMatrix();

    DSL_intArray coords;

    unsigned int colSize = def->GetNumberOfOutcomes();
    unsigned int rowSize = cpt.GetSize() / colSize;
    vf2D cpt2(rowSize , vf1D(colSize, 0.0));

    unsigned int colIdx = 0;
    unsigned int rowIdx = 0;
    for (int elemIdx = 0; elemIdx < cpt.GetSize(); elemIdx ++) {
        cpt2[rowIdx][colIdx] = cpt[elemIdx];
        ++colIdx;
        if(colIdx == colSize) {
            colIdx = 0;
            ++rowIdx;
        }
    }

    return cpt2;
}
vf1D getWeights(DSL_network &network, string &childName) {
    int childIdx = network.FindNode(childName.c_str());
    DSL_node* node = network.GetNode(childIdx);
    int handle = node->Handle();
    DSL_nodeDefinition *def = node->Definition();
    const DSL_Dmatrix &cpt = *def->GetMatrix();
    const DSL_intArray &parents = network.GetParents(handle);
    int parentCount = parents.NumItems();

    DSL_intArray coords;

    unsigned int colSize = def->GetNumberOfOutcomes();
    unsigned int rowSize = cpt.GetSize() / colSize;
    vf1D weights(rowSize, 0.0);

    unsigned int rowIdx = 0;
    for (int elemIdx = 0; elemIdx < cpt.GetSize(); elemIdx += colSize) {
        cpt.IndexToCoordinates(elemIdx, coords);
        double mult = 1.0;
        for (int parentIdx = 0; parentIdx < parentCount; parentIdx ++) {
            DSL_node *parentNode = network.GetNode(parents[parentIdx]);
            const DSL_Dmatrix &parent_cpt = *parentNode->Definition()->GetMatrix();
            mult *= parent_cpt[coords[parentIdx]];
        }
        weights[rowIdx++] = mult;
    }

    return weights;
}
int main(int argc, char* argv[]) {
    ios_base::sync_with_stdio(0);

    string data_infile = string(argv[1]);
    string network_infile = string(argv[2]);
    string child_name = string("C1");

    if (argv[3] == string("EM") || argv[3] == string("Smile")) {
        DSL_network net = LearnParamsEM(data_infile, network_infile, child_name);
        int childIdx = net.FindNode(child_name.c_str());
        DSL_node* childNode = net.GetNode(childIdx);

        if (argv[3] == string("Smile")) {
            childNode->ChangeType(DSL_NOISY_MAX);
        }

        //printCPT(childNode);

    } else if (argv[3] == string("Naive")) {
        DSL_network net = LearnParamsNaive(data_infile, network_infile, child_name);
        //int childIdx = net.FindNode(child_name.c_str());
        //DSL_node* childNode = net.GetNode(childIdx);
        //printCPT(childNode);

    } else if (argv[3] == string("Gauss")) {
        DSL_network net = LearnParamsGaussJordan(data_infile, network_infile, child_name);
        //int childIdx = net.FindNode(child_name.c_str());
        //DSL_node* childNode = net.GetNode(childIdx);
        //printCPT(childNode);
    } else if (argv[3] == string("NvGJ")) {

        DSL_network netGJ = LearnParamsGaussJordan(data_infile, network_infile, child_name);
        DSL_network netNaive = LearnParamsNaive(data_infile, network_infile, child_name);
        DSL_network netEM = LearnParamsEM(data_infile, network_infile, child_name);

        DSL_network netOriginal = OpenNetwork(network_infile);

        vf1D weights = getWeights(netOriginal, child_name);

        vf2D cptGJ = getCPTArray(netGJ, child_name);
        vf2D cptNaive = getCPTArray(netNaive, child_name);
        vf2D cptEM = getCPTArray(netEM, child_name);

        vf2D cptOriginal = getCPTArray(netOriginal, child_name);

        cout << "Gauss:" << EuclidianDistance( cptOriginal, cptGJ, weights) << endl;
        cout << "Naive:" << EuclidianDistance( cptOriginal, cptNaive, weights) << endl;
        cout << "EM:" << EuclidianDistance( cptOriginal, cptEM, weights) << endl;

        //cout << "Euclidian distance (orig vs GJ):    " << EuclidianDistance(original, netGJ, child_name) << endl;
        //cout << "Euclidian distance (orig vs Naive): " << EuclidianDistance(original, netNaive, child_name) << endl;
        //int childIdx = net.FindNode(child_name.c_str());

    }

    return 0;
}
示例#4
0
void generateDatafile(int n, string inname) {
	DSL_network theNet;
	theNet.ReadFile(inname.c_str());
	int cancer = theNet.FindNode("LungCancer");
	DSL_node *node = theNet.GetNode(cancer);
    cptMap cpt1 = get_cptmap(node);
    keyMap km1;
    km1[string("Smoker")] = string("True");
    km1[string("Genetic")] = string("True");
    km1[string("CoalWorker")] = string("True");
    km1[string("BadDiet")] = string("True");
    printMap(km1);
    cout << "SIZE: "<< cpt1.size() << endl;
    cout << cpt1[km1]["True"];
}
DSL_network LearnParamsEM(string data_infile, string network_infile, string child_name) {
    DSL_dataset ds;
    if (ds.ReadFile(data_infile.c_str()) != DSL_OKAY) {
        cout << "Cannot read data file... exiting." << endl;
        exit(1);
    }

    DSL_network originalNet;
    if (originalNet.ReadFile(network_infile.c_str(), DSL_XDSL_FORMAT) != DSL_OKAY) {
        cout << "Cannot read network... exiting." << endl;
        exit(1);
    }

    int childIdx = originalNet.FindNode(child_name.c_str());
    originalNet.GetNode(childIdx)->ChangeType(DSL_CPT);

    vector<DSL_datasetMatch> matches;
    string err;
    if (ds.MatchNetwork(originalNet, matches, err) != DSL_OKAY) {
        cout << "Cannot match network... exiting." << endl;
        exit(1);
    }

    DSL_em em;
    em.SetUniformizeParameters(true);
    em.SetRandomizeParameters(true);
    em.SetSeed(0);
    em.SetEquivalentSampleSize(1);

    if (em.Learn(ds, originalNet, matches) != DSL_OKAY) {
        cout << "Cannot learn parameters... exiting." << endl;
        exit(1);
    }

    return originalNet;
}
    LearningInfo(string data_infile, string network_infile, string child_name) {

        if (dataSet.ReadFile(data_infile.c_str()) != DSL_OKAY) {
            cout << "Cannot read data file... exiting." << endl;
            exit(1);
        }

        if (originalNet.ReadFile(network_infile.c_str(), DSL_XDSL_FORMAT) != DSL_OKAY) {
            cout << "Cannot read network... exiting." << endl;
            exit(1);
        }

        string err;
        if (dataSet.MatchNetwork(originalNet, matches, err) != DSL_OKAY) {
            cout << "Cannot match network... exiting." << endl;
            exit(1);
        }

        for(unsigned int i=0 ; i < matches.size() ; ++i) {
            matchNetToData[matches[i].node] = matches[i].column;
            matchDataToNet[matches[i].column] = matches[i].node;
        }

        childIdx = originalNet.FindNode(child_name.c_str());
        childNode = originalNet.GetNode(childIdx);

        if (childNode->Definition()->GetType() != (DSL_CHANCE | DSL_DISCRETE | DSL_NOISY_MAX) ) {
            cout << "Child should be a NoisyMAX... exiting" << endl;
            // ewentualnie zmienic na noisy-max ręcznie
            exit(1);
        }

        childMAXDefinition = new DSL_noisyMAX(*(childNode->Definition()));

        DSL_intArray &parents = originalNet.GetParents(childNode->Handle());
        numberOfParents = parents.NumItems();
        parentIndices = vector<int>(numberOfParents, 0);
        for(int i=0; i<numberOfParents; ++i)
            parentIndices[i] = parents[i];

        childDimension = childNode->Definition()->GetNumberOfOutcomes();
        parentDimensions = vector<int>(numberOfParents, 0);
        sumParentDimensions = 0;

        parentOutcomesStrengths = vector<DSL_intArray>(numberOfParents);
        minimalNumberOfParameters = 1; // minimal number of unique parameters to calculate (count leak right away)

        for(int parentIdx = 0 ; parentIdx < numberOfParents ; ++parentIdx) {
            DSL_node *parentNode = originalNet.GetNode(parentIndices[parentIdx]);
            sumParentDimensions += (parentDimensions[parentIdx] = parentNode->Definition()->GetNumberOfOutcomes()); //parent dimension is equal to the number of outcomes
            parentOutcomesStrengths[parentIdx] = childMAXDefinition->GetParentOutcomeStrengths(parentIdx);
            //for (int stateIdx=0 ; stateIdx < parentDimensions[parentIdx] ; ++stateIdx)
            //	cout << parentOutcomesStrengths[parentIdx][stateIdx] << " ";
            //cout << endl;
            minimalNumberOfParameters += parentDimensions[parentIdx] - 1; // (each parent dimension reduced by one) because we don't count distinguished states of parents
            distinguishedStates[parentIdx] = parentOutcomesStrengths[parentIdx][parentDimensions[parentIdx] - 1];
        }

        int sumOffset = 0;
        parameterRowOffset = vi1D(numberOfParents + 1, 0); // +1 so we know the offset for LEAK column
        for(int parentIdx = 0; parentIdx < numberOfParents ; ++parentIdx) {
            parameterRowOffset[parentIdx] = sumOffset;
            sumOffset += parentDimensions[parentIdx] - 1;
        }
        parameterRowOffset[numberOfParents] = sumOffset;

        parametersRowLength = minimalNumberOfParameters;
        minimalNumberOfParameters *= (childDimension - 1); // number of unique rows, last row is always 1.0 - sum

        //	DEBUG(minimalNumberOfParameters);
        //	DEBUG(childDimension);
        //	DEBUGV(parentDimensions);
        //	DEBUGV(parameterRowOffset);


        //for(int j=0; j< 7 ; ++j) {
        //	DSL_datasetVarInfo vi = ds.GetVariableInfo(j);
        //	cout << "discreete:" << vi.discrete << " id:" << vi.id << endl << " missingInt:" << vi.missingInt << " mF:" << vi.missingFloat << "snames:"<< endl;
        //	for(int i=0;i<vi.stateNames.size(); ++i)
        //		cout << vi.stateNames[i]<< " ";
        //	cout <<endl;
        //}

        //for(int i = 0; i < ds.GetNumberOfRecords(); ++i) {

        //	vector<int> row(ds.GetNumberOfVariables(), 0);
        //	int sum_ones = 0;

        //	for(int j = 0; j < ds.GetNumberOfVariables(); ++j) {
        //		sum_ones += (row[j] = ds.GetInt(j,i));
        //	}
        //}
        //vector<int> rd = ds.GetIntData(0);
        //cout <<"RDSize:"<<rd.size()<< endl;
        //for(int i=0;i<rd.size();++i) {
        //	cout << vi.stateNames[rd[i]] << endl;
        //}
        //
    }
示例#7
0
void DSLPNLConverter::CreateFactors(DSL_network& dslNet, CBNet* pnlBNet)
{
    int i,j,k;
    
    // Read number of nodes in the net
    int numberOfNodes = dslNet.GetNumberOfNodes();
    
    // This is a way PNL likes it	
    pnlBNet->AllocFactors();
    
    DSL_Dmatrix* dslMatrix;
    CCPD* pnlCPD;
    
    for (i=0;i<numberOfNodes;i++)
    {
        
        // Get parents of the ith node 
        // IMPORTANT -- we should preserve order from DSL_network, since
        // probabilities will be according DSL ordering
        
        DSL_intArray dslParents;
        dslParents = dslNet.GetParents(dslNet.FindNode(theIds[i]));
        
        // establish sizes and allocate memory
        int numberOfNodesInDomain = dslParents.NumItems() + 1;
        int* domain = new int[numberOfNodesInDomain];
        CNodeType** nodeTypes = new CNodeType*[numberOfNodesInDomain];
        
        // establish members of the domain
        for (j=0;j<numberOfNodesInDomain-1;j++)
            domain[j] = dslParents[j];
        
        domain[numberOfNodesInDomain-1] = i;
        
        // Fill up node types
        for (j=0;j<numberOfNodesInDomain;j++)
            nodeTypes[j] = const_cast <CNodeType*> (pnlBNet->GetNodeType(domain[j]));
        
        // Read CPT from SMILE
        dslNet.GetNode(dslNet.FindNode(theIds[i]))->Definition()->GetDefinition(&dslMatrix);
        
        // Alloc space for CPT
        int sizeOfCPT = dslMatrix->GetSize();
        float* flatCPT = new float[sizeOfCPT];
        
        // Here we convert 'copy' numbers from SMILE to PNL
        // The painful part is convert double to float.
        // Additionally we check if after conversion they sum-up to 1
        int numberOfMyStates = nodeTypes[numberOfNodesInDomain-1]->GetNodeSize();
        
        int iterations  = sizeOfCPT/numberOfMyStates;
        for (j=0;j<iterations;j++)
        {
            float sum = 0.0f;
            for (k=0;k<numberOfMyStates;k++)
            {
                flatCPT[j*numberOfMyStates+k] = static_cast <float> (dslMatrix->Subscript(j*numberOfMyStates+k));
                sum += flatCPT[j*numberOfMyStates+k];
            }
            if (sum!=1.0f)
            {
                for (k=0;k<numberOfMyStates;k++)
                    flatCPT[j*numberOfMyStates+k] /= sum;      
            }
        }
        
#ifdef DSLPNL_DEBUG
        std::cerr << "Node "<< i << " domain : ";
        for (j=0;j<numberOfNodesInDomain;j++)
        {
            std::cerr << domain[j] << " ";
        }
        std::cerr <<  std::endl;
        for (j=0;j<sizeOfCPT;j++)
            std::cerr << flatCPT[j] << " ";
        std::cerr <<  std::endl;
#endif
        
        CModelDomain* pMD = pnlBNet->GetModelDomain();
        pnlCPD = CTabularCPD::Create(domain, numberOfNodesInDomain, pMD, flatCPT);
        if (pnlCPD==NULL)
        {
            std::cout << "We got a problem with creating CPD" << std::endl;
            return;
        }
        pnlBNet->AttachFactor(pnlCPD);
        
        delete[] nodeTypes;
        delete[] domain;
        delete[] flatCPT; 
    }
}
string dynamicEMTraining(string theNet, string trainingSet){ 
 // open the data set:
	 DSL_dataset ds;
	 if (ds.ReadFile(trainingSet.c_str()) != DSL_OKAY) {
		cout << "Cannot read data file... exiting." << endl;
		exit(1);
	 }
	 
	 // open the network:
	 DSL_network net;
	 if (net.ReadFile(theNet.c_str(), DSL_XDSL_FORMAT) != DSL_OKAY) {
		cout << "Cannot read network... exiting." << endl;
		exit(1);
	 }
	 
	 // match the data set and the network (variables):
	 vector<DSL_datasetMatch> dsMap(ds.GetNumberOfVariables());
	 int varCnt = 0;  // the number of variables occuring both in the data set and the network
	 for (int i = 0; i < ds.GetNumberOfVariables(); i++) {
		string id = ds.GetId(i);
		const char* idc = id.c_str();
		
		bool done = false;
		for (int j = 0; j < (int) strlen(idc) && !done; j++) {
		   if (idc[j] == '_') {
			  char* nodeId = (char*) malloc((j+1) * sizeof(char));
			  strncpy(nodeId, idc, j);
			  nodeId[j] = '\0';
			  
			  int nodeHdl = net.FindNode(nodeId);
			  if (nodeHdl >= 0) {
				 DSL_intArray orders;
				 net.GetTemporalOrders(nodeHdl, orders);
				 
				 dsMap[varCnt].node   = nodeHdl;
				 dsMap[varCnt].slice  = atoi(idc + j + 1);
				 dsMap[varCnt].column = i;
				 varCnt++;
				 
				 free(nodeId);
				 done = true;
			  }
		   }
		}
		if (!done) {
		   int nodeHdl = net.FindNode(idc);
		   if (nodeHdl >= 0) {
			  dsMap[varCnt].node   = nodeHdl;
			  dsMap[varCnt].slice  = 0;
			  dsMap[varCnt].column = i;
			  varCnt++;
		   }
		}
	 }
	 dsMap.resize(varCnt);
	 
	 // match the data set and the network (states):
	 for (int i = 0; i < dsMap.size(); i++) {
		DSL_datasetMatch &m = dsMap[i];
		int nodeHdl = m.node;
		int colIdx = m.column;
		
		DSL_idArray* ids = net.GetNode(nodeHdl)->Definition()->GetOutcomesNames();
		const DSL_datasetVarInfo &varInfo = ds.GetVariableInfo(colIdx);
		const vector<string> &stateNames = varInfo.stateNames;
		vector<int> map(stateNames.size(), -1);
		for (int j = 0; j < (int) stateNames.size(); j++) {
		   const char* id = stateNames[j].c_str();
		   for (int k = 0; k < ids->NumItems(); k++) {
			  char* tmpid = (*ids)[k];
			  if (!strcmp(id, tmpid)) {
				 map[j] = k;
			  }
		   }
		}
		for (int k = 0; k < ds.GetNumberOfRecords(); k++) {
		   if (ds.GetInt(colIdx, k) >= 0) {
			  ds.SetInt(colIdx, k, map[ds.GetInt(colIdx, k)]);
		   }
		}
	}
	
	// learn parameters:
	DSL_em em;
	if (em.Learn(ds, net, dsMap) != DSL_OKAY) {
		cout << "Cannot learn parameters... exiting." << endl;
		exit(1);
	}
	
	net.WriteFile((theNet.insert(theNet.find_last_of("/") + 1, "res_")).c_str(), DSL_XDSL_FORMAT);
	
	return theNet;
}