コード例 #1
0
DSL_network LearnParamsEM(string data_infile, string network_infile, string child_name) {
    DSL_dataset ds;
    if (ds.ReadFile(data_infile.c_str()) != DSL_OKAY) {
        cout << "Cannot read data file... exiting." << endl;
        exit(1);
    }

    DSL_network originalNet;
    if (originalNet.ReadFile(network_infile.c_str(), DSL_XDSL_FORMAT) != DSL_OKAY) {
        cout << "Cannot read network... exiting." << endl;
        exit(1);
    }

    int childIdx = originalNet.FindNode(child_name.c_str());
    originalNet.GetNode(childIdx)->ChangeType(DSL_CPT);

    vector<DSL_datasetMatch> matches;
    string err;
    if (ds.MatchNetwork(originalNet, matches, err) != DSL_OKAY) {
        cout << "Cannot match network... exiting." << endl;
        exit(1);
    }

    DSL_em em;
    em.SetUniformizeParameters(true);
    em.SetRandomizeParameters(true);
    em.SetSeed(0);
    em.SetEquivalentSampleSize(1);

    if (em.Learn(ds, originalNet, matches) != DSL_OKAY) {
        cout << "Cannot learn parameters... exiting." << endl;
        exit(1);
    }

    return originalNet;
}
コード例 #2
0
    LearningInfo(string data_infile, string network_infile, string child_name) {

        if (dataSet.ReadFile(data_infile.c_str()) != DSL_OKAY) {
            cout << "Cannot read data file... exiting." << endl;
            exit(1);
        }

        if (originalNet.ReadFile(network_infile.c_str(), DSL_XDSL_FORMAT) != DSL_OKAY) {
            cout << "Cannot read network... exiting." << endl;
            exit(1);
        }

        string err;
        if (dataSet.MatchNetwork(originalNet, matches, err) != DSL_OKAY) {
            cout << "Cannot match network... exiting." << endl;
            exit(1);
        }

        for(unsigned int i=0 ; i < matches.size() ; ++i) {
            matchNetToData[matches[i].node] = matches[i].column;
            matchDataToNet[matches[i].column] = matches[i].node;
        }

        childIdx = originalNet.FindNode(child_name.c_str());
        childNode = originalNet.GetNode(childIdx);

        if (childNode->Definition()->GetType() != (DSL_CHANCE | DSL_DISCRETE | DSL_NOISY_MAX) ) {
            cout << "Child should be a NoisyMAX... exiting" << endl;
            // ewentualnie zmienic na noisy-max ręcznie
            exit(1);
        }

        childMAXDefinition = new DSL_noisyMAX(*(childNode->Definition()));

        DSL_intArray &parents = originalNet.GetParents(childNode->Handle());
        numberOfParents = parents.NumItems();
        parentIndices = vector<int>(numberOfParents, 0);
        for(int i=0; i<numberOfParents; ++i)
            parentIndices[i] = parents[i];

        childDimension = childNode->Definition()->GetNumberOfOutcomes();
        parentDimensions = vector<int>(numberOfParents, 0);
        sumParentDimensions = 0;

        parentOutcomesStrengths = vector<DSL_intArray>(numberOfParents);
        minimalNumberOfParameters = 1; // minimal number of unique parameters to calculate (count leak right away)

        for(int parentIdx = 0 ; parentIdx < numberOfParents ; ++parentIdx) {
            DSL_node *parentNode = originalNet.GetNode(parentIndices[parentIdx]);
            sumParentDimensions += (parentDimensions[parentIdx] = parentNode->Definition()->GetNumberOfOutcomes()); //parent dimension is equal to the number of outcomes
            parentOutcomesStrengths[parentIdx] = childMAXDefinition->GetParentOutcomeStrengths(parentIdx);
            //for (int stateIdx=0 ; stateIdx < parentDimensions[parentIdx] ; ++stateIdx)
            //	cout << parentOutcomesStrengths[parentIdx][stateIdx] << " ";
            //cout << endl;
            minimalNumberOfParameters += parentDimensions[parentIdx] - 1; // (each parent dimension reduced by one) because we don't count distinguished states of parents
            distinguishedStates[parentIdx] = parentOutcomesStrengths[parentIdx][parentDimensions[parentIdx] - 1];
        }

        int sumOffset = 0;
        parameterRowOffset = vi1D(numberOfParents + 1, 0); // +1 so we know the offset for LEAK column
        for(int parentIdx = 0; parentIdx < numberOfParents ; ++parentIdx) {
            parameterRowOffset[parentIdx] = sumOffset;
            sumOffset += parentDimensions[parentIdx] - 1;
        }
        parameterRowOffset[numberOfParents] = sumOffset;

        parametersRowLength = minimalNumberOfParameters;
        minimalNumberOfParameters *= (childDimension - 1); // number of unique rows, last row is always 1.0 - sum

        //	DEBUG(minimalNumberOfParameters);
        //	DEBUG(childDimension);
        //	DEBUGV(parentDimensions);
        //	DEBUGV(parameterRowOffset);


        //for(int j=0; j< 7 ; ++j) {
        //	DSL_datasetVarInfo vi = ds.GetVariableInfo(j);
        //	cout << "discreete:" << vi.discrete << " id:" << vi.id << endl << " missingInt:" << vi.missingInt << " mF:" << vi.missingFloat << "snames:"<< endl;
        //	for(int i=0;i<vi.stateNames.size(); ++i)
        //		cout << vi.stateNames[i]<< " ";
        //	cout <<endl;
        //}

        //for(int i = 0; i < ds.GetNumberOfRecords(); ++i) {

        //	vector<int> row(ds.GetNumberOfVariables(), 0);
        //	int sum_ones = 0;

        //	for(int j = 0; j < ds.GetNumberOfVariables(); ++j) {
        //		sum_ones += (row[j] = ds.GetInt(j,i));
        //	}
        //}
        //vector<int> rd = ds.GetIntData(0);
        //cout <<"RDSize:"<<rd.size()<< endl;
        //for(int i=0;i<rd.size();++i) {
        //	cout << vi.stateNames[rd[i]] << endl;
        //}
        //
    }
string dynamicEMTraining(string theNet, string trainingSet){ 
 // open the data set:
	 DSL_dataset ds;
	 if (ds.ReadFile(trainingSet.c_str()) != DSL_OKAY) {
		cout << "Cannot read data file... exiting." << endl;
		exit(1);
	 }
	 
	 // open the network:
	 DSL_network net;
	 if (net.ReadFile(theNet.c_str(), DSL_XDSL_FORMAT) != DSL_OKAY) {
		cout << "Cannot read network... exiting." << endl;
		exit(1);
	 }
	 
	 // match the data set and the network (variables):
	 vector<DSL_datasetMatch> dsMap(ds.GetNumberOfVariables());
	 int varCnt = 0;  // the number of variables occuring both in the data set and the network
	 for (int i = 0; i < ds.GetNumberOfVariables(); i++) {
		string id = ds.GetId(i);
		const char* idc = id.c_str();
		
		bool done = false;
		for (int j = 0; j < (int) strlen(idc) && !done; j++) {
		   if (idc[j] == '_') {
			  char* nodeId = (char*) malloc((j+1) * sizeof(char));
			  strncpy(nodeId, idc, j);
			  nodeId[j] = '\0';
			  
			  int nodeHdl = net.FindNode(nodeId);
			  if (nodeHdl >= 0) {
				 DSL_intArray orders;
				 net.GetTemporalOrders(nodeHdl, orders);
				 
				 dsMap[varCnt].node   = nodeHdl;
				 dsMap[varCnt].slice  = atoi(idc + j + 1);
				 dsMap[varCnt].column = i;
				 varCnt++;
				 
				 free(nodeId);
				 done = true;
			  }
		   }
		}
		if (!done) {
		   int nodeHdl = net.FindNode(idc);
		   if (nodeHdl >= 0) {
			  dsMap[varCnt].node   = nodeHdl;
			  dsMap[varCnt].slice  = 0;
			  dsMap[varCnt].column = i;
			  varCnt++;
		   }
		}
	 }
	 dsMap.resize(varCnt);
	 
	 // match the data set and the network (states):
	 for (int i = 0; i < dsMap.size(); i++) {
		DSL_datasetMatch &m = dsMap[i];
		int nodeHdl = m.node;
		int colIdx = m.column;
		
		DSL_idArray* ids = net.GetNode(nodeHdl)->Definition()->GetOutcomesNames();
		const DSL_datasetVarInfo &varInfo = ds.GetVariableInfo(colIdx);
		const vector<string> &stateNames = varInfo.stateNames;
		vector<int> map(stateNames.size(), -1);
		for (int j = 0; j < (int) stateNames.size(); j++) {
		   const char* id = stateNames[j].c_str();
		   for (int k = 0; k < ids->NumItems(); k++) {
			  char* tmpid = (*ids)[k];
			  if (!strcmp(id, tmpid)) {
				 map[j] = k;
			  }
		   }
		}
		for (int k = 0; k < ds.GetNumberOfRecords(); k++) {
		   if (ds.GetInt(colIdx, k) >= 0) {
			  ds.SetInt(colIdx, k, map[ds.GetInt(colIdx, k)]);
		   }
		}
	}
	
	// learn parameters:
	DSL_em em;
	if (em.Learn(ds, net, dsMap) != DSL_OKAY) {
		cout << "Cannot learn parameters... exiting." << endl;
		exit(1);
	}
	
	net.WriteFile((theNet.insert(theNet.find_last_of("/") + 1, "res_")).c_str(), DSL_XDSL_FORMAT);
	
	return theNet;
}