void TopicModel::addDocuments(std::vector<std::string>& names, std::vector<std::vector<std::string>>& contents)
{
    try
    {
        size_t docs =names.size();
        for (size_t ind = 0; ind < docs; ++ind)
        {
            std::vector<int> features(contents[ind].size());
            dictionary.addFeatures(contents[ind],features);
            texts.push_back(new Text(names[ind], features));
        }

        numTypes = dictionary.getFeaturesNum();
        typeTotals = dictionary.getFeatureTotals();
        typeTopicCounts.resize(numTypes);
        maxTypeCount = 0;

        for (size_t type = 0; type < numTypes; ++type)
        {
            if (typeTotals[type] > maxTypeCount) { maxTypeCount = typeTotals[type]; }
            typeTopicCounts[type].resize(std::min(numTopics, typeTotals[type]));
        }

        boost::random::mt19937 rng;
        rng.seed(randomSeed);
        boost::random::uniform_int_distribution<> uint_dist(0, numTopics-1);
        for (size_t ind = 0; ind < docs; ++ind)
        {
            std::vector<int> topics = texts[ind]->getFeatureTopic();
            for (size_t position = 0; position < topics.size(); ++position)
            {
                int topic = uint_dist(rng);
                topics[position] = topic;
            }
        }

        buildInitialTypeTopicCounts();
        initializeHistograms();
    }
    catch(...)
    {
        std::cerr << "Can't load documents" << std::endl;

    }
}
示例#2
0
int MultinomialSampler::Sample(std::default_random_engine &generator)
{
	if (intervals_ == 0)
		return -1;

	unsigned int val = uint_dist(generator);
	int l = 0, r = num_vals - 1, m;
	while (l <= r)
	{
		m = (l + r) >> 1;
		if (intervals_[m] > val)
			r = m - 1;
		else
			l = m + 1;
	}

	if (l >= num_vals)
		l = num_vals - 1;
	return l;
}
示例#3
0
TEST_F(AsmComputeKernelTest,testKernelSimple) {
	float firstArray1[] = {1,1,1,1,1,1,1,1,1,1,1,1};
	float secondArray1[] = {1,1,1,1,1,1,1,1,1,1,1,1};
	for(int i = 0;i < numberOfThreads;++i) {
		data[i].size = 12;
		data[i].first = firstArray1;
		data[i].second = secondArray1;
	}
	for(int i = 0;i < numberOfThreads;++i) {
		pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i]));
		pthread_join(threadID[i],nullptr);
	}
	for(int i = 0;i < numberOfThreads;++i) {
		ASSERT_EQ(data[i].result,12);
	}
	float firstArray2[] = {1,2,3,4,5,6,7,8,9,10,11,12};
	float secondArray2[] = {9,8,7,0,0,0,0,0,0,0,0,0};
	for(int i = 0;i < numberOfThreads;++i) {
		data[i].size = 12;
		data[i].first = firstArray2;
		data[i].second = secondArray2;
	}
	for(int i = 0;i < numberOfThreads;++i) {
		pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i]));
		pthread_join(threadID[i],nullptr);
	}
	for(int i = 0;i < numberOfThreads;++i) {
		ASSERT_EQ(data[i].result,46);
	}
	float firstArray3[] = {1,2,3,4,5,6,7,8,9,10,11,12};
	float secondArray3[] = {1,-1,1,-1,1,-1,1,-1,1,-1,1,-1};
	for(int i = 0;i < numberOfThreads;++i) {
		data[i].size = 12;
		data[i].first = firstArray3;
		data[i].second = secondArray3;
	}
	for(int i = 0;i < numberOfThreads;++i) {
		pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i]));
		pthread_join(threadID[i],nullptr);
	}
	for(int i = 0;i < numberOfThreads;++i) {
		ASSERT_EQ(data[i].result,-6);
	}
	float firstArray4[] = {1,2,3,4,5,6};
	float secondArray4[] = {1,-1,1,-1,1,-1};
	for(int i = 0;i < numberOfThreads;++i) {
		data[i].size = 6;
		data[i].first = firstArray4;
		data[i].second = secondArray4;
	}
	for(int i = 0;i < numberOfThreads;++i) {
		pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i]));
		pthread_join(threadID[i],nullptr);
	}
	for(int i = 0;i < numberOfThreads;++i) {
		ASSERT_EQ(data[i].result,-3);
	}
	float firstArray5[] = {1,2,3,4,5,6,7,8,9,10,11,12,100};
	float secondArray5[] = {1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1};
	for(int i = 0;i < numberOfThreads;++i) {
		data[i].size = 13;
		data[i].first = firstArray5;
		data[i].second = secondArray5;
	}
	for(int i = 0;i < numberOfThreads;++i) {
		pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i]));
		pthread_join(threadID[i],nullptr);
	}
	for(int i = 0;i < numberOfThreads;++i) {
		ASSERT_EQ(data[i].result,94);
	}
	
	float * firstArray6 = nullptr;
	float * secondArray6 = nullptr;
	float result = 0;
	int size;
	std::mt19937	rng;
	uint32_t	seed;
	rng.seed(seed);
	std::uniform_int_distribution<uint32_t> uint_dist(1,1000);	
	for(int j = 0;j < 5;++j) {
		size = 960;//uint_dist(rng);
		result = 0;
		firstArray6 = new float[size];
		secondArray6 = new float[size];
		for(int i = 0;i < size;++i) {
			firstArray6[i] = uint_dist(rng) % 2 == 0 ? 1 : 0;
			secondArray6[i] = uint_dist(rng) %  2== 0 ? 1 : 0;
			result += firstArray6[i]*secondArray6[i];
		}
		for(int i = 0;i < numberOfThreads;++i) {
			data[i].size = size;
			data[i].first = firstArray6;
			data[i].second = secondArray6;
		}
		for(int i = 0;i < numberOfThreads;++i) {
			pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i]));
			pthread_join(threadID[i],nullptr);
		}
		for(int i = 0;i < numberOfThreads;++i) {
			EXPECT_FLOAT_EQ(data[i].result,result);
		}
		delete	firstArray6;
		delete	secondArray6;
	}
	
	Matrix<float> X = Matrix<float>::loadMatrix("testDataSpam500/X");
	for(int i = 0;i < numberOfThreads;++i) {
		data[i].size = X.cols();
		data[i].first = X(0);
		data[i].second = X(455);
	}
	for(int i = 0;i < numberOfThreads;++i) {
		pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i]));
		pthread_join(threadID[i],nullptr);
	}
	for(int i = 0;i < numberOfThreads;++i) {
		ASSERT_EQ(data[i].result,9);
	}
}
示例#4
0
文件: World.cpp 项目: BrianD1991/FYP
/**
 * Gets a randon number in range (inclusive)
 * @param start :integer start value of number range
 * @param end :integer end value in range
 * @return number in range
 * @exception none
 */
int World::getRnd(int start,int end){
    std::uniform_int_distribution<uint32_t> uint_dist(start,end);
    return uint_dist(rng);
}