void TopicModel::addDocuments(std::vector<std::string>& names, std::vector<std::vector<std::string>>& contents) { try { size_t docs =names.size(); for (size_t ind = 0; ind < docs; ++ind) { std::vector<int> features(contents[ind].size()); dictionary.addFeatures(contents[ind],features); texts.push_back(new Text(names[ind], features)); } numTypes = dictionary.getFeaturesNum(); typeTotals = dictionary.getFeatureTotals(); typeTopicCounts.resize(numTypes); maxTypeCount = 0; for (size_t type = 0; type < numTypes; ++type) { if (typeTotals[type] > maxTypeCount) { maxTypeCount = typeTotals[type]; } typeTopicCounts[type].resize(std::min(numTopics, typeTotals[type])); } boost::random::mt19937 rng; rng.seed(randomSeed); boost::random::uniform_int_distribution<> uint_dist(0, numTopics-1); for (size_t ind = 0; ind < docs; ++ind) { std::vector<int> topics = texts[ind]->getFeatureTopic(); for (size_t position = 0; position < topics.size(); ++position) { int topic = uint_dist(rng); topics[position] = topic; } } buildInitialTypeTopicCounts(); initializeHistograms(); } catch(...) { std::cerr << "Can't load documents" << std::endl; } }
int MultinomialSampler::Sample(std::default_random_engine &generator) { if (intervals_ == 0) return -1; unsigned int val = uint_dist(generator); int l = 0, r = num_vals - 1, m; while (l <= r) { m = (l + r) >> 1; if (intervals_[m] > val) r = m - 1; else l = m + 1; } if (l >= num_vals) l = num_vals - 1; return l; }
TEST_F(AsmComputeKernelTest,testKernelSimple) { float firstArray1[] = {1,1,1,1,1,1,1,1,1,1,1,1}; float secondArray1[] = {1,1,1,1,1,1,1,1,1,1,1,1}; for(int i = 0;i < numberOfThreads;++i) { data[i].size = 12; data[i].first = firstArray1; data[i].second = secondArray1; } for(int i = 0;i < numberOfThreads;++i) { pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i])); pthread_join(threadID[i],nullptr); } for(int i = 0;i < numberOfThreads;++i) { ASSERT_EQ(data[i].result,12); } float firstArray2[] = {1,2,3,4,5,6,7,8,9,10,11,12}; float secondArray2[] = {9,8,7,0,0,0,0,0,0,0,0,0}; for(int i = 0;i < numberOfThreads;++i) { data[i].size = 12; data[i].first = firstArray2; data[i].second = secondArray2; } for(int i = 0;i < numberOfThreads;++i) { pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i])); pthread_join(threadID[i],nullptr); } for(int i = 0;i < numberOfThreads;++i) { ASSERT_EQ(data[i].result,46); } float firstArray3[] = {1,2,3,4,5,6,7,8,9,10,11,12}; float secondArray3[] = {1,-1,1,-1,1,-1,1,-1,1,-1,1,-1}; for(int i = 0;i < numberOfThreads;++i) { data[i].size = 12; data[i].first = firstArray3; data[i].second = secondArray3; } for(int i = 0;i < numberOfThreads;++i) { pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i])); pthread_join(threadID[i],nullptr); } for(int i = 0;i < numberOfThreads;++i) { ASSERT_EQ(data[i].result,-6); } float firstArray4[] = {1,2,3,4,5,6}; float secondArray4[] = {1,-1,1,-1,1,-1}; for(int i = 0;i < numberOfThreads;++i) { data[i].size = 6; data[i].first = firstArray4; data[i].second = secondArray4; } for(int i = 0;i < numberOfThreads;++i) { pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i])); pthread_join(threadID[i],nullptr); } for(int i = 0;i < numberOfThreads;++i) { ASSERT_EQ(data[i].result,-3); } float firstArray5[] = {1,2,3,4,5,6,7,8,9,10,11,12,100}; float secondArray5[] = {1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1}; for(int i = 0;i < numberOfThreads;++i) { data[i].size = 13; data[i].first = firstArray5; data[i].second = secondArray5; } for(int i = 0;i < numberOfThreads;++i) { pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i])); pthread_join(threadID[i],nullptr); } for(int i = 0;i < numberOfThreads;++i) { ASSERT_EQ(data[i].result,94); } float * firstArray6 = nullptr; float * secondArray6 = nullptr; float result = 0; int size; std::mt19937 rng; uint32_t seed; rng.seed(seed); std::uniform_int_distribution<uint32_t> uint_dist(1,1000); for(int j = 0;j < 5;++j) { size = 960;//uint_dist(rng); result = 0; firstArray6 = new float[size]; secondArray6 = new float[size]; for(int i = 0;i < size;++i) { firstArray6[i] = uint_dist(rng) % 2 == 0 ? 1 : 0; secondArray6[i] = uint_dist(rng) % 2== 0 ? 1 : 0; result += firstArray6[i]*secondArray6[i]; } for(int i = 0;i < numberOfThreads;++i) { data[i].size = size; data[i].first = firstArray6; data[i].second = secondArray6; } for(int i = 0;i < numberOfThreads;++i) { pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i])); pthread_join(threadID[i],nullptr); } for(int i = 0;i < numberOfThreads;++i) { EXPECT_FLOAT_EQ(data[i].result,result); } delete firstArray6; delete secondArray6; } Matrix<float> X = Matrix<float>::loadMatrix("testDataSpam500/X"); for(int i = 0;i < numberOfThreads;++i) { data[i].size = X.cols(); data[i].first = X(0); data[i].second = X(455); } for(int i = 0;i < numberOfThreads;++i) { pthread_create(&(threadID[i]),NULL,(void* (*)(void*))computeKernel,(void*)(&data[i])); pthread_join(threadID[i],nullptr); } for(int i = 0;i < numberOfThreads;++i) { ASSERT_EQ(data[i].result,9); } }
/** * Gets a randon number in range (inclusive) * @param start :integer start value of number range * @param end :integer end value in range * @return number in range * @exception none */ int World::getRnd(int start,int end){ std::uniform_int_distribution<uint32_t> uint_dist(start,end); return uint_dist(rng); }