Esempio n. 1
0
void ModelTrain()
{
	Vocab vocab;
	vocab.LoadVocab("l3g.txt");
	cout << "vocab Size " << vocab.VocabSize << endl;
	vector < tuple <int *, int > > src_batch, tgt_batch;
	extractBinaryfromStream("data//train_data_40k.tsv", vocab, src_batch, tgt_batch, 1, 0);

	int sampleSize = src_batch.size();
	cout << "train sample size" << sampleSize << endl;

	int iteration = 30;
	int miniBatchSize = 1024;
	int featureDim = vocab.VocabSize;
	int batchNum = sampleSize / miniBatchSize;
	int nTrial = 4;

	vector <int> shuff(sampleSize);

	RunnerBehavior rb;
	rb.RunMode = RUNMODE_TRAIN;
	rb.Device = DEVICE_GPU;
	cout<<"init cuda computation ...."<<endl;
	rb.ComputeLib = new CudaOperationManager(true, true);
	
	cout<<"init cuda computation done"<<endl;
	
	int hiddenDim1 = 128;
	int hiddenDim2 = 128;

	SparseIndexMatrixStat srcMiniBatchInfo;
	srcMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
	srcMiniBatchInfo.MAX_COL_SIZE = featureDim;
	srcMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
	srcMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
	srcMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;

	SparseIndexMatrixStat tgtMiniBatchInfo;
	tgtMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
	tgtMiniBatchInfo.MAX_COL_SIZE = featureDim;
	tgtMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
	tgtMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
	tgtMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;

	DenseMatrixStat OutputLayer1Info;
	OutputLayer1Info.MAX_ROW_SIZE = miniBatchSize;
	OutputLayer1Info.MAX_COL_SIZE = hiddenDim1;
	OutputLayer1Info.TOTAL_BATCH_NUM = batchNum;
	OutputLayer1Info.TOTAL_SAMPLE_NUM = sampleSize;


	DenseMatrixStat OutputLayer2Info;
	OutputLayer2Info.MAX_ROW_SIZE = miniBatchSize;
	OutputLayer2Info.MAX_COL_SIZE = hiddenDim2;
	OutputLayer2Info.TOTAL_BATCH_NUM = batchNum;
	OutputLayer2Info.TOTAL_SAMPLE_NUM = sampleSize;


	FullyConnectedLayer srcLayer1(featureDim, hiddenDim1, &rb);
	FullyConnectedLayer srcLayer2(hiddenDim1, hiddenDim2, &rb);

	FullyConnectedLayer tgtLayer1(featureDim, hiddenDim1, &rb);
	FullyConnectedLayer tgtLayer2(hiddenDim1, hiddenDim2, &rb);

	DenseMatrixStat OutputSimInfo;
	OutputSimInfo.MAX_ROW_SIZE = miniBatchSize;
	OutputSimInfo.MAX_COL_SIZE = 1 + nTrial;
	OutputSimInfo.TOTAL_BATCH_NUM = batchNum;
	OutputSimInfo.TOTAL_SAMPLE_NUM = sampleSize;

	SparseIndexMatrix srcBatch(&srcMiniBatchInfo, rb.Device);	
	HiddenDenseMatrix srcLayer1Data(&OutputLayer1Info, rb.Device);
	HiddenDenseMatrix srcLayer2Data(&OutputLayer2Info, rb.Device);

	SparseIndexMatrix tgtBatch(&tgtMiniBatchInfo, rb.Device);
	HiddenDenseMatrix tgtLayer1Data(&OutputLayer1Info, rb.Device);
	HiddenDenseMatrix tgtLayer2Data(&OutputLayer2Info, rb.Device);

	BiMatchData biMatchData(miniBatchSize, nTrial, rb.Device);

	SimilarityRunner similarityRunner(10, &rb);
	HiddenDenseMatrix simOutput(&OutputSimInfo, rb.Device);
	HiddenDenseMatrix probOutput(&OutputSimInfo, rb.Device);

	probOutput.Deriv->Data->Zero();
	
	//iteration = 1;
	cout<<"start training iteration"<<endl;
	
	double train_time = 0;
	double io_time = 0;
		
	struct timeval train_start, train_end;
	struct timeval io_start, io_end;
	
	gettimeofday(&train_start, 0);
	
	for (int iter = 0; iter<iteration; iter++)
	{
		for (int i = 0; i<sampleSize; i++) shuff[i] = i;

		int shuffIdx = 0;

		float avgLoss = 0;
		for (int b = 0; b<batchNum; b++)
		{
			gettimeofday(&io_start, 0);

			srcBatch.Refresh();
			tgtBatch.Refresh();

			while (shuffIdx < sampleSize - 1 && srcBatch.RowSize < miniBatchSize && tgtBatch.RowSize < miniBatchSize)
			{
				int p = shuffIdx + rand() % (sampleSize - shuffIdx);
				int smpIdx = shuff[p];
				shuff[p] = shuff[shuffIdx];
				shuff[shuffIdx] = smpIdx;
				shuffIdx += 1;

				srcBatch.PushSample(get<0>(src_batch[smpIdx]), get<1>(src_batch[smpIdx]));
				tgtBatch.PushSample(get<0>(tgt_batch[smpIdx]), get<1>(tgt_batch[smpIdx]));
			}
			
			gettimeofday(&io_end, 0);
			
			io_time += io_end.tv_sec - io_start.tv_sec;
			
			
			//cout<<"src batch row "<< srcBatch.RowSize<<endl;
			//cout<<"src element size " <<srcBatch.ElementSize<<endl; 
			//cout<<"tgt batch row "<< tgtBatch.RowSize<<endl;
			//cout<<"tgt element size " <<tgtBatch.ElementSize<<endl; 
			
			//srcLayer1.Weight->SyncToHost(0, 100);
			//tgtLayer1.Weight->SyncToHost(0, 100);
			
			//for(int i=0;i<100;i++)
			//{
			 //   cout<<"smpIdx "<< src.Weight->HostMem[i]<<endl;
			//}
			
			
			
			//cout<<"src weight "<<srcLayer1.Weight->HostMem[0]<<endl;
			//cout<<"tgt weight "<<tgtLayer1.Weight->HostMem[0]<<endl;
			
			//for(int i = 0; i< srcBatch.ElementSize; i++)
			//{
			//	srcBatch.SampleIdx
			//}
			//if( cudaSuccess != cudaGetLastError())
			//	cout <<"error 1"<<endl;
			
			
			srcLayer1.Forward(&srcBatch, srcLayer1Data.Output);
			//if( cudaSuccess != cudaGetLastError())
			//	cout <<"fdsfasdf"<<endl;
			//srcLayer1Data.Output->Data->SyncToHost(0,100);
			//cout<<"src 1 output"<<srcLayer1Data.Output->Data->HostMem[0]<<endl;
			
			srcLayer2.Forward(srcLayer1Data.Output, srcLayer2Data.Output);

			tgtLayer1.Forward(&tgtBatch, tgtLayer1Data.Output);
			tgtLayer2.Forward(tgtLayer1Data.Output, tgtLayer2Data.Output);
			
			biMatchData.GenerateMatch(srcBatch.RowSize);
			
			//srcLayer2Data.Output->Data->SyncToHost(0, srcLayer2Data.Stat->MAX_COL_SIZE * srcBatch.RowSize);
			//tgtLayer2Data.Output->Data->SyncToHost(0, tgtLayer2Data.Stat->MAX_COL_SIZE * tgtBatch.RowSize);
			
			//cout<<"src output"<<srcLayer2Data.Output->Data->HostMem[0]<<endl;
			//cout<<"tgt output"<<tgtLayer2Data.Output->Data->HostMem[0]<<endl;
			
			similarityRunner.Forward(srcLayer2Data.Output, tgtLayer2Data.Output, &biMatchData, simOutput.Output);

			//simOutput.Output->Data->SyncToHost(0, srcBatch.RowSize * 5);
			//for(int i=0;i<srcBatch.RowSize;i++)
			//{
			//	cout<<"sim"<< simOutput.Output->Data->HostMem[i]<<endl;
			//	break;
			//}
			//break;				
			rb.ComputeLib->SoftmaxForward(simOutput.Output->Data, probOutput.Output->Data, srcBatch.RowSize, simOutput.Stat->MAX_COL_SIZE);
			/// log softmax backward.  probOutput.Deriv->Data  --> biMatchData.MatchInfo
			rb.ComputeLib->VecAdd(probOutput.Output->Data, -1, biMatchData.MatchInfo, 1, simOutput.Deriv->Data, 0, biMatchData.MatchSize);

			//rb.ComputeLib->SoftmaxBackward(probOutput.Output->Data, probOutput.Deriv->Data, simOutput.Deriv->Data, srcBatch.RowSize, probOutput.Stat->MAX_COL_SIZE);
			/// output Loss.
			float loss = 0;
			//simOutput.Output->Data->QuickWatch();
			//simOutput.Deriv->Data->QuickWatch();
			probOutput.Output->Data->SyncToHost(0, srcBatch.RowSize * probOutput.Stat->MAX_COL_SIZE); //  ->QuickWatch();
			//probOutput.Deriv->Data->QuickWatch();
			for(int i=0;i< srcBatch.RowSize; i++)
			{
				//cout<< probOutput.Output->Data->HostMem[i * probOutput.Stat->MAX_COL_SIZE]<<endl;

				loss += logf(probOutput.Output->Data->HostMem[i * probOutput.Stat->MAX_COL_SIZE] + LARGEEPS);
			}
			loss = loss / srcBatch.RowSize;
			avgLoss = b * 1.0f / (b + 1) * avgLoss + 1.0f / (b + 1) * loss;

			if((b+1) % 10 == 0) cout<<"mini batch : "<<b+1<<"\t avg loss :"<<avgLoss<<endl;
			//cout<<"current loss "<<loss<<endl;
			similarityRunner.Backward(simOutput.Deriv, srcLayer2Data.Deriv, tgtLayer2Data.Deriv);


			tgtLayer2.Backward(tgtLayer2Data.Deriv, tgtLayer2Data.Output, tgtLayer1Data.Deriv);
			tgtLayer1.Backward(tgtLayer1Data.Deriv, tgtLayer1Data.Output);

			srcLayer2.Backward(srcLayer2Data.Deriv, srcLayer2Data.Output, srcLayer1Data.Deriv);
			srcLayer1.Backward(srcLayer1Data.Deriv, srcLayer1Data.Output);

			/// update.
			tgtLayer2.Update(tgtLayer2Data.Deriv, tgtLayer1Data.Output);
			tgtLayer1.Update(tgtLayer1Data.Deriv, &tgtBatch);

			srcLayer2.Update(srcLayer2Data.Deriv, srcLayer1Data.Output);
			srcLayer1.Update(srcLayer1Data.Deriv, &srcBatch);
		}
		cout<<"iteration : "<<iter + 1<<"\t avg loss :"<<avgLoss<<endl;

	}
	
	gettimeofday(&train_end, 0);
	
	train_time = (train_end.tv_sec - train_start.tv_sec);
			
	cout<<"train overall time elipsed (sec):"<<train_time<<endl;
	cout<<"io time elipsed (sec):"<<io_time<<endl;
	cout<<"gpu time elipsed (sec):"<<train_time - io_time<<endl;
	ofstream modelWriter;
	modelWriter.open("model//dssm.v2.model", ofstream::binary);
	srcLayer1.Serialize(modelWriter);
	srcLayer2.Serialize(modelWriter);
	tgtLayer1.Serialize(modelWriter);
	tgtLayer2.Serialize(modelWriter);
	modelWriter.close();
}
Esempio n. 2
0
void ModelPredict()
{
	Vocab vocab;
	vocab.LoadVocab("l3g.txt");
	cout << "vocab Size " << vocab.VocabSize << endl;
	vector < tuple <int *, int > > src_batch, tgt_batch;
	extractBinaryfromStream("data//test_data_clean.tsv", vocab, src_batch, tgt_batch, 0, 0);

	int sampleSize = src_batch.size();
	cout << "test sample size" << sampleSize << endl;

	int miniBatchSize = 1024;
	int featureDim = vocab.VocabSize;
	int batchNum = (sampleSize - 1) / miniBatchSize + 1;

	RunnerBehavior rb;
	rb.RunMode = RUNMODE_PREDICT;
	rb.Device = DEVICE_GPU;

	rb.ComputeLib = new CudaOperationManager(true, true);
	int hiddenDim1 = 128;
	int hiddenDim2 = 128;

	SparseIndexMatrixStat srcMiniBatchInfo;
	srcMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
	srcMiniBatchInfo.MAX_COL_SIZE = featureDim;
	srcMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
	srcMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
	srcMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;

	SparseIndexMatrixStat tgtMiniBatchInfo;
	tgtMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
	tgtMiniBatchInfo.MAX_COL_SIZE = featureDim;
	tgtMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
	tgtMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
	tgtMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;

	DenseMatrixStat OutputLayer1Info;
	OutputLayer1Info.MAX_ROW_SIZE = miniBatchSize;
	OutputLayer1Info.MAX_COL_SIZE = hiddenDim1;
	OutputLayer1Info.TOTAL_BATCH_NUM = batchNum;
	OutputLayer1Info.TOTAL_SAMPLE_NUM = sampleSize;

	DenseMatrixStat OutputLayer2Info;
	OutputLayer2Info.MAX_ROW_SIZE = miniBatchSize;
	OutputLayer2Info.MAX_COL_SIZE = hiddenDim2;
	OutputLayer2Info.TOTAL_BATCH_NUM = batchNum;
	OutputLayer2Info.TOTAL_SAMPLE_NUM = sampleSize;

	ifstream modelReader;
	modelReader.open("model//dssm.v2.model", ofstream::binary);
	FullyConnectedLayer srcLayer1(modelReader, &rb);
	FullyConnectedLayer srcLayer2(modelReader, &rb);
	FullyConnectedLayer tgtLayer1(modelReader, &rb);
	FullyConnectedLayer tgtLayer2(modelReader, &rb);
	modelReader.close();

	DenseMatrixStat OutputSimInfo;
	OutputSimInfo.MAX_ROW_SIZE = miniBatchSize;
	OutputSimInfo.MAX_COL_SIZE = 1;
	OutputSimInfo.TOTAL_BATCH_NUM = batchNum;
	OutputSimInfo.TOTAL_SAMPLE_NUM = sampleSize;

	SparseIndexMatrix srcBatch(&srcMiniBatchInfo, rb.Device);	
	HiddenDenseMatrix srcLayer1Data(&OutputLayer1Info, rb.Device);
	HiddenDenseMatrix srcLayer2Data(&OutputLayer2Info, rb.Device);

	SparseIndexMatrix tgtBatch(&tgtMiniBatchInfo, rb.Device);
	HiddenDenseMatrix tgtLayer1Data(&OutputLayer1Info, rb.Device);
	HiddenDenseMatrix tgtLayer2Data(&OutputLayer2Info, rb.Device);

	BiMatchData biMatchData(miniBatchSize, 0, rb.Device);

	SimilarityRunner similarityRunner(10, &rb);
	HiddenDenseMatrix simOutput(&OutputSimInfo, rb.Device);
	HiddenDenseMatrix probOutput(&OutputSimInfo, rb.Device);

	ofstream outfile;
	outfile.open("data//test_data.v2.result", ofstream::out);

	int smpIdx = 0;

	for (int b = 0; b<batchNum; b++)
	{
		srcBatch.Refresh();
		tgtBatch.Refresh();

		while (smpIdx < sampleSize && srcBatch.RowSize < miniBatchSize && tgtBatch.RowSize < miniBatchSize)
		{
			srcBatch.PushSample(get<0>(src_batch[smpIdx]), get<1>(src_batch[smpIdx]));
			tgtBatch.PushSample(get<0>(tgt_batch[smpIdx]), get<1>(tgt_batch[smpIdx]));
			smpIdx++;
		}

		srcLayer1.Forward(&srcBatch, srcLayer1Data.Output);
		srcLayer2.Forward(srcLayer1Data.Output, srcLayer2Data.Output);

		tgtLayer1.Forward(&tgtBatch, tgtLayer1Data.Output);
		tgtLayer2.Forward(tgtLayer1Data.Output, tgtLayer2Data.Output);

		biMatchData.GenerateMatch(srcBatch.RowSize);

		similarityRunner.Forward(srcLayer2Data.Output, tgtLayer2Data.Output, &biMatchData, simOutput.Output);

		simOutput.Output->Data->QuickWatch();

		//probOutput.Deriv->Data->QuickWatch();
		for(int i=0;i< srcBatch.RowSize; i++)
			outfile<<simOutput.Output->Data->HostMem[i]<<endl;
		//cout<<srcBatch.RowSize<<"\t"<<smpIdx<<endl;
		
		if((b+1) % 10 == 0) cout<<"mini batch : "<<b+1<<" sample number "<<smpIdx<<endl;
	}
	outfile.close();
}