void verifyThrNum::doJob(){
	try{
	de.coldDish(inputData, LEN, R, DATAFROM);
	de.coldDish(ptnData, LEN, R, PTNFROM);
	cout<<"input good"<<endl;
	}catch(int){
		cout<<"Subprocess Exception.(verifyThrNum_doJob())"<<endl;
		errorFlag = 1;
	}
	
	if(errorFlag != 1){
		for(unsigned k = 0; k< R; k++){
			for(unsigned i= 0; i< LEN; i++){
				mm(0, i)= inputData(k, i);
				mt(i, 0)= inputData(k, i);
			}
		axpy_prod(mt, mm, weight, false);
		}
		
		while(1){
			string str;
			cout<<"choose pattern 0~2 to recognize(-1 to stop):";
			getline (cin,str);
			if(str.compare("-1")==0)
				break;

			for(unsigned i= 0; i< ptnData.size2(); i++)
				pattern(0, i)= ptnData(atoi(str.c_str()), i);

			axpy_prod(pattern, weight, output);

			for(unsigned i=0; i <LEN; i++){
				if(output(0, i)>0)
					recgPtn(0, i)= 1;
				else if(output(0, i)<0)
					recgPtn(0, i)= -1;
			}
			cout<<"pattern "<<str<<": "<<endl;
			de.print2Matrix(COL, pattern);
			cout<<"recognized:"<<endl;
			de.print2Matrix(COL, recgPtn);
		}
	}
}
Exemple #2
0
ublas::vector<ublas::matrix<double> > wishart_InvA_rnd(const int df, ublas::matrix<double>& S, const int mc) {
  // Generates wishart matrix allowing for singular wishart
  size_t p = S.size1();
  ublas::vector<double> D(p);
  ublas::matrix<double> P(p, p);
  ublas::matrix<double> F(p, p);
  F = ublas::zero_matrix<double>(p, p);

  // make copy of S
  // ublas::matrix<double> SS(S);

  lapack::gesvd('A', 'A', S, D, P, F);
  // svd0(S, P, D, F);
  // P = trans(P);

  //! correct for singular matrix
  std::vector<size_t> ii;
  for (size_t i=0; i<D.size(); ++i)
    if (D(i) > norm_inf(D)*1e-9)
      ii.push_back(i);
  
  size_t r = ii.size();
  ublas::indirect_array<> idx(r);
  for (size_t i=0; i<r; ++i)
    idx(i) = ii[i];

  ublas::indirect_array<> irow(p);
  for (size_t i=0; i<irow.size(); ++ i) 
    irow(i) = i;
  ublas::matrix<double> Q(p, r);
  // Q = prod(project(P, irow, idx), diagm(ublas::apply_to_all<functor::inv_sqrt<double> >(project(D, idx))));
  // rprod does not seem any faster than diagonalizing D before multiplication
  // Q = rprod(project(P, irow, idx), ublas::apply_to_all<functor::inv_sqrt<double> >(D));
  axpy_prod(project(trans(P), irow, idx), diagm(ublas::apply_to_all<functor::inv_sqrt<double> >(project(D, idx))), Q, true);

  // generate mc samples
  ublas::vector<ublas::matrix<double> > K(mc);
  for (int i=0; i<mc; ++i)
    K(i) = wishart_1(df, Q, p, r);
  return K;
}
Exemple #3
0
int main(int argc, char *argv[])
{
    int64_t t1, t2, T1=0, T2=0;

    Kratos::OpenCL::DeviceGroup DeviceGroup(CL_DEVICE_TYPE_GPU, true);

    DeviceGroup.AddCLSearchPath("../");
    cl_uint Program = DeviceGroup.BuildProgramFromFile("opencl_spmv.cl", "-cl-fast-relaxed-math");
    cl_uint Kernel = DeviceGroup.RegisterKernel(Program, "CSR_Matrix_Vector_Multiply", WORKGROUP_SIZE);

    Kratos::CompressedMatrix A;
    Kratos::Vector X, Y1, Y2;

    Kratos::ReadMatrixMarketMatrix(argv[1], A);
    Kratos::ReadMatrixMarketVector(argv[2], X);

    Y1.resize(A.size1());
    Y2.resize(A.size1());

    cl_uint A_RowIndices = DeviceGroup.CreateBuffer((A.size1() + 1) * sizeof(cl_ulong), CL_MEM_READ_ONLY);
    cl_uint A_ColumnIndices = DeviceGroup.CreateBuffer(A.nnz() * sizeof(cl_ulong), CL_MEM_READ_ONLY);
    cl_uint A_Values = DeviceGroup.CreateBuffer(A.nnz() * sizeof(cl_double), CL_MEM_READ_ONLY);
    cl_uint X_Values = DeviceGroup.CreateBuffer(A.size1() * sizeof(cl_double), CL_MEM_READ_ONLY);
    cl_uint Y_Values = DeviceGroup.CreateBuffer(A.size1() * sizeof(cl_double), CL_MEM_WRITE_ONLY);

    DeviceGroup.CopyBuffer(A_RowIndices, Kratos::OpenCL::HostToDevice, Kratos::OpenCL::VoidPList(1, &A.index1_data()[0]));
    DeviceGroup.CopyBuffer(A_ColumnIndices, Kratos::OpenCL::HostToDevice, Kratos::OpenCL::VoidPList(1, &A.index2_data()[0]));
    DeviceGroup.CopyBuffer(A_Values, Kratos::OpenCL::HostToDevice, Kratos::OpenCL::VoidPList(1, &A.value_data()[0]));

    DeviceGroup.CopyBuffer(X_Values, Kratos::OpenCL::HostToDevice, Kratos::OpenCL::VoidPList(1, &X[0]));

    DeviceGroup.SetBufferAsKernelArg(Kernel, 0, A_RowIndices);
    DeviceGroup.SetBufferAsKernelArg(Kernel, 1, A_ColumnIndices);
    DeviceGroup.SetBufferAsKernelArg(Kernel, 2, A_Values);
    DeviceGroup.SetBufferAsKernelArg(Kernel, 3, X_Values);
    DeviceGroup.SetBufferAsKernelArg(Kernel, 4, Y_Values);
    DeviceGroup.SetKernelArg(Kernel, 5, A.size1());
    DeviceGroup.SetLocalMemAsKernelArg(Kernel, 6, (ROWS_PER_WORKGROUP + 1) * sizeof(cl_ulong));
    DeviceGroup.SetLocalMemAsKernelArg(Kernel, 7, WORKGROUP_SIZE * sizeof(cl_double));

    std::cout << "Launch size: " << A.size1() * LOCAL_WORKGROUP_SIZE + 1 << std::endl;

    for (unsigned int i = 0; i < N; i++)
    {
        t1 = Timer();

        DeviceGroup.ExecuteKernel(Kernel, A.size1() * LOCAL_WORKGROUP_SIZE + 1);
        DeviceGroup.Synchronize();

        t2 = Timer();

        if (i == 0 || t2 - t1 < T1)
        {
            T1 = t2 - t1;
        }
    }

    DeviceGroup.CopyBuffer(Y_Values, Kratos::OpenCL::DeviceToHost, Kratos::OpenCL::VoidPList(1, &Y1[0]));


    for (unsigned int i = 0; i < N; i++)
    {
        t1 = Timer();

        axpy_prod(A, X, Y2);

        t2 = Timer();

        if (i == 0 || t2 - t1 < T2)
        {
            T2 = t2 - t1;
        }
    }

    for (cl_uint i = 0; i < A.size1(); i++)
    {
        if (fabs(Y1[i] - Y2[i]) > 1e-10)
        {
            std::cout << "Error in location " << i << ": " << Y1[i] << "  " << Y2[i] << std::endl;
        }
    }

    std::cout << "Norm_2 of Y1 is " << norm_2(Y1) << "." << std::endl;
    std::cout << "Norm_2 of Y2 is " << norm_2(Y2) << "." << std::endl;

    std::cout << "Test finished." << std::endl << "OpenCL SpMV:\t" << T1 / 1000000.00 << " ms" << std::endl << "uBlas:\t\t" << T2 / 1000000.00 << " ms" << std::endl;

    return 0;
}