void print_colmaj_ocp_qp_in(colmaj_ocp_qp_in *qp) { int_t N = qp->N; printf("ocp_qp structure with contents:\n"); printf("N: %d\n", qp->N); printf("nx:\n"); print_int_matrix("stdout", qp->nx, 1, N + 1); printf("nu:\n"); print_int_matrix("stdout", qp->nu, 1, N + 1); printf("nb:\n"); print_int_matrix("stdout", qp->nb, 1, N + 1); printf("nc:\n"); print_int_matrix("stdout", qp->nc, 1, N + 1); for (int_t stage = 0; stage < N + 1; stage++) { if (stage < N) { printf("A[%d]:\n", stage); print_matrix("stdout", qp->A[stage], qp->nx[stage], qp->nx[stage]); printf("B[%d]:\n", stage); print_matrix("stdout", qp->B[stage], qp->nx[stage], qp->nu[stage]); printf("b[%d]:\n", stage); print_matrix("stdout", qp->b[stage], qp->nx[stage], 1); } printf("Q[%d]:\n", stage); print_matrix("stdout", qp->Q[stage], qp->nx[stage], qp->nx[stage]); printf("R[%d]:\n", stage); print_matrix("stdout", qp->R[stage], qp->nu[stage], qp->nu[stage]); printf("S[%d]:\n", stage); print_matrix("stdout", qp->S[stage], qp->nu[stage], qp->nx[stage]); printf("q[%d]:\n", stage); print_matrix("stdout", qp->q[stage], qp->nx[stage], 1); printf("r[%d]:\n", stage); print_matrix("stdout", qp->r[stage], qp->nu[stage], 1); printf("lb[%d]:\n", stage); print_matrix("stdout", qp->lb[stage], qp->nb[stage], 1); printf("ub[%d]:\n", stage); print_matrix("stdout", qp->ub[stage], qp->nb[stage], 1); printf("Cx[%d]:\n", stage); print_matrix("stdout", qp->Cx[stage], qp->nc[stage], qp->nx[stage]); printf("Cu[%d]:\n", stage); print_matrix("stdout", qp->Cu[stage], qp->nc[stage], qp->nu[stage]); printf("lc[%d]:\n", stage); print_matrix("stdout", qp->lc[stage], qp->nc[stage], 1); printf("uc[%d]:\n", stage); print_matrix("stdout", qp->uc[stage], qp->nc[stage], 1); } printf("\n"); }
void print_ocp_qp_to_file(ocp_qp_in *qp) { char filename[MAX_STR_LEN]; for (int_t i = 0; i <= qp->N; i++) { snprintf(filename, sizeof(filename), "Qm%d.txt", i); print_matrix(filename, qp->Q[i], qp->nx[i], qp->nx[i]); snprintf(filename, sizeof(filename), "Sm%d.txt", i); print_matrix(filename, qp->S[i], qp->nu[i], qp->nx[i]); snprintf(filename, sizeof(filename), "Rm%d.txt", i); print_matrix(filename, qp->R[i], qp->nu[i], qp->nu[i]); snprintf(filename, sizeof(filename), "qv%d.txt", i); print_matrix(filename, qp->q[i], qp->nx[i], 1); snprintf(filename, sizeof(filename), "rv%d.txt", i); print_matrix(filename, qp->r[i], qp->nu[i], 1); if (i < qp->N) { snprintf(filename, sizeof(filename), "Am%d.txt", i); print_matrix(filename, qp->A[i], qp->nx[i+1], qp->nx[i+1]); snprintf(filename, sizeof(filename), "Bm%d.txt", i); print_matrix(filename, qp->B[i], qp->nx[i+1], qp->nu[i]); snprintf(filename, sizeof(filename), "bv%d.txt", i); print_matrix(filename, qp->b[i], qp->nx[i+1], 1); } snprintf(filename, sizeof(filename), "idxb%d.txt", i); print_int_matrix(filename, qp->idxb[i], qp->nb[i], 1); snprintf(filename, sizeof(filename), "lb%d.txt", i); print_matrix(filename, qp->lb[i], qp->nb[i], 1); snprintf(filename, sizeof(filename), "ub%d.txt", i); print_matrix(filename, qp->ub[i], qp->nb[i], 1); snprintf(filename, sizeof(filename), "Cx%d.txt", i); print_matrix(filename, qp->Cx[i], qp->nc[i], qp->nx[i]); snprintf(filename, sizeof(filename), "Cu%d.txt", i); print_matrix(filename, qp->Cu[i], qp->nc[i], qp->nu[i]); } }
void print_int_array(char *file_name, const int_t *array, int_t size) { print_int_matrix(file_name, array, size, 1); }
int main(int argc, char* argv[]) { // Print help if necessary bool help = read_bool(argc, argv, "--help", false); if ((argc < 2) || (help)) { usage(argv); return 0; } // Use parameters struct for passing parameters to kernels efficiently parameters prm; // Parse inputs prm.matDims[0] = read_int(argc, argv, "--m", 2); prm.matDims[1] = read_int(argc, argv, "--k", 2); prm.matDims[2] = read_int(argc, argv, "--n", 2); prm.rank = read_int(argc, argv, "--rank", 7); prm.method = read_string(argc, argv, "--method", (char *)"als"); int maxIters = read_int(argc, argv, "--maxiters", 1000); int maxSecs = read_int(argc, argv, "--maxsecs", 1000); double tol = read_double(argc, argv, "--tol", 1e-8); int printItn = read_int(argc, argv, "--printitn", 0); double printTol = read_double(argc, argv, "--printtol", 1.0); int seed = read_int(argc, argv, "--seed", 0); int numSeeds = read_int(argc, argv, "--numseeds", 1); bool verbose = read_bool(argc, argv, "--verbose", false); prm.rnd_maxVal = read_double(argc,argv,"--maxval",1.0); prm.rnd_pwrOfTwo = read_int(argc,argv,"--pwrof2",0); bool roundFinal = read_bool(argc, argv, "--rndfin",false); prm.alpha = read_double(argc,argv, "--alpha", 0.1); int M = read_int(argc,argv, "--M", 0); if (M) { prm.M[0] = M; prm.M[1] = M; prm.M[2] = M; } else { prm.M[0] = read_int(argc, argv, "--M0", -1); prm.M[1] = read_int(argc, argv, "--M1", -1); prm.M[2] = read_int(argc, argv, "--M2", -1); } char * infile = read_string(argc, argv, "--input", NULL); char * outfile = read_string(argc, argv, "--output", NULL); if (verbose) { setbuf(stdout, NULL); printf("\n\n---------------------------------------------------------\n"); printf("PARAMETERS\n"); printf("dimensions = %d %d %d\n",prm.matDims[0],prm.matDims[1],prm.matDims[2]); printf("rank = %d\n",prm.rank); printf("method = %s\n",prm.method); if (infile) printf("input = %s\n",infile); else { if (numSeeds == 1) printf("input = seed %d\n",seed); else printf("inputs = seeds %d-%d\n",seed,seed+numSeeds-1); } if (outfile) printf("output = %s\n",outfile); else printf("output = none\n"); if (!strcmp(prm.method,"als")) { printf("tol = %1.2e\n",tol); printf("alpha = %1.2e\n",prm.alpha); printf("maval = %1.2e\n",prm.rnd_maxVal); printf("M's = (%d,%d,%d)\n",prm.M[0],prm.M[1],prm.M[2]); printf("maxiters = %d\n",maxIters); printf("maxsecs = %d\n",maxSecs); printf("printitn = %d\n",printItn); printf("printtol = %1.2e\n",printTol); } printf("---------------------------------------------------------\n"); } // Initialize other variables int i, j, k, numIters, mkn, tidx[3]; double err, errOld, errChange = 0.0, start_als, start_search, elapsed, threshold; // Compute tensor dimensions prm.dims[0] = prm.matDims[0]*prm.matDims[1]; prm.dims[1] = prm.matDims[1]*prm.matDims[2]; prm.dims[2] = prm.matDims[0]*prm.matDims[2]; // Compute tensor's nnz, total number of entries, and Frobenius norm mkn = prm.matDims[0]*prm.matDims[1]*prm.matDims[2]; prm.mkn2 = mkn*mkn; prm.xNorm = sqrt(mkn); // Compute number of columns in matricized tensors for (i = 0; i < 3; i++) prm.mtCols[i] = prm.mkn2 / prm.dims[i]; // Construct three matricizations of matmul tensor prm.X = (double**) malloc( 3 * sizeof(double*) ); for (i = 0; i < 3; i++) prm.X[i] = (double*) calloc( prm.mkn2, sizeof(double) ); for (int mm = 0; mm < prm.matDims[0]; mm++) for (int kk = 0; kk < prm.matDims[1]; kk++) for (int nn = 0; nn < prm.matDims[2]; nn++) { tidx[0] = mm + kk*prm.matDims[0]; tidx[1] = kk + nn*prm.matDims[1]; tidx[2] = mm + nn*prm.matDims[0]; prm.X[0][tidx[0]+prm.dims[0]*(tidx[1]+prm.dims[1]*tidx[2])] = 1; prm.X[1][tidx[1]+prm.dims[1]*(tidx[0]+prm.dims[0]*tidx[2])] = 1; prm.X[2][tidx[2]+prm.dims[2]*(tidx[0]+prm.dims[0]*tidx[1])] = 1; } // Allocate factor weights and matrices: working, initial, and model prm.lambda = (double*) malloc( prm.rank * sizeof(double) ); prm.U = (double**) malloc( 3 * sizeof(double*) ); double** U0 = (double**) malloc( 3 * sizeof(double*) ); prm.model = (double**) malloc( 3 * sizeof(double*) ); for (i = 0; i < 3; i++) { prm.U[i] = (double*) calloc( prm.mkn2, sizeof(double) ); U0[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) ); prm.model[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) ); } // Allocate coefficient matrix within ALS (Khatri-Rao product) int maxMatDim = prm.matDims[0]; if (maxMatDim < prm.matDims[1]) maxMatDim = prm.matDims[1]; if (maxMatDim < prm.matDims[2]) maxMatDim = prm.matDims[2]; prm.A = (double*) malloc( maxMatDim*mkn*prm.rank * sizeof(double) ); // Allocate workspaces prm.tau = (double*) malloc( mkn * sizeof(double) ); prm.lwork = maxMatDim*mkn*prm.rank; prm.work = (double*) malloc( prm.lwork * sizeof(double) ); prm.iwork = (int*) malloc( prm.mkn2 * sizeof(int) ); // Allocate matrices for normal equations int maxDim = prm.dims[0]; if (maxDim < prm.dims[1]) maxDim = prm.dims[1]; if (maxDim < prm.dims[2]) maxDim = prm.dims[2]; prm.NE_coeff = (double*) malloc( prm.rank*prm.rank * sizeof(double) ); prm.NE_rhs = (double*) malloc( maxDim*prm.rank * sizeof(double) ); prm.residual = (double*) malloc( prm.mkn2 * sizeof(double) ); //-------------------------------------------------- // Search Loop //-------------------------------------------------- int mySeed = seed, numGoodSeeds = 0, statusCnt = 0, status = 1; start_search = wall_time(); for (int seed_cnt = 0; seed_cnt < numSeeds; ++seed_cnt) { // Set starting point from random seed (match Matlab Tensor Toolbox) RandomMT cRMT(mySeed); for (i = 0; i < 3; i++) for (j = 0; j < prm.dims[i]; j++) for (k = 0; k < prm.rank; k++) U0[i][j+k*prm.dims[i]] = cRMT.genMatlabMT(); for (i = 0; i < prm.rank; i++) prm.lambda[i] = 1.0; // Copy starting point for (i = 0; i < 3; i++) cblas_dcopy(prm.dims[i]*prm.rank,U0[i],1,prm.U[i],1); // read from file if input is given if( infile ) read_input( infile, prm ); if (verbose) { printf("\nSTARTING POINT...\n"); for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]); } printf("\n"); } //-------------------------------------------------- // Main ALS Loop //-------------------------------------------------- start_als = wall_time(); err = 1.0; threshold = 1e-4; for (numIters = 0; numIters < maxIters && (wall_time()-start_als) < maxSecs; numIters++) { errOld = err; if (!strcmp(prm.method,"als")) { // Perform an iteration of ALS using NE with Smirnov's penalty term err = als( prm ); } else if (!strcmp(prm.method,"sparsify")) { // print stats before sparsifying printf("Old residual: %1.2e\n",compute_residual(prm,2,true)); printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); // sparsify and return printf("\nSparsifying...\n\n"); sparsify( prm ); numIters = maxIters; // print stats after sparsifying printf("New residual: %1.2e\n",compute_residual(prm,2,true)); printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); } else if (!strcmp(prm.method,"round")) { // print stats before rounding printf("Old residual: %1.2e\n",compute_residual(prm,2,true)); printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); // round and return for (i = 0; i < 3; i++) { capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal); rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo); } numIters = maxIters; // print stats after rounding printf("New residual: %1.2e\n",compute_residual(prm,2,true)); printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); } else die("Invalid method\n"); // Compute change in relative residual norm errChange = fabs(err - errOld); // Print info at current iteration if ((printItn > 0) && (((numIters + 1) % printItn) == 0)) { // print info printf ("Iter %d: residual = %1.5e change = %1.5e\n", numIters + 1, err, errChange); } // Check for convergence if ( numIters > 0 && errChange < tol ) break; } // If rounding, round final solution and re-compute residual if(roundFinal) { // normalize columns in A and B factors, put arbitrary weights into C normalize_model( prm, 2 ); // cap large values and round to nearest power of 2 for (i = 0; i < 3; i++) { capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal); rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo); } err = compute_residual(prm,0,true); } // Print status if searching over many seeds statusCnt++; if (numSeeds > 1000 && statusCnt == numSeeds/10) { printf("...%d%% complete...\n",10*status); status++; statusCnt = 0; } // Print final info elapsed = wall_time() - start_als; if ((printItn > 0 || verbose) && !strcmp(prm.method,"als")) { if (infile) printf("\nInput %s ",infile); else printf("\nInitial seed %d ",mySeed); printf("achieved residual %1.3e in %d iterations and %1.3e seconds\n \t final residual change: %1.3e\n \t average time per iteration: %1.3e s\n", err, numIters, elapsed, errChange, elapsed/numIters); } if (verbose) { printf("\nSOLUTION...\n"); for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); if (roundFinal || !strcmp(prm.method,"round")) print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo); else print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]); } if (err < printTol) numGoodSeeds++; } else if (err < printTol) { numGoodSeeds++; printf("\n\n***************************************\n"); if (infile) printf("Input %s: ",infile); else printf("Initial seed %d: ",mySeed); printf("after %d iterations, achieved residual %1.3e with final residual change of %1.3e\n", numIters, err, errChange); if (roundFinal) { for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo); } int count = 0; for (i = 0; i < 3; i++) count += nnz(prm.U[i],prm.dims[i]*prm.rank); printf("\ttotal nnz in solution: %d\n",count); printf("\tnaive adds/subs: %d\n",count - prm.dims[2] - 2*prm.rank); } printf("***************************************\n\n\n"); } // write to output if( outfile ) write_output( outfile, prm ); mySeed++; } // Final report of processor statistics elapsed = wall_time()-start_search; // Print stats if (!strcmp(prm.method,"als")) { printf("\n\n------------------------------------------------------------\n"); printf("Time elapsed: \t%1.1e\tseconds\n",elapsed); printf("Total number of seeds tried: \t%d\n",numSeeds); printf("Total number of good seeds: \t%d",numGoodSeeds); printf("\t(residual < %2.1e)\n",printTol); printf("------------------------------------------------------------\n"); } // free allocated memory for (i = 0; i < 3; i++) { free( prm.X[i] ); free( prm.U[i] ); free( U0[i] ); free( prm.model[i] ); } free( prm.X ); free( prm.U ); free( U0 ); free( prm.model ); free( prm.lambda ); free( prm.A ); free( prm.NE_coeff ); free( prm.NE_rhs ); free( prm.residual ); free( prm.tau ); free( prm.work ); free( prm.iwork ); return 0; }