int main() { Matrix *matrix1 = matrix_alloc(2,2); Matrix *matrix2 = matrix_alloc(2,2); Matrix *result = matrix_alloc(2,2); int j,k; for(j = 0; j<2; j++) { for(k = 0; k < 2; k++) { matrix1->matrix_entry[j][k] = rand()%3; } } printf("\n\tMatrix1 is:\n"); matrix_print(matrix1); for(j = 0; j<2; j++) { for(k = 0; k < 2; k++) { matrix2->matrix_entry[j][k] = rand()%3; } } printf("\n\tMatrix2 is:\n"); matrix_print(matrix2); matrix_subtract(result, matrix1, matrix2); printf("\n\tThe result matrix of the subtraction is:\n"); matrix_print(result); /* Freeing th alocated matrix spaces */ matrix_free(matrix1); matrix_free(matrix2); matrix_free(result); }
void gradient_descent(int num_threads, matrix_t* rolled_theta, unsigned int layer_sizes[], unsigned int num_layers, unsigned int num_labels, matrix_t* X, matrix_t* y, double lamda, unsigned int iteration_number) { double start, end; double cpu_time_used; start = omp_get_wtime(); unsigned int theta_sizes[][2] = {{25, 401}, {10, 26}}; matrix_t* gradient; unsigned int i; for(i=0; i < iteration_number; i++) { NN_cost_function(num_threads, &gradient, rolled_theta, layer_sizes, num_layers, num_labels, X, y, lamda); matrix_t* tmp; tmp = matrix_scalar_multiply(gradient, ALPHA); free_matrix(gradient); gradient = tmp; tmp = matrix_subtract(rolled_theta, gradient); free_matrix(rolled_theta); rolled_theta = tmp; free_matrix(gradient); if((i+1) % 100 == 0) { end = omp_get_wtime(); cpu_time_used = end - start; matrix_list_t* theta = unroll_matrix_list(rolled_theta, num_layers-1, theta_sizes); printf("iteration #%d, accuracy: %f, time used: %f\n", i+1, accuracy(theta, X, y), cpu_time_used); free_matrix_list(theta); } } free_matrix(rolled_theta); }
// conjugate linear equation solver // overwrites pyramid! static void lincg(pyramid_t* pyramid, pyramid_t* pC, const float* const b, float* const x, const int itmax, const float tol, pfstmo_progress_callback progress_cb) { const int rows = pyramid->rows; const int cols = pyramid->cols; const int n = rows*cols; const float tol2 = tol*tol; float* const x_save = matrix_alloc(n); float* const r = matrix_alloc(n); float* const p = matrix_alloc(n); float* const Ap = matrix_alloc(n); // bnrm2 = ||b|| const float bnrm2 = matrix_DotProduct(n, b, b); // r = b - Ax multiplyA(pyramid, pC, x, r); matrix_subtract(n, b, r); float rdotr = matrix_DotProduct(n, r, r); // rdotr = r.r // p = r matrix_copy(n, r, p); // Setup initial vector float saved_rdotr = rdotr; matrix_copy(n, x, x_save); const float irdotr = rdotr; const float percent_sf = 100.0f/logf(tol2*bnrm2/irdotr); int iter = 0; int num_backwards = 0; const int num_backwards_ceiling = 3; for (; iter < itmax; iter++) { if( progress_cb != NULL ) { int ret = progress_cb( (int) (logf(rdotr/irdotr)*percent_sf)); if( ret == PFSTMO_CB_ABORT && iter > 0 ) // User requested abort break; } // Ap = A p multiplyA(pyramid, pC, p, Ap); // alpha = r.r / (p . Ap) const float alpha = rdotr / matrix_DotProduct(n, p, Ap); // r = r - alpha Ap #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) r[i] -= alpha * Ap[i]; // rdotr = r.r const float old_rdotr = rdotr; rdotr = matrix_DotProduct(n, r, r); // Have we gone unstable? if (rdotr > old_rdotr) { // Save where we've got to if (num_backwards == 0 && old_rdotr < saved_rdotr) { saved_rdotr = old_rdotr; matrix_copy(n, x, x_save); } num_backwards++; } else { num_backwards = 0; } // x = x + alpha p #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) x[i] += alpha * p[i]; // Exit if we're done // fprintf(stderr, "iter:%d err:%f\n", iter+1, sqrtf(rdotr/bnrm2)); if(rdotr/bnrm2 < tol2) break; if (num_backwards > num_backwards_ceiling) { // Reset num_backwards = 0; matrix_copy(n, x_save, x); // r = Ax multiplyA(pyramid, pC, x, r); // r = b - r matrix_subtract(n, b, r); // rdotr = r.r rdotr = matrix_DotProduct(n, r, r); saved_rdotr = rdotr; // p = r matrix_copy(n, r, p); } else { // p = r + beta p const float beta = rdotr/old_rdotr; #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) p[i] = r[i] + beta*p[i]; } } // Use the best version we found if (rdotr > saved_rdotr) { rdotr = saved_rdotr; matrix_copy(n, x_save, x); } if (rdotr/bnrm2 > tol2) { // Not converged if( progress_cb != NULL ) progress_cb( (int) (logf(rdotr/irdotr)*percent_sf)); if (iter == itmax) fprintf(stderr, "\npfstmo_mantiuk06: Warning: Not converged (hit maximum iterations), error = %g (should be below %g).\n", sqrtf(rdotr/bnrm2), tol); else fprintf(stderr, "\npfstmo_mantiuk06: Warning: Not converged (going unstable), error = %g (should be below %g).\n", sqrtf(rdotr/bnrm2), tol); } else if (progress_cb != NULL) progress_cb(100); matrix_free(x_save); matrix_free(p); matrix_free(Ap); matrix_free(r); }
// bi-conjugate linear equation solver // overwrites pyramid! static void linbcg(pyramid_t* pyramid, pyramid_t* pC, float* const b, float* const x, const int itmax, const float tol, pfstmo_progress_callback progress_cb) { const int rows = pyramid->rows; const int cols = pyramid->cols; const int n = rows*cols; const float tol2 = tol*tol; float* const z = matrix_alloc(n); float* const zz = matrix_alloc(n); float* const p = matrix_alloc(n); float* const pp = matrix_alloc(n); float* const r = matrix_alloc(n); float* const rr = matrix_alloc(n); float* const x_save = matrix_alloc(n); const float bnrm2 = matrix_DotProduct(n, b, b); multiplyA(pyramid, pC, x, r); // r = A*x = divergence(x) matrix_subtract(n, b, r); // r = b - r float err2 = matrix_DotProduct(n, r, r); // err2 = r.r // matrix_copy(n, r, rr); // rr = r multiplyA(pyramid, pC, r, rr); // rr = A*r float bkden = 0; float saved_err2 = err2; matrix_copy(n, x, x_save); const float ierr2 = err2; const float percent_sf = 100.0f/logf(tol2*bnrm2/ierr2); int iter = 0; bool reset = true; int num_backwards = 0; const int num_backwards_ceiling = 3; for (; iter < itmax; iter++) { if( progress_cb != NULL ) progress_cb( (int) (logf(err2/ierr2)*percent_sf)); solveX(n, r, z); // z = ~A(-1) * r = -0.25 * r solveX(n, rr, zz); // zz = ~A(-1) * rr = -0.25 * rr const float bknum = matrix_DotProduct(n, z, rr); if(reset) { reset = false; matrix_copy(n, z, p); matrix_copy(n, zz, pp); } else { const float bk = bknum / bkden; // beta = ... #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) { p[i] = z[i] + bk * p[i]; pp[i] = zz[i] + bk * pp[i]; } } bkden = bknum; // numerato becomes the dominator for the next iteration multiplyA(pyramid, pC, p, z); // z = A* p = divergence( p) multiplyA(pyramid, pC, pp, zz); // zz = A*pp = divergence(pp) const float ak = bknum / matrix_DotProduct(n, z, pp); // alfa = ... #pragma omp parallel for schedule(static) for(int i = 0 ; i < n ; i++ ) { r[i] -= ak * z[i]; // r = r - alfa * z rr[i] -= ak * zz[i]; //rr = rr - alfa * zz } const float old_err2 = err2; err2 = matrix_DotProduct(n, r, r); // Have we gone unstable? if (err2 > old_err2) { // Save where we've got to if it's the best yet if (num_backwards == 0 && old_err2 < saved_err2) { saved_err2 = old_err2; matrix_copy(n, x, x_save); } num_backwards++; } else { num_backwards = 0; } #pragma omp parallel for schedule(static) for(int i = 0 ; i < n ; i++ ) x[i] += ak * p[i]; // x = x + alfa * p if (num_backwards > num_backwards_ceiling) { // Reset reset = true; num_backwards = 0; // Recover saved value matrix_copy(n, x_save, x); // r = Ax multiplyA(pyramid, pC, x, r); // r = b - r matrix_subtract(n, b, r); // err2 = r.r err2 = matrix_DotProduct(n, r, r); saved_err2 = err2; // rr = A*r multiplyA(pyramid, pC, r, rr); } // fprintf(stderr, "iter:%d err:%f\n", iter+1, sqrtf(err2/bnrm2)); if(err2/bnrm2 < tol2) break; } // Use the best version we found if (err2 > saved_err2) { err2 = saved_err2; matrix_copy(n, x_save, x); } if (err2/bnrm2 > tol2) { // Not converged if( progress_cb != NULL ) progress_cb( (int) (logf(err2/ierr2)*percent_sf)); if (iter == itmax) fprintf(stderr, "\npfstmo_mantiuk06: Warning: Not converged (hit maximum iterations), error = %g (should be below %g).\n", sqrtf(err2/bnrm2), tol); else fprintf(stderr, "\npfstmo_mantiuk06: Warning: Not converged (going unstable), error = %g (should be below %g).\n", sqrtf(err2/bnrm2), tol); } else if (progress_cb != NULL) progress_cb(100); matrix_free(x_save); matrix_free(p); matrix_free(pp); matrix_free(z); matrix_free(zz); matrix_free(r); matrix_free(rr); }
double NN_cost_function(int num_threads, matrix_t** gradient, matrix_t* rolled_theta, unsigned int layer_sizes[], unsigned int num_layers, unsigned int num_labels, matrix_t* X, matrix_t* y, double lamda) { unsigned int theta_sizes[][2] = {{25, 401}, {10, 26}}; matrix_list_t* theta = unroll_matrix_list(rolled_theta, num_layers-1, theta_sizes); unsigned int m = X->rows; //unsigned int n = X->cols; matrix_list_t* theta_gradient_total = matrix_list_constructor(theta->num); unsigned int i, j; for(i=0; i<theta_gradient_total->num; i++) { theta_gradient_total->matrix_list[i] = matrix_constructor(theta->matrix_list[i]->rows, theta->matrix_list[i]->cols); } omp_set_num_threads(num_threads); int nthreads, tid; #pragma omp parallel private(nthreads, tid) { int indexes[2]; tid = omp_get_thread_num(); nthreads = omp_get_num_threads(); get_indexes(m, nthreads, tid, indexes); unsigned int i, j; matrix_t* temp; matrix_t* temp2; matrix_t* temp3; matrix_list_t* theta_gradient = matrix_list_constructor(theta->num); for(i=0; i<theta_gradient->num; i++) { theta_gradient->matrix_list[i] = matrix_constructor(theta->matrix_list[i]->rows, theta->matrix_list[i]->cols); } for(i=indexes[0]; i<indexes[1]; i++) { matrix_list_t* A = matrix_list_constructor(num_layers); matrix_list_t* Z = matrix_list_constructor(num_layers-1); matrix_list_t* delta = matrix_list_constructor(num_layers-1); A->matrix_list[0] = row_to_vector(X, i); temp = matrix_prepend_col(A->matrix_list[0], 1.0); free_matrix(A->matrix_list[0]); A->matrix_list[0] = matrix_transpose(temp); free_matrix(temp); for(j=0; j<num_layers-1; j++) { Z->matrix_list[j] = matrix_multiply(theta->matrix_list[j], A->matrix_list[j]); temp = matrix_sigmoid(Z->matrix_list[j]); A->matrix_list[j+1] = matrix_prepend_row(temp, 1.0); free_matrix(temp); } temp = matrix_remove_row(A->matrix_list[num_layers-1]); free_matrix(A->matrix_list[num_layers-1]); A->matrix_list[num_layers-1] = temp; matrix_t* result_matrix = matrix_constructor(1, num_labels); for(j = 0; j < num_labels; j++) { if(vector_get(y, i) == j) { vector_set(result_matrix, j, 1.0); } } temp = matrix_transpose(result_matrix); free_matrix(result_matrix); result_matrix= temp; delta->matrix_list[1] = matrix_subtract(A->matrix_list[num_layers-1], result_matrix); free_matrix(result_matrix); matrix_t* theta_transpose = matrix_transpose(theta->matrix_list[1]); temp = matrix_multiply(theta_transpose, delta->matrix_list[1]); matrix_t* sig_gradient = matrix_sigmoid_gradient(Z->matrix_list[0]); temp2 = matrix_prepend_row(sig_gradient, 1.0); temp3 = matrix_cell_multiply(temp, temp2); delta->matrix_list[0] = matrix_remove_row(temp3); free_matrix(temp); free_matrix(temp2); free_matrix(temp3); free_matrix(sig_gradient); free_matrix(theta_transpose); for(j=0; j<num_layers-1; j++) { matrix_t* A_transpose = matrix_transpose(A->matrix_list[j]); temp = matrix_multiply(delta->matrix_list[j], A_transpose); temp2 = matrix_add(theta_gradient->matrix_list[j], temp); free_matrix(theta_gradient->matrix_list[j]); theta_gradient->matrix_list[j] = temp2; free_matrix(A_transpose); free_matrix(temp); } free_matrix_list(A); free_matrix_list(Z); free_matrix_list(delta); } #pragma omp critical { matrix_list_t* temp_list; temp_list = matrix_list_add(theta_gradient_total, theta_gradient); free_matrix_list(theta_gradient_total); free_matrix_list(theta_gradient); theta_gradient_total = temp_list; } } for(i=0; i<num_layers-1; i++) { matrix_t* temp; matrix_t* temp2; matrix_t* temp3; temp = matrix_scalar_multiply(theta_gradient_total->matrix_list[i], 1.0/m); temp2 = copy_matrix(theta->matrix_list[i]); for(j=0; j<theta->matrix_list[i]->rows; j++) { matrix_set(temp2, j, 0, 0.0); } free_matrix(theta_gradient_total->matrix_list[i]); temp3 = matrix_scalar_multiply(temp2, lamda/m); theta_gradient_total->matrix_list[i] = matrix_add(temp, temp3); free_matrix(temp); free_matrix(temp2); free_matrix(temp3); } *gradient = roll_matrix_list(theta_gradient_total); free_matrix_list(theta); free_matrix_list(theta_gradient_total); return 0.0; }
matrix operator-(matrix A, matrix B){ return matrix_subtract(A, B); }
void operator_matrix() { matrix result; char in[USHRT_MAX]; int m; int n; while (1) { printf("Entre com o número de linhas da 1ª matriz: "); scanf("%s", in); m = atoi(in); printf("\n"); if (m == 0) printf("Valor inválido!\n\n"); else break; } while (1) { printf("Entre com o número de colunas da 1ª matriz: "); scanf("%s", in); n = atoi(in); printf("\n"); if (n == 0) printf("Valor inválido!\n\n"); else break; } result = matrix_constructor(m, n); printf("Entre com os elementos da 1ª matriz, separando-os por espaços e/ou quebras de linha:\n"); for (int i = 0; i < m; i++) for (int j = 0; j < n; j++) { scanf("%s", in); result.table[i][j] = atof(in); } printf("\n"); int keep_going = 1; while (keep_going) { matrix next; switch (menu_matrix()) { case 1: // Add while (1) { while (1) { printf("Entre com o número de linhas da proxima matriz: "); scanf("%s", in); m = atoi(in); printf("\n"); if (m == 0) printf("Valor inválido!\n\n"); else break; } while (1) { printf("Entre com o número de colunas da proxima matriz: "); scanf("%s", in); n = atoi(in); printf("\n"); if (n == 0) printf("Valor inválido!\n\n"); else break; } next = matrix_constructor(m, n); if (!matrix_can_add(result, next)) printf("Não é possivel fazer a operação desejada com as matrizes de ordens previamente informadas!\n\n"); else break; } printf("Entre com os elementos da proxima matriz, separando-os por espaços e/ou quebras de linha:\n"); for (int i = 0; i < m; i++) for (int j = 0; j < n; j++) { scanf("%s", in); next.table[i][j] = atof(in); } printf("\n"); matrix_add(&result, next); break; case 2: // Subtract while (1) { while (1) { printf("Entre com o número de linhas da proxima matriz: "); scanf("%s", in); m = atoi(in); printf("\n"); if (m == 0) printf("Valor inválido!\n\n"); else break; } while (1) { printf("Entre com o número de colunas da proxima matriz: "); scanf("%s", in); n = atoi(in); printf("\n"); if (n == 0) printf("Valor inválido!\n\n"); else break; } next = matrix_constructor(m, n); if (!matrix_can_subtract(result, next)) printf("Não é possivel fazer a operação desejada com as matrizes de ordens previamente informadas!\n\n"); else break; } printf("Entre com os elementos da proxima matriz, separando-os por espaços e/ou quebras de linha:\n"); for (int i = 0; i < m; i++) for (int j = 0; j < n; j++) { scanf("%s", in); next.table[i][j] = atof(in); } printf("\n"); matrix_subtract(&result, next); break; case 3: // Multiply while (1) { while (1) { printf("Entre com o número de linhas da proxima matriz: "); scanf("%s", in); m = atoi(in); printf("\n"); if (m == 0) printf("Valor inválido!\n\n"); else break; } while (1) { printf("Entre com o número de colunas da proxima matriz: "); scanf("%s", in); n = atoi(in); printf("\n"); if (n == 0) printf("Valor inválido!\n\n"); else break; } next = matrix_constructor(m, n); if (!matrix_can_multiply(result, next)) printf("Não é possivel fazer a operação desejada com as matrizes de ordens previamente informadas!\n\n"); else break; } printf("Entre com os elementos da proxima matriz, separando-os por espaços e/ou quebras de linha:\n"); for (int i = 0; i < m; i++) for (int j = 0; j < n; j++) { scanf("%s", in); next.table[i][j] = atof(in); } printf("\n"); matrix_multiply(&result, next); break; case 4: // Power if (matrix_can_power(result)) { while (1) { printf("Entre com o próximo valor: "); scanf("%s", in); printf("\n"); if (atoll(in) >= 0) { matrix_power(&result, (unsigned long long int) atoll(in)); break; } else printf("Valor inválido!\n\n"); } } else printf("Não é possivel realizar a operação desejada com a matrix atual!\n\n"); break; default: keep_going = 0; break; } matrix_destructor(&next); printf("Resultado:\n\n"); matrix_print(result); printf("\n"); if (!keep_going) matrix_destructor(&result); } }
std::vector<Task*> StrassenSingleProblem::split() { int T_m = m/2, T_n = k/2, S_m = k/2, S_n = n/2; float *A11 = A; float *A21 = A + m/2; float *A12 = A + lda*k/2; float *A22 = A + lda*k/2 + m/2; float *B11 = B; float *B21 = B + k/2; float *B12 = B + ldb*n/2; float *B22 = B + ldb*n/2 + k/2; float *C11 = C; float *C21 = C + m/2; float *C12 = C + ldc*n/2; float *C22 = C + ldc*n/2 + m/2; float *T0 = A11; float *T1 = A12; float *T2 = (float*) malloc(T_m * T_n * sizeof(float)); float *T3 = (float*) malloc(T_m * T_n * sizeof(float)); float *T4 = (float*) malloc(T_m * T_n * sizeof(float)); float *T5 = (float*) malloc(T_m * T_n * sizeof(float)); float *T6 = A22; float *S0 = B11; float *S1 = B21; float *S2 = (float*) malloc(S_m * S_n * sizeof(float)); float *S3 = (float*) malloc(S_m * S_n * sizeof(float)); float *S4 = (float*) malloc(S_m * S_n * sizeof(float)); float *S5 = B22; float *S6 = (float*) malloc(S_m * S_n * sizeof(float)); float *Q0 = C11; float *Q1 = (float*) malloc(T_m * S_n * sizeof(float)); float *Q2 = C22; float *Q3 = C12; float *Q4 = C21; float *Q5 = (float*) malloc(T_m * S_n * sizeof(float)); float *Q6 = (float*) malloc(T_m * S_n * sizeof(float)); matrix_add(T_m, T_n, A21, lda, A22, lda, T2, T_m); matrix_subtract(T_m, T_n, T2, T_m, A11, lda, T3, T_m); matrix_subtract(T_m, T_n, A11, lda, A21, lda, T4, T_m); matrix_subtract(T_m, T_n, A12, lda, T3, T_m, T5, T_m); matrix_subtract(S_m, S_n, B12, ldb, B11, ldb, S2, S_m); matrix_subtract(S_m, S_n, B22, ldb, S2, S_m, S3, S_m); matrix_subtract(S_m, S_n, B22, ldb, B12, ldb, S4, S_m); matrix_subtract(S_m, S_n, S3, S_m, B21, ldb, S6, S_m); std::vector<Task*> tasks (7); tasks[0] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T0, lda, S0, ldb, Q0, ldc)); tasks[1] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T1, lda, S1, ldb, Q1, T_m)); tasks[2] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T2, T_m, S2, S_m, Q2, ldc)); tasks[3] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T3, T_m, S3, S_m, Q3, ldc)); tasks[4] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T4, T_m, S4, S_m, Q4, ldc)); tasks[5] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T5, T_m, S5, ldb, Q5, T_m)); tasks[6] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T6, lda, S6, S_m, Q6, T_m)); return tasks; }