Exemplo n.º 1
0
int main()
{
  Matrix *matrix1 = matrix_alloc(2,2);
  Matrix *matrix2 = matrix_alloc(2,2);
  Matrix *result = matrix_alloc(2,2);
  int j,k;

 
 for(j = 0; j<2; j++)
    {
      for(k = 0; k < 2; k++)
      {
	matrix1->matrix_entry[j][k] = rand()%3;
      }
    }
  printf("\n\tMatrix1 is:\n");
  matrix_print(matrix1);

  for(j = 0; j<2; j++)
    {
      for(k = 0; k < 2; k++)
      {
	matrix2->matrix_entry[j][k] = rand()%3;
      }
    }
  printf("\n\tMatrix2 is:\n");
  matrix_print(matrix2);

  matrix_subtract(result, matrix1, matrix2);
 
  printf("\n\tThe result matrix of the subtraction is:\n");
  matrix_print(result);
  
  /* Freeing th alocated matrix spaces */
  matrix_free(matrix1);
  matrix_free(matrix2);
  matrix_free(result);
}
Exemplo n.º 2
0
void gradient_descent(int num_threads, matrix_t* rolled_theta, unsigned int layer_sizes[], unsigned int num_layers,
		unsigned int num_labels, matrix_t* X, matrix_t* y, double lamda, unsigned int iteration_number)
{
	double start, end;
	double cpu_time_used;
	start = omp_get_wtime();

	unsigned int theta_sizes[][2] = {{25, 401}, {10, 26}};
	matrix_t* gradient;

	unsigned int i;
	for(i=0; i < iteration_number; i++)
	{
		NN_cost_function(num_threads, &gradient, rolled_theta, layer_sizes, num_layers, num_labels, X, y, lamda);

		matrix_t* tmp;
		tmp = matrix_scalar_multiply(gradient, ALPHA);
		free_matrix(gradient);
		gradient = tmp;

		tmp = matrix_subtract(rolled_theta, gradient);
		free_matrix(rolled_theta);
		rolled_theta = tmp;

		free_matrix(gradient);

		if((i+1) % 100 == 0)
		{
			end = omp_get_wtime();
			cpu_time_used = end - start;
			matrix_list_t* theta = unroll_matrix_list(rolled_theta, num_layers-1, theta_sizes);
			printf("iteration #%d, accuracy: %f, time used: %f\n", i+1, accuracy(theta, X, y), cpu_time_used);
			free_matrix_list(theta);
		}
	}
	free_matrix(rolled_theta);
}
Exemplo n.º 3
0
// conjugate linear equation solver
// overwrites pyramid!
static void lincg(pyramid_t* pyramid, pyramid_t* pC, const float* const b, float* const x, const int itmax, const float tol, pfstmo_progress_callback progress_cb)
{
  const int rows = pyramid->rows;
  const int cols = pyramid->cols;
  const int n = rows*cols;
  const float tol2 = tol*tol;
	
  float* const x_save = matrix_alloc(n);
  float* const r = matrix_alloc(n);
  float* const p = matrix_alloc(n);
  float* const Ap = matrix_alloc(n);	
	
  // bnrm2 = ||b||
  const float bnrm2 = matrix_DotProduct(n, b, b);

  // r = b - Ax
  multiplyA(pyramid, pC, x, r);
  matrix_subtract(n, b, r);
  float rdotr = matrix_DotProduct(n, r, r); // rdotr = r.r
	
  // p = r
  matrix_copy(n, r, p);

  // Setup initial vector
  float saved_rdotr = rdotr;
  matrix_copy(n, x, x_save);

  const float irdotr = rdotr;
  const float percent_sf = 100.0f/logf(tol2*bnrm2/irdotr);
  int iter = 0;
  int num_backwards = 0;
  const int num_backwards_ceiling = 3;
  for (; iter < itmax; iter++)
    {
      if( progress_cb != NULL ) {
	int ret = progress_cb( (int) (logf(rdotr/irdotr)*percent_sf));    
        if( ret == PFSTMO_CB_ABORT && iter > 0 ) // User requested abort
          break;
      }      
      
      // Ap = A p
      multiplyA(pyramid, pC, p, Ap);
      
      // alpha = r.r / (p . Ap)
      const float alpha = rdotr / matrix_DotProduct(n, p, Ap);
      
      // r = r - alpha Ap
#pragma omp parallel for schedule(static)
      for (int i = 0; i < n; i++)
	r[i] -= alpha * Ap[i];
            
      // rdotr = r.r
      const float old_rdotr = rdotr;
      rdotr = matrix_DotProduct(n, r, r);
      
      // Have we gone unstable?
      if (rdotr > old_rdotr)
	{
	  // Save where we've got to
	  if (num_backwards == 0 && old_rdotr < saved_rdotr)
	    {
	      saved_rdotr = old_rdotr;
	      matrix_copy(n, x, x_save);
	    }

	  num_backwards++;
	}
      else
	{
	  num_backwards = 0;
	}

      // x = x + alpha p
#pragma omp parallel for schedule(static)
      for (int i = 0; i < n; i++)
	x[i] += alpha * p[i];


      // Exit if we're done
      // fprintf(stderr, "iter:%d err:%f\n", iter+1, sqrtf(rdotr/bnrm2));
      if(rdotr/bnrm2 < tol2)
	break;
      
      if (num_backwards > num_backwards_ceiling)
	{
	  // Reset
	  num_backwards = 0;
	  matrix_copy(n, x_save, x);

	  // r = Ax
	  multiplyA(pyramid, pC, x, r);

	  // r = b - r
	  matrix_subtract(n, b, r);

	  // rdotr = r.r
	  rdotr = matrix_DotProduct(n, r, r);
	  saved_rdotr = rdotr;

	  // p = r
	  matrix_copy(n, r, p);
	}
      else
	{
	  // p = r + beta p
	  const float beta = rdotr/old_rdotr;
#pragma omp parallel for schedule(static)
	  for (int i = 0; i < n; i++)
	    p[i] = r[i] + beta*p[i];
	}
    }

  // Use the best version we found
  if (rdotr > saved_rdotr)
    {
      rdotr = saved_rdotr;
      matrix_copy(n, x_save, x);
    }  

  if (rdotr/bnrm2 > tol2)
    {
      // Not converged
      if( progress_cb != NULL )
	progress_cb( (int) (logf(rdotr/irdotr)*percent_sf));    
      if (iter == itmax)
	fprintf(stderr, "\npfstmo_mantiuk06: Warning: Not converged (hit maximum iterations), error = %g (should be below %g).\n", sqrtf(rdotr/bnrm2), tol);  
      else
	fprintf(stderr, "\npfstmo_mantiuk06: Warning: Not converged (going unstable), error = %g (should be below %g).\n", sqrtf(rdotr/bnrm2), tol);  
    }
  else if (progress_cb != NULL)
    progress_cb(100);
    
  matrix_free(x_save);
  matrix_free(p);
  matrix_free(Ap);
  matrix_free(r);
}
Exemplo n.º 4
0
// bi-conjugate linear equation solver
// overwrites pyramid!
static void linbcg(pyramid_t* pyramid, pyramid_t* pC, float* const b, float* const x, const int itmax, const float tol, pfstmo_progress_callback progress_cb)
{
  const int rows = pyramid->rows;
  const int cols = pyramid->cols;
  const int n = rows*cols;
  const float tol2 = tol*tol;
	
  float* const z = matrix_alloc(n);
  float* const zz = matrix_alloc(n);
  float* const p = matrix_alloc(n);
  float* const pp = matrix_alloc(n);
  float* const r = matrix_alloc(n);
  float* const rr = matrix_alloc(n);	
  float* const x_save = matrix_alloc(n);	
	
  const float bnrm2 = matrix_DotProduct(n, b, b);
	
  multiplyA(pyramid, pC, x, r); // r = A*x = divergence(x)
  matrix_subtract(n, b, r); // r = b - r
  float err2 = matrix_DotProduct(n, r, r); // err2 = r.r

  // matrix_copy(n, r, rr); // rr = r
  multiplyA(pyramid, pC, r, rr); // rr = A*r
  
  float bkden = 0;
  float saved_err2 = err2;
  matrix_copy(n, x, x_save);

  const float ierr2 = err2;
  const float percent_sf = 100.0f/logf(tol2*bnrm2/ierr2);

  int iter = 0;
  bool reset = true;
  int num_backwards = 0;
  const int num_backwards_ceiling = 3;
  for (; iter < itmax; iter++)
    {
      if( progress_cb != NULL )
	progress_cb( (int) (logf(err2/ierr2)*percent_sf));    
      
      solveX(n, r, z);   //  z = ~A(-1) *  r = -0.25 *  r
      solveX(n, rr, zz); // zz = ~A(-1) * rr = -0.25 * rr
		
      const float bknum = matrix_DotProduct(n, z, rr);
		
      if(reset)
	{
	  reset = false;
	  matrix_copy(n, z, p);
	  matrix_copy(n, zz, pp); 
	}
      else
	{
	  const float bk = bknum / bkden; // beta = ...
#pragma omp parallel for schedule(static)
	  for (int i = 0; i < n; i++)
	    {
	      p[i]  =  z[i] + bk *  p[i];
	      pp[i] = zz[i] + bk * pp[i];
	    }
	}
		
      bkden = bknum; // numerato becomes the dominator for the next iteration
      
      multiplyA(pyramid, pC,  p,  z); //  z = A* p = divergence( p)
      multiplyA(pyramid, pC, pp, zz); // zz = A*pp = divergence(pp)
      
      const float ak = bknum / matrix_DotProduct(n, z, pp); // alfa = ...
#pragma omp parallel for schedule(static)
      for(int i = 0 ; i < n ; i++ )
	{
	  r[i]  -= ak *  z[i];	// r =  r - alfa * z
	  rr[i] -= ak * zz[i];	//rr = rr - alfa * zz
	}
      
      const float old_err2 = err2;
      err2 = matrix_DotProduct(n, r, r);

      // Have we gone unstable?
      if (err2 > old_err2)
	{
	  // Save where we've got to if it's the best yet
	  if (num_backwards == 0 && old_err2 < saved_err2)
	    {
	      saved_err2 = old_err2;
	      matrix_copy(n, x, x_save);
	    }
	  
	  num_backwards++;
	}
      else
	{
	  num_backwards = 0;
	}

#pragma omp parallel for schedule(static)
      for(int i = 0 ; i < n ; i++ )
	x[i] += ak * p[i];	// x =  x + alfa * p

      if (num_backwards > num_backwards_ceiling)
	{
	  // Reset
	  reset = true;
	  num_backwards = 0;
	  
	  // Recover saved value
	  matrix_copy(n, x_save, x);
	  
	  // r = Ax
	  multiplyA(pyramid, pC, x, r);
	  
	  // r = b - r
	  matrix_subtract(n, b, r);
	  
	  // err2 = r.r
	  err2 = matrix_DotProduct(n, r, r);
	  saved_err2 = err2;

	  // rr = A*r
	  multiplyA(pyramid, pC, r, rr);
	}
      
      // fprintf(stderr, "iter:%d err:%f\n", iter+1, sqrtf(err2/bnrm2));
      if(err2/bnrm2 < tol2)
	break;
    }

  // Use the best version we found
  if (err2 > saved_err2)
    {
      err2 = saved_err2;
      matrix_copy(n, x_save, x);
    }

  if (err2/bnrm2 > tol2)
    {
      // Not converged
      if( progress_cb != NULL )
	progress_cb( (int) (logf(err2/ierr2)*percent_sf));    
      if (iter == itmax)
	fprintf(stderr, "\npfstmo_mantiuk06: Warning: Not converged (hit maximum iterations), error = %g (should be below %g).\n", sqrtf(err2/bnrm2), tol);  
      else
	fprintf(stderr, "\npfstmo_mantiuk06: Warning: Not converged (going unstable), error = %g (should be below %g).\n", sqrtf(err2/bnrm2), tol);  
    }
  else if (progress_cb != NULL)
    progress_cb(100);
    
  
  matrix_free(x_save);
  matrix_free(p);
  matrix_free(pp);
  matrix_free(z);
  matrix_free(zz);
  matrix_free(r);
  matrix_free(rr);
}
Exemplo n.º 5
0
double NN_cost_function(int num_threads, matrix_t** gradient, matrix_t* rolled_theta, unsigned int layer_sizes[], unsigned int num_layers,
		unsigned int num_labels, matrix_t* X, matrix_t* y, double lamda)
{
	unsigned int theta_sizes[][2] = {{25, 401}, {10, 26}};

	matrix_list_t* theta = unroll_matrix_list(rolled_theta, num_layers-1, theta_sizes);

	unsigned int m = X->rows;
	//unsigned int n = X->cols;

	matrix_list_t* theta_gradient_total = matrix_list_constructor(theta->num);
	unsigned int i, j;
	for(i=0; i<theta_gradient_total->num; i++)
	{
		theta_gradient_total->matrix_list[i] = matrix_constructor(theta->matrix_list[i]->rows, theta->matrix_list[i]->cols);
	}
	
	omp_set_num_threads(num_threads);
	int nthreads, tid;
	#pragma omp parallel private(nthreads, tid)
	{
		int indexes[2];
		tid = omp_get_thread_num();
		nthreads = omp_get_num_threads();
		get_indexes(m, nthreads, tid, indexes);
		unsigned int i, j;

		matrix_t* temp;
		matrix_t* temp2;
		matrix_t* temp3;

		matrix_list_t* theta_gradient = matrix_list_constructor(theta->num);
		for(i=0; i<theta_gradient->num; i++)
		{
			theta_gradient->matrix_list[i] = matrix_constructor(theta->matrix_list[i]->rows, theta->matrix_list[i]->cols);
		}

		for(i=indexes[0]; i<indexes[1]; i++)
		{
			matrix_list_t* A = matrix_list_constructor(num_layers);
			matrix_list_t* Z = matrix_list_constructor(num_layers-1);
			matrix_list_t* delta = matrix_list_constructor(num_layers-1);

			A->matrix_list[0] = row_to_vector(X, i);
			temp = matrix_prepend_col(A->matrix_list[0], 1.0);
			free_matrix(A->matrix_list[0]);

			A->matrix_list[0] = matrix_transpose(temp);
			free_matrix(temp);

			for(j=0; j<num_layers-1; j++)
			{
				Z->matrix_list[j] = matrix_multiply(theta->matrix_list[j], A->matrix_list[j]);

				temp = matrix_sigmoid(Z->matrix_list[j]);
				A->matrix_list[j+1] = matrix_prepend_row(temp, 1.0);
				free_matrix(temp);
			}

			temp = matrix_remove_row(A->matrix_list[num_layers-1]);
			free_matrix(A->matrix_list[num_layers-1]);
			A->matrix_list[num_layers-1] = temp;

			matrix_t* result_matrix = matrix_constructor(1, num_labels);
			for(j = 0; j < num_labels; j++)
			{
				if(vector_get(y, i) == j)
				{
					vector_set(result_matrix, j, 1.0);
				}
			}
			temp = matrix_transpose(result_matrix);
			free_matrix(result_matrix);
			result_matrix= temp;

			delta->matrix_list[1] = matrix_subtract(A->matrix_list[num_layers-1], result_matrix);
			free_matrix(result_matrix);

			matrix_t* theta_transpose = matrix_transpose(theta->matrix_list[1]);
			temp = matrix_multiply(theta_transpose, delta->matrix_list[1]);

			matrix_t* sig_gradient = matrix_sigmoid_gradient(Z->matrix_list[0]);
			temp2 = matrix_prepend_row(sig_gradient, 1.0);

			temp3 = matrix_cell_multiply(temp, temp2);
			delta->matrix_list[0] = matrix_remove_row(temp3);

			free_matrix(temp);
			free_matrix(temp2);
			free_matrix(temp3);
			free_matrix(sig_gradient);
			free_matrix(theta_transpose);

			for(j=0; j<num_layers-1; j++)
			{
				matrix_t* A_transpose = matrix_transpose(A->matrix_list[j]);
				temp = matrix_multiply(delta->matrix_list[j], A_transpose);
				temp2 = matrix_add(theta_gradient->matrix_list[j], temp);
				free_matrix(theta_gradient->matrix_list[j]);
				theta_gradient->matrix_list[j] = temp2;


				free_matrix(A_transpose);
				free_matrix(temp);
			}
			free_matrix_list(A);
			free_matrix_list(Z);
			free_matrix_list(delta);
		}
		#pragma omp critical
		{
			matrix_list_t* temp_list;
			temp_list = matrix_list_add(theta_gradient_total, theta_gradient);

			free_matrix_list(theta_gradient_total);
			free_matrix_list(theta_gradient);
			theta_gradient_total = temp_list;
		}
	}

	for(i=0; i<num_layers-1; i++)
	{
		matrix_t* temp;
		matrix_t* temp2;
		matrix_t* temp3;

		temp = matrix_scalar_multiply(theta_gradient_total->matrix_list[i], 1.0/m);
		temp2 = copy_matrix(theta->matrix_list[i]);
		for(j=0; j<theta->matrix_list[i]->rows; j++)
		{
			matrix_set(temp2, j, 0, 0.0);
		}
		free_matrix(theta_gradient_total->matrix_list[i]);
		temp3 = matrix_scalar_multiply(temp2, lamda/m);
		theta_gradient_total->matrix_list[i] = matrix_add(temp, temp3);
		free_matrix(temp);
		free_matrix(temp2);
		free_matrix(temp3);
	}

	*gradient = roll_matrix_list(theta_gradient_total);

	free_matrix_list(theta);
	free_matrix_list(theta_gradient_total);

	return 0.0;
}
Exemplo n.º 6
0
matrix operator-(matrix A, matrix B){
	return matrix_subtract(A, B);
}
Exemplo n.º 7
0
void operator_matrix() {
    matrix result;
    char in[USHRT_MAX];
    int m;
    int n;
    while (1) {
        printf("Entre com o número de linhas da 1ª matriz: ");
        scanf("%s", in);
        m = atoi(in);
        printf("\n");
        if (m == 0)
            printf("Valor inválido!\n\n");
        else
            break;
    }
    while (1) {
        printf("Entre com o número de colunas da 1ª matriz: ");
        scanf("%s", in);
        n = atoi(in);
        printf("\n");
        if (n == 0)
            printf("Valor inválido!\n\n");
        else
            break;
    }
    result = matrix_constructor(m, n);
    printf("Entre com os elementos da 1ª matriz, separando-os por espaços e/ou quebras de linha:\n");
    for (int i = 0; i < m; i++)
        for (int j = 0; j < n; j++) {
            scanf("%s", in);
            result.table[i][j] = atof(in);
        }
    printf("\n");
    int keep_going = 1;
    while (keep_going) {
        matrix next;
        switch (menu_matrix()) {
            case 1:
                // Add
                while (1) {
                    while (1) {
                        printf("Entre com o número de linhas da proxima matriz: ");
                        scanf("%s", in);
                        m = atoi(in);
                        printf("\n");
                        if (m == 0)
                            printf("Valor inválido!\n\n");
                        else
                            break;
                    }
                    while (1) {
                        printf("Entre com o número de colunas da proxima matriz: ");
                        scanf("%s", in);
                        n = atoi(in);
                        printf("\n");
                        if (n == 0)
                            printf("Valor inválido!\n\n");
                        else
                            break;
                    }
                    next = matrix_constructor(m, n);
                    if (!matrix_can_add(result, next))
                        printf("Não é possivel fazer a operação desejada com as matrizes de ordens previamente informadas!\n\n");
                    else
                        break;
                }
                printf("Entre com os elementos da proxima matriz, separando-os por espaços e/ou quebras de linha:\n");
                for (int i = 0; i < m; i++)
                    for (int j = 0; j < n; j++) {
                        scanf("%s", in);
                        next.table[i][j] = atof(in);
                    }
                printf("\n");
                matrix_add(&result, next);
                break;
            case 2:
                // Subtract
                while (1) {
                    while (1) {
                        printf("Entre com o número de linhas da proxima matriz: ");
                        scanf("%s", in);
                        m = atoi(in);
                        printf("\n");
                        if (m == 0)
                            printf("Valor inválido!\n\n");
                        else
                            break;
                    }
                    while (1) {
                        printf("Entre com o número de colunas da proxima matriz: ");
                        scanf("%s", in);
                        n = atoi(in);
                        printf("\n");
                        if (n == 0)
                            printf("Valor inválido!\n\n");
                        else
                            break;
                    }
                    next = matrix_constructor(m, n);
                    if (!matrix_can_subtract(result, next))
                        printf("Não é possivel fazer a operação desejada com as matrizes de ordens previamente informadas!\n\n");
                    else
                        break;
                }
                printf("Entre com os elementos da proxima matriz, separando-os por espaços e/ou quebras de linha:\n");
                for (int i = 0; i < m; i++)
                    for (int j = 0; j < n; j++) {
                        scanf("%s", in);
                        next.table[i][j] = atof(in);
                    }
                printf("\n");
                matrix_subtract(&result, next);
                break;
            case 3:
                // Multiply
                while (1) {
                    while (1) {
                        printf("Entre com o número de linhas da proxima matriz: ");
                        scanf("%s", in);
                        m = atoi(in);
                        printf("\n");
                        if (m == 0)
                            printf("Valor inválido!\n\n");
                        else
                            break;
                    }
                    while (1) {
                        printf("Entre com o número de colunas da proxima matriz: ");
                        scanf("%s", in);
                        n = atoi(in);
                        printf("\n");
                        if (n == 0)
                            printf("Valor inválido!\n\n");
                        else
                            break;
                    }
                    next = matrix_constructor(m, n);
                    if (!matrix_can_multiply(result, next))
                        printf("Não é possivel fazer a operação desejada com as matrizes de ordens previamente informadas!\n\n");
                    else
                        break;
                }
                printf("Entre com os elementos da proxima matriz, separando-os por espaços e/ou quebras de linha:\n");
                for (int i = 0; i < m; i++)
                    for (int j = 0; j < n; j++) {
                        scanf("%s", in);
                        next.table[i][j] = atof(in);
                    }
                printf("\n");
                matrix_multiply(&result, next);
                break;
            case 4:
                // Power
                if (matrix_can_power(result)) {
                    while (1) {
                        printf("Entre com o próximo valor: ");
                        scanf("%s", in);
                        printf("\n");
                        if (atoll(in) >= 0) {
                            matrix_power(&result, (unsigned long long int) atoll(in));
                            break;
                        } else
                            printf("Valor inválido!\n\n");
                    }
                } else
                    printf("Não é possivel realizar a operação desejada com a matrix atual!\n\n");
                break;
            default:
                keep_going = 0;
                break;
        }
        matrix_destructor(&next);
        printf("Resultado:\n\n");
        matrix_print(result);
        printf("\n");
        if (!keep_going)
            matrix_destructor(&result);
    }
}
Exemplo n.º 8
0
std::vector<Task*> StrassenSingleProblem::split() {
    int T_m = m/2, T_n = k/2, S_m = k/2, S_n = n/2;

    float *A11 = A;
    float *A21 = A + m/2;
    float *A12 = A + lda*k/2;
    float *A22 = A + lda*k/2 + m/2;

    float *B11 = B;
    float *B21 = B + k/2;
    float *B12 = B + ldb*n/2;
    float *B22 = B + ldb*n/2 + k/2;

    float *C11 = C;
    float *C21 = C + m/2;
    float *C12 = C + ldc*n/2;
    float *C22 = C + ldc*n/2 + m/2;

    float *T0 = A11;
    float *T1 = A12;
    float *T2 = (float*) malloc(T_m * T_n * sizeof(float));
    float *T3 = (float*) malloc(T_m * T_n * sizeof(float));
    float *T4 = (float*) malloc(T_m * T_n * sizeof(float));
    float *T5 = (float*) malloc(T_m * T_n * sizeof(float));
    float *T6 = A22;

    float *S0 = B11;
    float *S1 = B21;
    float *S2 = (float*) malloc(S_m * S_n * sizeof(float));
    float *S3 = (float*) malloc(S_m * S_n * sizeof(float));
    float *S4 = (float*) malloc(S_m * S_n * sizeof(float));
    float *S5 = B22;
    float *S6 = (float*) malloc(S_m * S_n * sizeof(float));

    float *Q0 = C11;
    float *Q1 = (float*) malloc(T_m * S_n * sizeof(float));
    float *Q2 = C22;
    float *Q3 = C12;
    float *Q4 = C21;
    float *Q5 = (float*) malloc(T_m * S_n * sizeof(float));
    float *Q6 = (float*) malloc(T_m * S_n * sizeof(float));

    matrix_add(T_m, T_n, A21, lda, A22, lda, T2, T_m);
    matrix_subtract(T_m, T_n, T2, T_m, A11, lda, T3, T_m);
    matrix_subtract(T_m, T_n, A11, lda, A21, lda, T4, T_m);
    matrix_subtract(T_m, T_n, A12, lda, T3, T_m, T5, T_m);

    matrix_subtract(S_m, S_n, B12, ldb, B11, ldb, S2, S_m);
    matrix_subtract(S_m, S_n, B22, ldb, S2, S_m, S3, S_m);
    matrix_subtract(S_m, S_n, B22, ldb, B12, ldb, S4, S_m);
    matrix_subtract(S_m, S_n, S3, S_m, B21, ldb, S6, S_m);

    std::vector<Task*> tasks (7);
    tasks[0] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T0, lda, S0, ldb, Q0, ldc));
    tasks[1] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T1, lda, S1, ldb, Q1, T_m));
    tasks[2] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T2, T_m, S2, S_m, Q2, ldc));
    tasks[3] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T3, T_m, S3, S_m, Q3, ldc));
    tasks[4] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T4, T_m, S4, S_m, Q4, ldc));
    tasks[5] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T5, T_m, S5, ldb, Q5, T_m));
    tasks[6] = new Task(new StrassenSingleProblem(T_m, T_n, S_n, T6, lda, S6, S_m, Q6, T_m));
    return tasks;
}