// ----------------------------------------------------------------- // Take log of hermitian part of decomposition to define scalar field void matrix_log(matrix *in, matrix *out) { char V = 'V'; // Ask LAPACK for both eigenvalues and eigenvectors char U = 'U'; // Have LAPACK store upper triangle of in int row, col, Npt = NCOL, stat = 0, Nwork = 2 * NCOL; matrix evecs, tmat; // Convert in to column-major double array used by LAPACK for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { store[2 * (col * NCOL + row)] = in->e[row][col].real; store[2 * (col * NCOL + row) + 1] = in->e[row][col].imag; } } // Compute eigenvalues and eigenvectors of in zheev_(&V, &U, &Npt, store, &Npt, eigs, work, &Nwork, Rwork, &stat); if (stat != 0) printf("WARNING: zheev returned error message %d\n", stat); // Move the results back into matrix structures // Use evecs to hold the eigenvectors for projection clear_mat(out); for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { evecs.e[row][col].real = store[2 * (col * NCOL + row)]; evecs.e[row][col].imag = store[2 * (col * NCOL + row) + 1]; } out->e[row][row].real = log(eigs[row]); } // Inverse of eigenvector matrix is simply adjoint mult_na(out, &evecs, &tmat); mult_nn(&evecs, &tmat, out); }
// ----------------------------------------------------------------- // Calculate U = exp(A).U // Goes to eighth order in the exponential: // exp(A) * U = ( 1 + A + A^2/2 + A^3/3 ...) * U // = U + A*(U + (A/2)*(U + (A/3)*( ... ))) void exp_mult(int dir, double eps, anti_hermitmat *A) { register int i; register site *s; matrix *link, temp1, temp2, htemp; register Real t2, t3, t4, t5, t6, t7, t8; // Take divisions out of site loop (can't be done by compiler) t2 = eps / 2.0; t3 = eps / 3.0; t4 = eps / 4.0; t5 = eps / 5.0; t6 = eps / 6.0; t7 = eps / 7.0; t8 = eps / 8.0; FORALLSITES(i, s) { uncompress_anti_hermitian(&(A[i]), &htemp); link = &(s->link[dir]); mult_nn(&htemp, link, &temp1); scalar_mult_add_matrix(link, &temp1, t8, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t7, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t6, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t5, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t4, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t3, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, t2, &temp2); mult_nn(&htemp, &temp2, &temp1); scalar_mult_add_matrix(link, &temp1, eps, &temp2); mat_copy(&temp2, link); // This step updates the link U[dir] }
void random_gauge_trans(Twist_Fermion *TF) { int a, b, i, j, x = 1, t = 1, s = node_index(x, t); complex tc; matrix Gmat, tmat, etamat, psimat[NUMLINK], chimat; if (this_node != 0) { printf("random_gauge_trans: only implemented in serial so far\n"); fflush(stdout); terminate(1); } if (nx < 4 || nt < 4) { printf("random_gauge_trans: doesn't deal with boundaries, "); printf("needs to be run on larger volume\n"); fflush(stdout); terminate(1); } // Set up random gaussian matrix, then unitarize it clear_mat(&tmat); for (j = 0; j < DIMF; j++) { #ifdef SITERAND tc.real = gaussian_rand_no(&(lattice[0].site_prn)); tc.imag = gaussian_rand_no(&(lattice[0].site_prn)); #else tc.real = gaussian_rand_no(&(lattice[0].node_prn)); tc.imag = gaussian_rand_no(&(lattice[0].node_prn)); #endif c_scalar_mult_sum_mat(&(Lambda[j]), &tc, &tmat); } polar(&tmat, &Gmat); // Confirm unitarity or check invariance when Gmat = I // mult_na(&Gmat, &Gmat, &tmat); // dumpmat(&tmat); // mat_copy(&tmat, &Gmat); // Left side of local eta clear_mat(&etamat); // Construct G eta = sum_j eta^j G Lambda^j for (j = 0; j < DIMF; j++) { mult_nn(&Gmat, &(Lambda[j]), &tmat); tc = TF[s].Fsite.c[j]; c_scalar_mult_sum_mat(&tmat, &tc, &etamat); } // Project out eta^j = -Tr[Lambda^j G eta] for (j = 0; j < DIMF; j++) { mult_nn(&(Lambda[j]), &etamat, &tmat); tc = trace(&tmat); CNEGATE(tc, TF[s].Fsite.c[j]); } // Right side of local eta clear_mat(&etamat); // Construct eta Gdag = sum_j eta^j Lambda^j Gdag for (j = 0; j < DIMF; j++) { mult_na(&(Lambda[j]), &Gmat, &tmat); tc = TF[s].Fsite.c[j]; c_scalar_mult_sum_mat(&tmat, &tc, &etamat); } // Project out eta^j = -Tr[eta Gdag Lambda^j] for (j = 0; j < DIMF; j++) { mult_nn(&etamat, &(Lambda[j]), &tmat); tc = trace(&tmat); CNEGATE(tc, TF[s].Fsite.c[j]); } // Left side of local links and psis; right side of local chis FORALLDIR(a) { mult_nn(&Gmat, &(lattice[s].link[a]), &tmat); mat_copy(&tmat, &(lattice[s].link[a])); clear_mat(&(psimat[a])); for (j = 0; j < DIMF; j++) { mult_nn(&Gmat, &(Lambda[j]), &tmat); tc = TF[s].Flink[a].c[j]; c_scalar_mult_sum_mat(&tmat, &tc, &(psimat[a])); } for (j = 0; j < DIMF; j++) { mult_nn(&(Lambda[j]), &(psimat[a]), &tmat); tc = trace(&tmat); CNEGATE(tc, TF[s].Flink[a].c[j]); } for (b = a + 1; b < NUMLINK; b++) { clear_mat(&(chimat)); for (j = 0; j < DIMF; j++) { mult_na(&(Lambda[j]), &Gmat, &tmat); tc = TF[s].Fplaq.c[j]; c_scalar_mult_sum_mat(&tmat, &tc, &(chimat)); } for (j = 0; j < DIMF; j++) { mult_nn(&(chimat), &(Lambda[j]), &tmat); tc = trace(&tmat); CNEGATE(tc, TF[s].Fplaq[i].c[j]); } } } // Right side of neighboring links and psis // TODO: Presumably we can convert this to a loop... s = node_index(x - 1, t); mult_na(&(lattice[s].link[0]), &Gmat, &tmat); mat_copy(&tmat, &(lattice[s].link[0])); clear_mat(&(psimat[0])); for (j = 0; j < DIMF; j++) { mult_na(&(Lambda[j]), &Gmat, &tmat); tc = TF[s].Flink[0].c[j]; c_scalar_mult_sum_mat(&tmat, &tc, &(psimat[0])); } for (j = 0; j < DIMF; j++) { mult_nn(&(psimat[0]), &(Lambda[j]), &tmat); tc = trace(&tmat); CNEGATE(tc, TF[s].Flink[0].c[j]); } s = node_index(x, t - 1); mult_na(&(lattice[s].link[3]), &Gmat, &tmat); mat_copy(&tmat, &(lattice[s].link[3])); clear_mat(&(psimat[3])); for (j = 0; j < DIMF; j++) { mult_na(&(Lambda[j]), &Gmat, &tmat); tc = TF[s].Flink[3].c[j]; c_scalar_mult_sum_mat(&tmat, &tc, &(psimat[3])); } for (j = 0; j < DIMF; j++) { mult_nn(&(psimat[3]), &(Lambda[j]), &tmat); tc = trace(&tmat); CNEGATE(tc, TF[s].Flink[3].c[j]); } // Left side of neighboring chi s = node_index(x - 1, t - 1); i = plaq_index[0][3]; clear_mat(&(chimat[i])); for (j = 0; j < DIMF; j++) { mult_nn(&Gmat, &(Lambda[j]), &tmat); tc = TF[s].Fplaq[i].c[j]; c_scalar_mult_sum_mat(&tmat, &tc, &(chimat[i])); } for (j = 0; j < DIMF; j++) { mult_nn(&(Lambda[j]), &(chimat[i]), &tmat); tc = trace(&tmat); CNEGATE(tc, TF[s].Fplaq[i].c[j]); } }
void hvy_pot(int do_det) { register int i; register site *s; int t_dist, x_dist; double wloop; complex tc; matrix tmat, tmat2; msg_tag *mtag = NULL; node0_printf("hvy_pot: MAX_T = %d, MAX_X = %d\n", MAX_T, MAX_X); // Use staple to hold product of t_dist links at each point for (t_dist = 1; t_dist <= MAX_T; t_dist++) { if (t_dist == 1) { FORALLSITES(i, s) mat_copy(&(s->link[TUP]), &(staple[i])); } else { mtag = start_gather_field(staple, sizeof(matrix), goffset[TUP], EVENANDODD, gen_pt[0]); // Be careful about overwriting staple; // gen_pt may just point to it for on-node "gathers" wait_gather(mtag); FORALLSITES(i, s) mult_nn(&(s->link[TUP]), (matrix *)gen_pt[0][i], &(tempmat2[i])); cleanup_gather(mtag); FORALLSITES(i, s) mat_copy(&(tempmat2[i]), &(staple[i])); } // Copy staple to tempmat // Will shoft at end of loop FORALLSITES(i, s) mat_copy(&(staple[i]), &(tempmat[i])); for (x_dist = 0; x_dist <= MAX_X; x_dist++) { // Evaluate potential at this separation wloop = 0.0; FORALLSITES(i, s) { // Compute the actual Coulomb gauge Wilson loop product mult_na(&(staple[i]), &(tempmat[i]), &tmat); if (do_det == 1) det_project(&tmat, &tmat2); else mat_copy(&tmat, &tmat2); tc = trace(&tmat2); wloop += tc.real; } g_doublesum(&wloop); if (do_det == 1) { // Braces fix compiler error node0_printf("D_LOOP "); } else node0_printf("POT_LOOP "); node0_printf("%d %d %.6g\n", x_dist, t_dist, wloop / volume); // As we increment x, shift in x direction shiftmat(tempmat, tempmat2, goffset[XUP]); } // x_dist } // t_dist
void setup_lambda() { int i, j, k, l, count; complex inv_sqrt = cmplx(1.0 / sqrt(2.0), 0.0); complex i_inv_sqrt = cmplx(0.0, 1.0 / sqrt(2.0)); #ifdef DEBUG_CHECK int a; complex trace, tt; node0_printf("Computing generators for U(N)\n"); #endif // Make sure Lambda matrices are initialized for (i = 0; i < DIMF; i++) clear_mat(&(Lambda[i])); // N * (N - 1) off-diagonal SU(N) generators // (T^{ij, +})_{kl} = i * (de_{ki} de_{lj} + de_{kj} de_{li}) / sqrt(2) // (T^{ij, -})_{kl} = (de_{ki} de_{lj} - de_{kj} de_{ki}) / sqrt(2) // Sign in second chosen to match previous values count = 0; for (i = 0; i < NCOL; i++) { for (j = i + 1; j < NCOL; j++) { for (k = 0; k < NCOL; k++) { for (l = 0; l < NCOL; l++) { if (k == i && l == j) { CSUM(Lambda[count].e[k][l], i_inv_sqrt); CSUM(Lambda[count + 1].e[k][l], inv_sqrt); } else if (k == j && l == i) { CSUM(Lambda[count].e[k][l], i_inv_sqrt); CDIF(Lambda[count + 1].e[k][l], inv_sqrt); } } } count += 2; } } if (count != NCOL * (NCOL - 1)) { node0_printf("ERROR: Wrong number of off-diagonal generators, "); node0_printf("%d vs. %d\n", count, NCOL * (NCOL - 1)); terminate(1); } // N - 1 diagonal SU(N) generators // T^k = i * diag(1, 1, ..., -k, 0, ..., 0) / sqrt(k * (k + 1)) for (i = 0; i < NCOL - 1; i++) { j = NCOL * (NCOL - 1) + i; // Index after +/- above k = i + 1; i_inv_sqrt = cmplx(0.0, 1.0 / sqrt(k * (k + 1.0))); for (l = 0; l <= k; l++) Lambda[j].e[l][l] = i_inv_sqrt; CMULREAL(Lambda[j].e[k][k], -1.0 * k, Lambda[j].e[k][k]); } // U(1) generator i * I_N / sqrt(N) if (DIMF == NCOL * NCOL) { // Allow SU(N) compilation for now i_inv_sqrt = cmplx(0.0, sqrt(one_ov_N)); clear_mat(&(Lambda[DIMF - 1])); for (i = 0; i < NCOL; i++) Lambda[DIMF - 1].e[i][i] = i_inv_sqrt; } #ifdef DEBUG_CHECK // Print Lambdas for (i = 0; i < DIMF; i++){ node0_printf("Lambda[%d]\n",i); if (this_node == 0) dumpmat(&(Lambda[i])); } // Test group theory node0_printf("Check group theory "); node0_printf("Sum_a Lambda^a_{kl} Lambda^a_{ij} = -delta_kj delta_il\n"); for (i = 0; i < NCOL; i++) { for (j = 0; j < NCOL; j++) { for (k = 0; k < NCOL; k++) { for (l = 0; l < NCOL; l++) { trace = cmplx(0, 0); for (a = 0; a < DIMF; a++) { CMUL(Lambda[a].e[k][l], Lambda[a].e[i][j], tt); CSUM(trace, tt); } if (cabs_sq(&trace) > IMAG_TOL) node0_printf("Sum_a La^a_{%d%d} La^a_{%d%d} = (%.4g, %.4g)\n", k, j, i, l, trace.real, trace.imag); } } } } #endif // Test orthogonality and compute products of Lambdas for fermion forces #ifdef DEBUG_CHECK for (i = 0; i < DIMF; i++) { for (j = 0; j < DIMF; j++) { mult_nn(&(Lambda[i]), &(Lambda[j]), &tmat); trace = trace(&tmat); if (trace.real * trace.real > IMAG_TOL) node0_printf("Tr[T_%d T_%d] = (%.4g, %.4g)\n", i, j, trace.real, trace.imag); } } #endif }
// ----------------------------------------------------------------- // Given matrix in = P.u, calculate the unitary matrix u = [1 / P].in // and the positive P = sqrt[in.in^dag] // We diagonalize PSq = in.in^dag using LAPACK, // then project out its inverse square root void polar(matrix *in, matrix *u, matrix *P) { char V = 'V'; // Ask LAPACK for both eigenvalues and eigenvectors char U = 'U'; // Have LAPACK store upper triangle of U.Ubar int row, col, Npt = NCOL, stat = 0, Nwork = 2 * NCOL; matrix PSq, Pinv, tmat; // Convert PSq to column-major double array used by LAPACK mult_na(in, in, &PSq); for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { store[2 * (col * NCOL + row)] = PSq.e[row][col].real; store[2 * (col * NCOL + row) + 1] = PSq.e[row][col].imag; } } // Compute eigenvalues and eigenvectors of PSq zheev_(&V, &U, &Npt, store, &Npt, eigs, work, &Nwork, Rwork, &stat); // Check for degenerate eigenvalues (broke previous Jacobi algorithm) for (row = 0; row < NCOL; row++) { for (col = row + 1; col < NCOL; col++) { if (fabs(eigs[row] - eigs[col]) < IMAG_TOL) printf("WARNING: w[%d] = w[%d] = %.8g\n", row, col, eigs[row]); } } // Move the results back into matrix structures // Overwrite PSq to hold the eigenvectors for projection for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { PSq.e[row][col].real = store[2 * (col * NCOL + row)]; PSq.e[row][col].imag = store[2 * (col * NCOL + row) + 1]; P->e[row][col] = cmplx(0.0, 0.0); Pinv.e[row][col] = cmplx(0.0, 0.0); } P->e[row][row].real = sqrt(eigs[row]); Pinv.e[row][row].real = 1.0 / sqrt(eigs[row]); } mult_na(P, &PSq, &tmat); mult_nn(&PSq, &tmat, P); // Now project out 1 / sqrt[in.in^dag] to find u = [1 / P].in mult_na(&Pinv, &PSq, &tmat); mult_nn(&PSq, &tmat, &Pinv); mult_nn(&Pinv, in, u); #ifdef DEBUG_CHECK // Check unitarity of u mult_na(u, u, &PSq); c_scalar_add_diag(&PSq, &minus1); for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { if (cabs_sq(&(PSq.e[row][col])) > SQ_TOL) { printf("Error getting unitary piece: "); printf("%.4g > %.4g for [%d, %d]\n", cabs(&(PSq.e[row][col])), IMAG_TOL, row, col); dumpmat(in); dumpmat(u); dumpmat(P); return; } } } #endif #ifdef DEBUG_CHECK // Check hermiticity of P adjoint(P, &tmat); sub_matrix(P, &tmat, &PSq); for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { if (cabs_sq(&(PSq.e[row][col])) > SQ_TOL) { printf("Error getting hermitian piece: "); printf("%.4g > %.4g for [%d, %d]\n", cabs(&(PSq.e[row][col])), IMAG_TOL, row, col); dumpmat(in); dumpmat(u); dumpmat(P); return; } } } #endif #ifdef DEBUG_CHECK // Check that in = P.u mult_nn(P, u, &tmat); sub_matrix(in, &tmat, &PSq); for (row = 0; row < NCOL; row++) { for (col = 0; col < NCOL; col++) { if (cabs_sq(&(PSq.e[row][col])) > SQ_TOL) { printf("Error reconstructing initial matrix: "); printf("%.4g > %.4g for [%d, %d]\n", cabs(&(PSq.e[row][col])), IMAG_TOL, row, col); dumpmat(in); dumpmat(u); dumpmat(P); return; } } } #endif }
// ----------------------------------------------------------------- void meas_plaq() { register int i, dir1, dir2; register site *s; register matrix *m1, *m4; double plaq_ss = 0, plaq_e[5], plaq_o[5]; // st, x, y, z, a matrix mtmp, *tmat; // Scratch space msg_tag *mtag0,*mtag1; tmat = (matrix *)malloc(sizeof(matrix) * sites_on_node); if (tmat == NULL) { node0_printf("ERROR: can't malloc tmat in plaq_diff()\n"); fflush(stdout); terminate(1); } for (i = 0; i < 5; i++) { plaq_e[i] = 0; plaq_o[i] = 0; } for (dir1 = YUP; dir1 <= TUP; dir1++) { for (dir2 = XUP; dir2 < dir1; dir2++) { mtag0 = start_gather_site(F_OFFSET(link[dir2]), sizeof(matrix), dir1, EVENANDODD, gen_pt[0]); mtag1 = start_gather_site(F_OFFSET(link[dir1]), sizeof(matrix), dir2, EVENANDODD, gen_pt[1]); FORALLSITES(i, s) { m1 = &(s->link[dir1]); m4 = &(s->link[dir2]); mult_an(m4, m1, &tmat[i]); } wait_gather(mtag0); wait_gather(mtag1); FORALLSITES(i, s) { mult_nn(&tmat[i], (matrix *)(gen_pt[0][i]), &mtmp); if (dir1 == TUP) { if ((s->t) % 2 == 0) plaq_e[0] += (double)realtrace((matrix *)(gen_pt[1][i]), &mtmp); else plaq_o[0] += (double)realtrace((matrix *)(gen_pt[1][i]), &mtmp); } else // Check plaq_ss += (double)realtrace((matrix *)(gen_pt[1][i]), &mtmp); if (dir1 == XUP || dir2 == XUP) { if ((s->x) % 2 == 0) plaq_e[1] += (double)realtrace((matrix *)(gen_pt[1][i]), &mtmp); else plaq_o[1] += (double)realtrace((matrix *)(gen_pt[1][i]), &mtmp); } if (dir1 == YUP || dir2 == YUP) { if ((s->y) % 2 == 0) plaq_e[2] += (double)realtrace((matrix *)(gen_pt[1][i]), &mtmp); else plaq_o[2] += (double)realtrace((matrix *)(gen_pt[1][i]), &mtmp); } if (dir1 == ZUP || dir2 == ZUP) { if ((s->z) % 2 == 0) plaq_e[3] += (double)realtrace((matrix *)(gen_pt[1][i]), &mtmp); else plaq_o[3] += (double)realtrace((matrix *)(gen_pt[1][i]), &mtmp); } // "a" is not really even/odd, but leave the labels for consistency if ((s->t) % 2 == 1 && (s->x) % 2 == 1 && (s->y) % 2 == 1 && (s->z) % 2 == 1) plaq_e[4] += (double)realtrace((matrix *)(gen_pt[1][i]), &mtmp); if ((s->t) % 2 == 0 && (s->x) % 2 == 0 && (s->y) % 2 == 0 && (s->z) % 2 == 0) plaq_o[4] += (double)realtrace((matrix *)(gen_pt[1][i]), &mtmp); } cleanup_gather(mtag0); cleanup_gather(mtag1); } // End loop over dir2
FORALLSITES(i, s) { mult_nn(&(s->link[dir2]), (matrix *)(gen_pt[1][i]), &tmat1); mult_nn(&tmat1, (matrix *)(gen_pt[2][i]), &tmat2); mult_na(&tmat2, (matrix *)(gen_pt[0][i]), &tmat1); add_matrix(&(s->tempmat2), &tmat1, &(s->tempmat2)); }
// ----------------------------------------------------------------- void staple_mcrg(int dir1, int block) { register int i, dir2; register site *s; int j, bl, start, disp[4]; // Displacement vector for general gather msg_tag *tag0, *tag1, *tag2, *tag3, *tag4, *tag5, *tag6; matrix tmat1, tmat2; bl = 1; for (j = 1; j < block; j++) bl *= 2; // Block size start = 1; // Indicates staple sum not initialized // Loop over other directions for (dir2 = XUP; dir2 <= TUP; dir2++) { if (dir2 != dir1) { // Get link[dir2] from direction 2 * dir1 clear_disp(disp); disp[dir1] = 2 * bl; tag0 = start_general_gather_site(F_OFFSET(link[dir2]), sizeof(matrix), disp, EVENANDODD, gen_pt[0]); wait_general_gather(tag0); // Get link[dir1] from direction dir2 clear_disp(disp); disp[dir2] = bl; tag1 = start_general_gather_site(F_OFFSET(link[dir1]), sizeof(matrix), disp, EVENANDODD, gen_pt[1]); wait_general_gather(tag1); // Get link[dir1] from direction dir1 + dir2 clear_disp(disp); disp[dir1] = bl; disp[dir2] = bl; tag2 = start_general_gather_site(F_OFFSET(link[dir1]), sizeof(matrix), disp, EVENANDODD, gen_pt[2]); wait_general_gather(tag2); // Get link[dir2] from direction -dir2 clear_disp(disp); disp[dir2] = -bl; tag3 = start_general_gather_site(F_OFFSET(link[dir2]), sizeof(matrix), disp, EVENANDODD, gen_pt[3]); wait_general_gather(tag3); // Get link[dir1] from direction -dir2 clear_disp(disp); disp[dir2] = -bl; tag4 = start_general_gather_site(F_OFFSET(link[dir1]), sizeof(matrix), disp, EVENANDODD, gen_pt[4]); wait_general_gather(tag4); // Get link[dir1] from direction dir1 - dir2 clear_disp(disp); disp[dir1] = bl; disp[dir2]= -bl; tag5 = start_general_gather_site(F_OFFSET(link[dir1]), sizeof(matrix), disp, EVENANDODD, gen_pt[5]); wait_general_gather(tag5); // Get link[dir2] from direction 2 * dir1 - dir2 clear_disp(disp); disp[dir1] = 2 * bl; disp[dir2] = -bl; tag6 = start_general_gather_site(F_OFFSET(link[dir2]), sizeof(matrix), disp, EVENANDODD, gen_pt[6]); wait_general_gather(tag6); // Upper staple if (start) { // The first contribution to the staple FORALLSITES(i, s) { mult_nn(&(s->link[dir2]), (matrix *)(gen_pt[1][i]), &tmat1); mult_nn(&tmat1, (matrix *)(gen_pt[2][i]), &tmat2); mult_na(&tmat2, (matrix *)(gen_pt[0][i]), &(s->tempmat2)); } start = 0; } else {