static int show_test_pattern(MENU_ARGS) /* generate a color test pattern */ { int i, j, k; reset_colors(); ed(2); cup(1, 1); printf("There are %d color combinations", MAX_COLORS * MAX_COLORS); for (k = 0; k <= 11; k += 11) { cup(k + 2, 1); printf("%dx%d matrix of foreground/background colors, bright *", MAX_COLORS, MAX_COLORS); if (k) { sgr("1"); printf("on"); sgr("0"); } else { printf("off"); } printf("*"); for (i = 0; i < MAX_COLORS; i++) { cup(k + 3, (i + 1) * 8 + 1); printf("%s", colors[i]); } for (i = 0; i < MAX_COLORS; i++) { cup(k + i + 4, 1); printf("%s", colors[i]); } for (i = 0; i < MAX_COLORS; i++) { for (j = 0; j < MAX_COLORS; j++) { if (k) sgr("1"); set_color_pair(j, i); cup(k + 4 + i, (j + 1) * 8 + 1); printf("Hello"); reset_colors(); } } } reset_colors(); cup(max_lines - 1, 1); return MENU_HOLD; }
/* * Pick an unusual color combination for testing, just in case the user's * got the background set to something different. */ static void c_sgr(const char *s) { char temp[80]; char *t; int reset = FALSE; (void) strcpy(temp, s); if (*temp == ';' || *temp == 0) { reset = TRUE; } else { for (t = temp; *t != 0; t++) { if (((t[0] == '0') && (t == temp || t[-1] == ';') && (t[1] == 0 || t[1] == ';')) || ((t[0] == ';') && (t[1] == ';'))) { reset = TRUE; break; } } } if (reset && do_colors) { sprintf(temp + strlen(temp), ";%d;%d", COLOR_YELLOW + 30, COLOR_BLUE + 40); } sgr(temp); }
void bye(void) { /* Force my personal prejudices upon the poor luser */ if (LOG_ENABLED) fprintf(log_fp, "Cleanup & exit\n"); default_level(); /* Enter ANSI mode (if in VT52 mode) */ decckm(FALSE); /* cursor keys normal */ deccolm(FALSE); /* 80 col mode */ decscnm(FALSE); /* Normal screen */ decom(FALSE); /* Absolute origin mode */ decawm(TRUE); /* Wrap around on */ decarm(TRUE); /* Auto repeat on */ decstbm(0, 0); /* No scroll region */ sgr("0"); /* Normal character attributes */ /* Say goodbye */ vt_clear(2); vt_move(12, 30); printf("That's all, folks!\n"); printf("\n\n\n"); inflush(); close_tty(); exit(EXIT_SUCCESS); }
int bug_d(MENU_ARGS) { int i; char result; /* Make the bug appear */ do { cup(14, 1); /* The original code in the article says * PRINT ESC$; "[13;1H"; CHR$(10%); * but I guess a cup(14,1); would do. * (To output a pure LF might be tricky). */ deccolm(TRUE); /* Make the bug visible */ cup(1, 9); decdwl(); println("You should see blinking text at the bottom line."); cup(3, 9); decdwl(); println("Enter 0 to exit, 1 to try to invoke the bug again."); cup(max_lines, 9); decdwl(); sgr("1;5;7"); printf("If you can see this then the bug did not appear."); sgr(""); cup(4, 9); decdwl(); result = inchar(); readnl(); deccolm(FALSE); } while (result == '1'); decsclm(TRUE); /* Syrup scroll */ cup(max_lines - 1, 1); for (i = 1; i <= 5; i++) println("If the bug is present, this should make things much worse!"); holdit(); decsclm(FALSE); /* Jump scroll */ return MENU_NOHOLD; }
static void set_foreground(int fg) { if (do_colors) { char temp[80]; (void) sprintf(temp, "3%d", fg); sgr(temp); } }
static void set_color_pair(int fg, int bg) { if (do_colors) { char temp[80]; (void) sprintf(temp, "3%d;4%d", fg, bg); sgr(temp); } }
static void set_background(int bg) { if (do_colors) { char temp[80]; (void) sprintf(temp, "4%d", bg); sgr(temp); } }
/* Set up my personal prejudices */ int setup_terminal(MENU_ARGS) { if (LOG_ENABLED) fprintf(log_fp, "Setup Terminal with test-defaults\n"); default_level(); /* Enter ANSI mode (if in VT52 mode) */ decckm(FALSE); /* cursor keys normal */ deccolm(FALSE); /* 80 col mode */ decsclm(FALSE); /* Jump scroll */ decscnm(FALSE); /* Normal screen */ decom(FALSE); /* Absolute origin mode */ decawm(TRUE); /* Wrap around on */ decarm(FALSE); /* Auto repeat off */ sm("?40"); /* Enable 80/132 switch (xterm) */ rm("?45"); /* Disable reverse wrap (xterm) */ decstbm(0, 0); /* No scroll region */ sgr("0"); /* Normal character attributes */ return MENU_NOHOLD; }
static int test_SGR_0(MENU_ARGS) { vt_move(1, 1); println(the_title); println(""); println("ECMA-48 states that SGR 0 \"cancels the effect of any preceding occurrence"); println("of SGR in the data stream regardless of the setting of the graphic rendition"); println("combination mode (GRCM)\"."); println(""); println(""); reset_colors(); printf("You should see only black:"); sgr("30;40"); printf("SGR 30 and SGR 40 don't work"); reset_colors(); println(":up to here"); reset_colors(); printf("You should see only white:"); sgr("37;47"); printf("SGR 37 and SGR 47 don't work"); reset_colors(); println(":up to here"); reset_colors(); printf("You should see text here: "); sgr("30;40"); sgr("0"); printf("SGR 0 reset works (explicit 0)"); println(""); reset_colors(); printf("................and here: "); sgr("37;47"); sgr(""); printf("SGR 0 reset works (default param)"); println(""); reset_colors(); holdit(); return MENU_NOHOLD; }
/* * Some terminals will reset colors with SGR-0; I've added the 39, 49 codes for * those that are ISO compliant. (The black/white codes are for emulators * written by people who don't bother reading standards). */ static void reset_colors(void) { sgr("0;40;37;39;49"); sgr("0"); }
int tst_doublesize(MENU_ARGS) { /* Test of: DECSWL (Single Width Line) DECDWL (Double Width Line) DECDHL (Double Height Line) (also implicit double width) */ int col, i, w, w1; /* Print the test pattern in both 80 and 132 character width */ for (w = 0; w <= 1; w++) { w1 = 13 * w; ed(2); cup(1, 1); if (w) { deccolm(TRUE); printf("%3d column mode", max_cols); } else { deccolm(FALSE); printf("%3d column mode", min_cols); } cup(5, 3 + 2 * w1); printf("v------- left margin"); cup(7, 3 + 2 * w1); printf("This is a normal-sized line"); decdhl(0); decdhl(1); decdwl(); decswl(); cup(9, 2 + w1); printf("This is a Double-width line"); decswl(); decdhl(0); decdhl(1); decdwl(); cup(11, 2 + w1); decdwl(); decswl(); decdhl(1); decdhl(0); printf("This is a Double-width-and-height line"); cup(12, 2 + w1); decdwl(); decswl(); decdhl(0); decdhl(1); printf("This is a Double-width-and-height line"); cup(14, 2 + w1); decdwl(); decswl(); decdhl(1); decdhl(0); el(2); printf("This is another such line"); cup(15, 2 + w1); decdwl(); decswl(); decdhl(0); decdhl(1); printf("This is another such line"); cup(17, 3 + 2 * w1); printf("^------- left margin"); cup(21, 1); printf("This is not a double-width line"); for (i = 0; i <= 1; i++) { cup(21, 6); if (i) { printf("**is**"); decdwl(); } else { printf("is not"); decswl(); } cup(max_lines - 1, 1); holdit(); } } /* Set vanilla tabs for next test */ cup(1, 1); tbc(3); for (col = 1; col <= max_cols; col += TABWIDTH) { cuf(TABWIDTH); hts(); } deccolm(FALSE); ed(2); /* *INDENT-OFF* */ scs_graphics(); cup( 8,1); decdhl(0); printf("lqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqk"); cup( 9,1); decdhl(1); printf("lqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqk"); cup(10,1); decdhl(0); printf("x%c%c%c%c%cx",9,9,9,9,9); cup(11,1); decdhl(1); printf("x%c%c%c%c%cx",9,9,9,9,9); cup(12,1); decdhl(0); printf("x%c%c%c%c%cx",9,9,9,9,9); cup(13,1); decdhl(1); printf("x%c%c%c%c%cx",9,9,9,9,9); scs(1, '0'); /* should look the same as scs_graphics() */ cup(14,1); decdhl(0); printf("x x"); cup(15,1); decdhl(1); printf("x x"); cup(16,1); decdhl(0); printf("mqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqj"); cup(17,1); decdhl(1); printf("mqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqj"); scs_normal(); /* *INDENT-ON* */ sgr("1;5"); cup(12, 3); printf("* The mad programmer strikes again * "); cup(13, 3); printf("%c", 9); cub(6); printf("* The mad programmer strikes again *"); sgr("0"); cup(max_lines - 2, 1); println("Another test pattern... a frame with blinking bold text,"); printf("all in double-height double-width size. "); holdit(); decstbm(8, max_lines); /* Absolute origin mode, so cursor is set at (1,1) */ cup(8, 1); for (i = 1; i <= 12; i++) ri(); decstbm(0, 0); /* No scroll region */ cup(1, 1); printf("%s", "Exactly half of the box should remain. "); return MENU_HOLD; }
int tst_screen(MENU_ARGS) { /* Test of: - DECSTBM (Set Top and Bottom Margins) - TBC (Tabulation Clear) - HTS (Horizontal Tabulation Set) - SM RM (Set/Reset mode): - 80/132 chars . - Origin: Relative/absolute . - Scroll: Smooth/jump . - Wraparound - SGR (Select Graphic Rendition) - SM RM (Set/Reset Mode) - Inverse - DECSC (Save Cursor) - DECRC (Restore Cursor) */ int i, j, cset, row, col, background; static const char *tststr = "*qx`"; static const char *attr[5] = { ";0", ";1", ";4", ";5", ";7" }; set_tty_crmod(TRUE); /* want to disable tab/space conversion */ cup(1, 1); decawm(TRUE); /* DECAWM: Wrap Around ON */ for (col = 1; col <= min_cols * 2; col++) printf("*"); decawm(FALSE); /* DECAWM: Wrap Around OFF */ cup(3, 1); for (col = 1; col <= min_cols * 2; col++) printf("*"); decawm(TRUE); /* DECAWM: Wrap Around ON */ cup(5, 1); println("This should be three identical lines of *'s completely filling"); println("the top of the screen without any empty lines between."); println("(Test of WRAP AROUND mode setting.)"); holdit(); ed(2); tbc(3); cup(1, 1); for (col = 1; col <= min_cols - 2; col += 3) { cuf(3); hts(); } cup(1, 4); for (col = 4; col <= min_cols - 2; col += 6) { tbc(0); cuf(6); } cup(1, 7); tbc(1); tbc(2); /* no-op */ cup(1, 1); for (col = 1; col <= min_cols - 2; col += 6) printf("%c*", TAB); cup(2, 2); for (col = 2; col <= min_cols - 2; col += 6) printf(" *"); cup(4, 1); println("Test of TAB setting/resetting. These two lines"); printf("should look the same. "); holdit(); for (background = 0; background <= 1; background++) { if (background) decscnm(FALSE); else decscnm(TRUE); deccolm(TRUE); /* 132 cols */ ed(2); /* VT100 clears screen on SM3/RM3, but not obviously, so... */ cup(1, 1); tbc(3); for (col = 1; col <= max_cols; col += TABWIDTH) { cuf(TABWIDTH); hts(); } cup(1, 1); for (col = 1; col <= max_cols; col += 10) printf("%.*s", (max_cols > col) ? (max_cols - col) : 10, "1234567890"); for (row = 3; row <= 20; row++) { cup(row, row); printf("This is %d column mode, %s background.", max_cols, background ? "dark" : "light"); } holdit(); deccolm(FALSE); /* 80 cols */ ed(2); /* VT100 clears screen on SM3/RM3, but not obviously, so... */ cup(1, 1); for (col = 1; col <= min_cols; col += 10) printf("%.*s", (min_cols > col) ? (min_cols - col) : 10, "1234567890"); for (row = 3; row <= 20; row++) { cup(row, row); printf("This is %d column mode, %s background.", min_cols, background ? "dark" : "light"); } holdit(); } do_scrolling(); ed(2); decstbm(max_lines - 1, max_lines); printf( "\nOrigin mode test. This line should be at the bottom of the screen."); cup(1, 1); printf("%s", "This line should be the one above the bottom of the screen. "); holdit(); ed(2); decom(FALSE); /* Origin mode (absolute) */ cup(max_lines, 1); printf( "Origin mode test. This line should be at the bottom of the screen."); cup(1, 1); printf("%s", "This line should be at the top of the screen. "); holdit(); decstbm(1, max_lines); ed(2); /* *INDENT-OFF* */ cup( 1,20); printf("Graphic rendition test pattern:"); cup( 4, 1); sgr("0"); printf("vanilla"); cup( 4,40); sgr("0;1"); printf("bold"); cup( 6, 6); sgr(";4"); printf("underline"); cup( 6,45);sgr(";1");sgr("4");printf("bold underline"); cup( 8, 1); sgr("0;5"); printf("blink"); cup( 8,40); sgr("0;5;1"); printf("bold blink"); cup(10, 6); sgr("0;4;5"); printf("underline blink"); cup(10,45); sgr("0;1;4;5"); printf("bold underline blink"); cup(12, 1); sgr("1;4;5;0;7"); printf("negative"); cup(12,40); sgr("0;1;7"); printf("bold negative"); cup(14, 6); sgr("0;4;7"); printf("underline negative"); cup(14,45); sgr("0;1;4;7"); printf("bold underline negative"); cup(16, 1); sgr("1;4;;5;7"); printf("blink negative"); cup(16,40); sgr("0;1;5;7"); printf("bold blink negative"); cup(18, 6); sgr("0;4;5;7"); printf("underline blink negative"); cup(18,45); sgr("0;1;4;5;7"); printf("bold underline blink negative"); /* *INDENT-ON* */ sgr(""); decscnm(FALSE); /* Inverse video off */ cup(max_lines - 1, 1); el(0); printf("Dark background. "); holdit(); decscnm(TRUE); /* Inverse video */ cup(max_lines - 1, 1); el(0); printf("Light background. "); holdit(); decscnm(FALSE); ed(2); /* *INDENT-OFF* */ cup(8,12); printf("normal"); cup(8,24); printf("bold"); cup(8,36); printf("underscored"); cup(8,48); printf("blinking"); cup(8,60); printf("reversed"); cup(10,1); printf("stars:"); cup(12,1); printf("line:"); cup(14,1); printf("x'es:"); cup(16,1); printf("diamonds:"); /* *INDENT-ON* */ for (cset = 0; cset <= 3; cset++) { for (i = 0; i <= 4; i++) { cup(10 + 2 * cset, 12 + 12 * i); sgr(attr[i]); if (cset == 0 || cset == 2) scs_normal(); else scs_graphics(); for (j = 0; j <= 4; j++) { printf("%c", tststr[cset]); } decsc(); cup(cset + 1, i + 1); sgr(""); scs_normal(); printf("A"); decrc(); for (j = 0; j <= 4; j++) { printf("%c", tststr[cset]); } } } sgr("0"); scs_normal(); cup(21, 1); println("Test of the SAVE/RESTORE CURSOR feature. There should"); println("be ten characters of each flavour, and a rectangle"); println("of 5 x 4 A's filling the top left of the screen."); restore_ttymodes(); return MENU_HOLD; }
double NonLocalPotential::energy(bool compute_hpsi, SlaterDet& dsd, bool compute_forces, vector<vector<double> >& fion, bool compute_stress, valarray<double>& sigma_enl) { const bool compute_anl = false; const vector<double>& occ = sd_.occ(); const int ngwl = basis_.localsize(); // define atom block size const int na_block_size = 32; valarray<double> gr(na_block_size*ngwl); // gr[ig+ia*ngwl] valarray<double> cgr(na_block_size*ngwl); // cgr[ig+ia*ngwl] valarray<double> sgr(na_block_size*ngwl); // sgr[ig+ia*ngwl] vector<vector<double> > tau; atoms_.get_positions(tau); double enl = 0.0; double tsum[6] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; if ( nspnl == 0 ) return 0.0; const double omega = basis_.cell().volume(); assert(omega != 0.0); const double omega_inv = 1.0 / omega; for ( int is = 0; is < nsp; is++ ) { if ( npr[is] > 0 ) // species is is non-local { if ( compute_anl ) { // define number of atom blocks const int na_blocks = na[is] / na_block_size + ( na[is] % na_block_size == 0 ? 0 : 1 ); valarray<double> anl_loc(npr[is]*na_block_size*2*ngwl); const int nstloc = sd_.nstloc(); // fnl_loc[ipra][n] valarray<double> fnl_loc(npr[is]*na_block_size*nstloc); valarray<double> fnl_buf(npr[is]*na_block_size*nstloc); for ( int ia_block = 0; ia_block < na_blocks; ia_block++ ) { // process projectors of atoms in block ia_block const int iastart = ia_block * na_block_size; const int iaend = (ia_block+1) * na_block_size < na[is] ? (ia_block+1) * na_block_size : na[is]; const int ia_block_size = iaend - iastart; // compute cgr[is][ia][ig], sgr[is][ia][ig] int k = 3; double mone = -1.0, zero = 0.0; char cn='n'; // next line: const cast is ok since dgemm_ does not modify argument double* gx = const_cast<double*>(basis_.gx_ptr(0)); dgemm(&cn,&cn,(int*)&ngwl,(int*)&ia_block_size,&k,&mone, gx,(int*)&ngwl, &tau[is][3*iastart],&k, &zero,&gr[0],(int*)&ngwl); int len = ia_block_size * ngwl; #if AIX || BGL vsincos(&sgr[is][0],&cgr[is][0],&gr[0],&len); #else for ( int i = 0; i < len; i++ ) { const double arg = gr[i]; sgr[i] = sin(arg); cgr[i] = cos(arg); } #endif // compute anl_loc for ( int ipr = 0; ipr < npr[is]; ipr++ ) { // twnl[is][ig+ngwl*ipr] const double * t = &twnl[is][ngwl*ipr]; const int l = lproj[is][ipr]; // anl_loc[ig+ipra*ngwl] double * a = &anl_loc[ipr*ia_block_size*ngwl]; if ( l == 0 ) { for ( int ia = 0; ia < ia_block_size; ia++ ) { for ( int ig = 0; ig < ngwl; ig++ ) { a[ig+ia*ngwl] = t[ig] * cgr[ig+ia*ngwl]; a[ig+1+ia*ngwl] = t[ig] * sgr[ig+ia*ngwl]; } } } else if ( l == 1 ) { for ( int ia = 0; ia < ia_block_size; ia++ ) { for ( int ig = 0; ig < ngwl; ig++ ) { /* Next line: -i * eigr */ /* -i * (a+i*b) = b - i*a */ a[ig+ia*ngwl] = t[ig] * sgr[ig+ia*ngwl]; a[ig+1+ia*ngwl] = -t[ig] * cgr[ig+ia*ngwl]; } } } else if ( l == 2 ) { for ( int ia = 0; ia < ia_block_size; ia++ ) { for ( int ig = 0; ig < ngwl; ig++ ) { // Next line: (-) sign for -eigr a[ig+ia*ngwl] = -t[ig] * cgr[ig+ia*ngwl]; a[ig+1+ia*ngwl] = -t[ig] * sgr[ig+ia*ngwl]; } } } } // ipr // array anl_loc is complete // compute fnl[npra][nstloc] = anl^T * c double one=1.0; char ct='t'; int twongwl = 2 * ngwl; int nprnaloc = ia_block_size * npr[is]; const complex<double>* c = sd_.c().cvalptr(); dgemm(&ct,&cn,&nprnaloc,(int*)&nstloc,&twongwl,&one, &anl_loc[0],&twongwl, (double*)c, &twongwl, &zero,&fnl_loc[0],&nprnaloc); // correct for double counting if ctxt_.myrow() == 0 if ( ctxt_.myrow() == 0 ) { // rank-one update // dger(m,n,alpha,x,incx,y,incy,a,lda); // a += alpha * x * transpose(y) // x = first row of anl_loc // y^T = first row of c double alpha = -0.5; dger(&nprnaloc,(int*)&nstloc,&alpha,&anl_loc[0],&twongwl, (double*)c,&twongwl,&fnl_loc[0],&nprnaloc); } // Allreduce fnl partial sum MPI_Comm basis_comm = basis_.context().comm(); double fnl_size = nprnaloc*nstloc; MPI_Allreduce(&fnl_loc[0],&fnl_buf[0],fnl_size, MPI_DOUBLE,MPI_SUM,basis_comm); // factor 2.0 in next line is: counting G, -G fnl_loc = 2.0 * fnl_buf; // accumulate Enl contribution const int nbase = ctxt_.mycol() * sd_.c().nb(); for ( int ipr = 0; ipr < npr[is]; ipr++ ) { const double fac = wt[is][ipr] * omega_inv; for ( int n = 0; n < nstloc; n++ ) { const double facn = fac * occ[n + nbase]; for ( int ia = 0; ia < ia_block_size; ia++ ) { const int i = ia + ipr*ia_block_size + n * nprnaloc; cout << "fnl_loc[ipr=" << ipr << ",ia=" << ia << ",n=" << n << "]: " << fnl_loc[i] << endl; const double tmp = fnl_loc[i]; enl += facn * tmp * tmp; fnl_loc[i] = fac * tmp; } } } if ( compute_hpsi ) { // compute cp += anl * fnl complex<double>* cp = dsd.c().valptr(); dgemm(&cn,&cn,&twongwl,(int*)&nstloc,&nprnaloc,&one, &anl_loc[0],&twongwl, &fnl_loc[0],&nprnaloc, &one,(double*)cp, &twongwl); } assert(compute_forces==false); assert(compute_stress==false); } // ia_block } else { // compute fnl // block distribution for fnl: same as SlaterDet for nst DoubleMatrix fnl(ctxt_,anl[is]->n(),sd_.c().n(), anl[is]->nb(),sd_.c().nb()); const DoubleMatrix c_proxy(sd_.c()); tmap["fnl_gemm"].start(); fnl.gemm('t','n',2.0,*anl[is],c_proxy,0.0); tmap["fnl_gemm"].stop(); // correct for double counting of G=0 components // rank-1 update using first row of *anl[is] and c_proxy fnl.ger(-1.0,*anl[is],0,c_proxy,0); cout << fnl << endl; // compute the non-local energy // multiply fnl[ipra+nprna*n] by fac = wt[is][ipr] * omega_inv; // block sizes: npr*nalocmax x c().nb() // loop over local array double*f = fnl.valptr(0); const int mb = fnl.mb(); const int nb = fnl.nb(); const int mloc = fnl.mloc(); for ( int li=0; li < fnl.mblocks(); li++) { const int mbs = fnl.mbs(li); for ( int lj=0; lj < fnl.nblocks(); lj++) { const int nbs = fnl.nbs(lj); for ( int ii=0; ii < mbs; ii++) { assert(mbs%npr[is]==0); // mbs/npr[is] is the number of atoms in the block li const int ipr = ii / (mbs/npr[is]); const double fac = wt[is][ipr] * omega_inv; for ( int jj=0; jj < nbs; jj++) { // global index: i(li,ii), j(lj,jj) const int nglobal = fnl.j(lj,jj); const double facn = fac * occ[nglobal]; const int iii = ii+li*mb; const int jjj = jj+lj*nb; const double tmp = f[iii+mloc*jjj]; enl += facn * tmp * tmp; f[iii+mloc*jjj] = fac * tmp; } } } } if ( compute_hpsi ) { tmap["enl_hpsi"].start(); // Apply operator to electronic states and accumulate in dsd DoubleMatrix cp_proxy(dsd.c()); cp_proxy.gemm('n','n',1.0,*anl[is],fnl,1.0); tmap["enl_hpsi"].stop(); } // ionic forces if ( compute_forces ) { tmap["enl_fion"].start(); double *tmpfion = new double[3*na[is]]; for ( int i = 0; i < 3*na[is]; i++ ) tmpfion[i] = 0.0; DoubleMatrix danl(ctxt_,anl[is]->m(),anl[is]->n(), anl[is]->mb(),anl[is]->nb()); DoubleMatrix dfnl(ctxt_,fnl.m(),fnl.n(),fnl.mb(),fnl.nb()); const int ngwl = basis_.localsize(); for ( int j = 0; j < 3; j++ ) { const double *const gxj = basis_.gx_ptr(j); for ( int ipr = 0; ipr < npr[is]; ipr++ ) { const int l = lproj[is][ipr]; // twnl[is][ig+ngwl*ipr] const double *t = &twnl[is][ngwl*ipr]; for ( int ia = 0; ia < naloc[is]; ia++ ) { // danl[ig+ipra*ngwl] // index = ig+cmloc_anl*(ia+nais*ipr), ig=0 const int ipra = ia+naloc[is]*ipr; double *da = danl.valptr(2*(sd_.c().mloc()*ipra)); const double *c = &cosgr[is][ia*ngwl]; const double *s = &singr[is][ia*ngwl]; if ( l == 0 ) { for ( int ig = 0; ig < ngwl; ig++ ) { const double tt = gxj[ig] * t[ig]; // Next lines: -i * ( a + ib ) = b - ia *da++ = tt * *s++; *da++ = -tt * *c++; } } else if ( l == 1 ) { for ( int ig = 0; ig < ngwl; ig++ ) { // Next lines: (-i)**2 * ( a + ib ) = - a - ib const double tt = - gxj[ig] * t[ig]; *da++ = tt * *c++; *da++ = tt * *s++; } } else if ( l == 2 ) { for ( int ig = 0; ig < ngwl; ig++ ) { // Next lines: (-i) * - ( a + ib ) = i*(a+ib) = - b + ia const double tt = gxj[ig] * t[ig]; *da++ = -tt * *s++; *da++ = tt * *c++; } } } // ia } // ipr // compute dfnl const DoubleMatrix c_proxy(sd_.c()); dfnl.gemm('t','n',2.0,danl,c_proxy,0.0); // Note: no need to correct for double counting of the // G=0 component which is always zero // non-local forces // loop over local array // block sizes: npr*nalocmax x c().nb() const double*f = fnl.valptr(0); const double*df = dfnl.valptr(0); const int mloc = fnl.mloc(); const int mb = fnl.mb(); const int nb = fnl.nb(); for ( int li=0; li < fnl.mblocks(); li++) { // find index of first atom in block li const int ia_first = nalocmax[is] * ( li * fnl.context().nprow() + fnl.context().myrow() ); const int mbs = fnl.mbs(li); for ( int lj=0; lj < fnl.nblocks(); lj++) { const int nbs = fnl.nbs(lj); for ( int ii=0; ii < mbs; ii++) { // ia_local: index of atom within block li const int ia_local = ii % ( mbs / npr[is] ); const int ia_global = ia_local + ia_first; assert(3*ia_global+j < 3*na[is]); for ( int jj=0; jj < nbs; jj++) { const int nglobal = fnl.j(lj,jj); // Factor 2.0 in next line from derivative of |Fnl|^2 const double facn = 2.0 * occ[nglobal]; const int iii = ii+li*mb; const int jjj = jj+lj*nb; tmpfion[3*ia_global+j] -= facn * f[iii+mloc*jjj] * df[iii+mloc*jjj]; } } } } } // j ctxt_.dsum(3*na[is],1,tmpfion,3*na[is]); for ( int ia = 0; ia < na[is]; ia++ ) { fion[is][3*ia+0] += tmpfion[3*ia]; fion[is][3*ia+1] += tmpfion[3*ia+1]; fion[is][3*ia+2] += tmpfion[3*ia+2]; } delete [] tmpfion; tmap["enl_fion"].stop(); } // compute_forces if ( compute_stress ) { const int ngwl = basis_.localsize(); DoubleMatrix danl(ctxt_,anl[is]->m(),anl[is]->n(), anl[is]->mb(),anl[is]->nb()); DoubleMatrix dfnl(ctxt_,fnl.m(),fnl.n(),fnl.mb(),fnl.nb()); for ( int ij = 0; ij < 6; ij++ ) { int ipr = 0; while ( ipr < npr[is] ) { const int l = lproj[is][ipr]; if ( l == 0 ) { // dtwnl[is][ipr][ij][ngwl] // index = ig + ngwl * ( ij + 6 * ipr)) // ipr = iquad + nquad[is] * ilm, where ilm = 0 const double *const dt0 = &dtwnl[is][ngwl*(ij+6*ipr)]; for ( int ia = 0; ia < naloc[is]; ia++ ) { const int ipra0 = ia+naloc[is]*ipr; double *da0 = danl.valptr(2*(sd_.c().mloc()*ipra0)); const double *c = &cosgr[is][ia*ngwl]; const double *s = &singr[is][ia*ngwl]; for ( int ig = 0; ig < ngwl; ig++ ) { const double d0 = dt0[ig]; // danl[is][ipr][iquad][ia][ig].re = // dtwnl[is][ipr][iquad][j][ig] * cosgr[is][ia][ig] *da0++ = *c++ * d0; // danl[is][ipr][iquad][ia][ig].im = // dtwnl[is][ipr][iquad][j][ig] * singr[is][ia][ig] *da0++ = *s++ * d0; } } } else if ( l == 1 ) { const int ipr1 = ipr; const int ipr2 = ipr + 1; const int ipr3 = ipr + 2; // dtwnl[is][ipr][ij][ngwl] // index = ig + ngwl * ( ij + 6 * iprx )) const double *dt1 = &dtwnl[is][ngwl*(ij+6*ipr1)]; const double *dt2 = &dtwnl[is][ngwl*(ij+6*ipr2)]; const double *dt3 = &dtwnl[is][ngwl*(ij+6*ipr3)]; for ( int ia = 0; ia < naloc[is]; ia++ ) { const int ipra1 = ia+naloc[is]*ipr1; const int ipra2 = ia+naloc[is]*ipr2; const int ipra3 = ia+naloc[is]*ipr3; double *da1 = danl.valptr(2*(sd_.c().mloc()*ipra1)); double *da2 = danl.valptr(2*(sd_.c().mloc()*ipra2)); double *da3 = danl.valptr(2*(sd_.c().mloc()*ipra3)); const double *c = &cosgr[is][ia*ngwl]; const double *s = &singr[is][ia*ngwl]; for ( int ig = 0; ig < ngwl; ig++ ) { const double d1 = dt1[ig]; const double d2 = dt2[ig]; const double d3 = dt3[ig]; // Next line: (-i)^l factor is -i // Next line: -i * eigr // -i * (a+i*b) = b - i*a const double tc = -*c++; // -cosgr[is][ia][ig] const double ts = *s++; // singr[is][ia][ig] *da1++ = d1 * ts; *da1++ = d1 * tc; *da2++ = d2 * ts; *da2++ = d2 * tc; *da3++ = d3 * ts; *da3++ = d3 * tc; } } } else if ( l == 2 ) { const int ipr4 = ipr; const int ipr5 = ipr + 1; const int ipr6 = ipr + 2; const int ipr7 = ipr + 3; const int ipr8 = ipr + 4; // dtwnl[is][ipr][iquad][ij][ngwl] // index = ig + ngwl * ( ij + 6 * ( iquad + nquad[is] * ipr )) const double *dt4 = &dtwnl[is][ngwl*(ij+6*ipr4)]; const double *dt5 = &dtwnl[is][ngwl*(ij+6*ipr5)]; const double *dt6 = &dtwnl[is][ngwl*(ij+6*ipr6)]; const double *dt7 = &dtwnl[is][ngwl*(ij+6*ipr7)]; const double *dt8 = &dtwnl[is][ngwl*(ij+6*ipr8)]; for ( int ia = 0; ia < naloc[is]; ia++ ) { const int ipra4 = ia+naloc[is]*ipr4; const int ipra5 = ia+naloc[is]*ipr5; const int ipra6 = ia+naloc[is]*ipr6; const int ipra7 = ia+naloc[is]*ipr7; const int ipra8 = ia+naloc[is]*ipr8; double *da4 = danl.valptr(2*(sd_.c().mloc()*ipra4)); double *da5 = danl.valptr(2*(sd_.c().mloc()*ipra5)); double *da6 = danl.valptr(2*(sd_.c().mloc()*ipra6)); double *da7 = danl.valptr(2*(sd_.c().mloc()*ipra7)); double *da8 = danl.valptr(2*(sd_.c().mloc()*ipra8)); const double *c = &cosgr[is][ia*ngwl]; const double *s = &singr[is][ia*ngwl]; for ( int ig = 0; ig < ngwl; ig++ ) { const double d4 = dt4[ig]; const double d5 = dt5[ig]; const double d6 = dt6[ig]; const double d7 = dt7[ig]; const double d8 = dt8[ig]; // Next lines: (-i)^2 * ( a + ib ) = - ( a + ib ) const double tc = -*c++; const double ts = -*s++; *da4++ = d4 * tc; *da4++ = d4 * ts; *da5++ = d5 * tc; *da5++ = d5 * ts; *da6++ = d6 * tc; *da6++ = d6 * ts; *da7++ = d7 * tc; *da7++ = d7 * ts; *da8++ = d8 * tc; *da8++ = d8 * ts; } } } else { assert(false); } // l ipr += 2*l+1; } // while ipr // compute dfnl const DoubleMatrix c_proxy(sd_.c()); dfnl.gemm('t','n',2.0,danl,c_proxy,0.0); // Note: no need to correct for double counting of the // G=0 component which is always zero // partial contributions to the stress sigma_ij // Note: fnl was already premultiplied by the factor // fac = wt[is][ipr][iquad] * omega_inv; const double *const f = fnl.cvalptr(0); const double *const df = dfnl.cvalptr(0); const int mb = fnl.mb(); const int nb = fnl.nb(); const int mloc = fnl.mloc(); for ( int li=0; li < fnl.mblocks(); li++) { const int mbs = fnl.mbs(li); for ( int lj=0; lj < fnl.nblocks(); lj++) { const int nbs = fnl.nbs(lj); for ( int ii=0; ii < mbs; ii++) { for ( int jj=0; jj < nbs; jj++) { // global index: i(li,ii), j(lj,jj) const int nglobal = fnl.j(lj,jj); const double facn = 2.0 * occ[nglobal]; const int iii = ii+li*mb; const int jjj = jj+lj*nb; const double tmp = f[iii+mloc*jjj]; const double dtmp = df[iii+mloc*jjj]; tsum[ij] += facn * tmp * dtmp; } } } } } // ij } // compute_stress } // compute_anl } // npr[is]>0 } // is ctxt_.dsum(1,1,&enl,1); sigma_enl = 0.0; if ( compute_stress ) { ctxt_.dsum(6,1,&tsum[0],6); sigma_enl[0] = ( enl + tsum[0] ) * omega_inv; sigma_enl[1] = ( enl + tsum[1] ) * omega_inv; sigma_enl[2] = ( enl + tsum[2] ) * omega_inv; sigma_enl[3] = + tsum[3] * omega_inv; sigma_enl[4] = + tsum[4] * omega_inv; sigma_enl[5] = + tsum[5] * omega_inv; } return enl; }