kern_obj * spawn_process(const uchar * cd, ulong csz, ulong dsz, ulong bsz, kern_obj * par, uchar priv, uchar priority) { ulong i, pbase, ssz, absz, pgs; kern_obj * proc = alloc_kobj(); ssz = csz + dsz; absz = bsz; if (csz % 0x1000 != 0) csz = csz - (csz % 0x1000) + 0x1000; if (dsz % 0x1000 != 0) dsz = dsz - (dsz % 0x1000) + 0x1000; if (bsz % 0x1000 != 0) bsz = bsz - (bsz % 0x1000) + 0x1000; proc->type = KOBJ_PROCESS; proc->u.proc.parent = par; proc->u.proc.priv = priv; proc->u.proc.priority = priority; proc->u.proc.code_data_ws = alloc_ws(); pgs = csz / 0x1000 + dsz / 0x1000 + bsz / 0x1000; pbase = alloc_pages(pgs + 0x200, PHYS_PAGES); if (pbase == 0) { free_kobj(proc); return NULL; } add_pgs_to_ws(proc->u.proc.code_data_ws, USPACE_BOT, pbase, csz + dsz + bsz + 0x200000, 7); for (i = 0; i < ssz + absz; i++) { if (i % 0x1000 == 0) { ATQ(KPT0_0_LOC) = (pbase + i) | 3; INVLPG(KSPACE_LOC); } if (i < ssz) ATB(KSPACE_LOC + i % 0x1000) = cd[i]; else ATB(KSPACE_LOC + i % 0x1000) = 0; } if (spawn_thread(proc, (int (*)())USPACE_BOT) == NULL) { free_kobj(proc); free_pages(pbase, pgs, PHYS_PAGES); return NULL; } getlock(&procthrd_lock, 0); if (head_process == NULL) { proc->u.proc.next = proc; head_process = proc; } else { proc->u.proc.next = head_process->u.proc.next; head_process->u.proc.next = proc; } unlock(&procthrd_lock, 0); return proc; }
void add_pgs_to_ws(kern_obj * o, ulong vbase, ulong pbase, uint size, ushort fl) { while (o->u.ws.next != NULL) o = o->u.ws.next; if (o->u.ws.size != 0) { o->u.ws.next = alloc_ws(); o = o->u.ws.next; } o->u.ws.flags = fl; o->u.ws.vbase = vbase; o->u.ws.pbase = pbase; o->u.ws.size = size; }
kern_obj * spawn_thread(kern_obj * proc, int (*func)()) { ulong fl, pg; kern_obj * thrd = alloc_kobj(); thrd->type = KOBJ_THREAD; thrd->u.thrd.proc = proc; thrd->u.thrd.sub = alloc_kobj(); thrd->u.thrd.sub->type = KOBJ_THREAD_SUB; thrd->u.thrd.sub->u.thrd2.stack_ws = alloc_ws(); pg = alloc_pages(2, PHYS_PAGES); if (pg == 0) { free_kobj(thrd->u.thrd.sub); free_kobj(thrd); return 0; } add_pgs_to_ws(thrd->u.thrd.sub->u.thrd2.stack_ws, USPACE_TOP - 0x2000, pg, 0x2000, 7); thrd->u.thrd.sub->u.thrd2.rsp = USPACE_TOP - 0x50; ATQ(KPT0_0_LOC) = (pg + 0x1000) | 3; INVLPG(KSPACE_LOC); ATQ(KSPACE_LOC + 0xFB0) = (ulong)func; ATQ(KSPACE_LOC + 0xFB8) = USER_CS | 3; GET_FLAGS(fl); ATQ(KSPACE_LOC + 0xFC0) = fl; ATQ(KSPACE_LOC + 0xFC8) = USPACE_TOP - 0x1000; ATQ(KSPACE_LOC + 0xFD0) = USER_DS | 3; getlock(&procthrd_lock, 1); if (head_thread == NULL) { thrd->u.thrd.next = thrd; head_thread = thrd; } else { thrd->u.thrd.next = head_thread->u.thrd.next; head_thread->u.thrd.next = thrd; } unlock(&procthrd_lock, 1); return thrd; }
int main(int argc,char *argv[]) { int irw,isp,ispp[2],status[6],mnkv; int bs[4],Ns,nmx,nkv,nmr,ncy,ninv; double kappa,m0,dm,mu0,mu,res,mres; double sqne,sqnp[2]; complex_dble lnw1[2],lnr,dr,drmx; solver_parms_t sp; mrw_masses_t ms; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); if (my_rank==0) { flog=freopen("check2.log","w",stdout); fin=freopen("check2.in","r",stdin); printf("\n"); printf("Direct check of mrw2\n"); printf("----------------------\n\n"); printf("%dx%dx%dx%d lattice, ",NPROC0*L0,NPROC1*L1,NPROC2*L2,NPROC3*L3); printf("%dx%dx%dx%d process grid, ",NPROC0,NPROC1,NPROC2,NPROC3); printf("%dx%dx%dx%d local lattice\n\n",L0,L1,L2,L3); } mnkv=0; mres=0.0; for (isp=0;isp<3;isp++) { read_solver_parms(isp); sp=solver_parms(isp); if (sp.res>mres) mres=sp.res; if (sp.nkv>mnkv) mnkv=sp.nkv; } read_bc_parms(); if (my_rank==0) { find_section("SAP"); read_line("bs","%d %d %d %d",bs,bs+1,bs+2,bs+3); } MPI_Bcast(bs,4,MPI_INT,0,MPI_COMM_WORLD); set_sap_parms(bs,0,1,1); if (my_rank==0) { find_section("Deflation subspace"); read_line("bs","%d %d %d %d",bs,bs+1,bs+2,bs+3); read_line("Ns","%d",&Ns); } MPI_Bcast(bs,4,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&Ns,1,MPI_INT,0,MPI_COMM_WORLD); set_dfl_parms(bs,Ns); if (my_rank==0) { find_section("Deflation subspace generation"); read_line("kappa","%lf",&kappa); read_line("mu","%lf",&mu); read_line("ninv","%d",&ninv); read_line("nmr","%d",&nmr); read_line("ncy","%d",&ncy); } MPI_Bcast(&kappa,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&mu,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&ninv,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&nmr,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&ncy,1,MPI_INT,0,MPI_COMM_WORLD); set_dfl_gen_parms(kappa,mu,ninv,nmr,ncy); if (my_rank==0) { find_section("Deflation projection"); read_line("nkv","%d",&nkv); read_line("nmx","%d",&nmx); read_line("res","%lf",&res); fclose(fin); } MPI_Bcast(&nkv,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&nmx,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&res,1,MPI_DOUBLE,0,MPI_COMM_WORLD); set_dfl_pro_parms(nkv,nmx,res); set_lat_parms(6.0,1.0,0,NULL,1.234); print_solver_parms(status,status+1); print_sap_parms(0); print_dfl_parms(0); start_ranlux(0,1245); geometry(); mnkv=2*mnkv+2; if (mnkv<(Ns+2)) mnkv=Ns+2; if (mnkv<5) mnkv=5; alloc_ws(mnkv); alloc_wsd(7); alloc_wv(2*nkv+2); alloc_wvd(4); drmx.re=0.0; drmx.im=0.0; for (irw=0;irw<3;irw++) { dm=1.0e-2; for (isp=0;isp<3;isp++) { ispp[0]=isp; ispp[1]=isp; if (isp==0) { m0=1.0877; mu0=0.1; } else if (isp==1) { m0=0.0877; mu0=0.01; } else { m0=-0.0123; mu0=0.001; } random_ud(); if (isp==2) { dfl_modes(status); error_root(status[0]<0,1,"main [check2.c]", "dfl_modes failed"); } if (irw==0) { ms.m1=m0; ms.d1=dm; ms.mu1=mu0; ms.m2=m0; ms.d2=dm; ms.mu2=mu0; lnr=mrw2(ms,0,ispp,lnw1,sqnp,&sqne,status); dr.re=fabs(lnw1[0].re-lnw1[1].re); dr.im=fabs(lnw1[0].im-lnw1[1].im); lnr=mrw2(ms,1,ispp,lnw1,sqnp,&sqne,status); dr.re+=fabs(lnr.re-(2.0*mu0*dm+dm*dm)*sqnp[0]); dr.re+=fabs(lnw1[0].re-lnw1[1].re); dr.re+=fabs(sqnp[0]-sqnp[1]); dr.im+=fabs(lnr.im); dr.im+=fabs(lnw1[0].im-lnw1[1].im); } else if (irw==1) { ms.m1=m0; ms.d1=dm; ms.mu1=mu0; ms.m2=m0; ms.d2=-dm; ms.mu2=mu0; lnr=mrw2(ms,0,ispp,lnw1,sqnp,&sqne,status); dr.re=fabs(lnw1[0].re+lnw1[1].re); dr.im=fabs(lnw1[0].im+lnw1[1].im); lnr=mrw2(ms,1,ispp,lnw1,sqnp,&sqne,status); dr.re+=fabs(lnr.re+dm*dm*sqnp[0]); dr.re+=fabs(lnw1[0].re+lnw1[1].re); dr.re+=fabs(sqnp[0]-sqnp[1]); dr.im+=fabs(lnr.im-2.0*lnw1[0].im); dr.im+=fabs(lnw1[0].im+lnw1[1].im); } else { ms.m1=m0; ms.d1=dm; ms.mu1=mu0; ms.m2=m0+dm; ms.d2=-dm; ms.mu2=mu0; lnr=mrw2(ms,0,ispp,lnw1,sqnp,&sqne,status); dr.re=fabs(lnr.re); dr.im=fabs(lnr.im); } if (dr.re>drmx.re) drmx.re=dr.re; if (dr.im>drmx.im) drmx.im=dr.im; if (my_rank==0) { if (irw==0) printf("mrw2(d2=d1): "); else if (irw==1) printf("mrw2(d2=-d1): "); else printf("mrw2(m2=m1+d1,d2=-d1): "); if ((isp==0)||(isp==1)) printf("status = %d\n",status[0]); else if (isp==2) printf("status = (%d,%d,%d)\n", status[0],status[1],status[2]); printf("diff = %.1e + i%.1e\n\n",dr.re,dr.im); } error_chk(); } } if (my_rank==0) { printf("\n"); printf("max diff = %.1e + i%.1e\n",drmx.re,drmx.im); printf("(should be smaller than %.1e)\n\n",mres*sqrt((double)(VOLUME*NPROC*24))); fclose(flog); } MPI_Finalize(); exit(0); }