More coding questions using one BC in different location,

I have several questions but the most important is if this picture looks like artifacts
or some well known programming mistake. Briefly this is supposed to be poynting
vector with a source square and a higher dielectric medium near the bottom. the
source is a travelling wave at 45 degrees. I had several other coding issues but the
biggest problem was trying to use the final result from one iteration as the
initial result for the next one. This involves only the top and bottom z planes
but I used the entire mesh :slight_smile: Thanks.

gratingmumps.edp (10.9 KB)

//  This code nominally runs but several issues.
// The mumps solve is only a fraction of overall time 
// probably due to the way I coded a few things. BTW, mumps
// looks like a good solver and much easier tham PETSc
// is there some particular case that is not true?
// I'm using the z dim as time and so I use one solve final
// time as the initial conditions for the next solve.
// I'm currently manipulating the entire mesh but
// presume there is some ismple way to just get the
// time ( z) plane and derivative although I can't find it.
// running this ( with ioctl "8" see the code ),
//  ff-mpirun -np 4 gratingmumps.edp -omega 3.1416 -nx 20 -ny 20 -nt 20 -lt .1 -wg
// produces a lot of triangular areas that look like artifacts.
// Is there some coding error that could produce this?

// I also had a hard time coding bit masking - see the "TODO"
// lol. 

// is there someway to get mpi to release the core during a spin
// wait? My  cooling fan keeps coming on and the room actually gets 
// hotter when running this for a while lol. 

// do fem along time  2D +1, square source in 2 slab media.
// sort of a travelling wave antenna with a high index medium below.
// look at fields and performance vs coding. 
// with these params, 
// ff-mpirun -np 4 gratingmumps.edp -omega 50 -nx 20 -ny 20 -nt 20 -lt .02 -wg
// the solve is only a fraction of the run time.
// some issues comments with "FIXME"
// to select graphics, write to "ioctl.txt" in local dir see the code

// mostly taken from from hpddm/diffusion-cartesian-2d-PETSc.edp
// hpddm/maxwell-3d-PETSc.edp
//NSI3d-carac-mumps.edp Stokes-v1-matrix-mumps.edp Stokes-v3-matrix-mumps.edp

//  mpi does not appear to be energy conserving- using 100 pct of
// core in spin wait nothing else can run the cooling fan spins
// up and the room literally gets hotter.  


  author = {Michael J Marchywka},
  title = {$PROJ},
license={Knowledge sir should be free to all },
publisher={Mike Marchywka},
  filename = {$PROJ},
  url = {},
  version = {0.0.0},
  date-started = {$DATE}


load "MUMPS_mpi"                        // PETSc plugin

load "medit"                             // PETSc plugin
load "msh3"                             // PETSc plugin
//macro dimension()3// EOM            // 2D or 3D
include "macro_ddm.idp"
include "cube.idp"

func real self(real q ) { return q; } 
// from hpddm/diffusion-cartesian-2d-PETSc.edp

macro Pk P13d // EOM

/*    Options for distributed solver   */
int s      = getARGV("-split", 1)   ; // Refinement factor
//int Npts   = getARGV("-npts" , 80) ; // Number of points on the perimeter
int Xpts   = getARGV("-nx" , 20) ; // Number of points on the perimeter
int Ypts   = getARGV("-ny" , 20) ; // Number of points on the perimeter
int Tpts   = getARGV("-nt" , 20) ; // Number of points on the perimeter
real Lx    = getARGV("-lx"   , 1.0); // Dimension of the domain
real Ly    = getARGV("-ly"   , 1.0); // Dimension of the domain
real Lt    = getARGV("-lt"   , 1.0e-1); // Dimension of the domain
real fvolume=Lx*Ly*Lt;  
//real dt    = getARGV("-dt"   , 0.0); // if non zero use time stepper 
int itermax    = getARGV("-itermax"   , 10000); // if non zero use time stepper 
real omega    = getARGV("-omega" , 30.0); // if non zero use time stepper 
//real c    = getARGV("-c" , 1); // if non zero use time stepper 

/*         Geometry parameters         */
int[int] Labels3=[1,2,3,4,5,6]; // labels : x0,1; x0,1; z0,1  
// from examples, 
macro Curl(ux, uy, uz)[dy(uz)-dz(uy), dz(ux)-dx(uz), dx(uy)-dy(ux)]// EOM
macro CrossN(ux, uy, uz)[uy*N.z-uz*N.y, uz*N.x-ux*N.z, ux*N.y-uy*N.x]// EOM
macro Curlabs(ux, uy, uz)[abs(dy(uz)-dz(uy)), abs(dz(ux)-dx(uz)), abs(dx(uy)-dy(ux))]// EOM
/*            ##############           */
 // Construction of the rectangular domain
if (mpirank==0) cout<<"npts "<<Xpts<<" "<<Ypts<<" "<<Tpts<<endl; cout.flush;
mesh3  Th; // main mesh 
mesh Th2; // for projections etc
if (mpirank==0)
Th = cube(Xpts,Ypts,Tpts,[Lx*(x-0.5),Ly*(y-0.5),Lt*(z-0.5)],label=Labels3);
int[int] chlab=[1,1,2,1,3,1,4,1];
cout<< " mesh volume   "<<fvolume<<endl; cout.flush;

fespace Vhp(Th, [Pk,Pk]);      // E field, no charging  
fespace Vh(Th, Pk);      // temps etc  

// this is for the dielectric change... 
// high index slab at bottom 
func int  myreg(real xx, real yy) 
if (yy< -.3 ) return 2; else return 1;
} // my reg

func real fzed(real xx) { return xx; }
func real  fzero(real xx, real yy, real tt) { return 0.0; } 
func real  fone(real xx, real yy, real tt) { return 1.0; } 
// no code yet for divergence of E :) 
func real  fsigma(real xx, real yy, real tt) 
{ if (myreg(xx,yy)==1) return 0; 
return 0;
func real  fepsilon(real xx, real yy, real tt) 
{ if (myreg(xx,yy)==1) return 1; 
return 2;
// travelling wave on a square 
// .2 x .2 with c=1, omega=ck, omega=2pi/lambda
// lambda = 2pi/omega; lambda=1, omega=2pi; lambda=1.4, omega=4.44
//real phi=-0*pi/4;
real phi=-pi/4;
real ksrc=omega;
real kx=ksrc*cos(phi);
real ky=ksrc*sin(phi);

func real  fsrc(real xx, real yy,real tt,real toff)
//if (xx< -.3) if (yy>.3)
//return 1.0i*sin(fudge*omega*(xx-yy)/sqrt(2.0)) ; 

if (xx<= .1) if (xx>= -.1)
if (yy<= .1) if (yy>= -.1)
return cos(kx*xx+ky*yy-omega*(tt+toff)) ;
return 0;

func real  fsrcc(real xx, real yy,real tt,real toff)
//if (xx< -.3) if (yy>.3)
//return 1.0i*sin(fudge*omega*(xx-yy)/sqrt(2.0)) ; 

if (xx<= .1) if (xx>= -.1)
if (yy<= .1) if (yy>= -.1)
return sin(kx*xx+ky*yy-omega*(tt+toff)) ;
return 0;

// 4 for source region 
func int   srcreg(real xx, real yy,real tt,real toff)
if (xx<= .1) if (xx>= -.1) if (yy<= .1) if (yy>= -.1)
	 return 4;
return 0;

// try to create regions as bit fields for multiple
// region memberships. 
// high epsilong is region 2, 4 is the src region 
// then isolate bits later 
// "<int>" nice here 
Vh reg=region;
// no real savings AFAICT, 
int[int] srcpts(reg[].n);
int nsrcpts=0;
for(int i=0; i<reg[].n; ++i) 
{if (reg[][i]==4) { srcpts[nsrcpts]=i; ++nsrcpts;}

// iterate using z as time with this center time.
real toff=0;

Vhp [pEx,pEy]=[fsrc(x,y,z,toff),fsrc(x,y,z,toff)];
Vhp [wEx,wEy]=[pEx,pEy]; // [fsrc(x,y,z,toff),fsrc(x,y,z,toff)];
Vhp [vEx,vEy]=[pEx,pEy]; // [fsrc(x,y,z,toff),fsrc(x,y,z,toff)];
Vhp [wExdz,wEydz]=[0,0];
Vh epsilon=fepsilon(x,y,z);
Vh mu=fone(x,y,z);
// this is a small number of points, need to just do a region
Vh srczed=fsrc(x,y,z,0);
Vh srcqzed=fsrcc(x,y,z,0);
Vh src=srczed;

matrix A;
real[int] rhss(Vhp.ndof);
real t1=0;
real t2=0;
for(int iter=0; iter<itermax; ++iter)
// center time for z 
cout<<" toff="<<toff<<endl;
if (iter>0) {  
// using mpi here instead of duplicating steps 
// only maybe 10 percent faster.. 
if (mpirank==0) 
// TODO FIXME this only needs the BC not entire mesh and it takes time. 
// use a region or something? 
// the points should be identical on both planes but this may
// interpolte using a lot of time. 
for (int mpii=1; mpii<mpisize; ++mpii ) processor(mpii)<<wEx[] ;
else processor(0)>>wEx[];
} // iter
t2=clock(); cout<<"move bc "<<(t2-t1)<<endl;cout.flush;t1=clock();
// this only needs to be done on lower plane.
// Can't find a way to differentiate over a region
// or label  without an integrl. 
t2=clock(); cout<<"wex dz"<<(t2-t1)<<endl;cout.flush;t1=clock();

// this is now a lot of time... 
if (mpirank==0)
// replace trig with multiplication and add
// still significant, may benefit from memory organization..
//Vh srcold=fsrc(x,y,z,toff);
// src=fsrc(x,y,z,toff);
real ct=cos(omega*toff);
real st=-sin(omega*toff);
// this still has to broad cast the entire thing... 
//for(int i=0; i<nsrcpts; ++i) 
//{int j= srcpts[i]; 

//real l2=sqrt(int3d(Th)((srcold-src)*(srcold-src)));
//cout<<" src err "<<l2<<endl; cout.flush;
for (int mpii=1; mpii<mpisize; ++mpii ) processor(mpii)<<src[] ;
else processor(0)>>src[];
// speed here almost proportional to number of integrations lol 
// how you write it matters. 
varf  Grating([Ex,Ey],[vEx,vEy],solver=CG,init=1)
=  int3d(Th)([dx(vEx),dx(vEy)]'*[dx(Ex),dx(Ey)] 
+epsilon*[dz(vEx),dz(vEy)]'*[dz(Ex),dz(Ey)] )
// no idea if sign here is right after all the other hacks.. 
// no apparent benefit but no real cost either... 
varf  GratingRHS([Ex,Ey],[vEx,vEy],solver=CG,init=1)
// LU not work with P2 element 
// segfault in "fill" 
t2=clock(); cout<<"getting mat"<<(t2-t1)<<endl;cout.flush;t1=clock();
t2=clock(); cout<<" ret from getting mat"<<(t2-t1)<<endl;cout.flush;t1=clock();
set(A, solver=sparsesolver,master=0);
pEx[]=(A^-1) * (rhss);
t2=clock(); cout<<" ret from solve  "<<(t2-t1)<<endl; cout.flush; t1=clock();
if (mpirank==0)
// FIXME TODO how to code this ? lol.  
func bool bit(int f, int b) { return ((((f/(2^b)))%2)!=0); } 
// no diea why this fails? 
//func bool bit(int f, int b) { return ((1&(f>>b))!=0); } 
ifstream file("ioctl.txt");
int ok=8;
file>> ok;
bool wait=bit(ok,0);
bool fields=bit(ok,1);
bool meds=bit(ok,2);
bool poyn=bit(ok,3);
cout<<" wait "<<wait<<" fields "<<fields<<" meds "<<meds<<" poyn "<<poyn<<endl; cout.flush;
fespace Vh2(Th2, P2);
// static energy 
Vh W=sqrt(fepsilon(x,y,z)*(pEx*pEx + pEy*pEy));
//Vh2 last=W(x,y,.5*Lt);
Vh2 last=pEy(x,y,.5*Lt);
Vh2 last2=pEx(x,y,.5*Lt);
Vh2 epsilonp=epsilon(x,y,.5*Lt);
if (poyn)
Vh2 sdir=dx(last)-dy(last2);
Vh2 sx=last*sdir*epsilonp;
Vh2 sy=-1.0*last2*sdir*epsilonp; 
 plot([sx,sy],wait=(wait),cmm=" last time "+toff,value=true,fill=true);
if (fields)
	 plot([last2,last],wait=(wait),cmm=" last time "+toff,value=true,fill=true);

if (meds) {
// scale is bad for viewin.. 
// this is slow however and need only be done once if
// little memroy penalty for keeping it. . 
 mesh3  ThCrp = cube(Xpts,Ypts,Tpts,[Lx*(x-0.5),Ly*(y-0.5),Lx*(z-0.5)]);
fespace Cr(ThCrp,P23d);
// scale is wrong... 
Cr V=W(x,y,Lt/Lx*(z-.5*Lx));
//Cr V=fsrc(x,y,Lt/Lx*(z-.5*Lx),toff);
        medit("Global solution", ThCrp, V );
} // meds 

} // mpirank 
cout<<" render   "<<(t2-t1)<<endl; cout.flush; t1=clock();

} // iter