Dear all
I try to use mpi to realize program parallelism
It is mainly used to reduce the program running time, but I’ve found that as the number of processors increases, the program run time increases. I can not solve this problem ,so i want to consult you.
that’s my 8 cpu codes :
//8 processor
if ( mpisize != 8 ) {
cout << " sorry, number of processors !=8 " << endl;
exit(1);}
verbosity=3;
int n=48; // z0-z11
real[int] z(n); //
real h=1./(n-1);
for (int i=0;i<=n-1;i++) //
{ z[i]= i*h; }//循环定义
mesh Th= square (n-1, n-1 , [x, y] ) ;
real m=(n-1)*(n-1);//1/h^2
fespace Vh(Th,P2);
Vh u, v, f,yyy,zzz,hhhh;
problem Poisson(u,v) =//posion
int2d(Th)(dx(u)*dx(v) + dy(u)*dy(v)+2*m*u*v) //
- int2d(Th)( f*v+m*(yyy+zzz)*v)+on(1,2,3,4,u=0) ;//
Vh[int] up(n+1),un(n+1);
for(int i=0;i<=n;i++){
un[i]=0;
}
if(mpirank==0){
for(int i=1;i<6;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=0;zzz=0;
Poisson; un[i] = u;
up[i]=un[i];
}
}
else if(mpirank==7){
for(int i=42;i<47;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=0;zzz=0;
Poisson; un[i] = u;
up[i]=un[i];}
}
else
{
for(int i=mpirank*6;i<(mpirank+1)*6;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=0;zzz=0;
Poisson; un[i] = u;
up[i]=un[i];
}
}
//mpiBarrier(mpiCommWorld);
//第一次迭代*********************************************
int k=1;
for(int jj=0;jj<900;jj++){
if(mpirank<7){
processor(mpirank+1)<<un[5+mpirank*6][];
processor(mpirank+1)>>hhhh[];
up[(mpirank+1)*6]=hhhh;
}
if(mpirank>0){
processor(mpirank-1)<<un[mpirank*6][];
processor(mpirank-1)>>hhhh[];
up[mpirank*6-1]=hhhh;
}
/* processor(1-mpirank)<<un[0.5*n+mpirank-1][];
processor(1-mpirank)>>hhhh[];
up[0.5*n-mpirank]=hhhh; */
//求解
if(mpirank==0){
for(int i=1;i<6;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=up[i-1];zzz=up[i+1];
Poisson; un[i] = u;}
}
else if(mpirank==7)
{
for(int i=42;i<47;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=up[i-1];zzz=up[i+1];
Poisson; un[i] = u;
}
}
else{
for(int i=mpirank*6;i<(mpirank+1)*6;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=up[i-1];zzz=up[i+1];
Poisson; un[i] = u;}
}
/* for(int i=mpirank*0.5*n+1-mpirank;i<mpirank*0.5*n+0.5*n-mpirank;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=up[i-1];zzz=up[i+1]; // 对于每一个u【i】,已知u【i-1】
Poisson; un[i] = u;
//up[i]=un[i];
}
*/
//误差
real[int] error(8);
for(int i=mpirank*6;i<(mpirank+1)*6;i++){
error[mpirank]=error[mpirank]+int2d(Th)((un[i]-up[i])^2)*h;
//cout<<"i*****"<<i<<endl;
}
for(int i=0;i<8;i++){
broadcast(processor(i),error[i]);
}
real err,error2;
/* for(int i=0;i<8;i++){
error2=error2+error[i];
} */
err=sqrt(error[0]+error[1]+error[2]+error[3]+error[4]+error[5]+error[6]+error[7]);
cout <<" **********************err = " << err << " *********** " << endl;
if(err<0.00001) break;
//update
for(int i=mpirank*6;i<(mpirank+1)*6;i++){
up[i]=un[i];
}
k++;
cout<<"***************"<<k<<endl;
}
and that’s my 16 cpu codes
//16 processor
//2023年5月1日17:13:16
if ( mpisize != 16 ) {
cout << " sorry, number of processors !=3 " << endl;
exit(1);}
verbosity=3;
int n=48; // z0-z11
real[int] z(n); // z数组
real h=1./(n-1);
for (int i=0;i<=n-1;i++) //
{ z[i]= i*h; }//循环定义
mesh Th= square (n-1, n-1 , [x, y] ) ;
real m=(n-1)*(n-1);//1/h^2
fespace Vh(Th,P2);
Vh u, v, f,yyy,zzz,hhhh;
problem Poisson(u,v) =//posion 方程定义 使用变分形式
int2d(Th)(dx(u)*dx(v) + dy(u)*dy(v)+2*m*u*v) // u即所求函数,这里是二维的
- int2d(Th)( f*v+m*(yyy+zzz)*v)+on(1,2,3,4,u=0) ;//这里yyy表示u【i-1】,zzz表示u【i+1】
Vh[int] up(n+1),un(n+1);
for(int i=0;i<=n;i++){
un[i]=0;
}
//首项迭代
if(mpirank==0){
for(int i=1;i<3;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=0;zzz=0; // 对于每一个u【i】,已知u【i-1】
Poisson; un[i] = u;
up[i]=un[i];
}
}
else if(mpirank==15){
for(int i=45;i<47;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=0;zzz=0; // 对于每一个u【i】,已知u【i-1】
Poisson; un[i] = u;
up[i]=un[i];}
}
else
{
for(int i=mpirank*3;i<(mpirank+1)*3;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=0;zzz=0; // 对于每一个u【i】,已知u【i-1】
Poisson; un[i] = u;
up[i]=un[i];
}
}
//mpiBarrier(mpiCommWorld);
//第一次迭代*********************************************
int k=1;
for(int jj=0;jj<1000;jj++){
if(mpirank<15){
processor(mpirank+1)<<un[2+mpirank*3][];
processor(mpirank+1)>>hhhh[];
up[(mpirank+1)*3]=hhhh;
}
if(mpirank>0){
processor(mpirank-1)<<un[mpirank*3][];
processor(mpirank-1)>>hhhh[];
up[mpirank*3-1]=hhhh;
}
/* processor(1-mpirank)<<un[0.5*n+mpirank-1][];
processor(1-mpirank)>>hhhh[];
up[0.5*n-mpirank]=hhhh; */
//求解
if(mpirank==0){
for(int i=1;i<3;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=up[i-1];zzz=up[i+1];
Poisson; un[i] = u;}
}
else if(mpirank==15)
{
for(int i=45;i<47;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=up[i-1];zzz=up[i+1];
Poisson; un[i] = u;
}
}
else{
for(int i=mpirank*3;i<(mpirank+1)*3;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=up[i-1];zzz=up[i+1];
Poisson; un[i] = u;}
}
/* for(int i=mpirank*0.5*n+1-mpirank;i<mpirank*0.5*n+0.5*n-mpirank;i++){
f=3*pi*pi*sin(pi*x)*sin(pi*y)*sin(pi*z[i]);
yyy=up[i-1];zzz=up[i+1]; // 对于每一个u【i】,已知u【i-1】
Poisson; un[i] = u;
//up[i]=un[i];
}
*/
//误差
real[int] error(16);
for(int i=mpirank*3;i<(mpirank+1)*3;i++){
error[mpirank]=error[mpirank]+int2d(Th)((un[i]-up[i])^2)*h;
//cout<<"i*****"<<i<<endl;
}
for(int i=0;i<16;i++){
broadcast(processor(i),error[i]);
}
real err,error2;
for(int i=0;i<16;i++){
error2=error2+error[i];
}
err=sqrt(error2);
cout <<" **********************err = " << err << " *********** " << endl;
if(err<0.00001) break;
//update
for(int i=mpirank*3;i<(mpirank+1)*3;i++){
up[i]=un[i];
}
k++;
cout<<"***************"<<k<<endl;
}
that’s my CPUtime:
for n=48
8 processor
**********************err = 9.9474e-06 ***********
times: compile 0.272s, execution 1177.91s, mpirank:7
16 processor
CodeAlloc : nb ptr 4745, size :566376 mpirank: 15
######## We forget of deleting 63955 Nb pointer, 0Bytes , mpirank 3, memory leak =0
**********************err = 9.9474e-06 ***********
times: compile 0.063s, execution 1632.11s, mpirank:15