Use MPI, it’s an embarrassingly parallel job, so trivial to parallelize.
int ntask = 200;
for(int i = (mpirank * ntask) / mpisize; i < ((mpirank + 1) * ntask) / mpisize; ++i) {
cout << "process #" << mpirank << " dealing with task #" << i << endl;
}
mpiAllreduce(...); // for synchronization