Matrix gather#
Demonstrates gathering of a distributed matrix as it may used in domain partitioning applications.
#include <cstdlib>
#include <iostream>
#include <cmath>
#include <vector>
#include <mpl/mpl.hpp>
// some basic matrix class
template<typename T>
class matrix : private std::vector<T> {
using base = std::vector<T>;
public:
using typename base::size_type;
using typename base::reference;
using typename base::const_reference;
using typename base::iterator;
using typename base::const_iterator;
private:
size_type nx, ny;
public:
matrix(size_type nx, size_type ny) : base(nx * ny), nx(nx), ny(ny) {}
const_reference operator()(size_type ix, size_type iy) const {
return base::operator[](ix + nx * iy);
}
reference operator()(size_type ix, size_type iy) { return base::operator[](ix + nx * iy); }
using base::begin;
using base::end;
using base::data;
};
int main() {
const mpl::communicator &comm_world{mpl::environment::comm_world()};
const int p{comm_world.size()}; // total numbers of processors
const int p_l{comm_world.rank()};
// find integer px and py such that px*py=p and px and py as close as possible
int px{static_cast<int>(std::sqrt(static_cast<double>(p)))};
while (p / px * px != p)
--px;
int py{p / px};
int nx{31}, ny{29}; // total size of the matrix
matrix<int> nx_l(px, py), ny_l(px, py); // sizes of sub matrices for both dimensions
matrix<int> nx_0(px, py), ny_0(px, py); // starts of sub matrices for both dimensions
// matrix of layouts
matrix<mpl::subarray_layout<int>> sub_matrix_l(px, py);
// calculate all indices and sizes, generate layouts
for (int iy{0}; iy < py; ++iy) {
for (int ix{0}; ix < px; ++ix) {
nx_l(ix, iy) = nx * (ix + 1) / px - nx * ix / px;
ny_l(ix, iy) = ny * (iy + 1) / py - ny * iy / py;
nx_0(ix, iy) = nx * ix / px;
ny_0(ix, iy) = ny * iy / py;
sub_matrix_l(ix, iy) = mpl::subarray_layout<int>(
{{ny, ny_l(ix, iy), ny_0(ix, iy)}, {nx, nx_l(ix, iy), nx_0(ix, iy)}});
}
}
// process local position in global data grid
int py_l{p_l / px}, px_l{p_l - px * py_l};
// gather via send-recv
{
// fill some local matrix with data
matrix<int> m_l(nx_l(px_l, py_l), ny_l(px_l, py_l));
std::fill(m_l.begin(), m_l.end(), p_l);
mpl::contiguous_layout<int> matrix_l(nx_l(px_l, py_l) * ny_l(px_l, py_l));
// send local sub-matrix to rank 0
mpl::irequest r(comm_world.isend(m_l.data(), matrix_l, 0));
if (p_l == 0) {
// gather all submatrices into one large matrix
matrix<int> m(nx, ny);
std::fill(m.begin(), m.end(), 0 - ' ');
for (int iy{0}; iy < py; ++iy) {
for (int ix{0}; ix < px; ++ix)
comm_world.recv(m.data(), sub_matrix_l(ix, iy), ix + px * iy);
}
for (int iy{0}; iy < ny; ++iy) {
for (int ix{0}; ix < nx; ++ix)
std::cout << static_cast<unsigned char>(m(ix, iy) + 'A');
std::cout << '\n';
}
std::cout << '\n';
}
r.wait();
}
// gather via gatherv
{
// fill some local matrix with data
matrix<int> m_l(nx_l(px_l, py_l), ny_l(px_l, py_l));
std::fill(m_l.begin(), m_l.end(), p_l);
// build the layouts for the gatherv operation
const int root{0};
mpl::contiguous_layout<int> matrix_l(nx_l(px_l, py_l) * ny_l(px_l, py_l));
if (p_l == root) {
mpl::layouts<int> recvl;
for (int i{0}; i < p; ++i)
recvl.push_back(sub_matrix_l(i % px, i / px));
matrix<int> m(nx, ny);
comm_world.gatherv(root, m_l.data(), matrix_l, m.data(), recvl);
for (int iy{0}; iy < ny; ++iy) {
for (int ix{0}; ix < nx; ++ix)
std::cout << static_cast<unsigned char>(m(ix, iy) + 'A');
std::cout << '\n';
}
std::cout << '\n';
} else
comm_world.gatherv(root, m_l.data(), matrix_l);
}
// gather via alltoallv
{
// fill some local matrix with data
matrix<int> m_l(nx_l(px_l, py_l), ny_l(px_l, py_l));
std::fill(m_l.begin(), m_l.end(), p_l);
// build the layouts for alltoallv to implement a gather operation
const int root{0};
mpl::layouts<int> sendl, recvl;
for (int i{0}; i < p; ++i) {
if (i == root)
sendl.push_back(mpl::contiguous_layout<int>(nx_l(px_l, py_l) * ny_l(px_l, py_l)));
else
sendl.push_back(mpl::empty_layout<int>());
}
if (p_l == root) {
for (int i{0}; i < p; ++i)
recvl.push_back(sub_matrix_l(i % px, i / px));
matrix<int> m(nx, ny);
comm_world.alltoallv(m_l.data(), sendl, m.data(), recvl);
for (int iy{0}; iy < ny; ++iy) {
for (int ix{0}; ix < nx; ++ix)
std::cout << static_cast<unsigned char>(m(ix, iy) + 'A');
std::cout << '\n';
}
std::cout << '\n';
} else {
for (int i{0}; i < p; ++i)
recvl.push_back(mpl::empty_layout<int>());
comm_world.alltoallv(m_l.data(), sendl, reinterpret_cast<int *>(0), recvl);
}
}
return EXIT_SUCCESS;
}