Usage examples: MPI-parallelized solvers¶

These examples show how to find a few smallest eigenvalues of a real symmetric matrix and their respective eigenvectors using the MPI-parallelized (PARPACK) versions of ezARPACK’s eigensolvers. The matrix is a banded one with the bandwidth equal to 5. Note that the matrix is never stored in memory and is defined only by the rule of how it acts on a vector.

Eigen 3¶

#include <cmath>
#include <iostream>
#include <vector>

// This example shows how to use an MPI-parallelized solver of ezARPACK and
// the Eigen3 storage backend to partially diagonalize a large sparse symmetric
// matrix and find a number of its low-lying eigenvalues.

#include <ezarpack/mpi/arpack_solver.hpp>
#include <ezarpack/storages/eigen.hpp>
#include <ezarpack/version.hpp>

using namespace ezarpack;
using namespace Eigen;

// Size of the matrix
const int N = 10000;

// We are going to use a band matrix with this bandwidth
const int bandwidth = 5;

// The number of low-lying eigenvalues we want to compute
const int N_ev = 10;

int main(int argc, char* argv[]) {

  // Initialize MPI environment
  MPI_Init(&argc, &argv);

  // Call utility functions from namespace 'ezarpack::mpi' to find out
  // the world communicator size and the rank of the calling process.
  const int comm_size = mpi::size(MPI_COMM_WORLD);
  const int comm_rank = mpi::rank(MPI_COMM_WORLD);

  // Print ezARPACK version
  if(comm_rank == 0)
    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;

  // Construct an MPI-parallelized solver object for the symmetric case.
  // For the Eigen3 storage backend, other options would be
  // * `mpi::arpack_solver<ezarpack::Asymmetric, eigen_storage>' for general
  //   real matrices;
  // * `mpi::arpack_solver<ezarpack::Complex, eigen_storage>' for general
  //   complex matrices.
  using solver_t = mpi::arpack_solver<ezarpack::Symmetric, eigen_storage>;
  solver_t solver(N, MPI_COMM_WORLD);

  // Specify parameters for the solver
  using params_t = solver_t::params_t;
  params_t params(N_ev,               // Number of low-lying eigenvalues
                  params_t::Smallest, // We want the smallest eigenvalues
                  true);              // Yes, we want the eigenvectors
                                      // (Ritz vectors) as well

  // Vectors from the N-dimensional space of the problem are partitioned
  // into contiguous blocks. These blocks are distributed among all
  // MPI processes in the communicator used to construct 'solver'.
  int block_start = solver.local_block_start();
  int block_size = solver.local_block_size();
  // Block owned by the calling process covers the index range
  // [block_start; block_start + block_size] within a full vector.

  // Compute and collect sizes of all rank-local blocks for later use.
  std::vector<int> block_sizes(comm_size);
  for(int rank = 0; rank < comm_size; ++rank)
    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);

  // Temporary vector used in distributed matrix-vector multiplication
  VectorXd local_op_in(N);

  // Linear operator representing multiplication of a given vector by our matrix
  // The matrix to be diagonalized is defined as
  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
  auto matrix_op = [&](solver_t::vector_const_view_t in,
                       solver_t::vector_view_t out) {
    // 'in' and 'out' are views of the locally stored blocks of their respective
    // distributed N-dimensional vectors. Therefore, matrix-vector
    // multiplication has to be performed in two steps.

    // 1. Local multiplication of A's columns
    // [block_start; block_start + block_size] by 'in'. The result is an
    // N-dimensional vector stored in 'local_op_in'.
    local_op_in.fill(0);
    for(int i = 0; i < N; ++i) {
      int j_min = std::max(block_start, i - bandwidth);
      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
      for(int j = j_min; j <= j_max; ++j) {
        int j_local = j - block_start;
        local_op_in(i) += double(std::abs(i - j)) / (1 + i + j) * in(j_local);
      }
    }

    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
    // 'out' blocks stored on different MPI ranks.
    MPI_Reduce_scatter(local_op_in.data(), out.data(), block_sizes.data(),
                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  };

  // Run diagonalization!
  solver(matrix_op, params);

  if(comm_rank == 0) {
    // Number of converged eigenvalues
    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
              << " eigenvalues have converged" << std::endl;

    // Print found eigenvalues
    std::cout << "Eigenvalues (Ritz values):" << std::endl;
    std::cout << solver.eigenvalues().transpose() << std::endl;
  }

  // Check A*v = \lambda*v
  auto const& lambda = solver.eigenvalues();
  auto const& v = solver.eigenvectors();
  VectorXd lhs(block_size), rhs(block_size);

  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
    const VectorXd eigenvec_block = v.col(i);
    matrix_op(eigenvec_block.head(block_size),
              lhs.head(block_size));  // calculate the local block of A*v
    rhs = lambda(i) * eigenvec_block; // and the local block of \lambda*v

    std::cout << i << ", block [" << block_start << ", "
              << (block_start + block_size - 1)
              << "]: deviation = " << (rhs - lhs).norm() / block_size
              << std::endl;
  }

  // Print some computation statistics
  if(comm_rank == 0) {
    auto stats = solver.stats();

    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
              << std::endl;
    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
              << std::endl;
    std::cout << "Total number of steps of re-orthogonalization: "
              << stats.n_reorth_steps << std::endl;
  }

  // Terminate MPI execution environment
  MPI_Finalize();

  return 0;
}

Blaze¶

#include <cmath>
#include <iostream>
#include <vector>

// This example shows how to use an MPI-parallelized solver of ezARPACK and
// the Blaze storage backend to partially diagonalize a large sparse symmetric
// matrix and find a number of its low-lying eigenvalues.

#include <ezarpack/mpi/arpack_solver.hpp>
#include <ezarpack/storages/blaze.hpp>
#include <ezarpack/version.hpp>

using namespace ezarpack;
using namespace blaze;

// Size of the matrix
const int N = 10000;

// We are going to use a band matrix with this bandwidth
const int bandwidth = 5;

// The number of low-lying eigenvalues we want to compute
const int N_ev = 10;

int main(int argc, char* argv[]) {

  // Initialize MPI environment
  MPI_Init(&argc, &argv);

  // Call utility functions from namespace 'ezarpack::mpi' to find out
  // the world communicator size and the rank of the calling process.
  const int comm_size = mpi::size(MPI_COMM_WORLD);
  const int comm_rank = mpi::rank(MPI_COMM_WORLD);

  // Print ezARPACK version
  if(comm_rank == 0)
    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;

  // Construct an MPI-parallelized solver object for the symmetric case.
  // For the Blaze storage backend, other options would be
  // * `mpi::arpack_solver<ezarpack::Asymmetric, blaze_storage>' for general
  //   real matrices;
  // * `mpi::arpack_solver<ezarpack::Complex, blaze_storage>' for general
  //   complex matrices.
  using solver_t = mpi::arpack_solver<ezarpack::Symmetric, blaze_storage>;
  solver_t solver(N, MPI_COMM_WORLD);

  // Specify parameters for the solver
  using params_t = solver_t::params_t;
  params_t params(N_ev,               // Number of low-lying eigenvalues
                  params_t::Smallest, // We want the smallest eigenvalues
                  true);              // Yes, we want the eigenvectors
                                      // (Ritz vectors) as well

  // Vectors from the N-dimensional space of the problem are partitioned
  // into contiguous blocks. These blocks are distributed among all
  // MPI processes in the communicator used to construct 'solver'.
  int block_start = solver.local_block_start();
  int block_size = solver.local_block_size();
  // Block owned by the calling process covers the index range
  // [block_start; block_start + block_size] within a full vector.

  // Compute and collect sizes of all rank-local blocks for later use.
  std::vector<int> block_sizes(comm_size);
  for(int rank = 0; rank < comm_size; ++rank)
    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);

  // Temporary vector used in distributed matrix-vector multiplication
  DynamicVector<double> local_op_in(N);

  // Linear operator representing multiplication of a given vector by our matrix
  // The matrix to be diagonalized is defined as
  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
  auto matrix_op = [&](solver_t::vector_const_view_t in,
                       solver_t::vector_view_t out) {
    // 'in' and 'out' are views of the locally stored blocks of their respective
    // distributed N-dimensional vectors. Therefore, matrix-vector
    // multiplication has to be performed in two steps.

    // 1. Local multiplication of A's columns
    // [block_start; block_start + block_size] by 'in'. The result is an
    // N-dimensional vector stored in 'local_op_in'.
    local_op_in.reset();
    for(int i = 0; i < N; ++i) {
      int j_min = std::max(block_start, i - bandwidth);
      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
      for(int j = j_min; j <= j_max; ++j) {
        int j_local = j - block_start;
        local_op_in[i] += double(std::abs(i - j)) / (1 + i + j) * in[j_local];
      }
    }

    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
    // 'out' blocks stored on different MPI ranks.
    MPI_Reduce_scatter(local_op_in.data(), out.data(), block_sizes.data(),
                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  };

  // Run diagonalization!
  solver(matrix_op, params);

  if(comm_rank == 0) {
    // Number of converged eigenvalues
    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
              << " eigenvalues have converged" << std::endl;

    // Print found eigenvalues
    std::cout << "Eigenvalues (Ritz values):" << std::endl;
    std::cout << trans(solver.eigenvalues()) << std::endl;
  }

  // Check A*v = \lambda*v
  auto const& lambda = solver.eigenvalues();
  auto const& v = solver.eigenvectors();
  DynamicVector<double> lhs(block_size), rhs(block_size);

  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
    const DynamicVector<double> eigenvec = column(v, i);
    // calculate the local block of A*v
    matrix_op(subvector(eigenvec, 0, block_size),
              subvector(lhs, 0, block_size));
    rhs = lambda[i] * eigenvec; // and the local block of \lambda*v

    std::cout << i << ", block [" << block_start << ", "
              << (block_start + block_size - 1)
              << "]: deviation = " << norm(rhs - lhs) / block_size << std::endl;
  }

  // Print some computation statistics
  if(comm_rank == 0) {
    auto stats = solver.stats();

    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
              << std::endl;
    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
              << std::endl;
    std::cout << "Total number of steps of re-orthogonalization: "
              << stats.n_reorth_steps << std::endl;
  }

  // Terminate MPI execution environment
  MPI_Finalize();

  return 0;
}

Armadillo¶

#include <cmath>
#include <iostream>
#include <vector>

// This example shows how to use an MPI-parallelized solver of ezARPACK and
// the Armadillo storage backend to partially diagonalize a large sparse
// symmetric matrix and find a number of its low-lying eigenvalues.

#include <ezarpack/mpi/arpack_solver.hpp>
#include <ezarpack/storages/armadillo.hpp>
#include <ezarpack/version.hpp>

using namespace ezarpack;
using namespace arma;

// Size of the matrix
const int N = 10000;

// We are going to use a band matrix with this bandwidth
const int bandwidth = 5;

// The number of low-lying eigenvalues we want to compute
const int N_ev = 10;

int main(int argc, char* argv[]) {

  // Initialize MPI environment
  MPI_Init(&argc, &argv);

  // Call utility functions from namespace 'ezarpack::mpi' to find out
  // the world communicator size and the rank of the calling process.
  const int comm_size = mpi::size(MPI_COMM_WORLD);
  const int comm_rank = mpi::rank(MPI_COMM_WORLD);

  // Print ezARPACK version
  if(comm_rank == 0)
    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;

  // Construct an MPI-parallelized solver object for the symmetric case.
  // For the Armadillo storage backend, other options would be
  // * `mpi::arpack_solver<Asymmetric, armadillo_storage>' for general
  //   real matrices;
  // * `mpi::arpack_solver<Complex, armadillo_storage>' for general
  //   complex matrices.
  using solver_t = mpi::arpack_solver<Symmetric, armadillo_storage>;
  solver_t solver(N, MPI_COMM_WORLD);

  // Specify parameters for the solver
  using params_t = solver_t::params_t;
  params_t params(N_ev,               // Number of low-lying eigenvalues
                  params_t::Smallest, // We want the smallest eigenvalues
                  true);              // Yes, we want the eigenvectors
                                      // (Ritz vectors) as well

  // Vectors from the N-dimensional space of the problem are partitioned
  // into contiguous blocks. These blocks are distributed among all
  // MPI processes in the communicator used to construct 'solver'.
  int block_start = solver.local_block_start();
  int block_size = solver.local_block_size();
  // Block owned by the calling process covers the index range
  // [block_start; block_start + block_size] within a full vector.

  // Compute and collect sizes of all rank-local blocks for later use.
  std::vector<int> block_sizes(comm_size);
  for(int rank = 0; rank < comm_size; ++rank)
    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);

  // Temporary vector used in distributed matrix-vector multiplication
  vec local_op_in(N);

  // Linear operator representing multiplication of a given vector by our matrix
  // The matrix to be diagonalized is defined as
  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
  auto matrix_op = [&](solver_t::vector_const_view_t in,
                       solver_t::vector_view_t out) {
    // 'in' and 'out' are views of the locally stored blocks of their respective
    // distributed N-dimensional vectors. Therefore, matrix-vector
    // multiplication has to be performed in two steps.

    // 1. Local multiplication of A's columns
    // [block_start; block_start + block_size] by 'in'. The result is an
    // N-dimensional vector stored in 'local_op_in'.
    local_op_in.zeros();
    for(int i = 0; i < N; ++i) {
      int j_min = std::max(block_start, i - bandwidth);
      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
      for(int j = j_min; j <= j_max; ++j) {
        int j_local = j - block_start;
        local_op_in[i] += double(std::abs(i - j)) / (1 + i + j) * in[j_local];
      }
    }

    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
    // 'out' blocks stored on different MPI ranks.
    MPI_Reduce_scatter(local_op_in.memptr(), &out[0], block_sizes.data(),
                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  };

  // Run diagonalization!
  solver(matrix_op, params);

  if(comm_rank == 0) {
    // Number of converged eigenvalues
    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
              << " eigenvalues have converged" << std::endl;

    // Print found eigenvalues
    std::cout << "Eigenvalues (Ritz values):" << std::endl;
    std::cout << solver.eigenvalues().t() << std::endl;
  }

  // Check A*v = \lambda*v
  auto const& lambda = solver.eigenvalues();
  auto const& v = solver.eigenvectors();
  vec lhs(block_size), rhs(block_size);

  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
    auto const eigenvec = v.col(i);
    matrix_op(eigenvec, lhs(span::all)); // calculate the local block of A*v
    rhs = lambda[i] * eigenvec;          // and the local block of \lambda*v

    std::cout << i << ", block [" << block_start << ", "
              << (block_start + block_size - 1)
              << "]: deviation = " << norm(rhs - lhs) / block_size << std::endl;
  }

  // Print some computation statistics
  if(comm_rank == 0) {
    auto stats = solver.stats();

    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
              << std::endl;
    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
              << std::endl;
    std::cout << "Total number of steps of re-orthogonalization: "
              << stats.n_reorth_steps << std::endl;
  }

  // Terminate MPI execution environment
  MPI_Finalize();

  return 0;
}

Boost uBLAS¶

#include <algorithm>
#include <cmath>
#include <iostream>
#include <vector>

// This example shows how to use an MPI-parallelized solver of ezARPACK and
// the uBLAS storage backend to partially diagonalize a large sparse symmetric
// matrix and find a number of its low-lying eigenvalues.

#include <ezarpack/mpi/arpack_solver.hpp>
#include <ezarpack/storages/ublas.hpp>
#include <ezarpack/version.hpp>

#include <boost/numeric/ublas/io.hpp>

using namespace ezarpack;
using namespace boost::numeric::ublas;

// Size of the matrix
const int N = 10000;

// We are going to use a band matrix with this bandwidth
const int bandwidth = 5;

// The number of low-lying eigenvalues we want to compute
const int N_ev = 10;

int main(int argc, char* argv[]) {

  // Initialize MPI environment
  MPI_Init(&argc, &argv);

  // Call utility functions from namespace 'ezarpack::mpi' to find out
  // the world communicator size and the rank of the calling process.
  const int comm_size = mpi::size(MPI_COMM_WORLD);
  const int comm_rank = mpi::rank(MPI_COMM_WORLD);

  // Print ezARPACK version
  if(comm_rank == 0)
    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;

  // Construct an MPI-parallelized solver object for the symmetric case.
  // For the uBLAS storage backend, other options would be
  // * `mpi::arpack_solver<Asymmetric, ublas_storage>' for general
  //   real matrices;
  // * `mpi::arpack_solver<Complex, ublas_storage>' for general
  //   complex matrices.
  using solver_t = mpi::arpack_solver<Symmetric, ublas_storage>;
  solver_t solver(N, MPI_COMM_WORLD);

  // Specify parameters for the solver
  using params_t = solver_t::params_t;
  params_t params(N_ev,               // Number of low-lying eigenvalues
                  params_t::Smallest, // We want the smallest eigenvalues
                  true);              // Yes, we want the eigenvectors
                                      // (Ritz vectors) as well

  // Vectors from the N-dimensional space of the problem are partitioned
  // into contiguous blocks. These blocks are distributed among all
  // MPI processes in the communicator used to construct 'solver'.
  int block_start = solver.local_block_start();
  int block_size = solver.local_block_size();
  // Block owned by the calling process covers the index range
  // [block_start; block_start + block_size] within a full vector.

  // Compute and collect sizes of all rank-local blocks for later use.
  std::vector<int> block_sizes(comm_size);
  for(int rank = 0; rank < comm_size; ++rank)
    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);

  // Temporary vector used in distributed matrix-vector multiplication
  vector<double> local_op_in(N);

  // Linear operator representing multiplication of a given vector by our matrix
  // The matrix to be diagonalized is defined as
  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
  auto matrix_op = [&](solver_t::vector_const_view_t in,
                       solver_t::vector_view_t out) {
    // 'in' and 'out' are views of the locally stored blocks of their respective
    // distributed N-dimensional vectors. Therefore, matrix-vector
    // multiplication has to be performed in two steps.

    // 1. Local multiplication of A's columns
    // [block_start; block_start + block_size] by 'in'. The result is an
    // N-dimensional vector stored in 'local_op_in'.
    std::fill(local_op_in.begin(), local_op_in.end(), .0);
    for(int i = 0; i < N; ++i) {
      int j_min = std::max(block_start, i - bandwidth);
      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
      for(int j = j_min; j <= j_max; ++j) {
        int j_local = j - block_start;
        local_op_in(i) += double(std::abs(i - j)) / (1 + i + j) * in(j_local);
      }
    }

    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
    // 'out' blocks stored on different MPI ranks.
    MPI_Reduce_scatter(&local_op_in(0), &out(0), block_sizes.data(), MPI_DOUBLE,
                       MPI_SUM, MPI_COMM_WORLD);
  };

  // Run diagonalization!
  solver(matrix_op, params);

  if(comm_rank == 0) {
    // Number of converged eigenvalues
    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
              << " eigenvalues have converged" << std::endl;

    // Print found eigenvalues
    std::cout << "Eigenvalues (Ritz values):" << std::endl;
    std::cout << solver.eigenvalues() << std::endl;
  }

  // Check A*v = \lambda*v
  auto const& lambda = solver.eigenvalues();
  auto const& v = solver.eigenvectors();
  vector<double> lhs(block_size), rhs(block_size);

  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
    const vector<double> eigenvec = column(v, i);
    // calculate the local block of A*v
    matrix_op(subrange(eigenvec, 0, block_size), subrange(lhs, 0, block_size));
    rhs = lambda(i) * eigenvec; // and the local block of \lambda*v

    std::cout << i << ", block [" << block_start << ", "
              << (block_start + block_size - 1)
              << "]: deviation = " << norm_2(rhs - lhs) / block_size
              << std::endl;
  }

  // Print some computation statistics
  if(comm_rank == 0) {
    auto stats = solver.stats();

    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
              << std::endl;
    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
              << std::endl;
    std::cout << "Total number of steps of re-orthogonalization: "
              << stats.n_reorth_steps << std::endl;
  }

  // Terminate MPI execution environment
  MPI_Finalize();

  return 0;
}

TRIQS arrays¶

#include <cmath>
#include <iostream>
#include <vector>

// This example shows how to use an MPI-parallelized solver of ezARPACK and
// the TRIQS storage backend to partially diagonalize a large sparse symmetric
// matrix and find a number of its low-lying eigenvalues.

#include <ezarpack/mpi/arpack_solver.hpp>
#include <ezarpack/storages/triqs.hpp>
#include <ezarpack/version.hpp>

using namespace ezarpack;
using namespace triqs::arrays;

// Size of the matrix
const int N = 10000;

// We are going to use a band matrix with this bandwidth
const int bandwidth = 5;

// The number of low-lying eigenvalues we want to compute
const int N_ev = 10;

int main(int argc, char* argv[]) {

  // Initialize MPI environment
  MPI_Init(&argc, &argv);

  // Call utility functions from namespace 'ezarpack::mpi' to find out
  // the world communicator size and the rank of the calling process.
  const int comm_size = ezarpack::mpi::size(MPI_COMM_WORLD);
  const int comm_rank = ezarpack::mpi::rank(MPI_COMM_WORLD);

  // Print ezARPACK version
  if(comm_rank == 0)
    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;

  // Construct an MPI-parallelized solver object for the symmetric case.
  // For the TRIQS storage backend, other options would be
  // * `mpi::arpack_solver<Asymmetric, triqs_storage>' for general
  //   real matrices;
  // * `mpi::arpack_solver<Complex, triqs_storage>' for general
  //   complex matrices.
  using solver_t = ezarpack::mpi::arpack_solver<Symmetric, triqs_storage>;
  solver_t solver(N, MPI_COMM_WORLD);

  // Specify parameters for the solver
  using params_t = solver_t::params_t;
  params_t params(N_ev,               // Number of low-lying eigenvalues
                  params_t::Smallest, // We want the smallest eigenvalues
                  true);              // Yes, we want the eigenvectors
                                      // (Ritz vectors) as well

  // Vectors from the N-dimensional space of the problem are partitioned
  // into contiguous blocks. These blocks are distributed among all
  // MPI processes in the communicator used to construct 'solver'.
  int block_start = solver.local_block_start();
  int block_size = solver.local_block_size();
  // Block owned by the calling process covers the index range
  // [block_start; block_start + block_size] within a full vector.

  // Compute and collect sizes of all rank-local blocks for later use.
  std::vector<int> block_sizes(comm_size);
  for(int rank = 0; rank < comm_size; ++rank)
    block_sizes[rank] =
        ezarpack::mpi::compute_local_block_size(N, comm_size, rank);

  // Temporary vector used in distributed matrix-vector multiplication
  vector<double> local_op_in(N);

  // Linear operator representing multiplication of a given vector by our matrix
  // The matrix to be diagonalized is defined as
  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
  auto matrix_op = [&](auto in, auto out) {
    // 'in' and 'out' are views of the locally stored blocks of their respective
    // distributed N-dimensional vectors. Therefore, matrix-vector
    // multiplication has to be performed in two steps.

    // 1. Local multiplication of A's columns
    // [block_start; block_start + block_size] by 'in'. The result is an
    // N-dimensional vector stored in 'local_op_in'.
    local_op_in() = 0;
    for(int i = 0; i < N; ++i) {
      int j_min = std::max(block_start, i - bandwidth);
      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
      for(int j = j_min; j <= j_max; ++j) {
        int j_local = j - block_start;
        local_op_in(i) += double(std::abs(i - j)) / (1 + i + j) * in(j_local);
      }
    }

    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
    // 'out' blocks stored on different MPI ranks.
    MPI_Reduce_scatter(local_op_in.data_start(), out.data_start(),
                       block_sizes.data(), MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  };

  // Run diagonalization!
  solver(matrix_op, params);

  if(comm_rank == 0) {
    // Number of converged eigenvalues
    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
              << " eigenvalues have converged" << std::endl;

    // Print found eigenvalues
    std::cout << "Eigenvalues (Ritz values):" << std::endl;
    std::cout << solver.eigenvalues() << std::endl;
  }

  // Check A*v = \lambda*v
  auto const& lambda = solver.eigenvalues();
  auto const& v = solver.eigenvectors();
  vector<double> lhs(block_size), rhs(block_size);

  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
    auto const eigenvec = v(range(), i);
    matrix_op(eigenvec, lhs()); // calculate the local block of A*v
    rhs = lambda(i) * eigenvec; // and the local block of \lambda*v

    std::cout << i << ", block [" << block_start << ", "
              << (block_start + block_size - 1)
              << "]: deviation = " << norm2(rhs - lhs) / block_size
              << std::endl;
  }

  // Print some computation statistics
  if(comm_rank == 0) {
    auto stats = solver.stats();

    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
              << std::endl;
    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
              << std::endl;
    std::cout << "Total number of steps of re-orthogonalization: "
              << stats.n_reorth_steps << std::endl;
  }

  // Terminate MPI execution environment
  MPI_Finalize();

  return 0;
}

TRIQS/nda¶

#include <cmath>
#include <iostream>
#include <vector>

// This example shows how to use an MPI-parallelized solver of ezARPACK and
// the TRIQS/nda storage backend to partially diagonalize a large sparse
// symmetric matrix and find a number of its low-lying eigenvalues.

#include <ezarpack/mpi/arpack_solver.hpp>
#include <ezarpack/storages/nda.hpp>
#include <ezarpack/version.hpp>

using namespace ezarpack;
using namespace nda;

// Size of the matrix
const int N = 10000;

// We are going to use a band matrix with this bandwidth
const int bandwidth = 5;

// The number of low-lying eigenvalues we want to compute
const int N_ev = 10;


int main(int argc, char* argv[]) {

  // Initialize MPI environment
  MPI_Init(&argc, &argv);

  // Call utility functions from namespace 'ezarpack::mpi' to find out
  // the world communicator size and the rank of the calling process.
  const int comm_size = mpi::size(MPI_COMM_WORLD);
  const int comm_rank = mpi::rank(MPI_COMM_WORLD);

  // Print ezARPACK version
  if(comm_rank == 0)
    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;

  // Construct an MPI-parallelized solver object for the symmetric case.
  // For the nda storage backend, other options would be
  // * `mpi::arpack_solver<Asymmetric, nda_storage>' for general real matrices;
  // * `mpi::arpack_solver<Complex, nda_storage>' for general complex matrices.
  using solver_t = mpi::arpack_solver<Symmetric, nda_storage>;
  solver_t solver(N, MPI_COMM_WORLD);

  // Specify parameters for the solver
  using params_t = solver_t::params_t;
  params_t params(N_ev,               // Number of low-lying eigenvalues
                  params_t::Smallest, // We want the smallest eigenvalues
                  true);              // Yes, we want the eigenvectors
                                      // (Ritz vectors) as well

  // Vectors from the N-dimensional space of the problem are partitioned
  // into contiguous blocks. These blocks are distributed among all
  // MPI processes in the communicator used to construct 'solver'.
  int block_start = solver.local_block_start();
  int block_size = solver.local_block_size();
  // Block owned by the calling process covers the index range
  // [block_start; block_start + block_size] within a full vector.

  // Compute and collect sizes of all rank-local blocks for later use.
  std::vector<int> block_sizes(comm_size);
  for(int rank = 0; rank < comm_size; ++rank)
    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);

  // Temporary vector used in distributed matrix-vector multiplication
  vector<double> local_op_in(N);

  // Linear operator representing multiplication of a given vector by our matrix
  // The matrix to be diagonalized is defined as
  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
  auto matrix_op = [&](auto in, auto out) {
    // 'in' and 'out' are views of the locally stored blocks of their respective
    // distributed N-dimensional vectors. Therefore, matrix-vector
    // multiplication has to be performed in two steps.

    // 1. Local multiplication of A's columns
    // [block_start; block_start + block_size] by 'in'. The result is an
    // N-dimensional vector stored in 'local_op_in'.
    local_op_in() = 0;
    for(int i = 0; i < N; ++i) {
      int j_min = std::max(block_start, i - bandwidth);
      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
      for(int j = j_min; j <= j_max; ++j) {
        int j_local = j - block_start;
        local_op_in(i) += double(std::abs(i - j)) / (1 + i + j) * in(j_local);
      }
    }

    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
    // 'out' blocks stored on different MPI ranks.
    MPI_Reduce_scatter(local_op_in.data(), out.data(), block_sizes.data(),
                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  };

  // Run diagonalization!
  solver(matrix_op, params);

  if(comm_rank == 0) {
    // Number of converged eigenvalues
    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
              << " eigenvalues have converged" << std::endl;

    // Print found eigenvalues
    std::cout << "Eigenvalues (Ritz values):" << std::endl;
    std::cout << solver.eigenvalues() << std::endl;
  }

  // Check A*v = \lambda*v
  auto const& lambda = solver.eigenvalues();
  auto const& v = solver.eigenvectors();
  vector<double> lhs(block_size), rhs(block_size);

  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
    auto const eigenvec = v(range::all, i);
    matrix_op(eigenvec, lhs()); // calculate the local block of A*v
    rhs = lambda(i) * eigenvec; // and the local block of \lambda*v

    std::cout << i << ", block [" << block_start << ", "
              << (block_start + block_size - 1) << "]: deviation = "
              << std::sqrt(sum(abs2(rhs - lhs))) / block_size << std::endl;
  }

  // Print some computation statistics
  if(comm_rank == 0) {
    auto stats = solver.stats();

    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
              << std::endl;
    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
              << std::endl;
    std::cout << "Total number of steps of re-orthogonalization: "
              << stats.n_reorth_steps << std::endl;
  }

  // Terminate MPI execution environment
  MPI_Finalize();

  return 0;
}

xtensor¶

#include <cmath>
#include <iostream>
#include <vector>

// This example shows how to use an MPI-parallelized solver of ezARPACK and
// the xtensor storage backend to partially diagonalize a large sparse symmetric
// matrix and find a number of its low-lying eigenvalues.

#include <ezarpack/mpi/arpack_solver.hpp>
#include <ezarpack/storages/xtensor.hpp>
#include <ezarpack/version.hpp>

#include <xtensor/io/xio.hpp>
#include <xtensor/reducers/xnorm.hpp>

using namespace ezarpack;
using namespace xt;

// Size of the matrix
const int N = 10000;

// We are going to use a band matrix with this bandwidth
const int bandwidth = 5;

// The number of low-lying eigenvalues we want to compute
const int N_ev = 10;

int main(int argc, char* argv[]) {

  // Initialize MPI environment
  MPI_Init(&argc, &argv);

  // Call utility functions from namespace 'ezarpack::mpi' to find out
  // the world communicator size and the rank of the calling process.
  const int comm_size = mpi::size(MPI_COMM_WORLD);
  const int comm_rank = mpi::rank(MPI_COMM_WORLD);

  // Print ezARPACK version
  if(comm_rank == 0)
    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;

  // Construct an MPI-parallelized solver object for the symmetric case.
  // For the xtensor storage backend, other options would be
  // * `mpi::arpack_solver<ezarpack::Asymmetric, xtensor_storage>' for general
  //   real matrices;
  // * `mpi::arpack_solver<ezarpack::Complex, xtensor_storage>' for general
  //   complex matrices.
  using solver_t = mpi::arpack_solver<ezarpack::Symmetric, xtensor_storage>;
  solver_t solver(N, MPI_COMM_WORLD);

  // Specify parameters for the solver
  using params_t = solver_t::params_t;
  params_t params(N_ev,               // Number of low-lying eigenvalues
                  params_t::Smallest, // We want the smallest eigenvalues
                  true);              // Yes, we want the eigenvectors
                                      // (Ritz vectors) as well

  // Vectors from the N-dimensional space of the problem are partitioned
  // into contiguous blocks. These blocks are distributed among all
  // MPI processes in the communicator used to construct 'solver'.
  int block_start = solver.local_block_start();
  int block_size = solver.local_block_size();
  // Block owned by the calling process covers the index range
  // [block_start; block_start + block_size] within a full vector.

  // Compute and collect sizes of all rank-local blocks for later use.
  std::vector<int> block_sizes(comm_size);
  for(int rank = 0; rank < comm_size; ++rank)
    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);

  // Temporary vector used in distributed matrix-vector multiplication
  auto local_op_in = xt::xtensor<double, 1>::from_shape({N});

  // Linear operator representing multiplication of a given vector by our matrix
  // The matrix to be diagonalized is defined as
  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
  auto matrix_op = [&](auto in, auto out) {
    // 'in' and 'out' are views of the locally stored blocks of their respective
    // distributed N-dimensional vectors. Therefore, matrix-vector
    // multiplication has to be performed in two steps.

    // 1. Local multiplication of A's columns
    // [block_start; block_start + block_size] by 'in'. The result is an
    // N-dimensional vector stored in 'local_op_in'.
    local_op_in.fill(.0);
    for(int i = 0; i < N; ++i) {
      int j_min = std::max(block_start, i - bandwidth);
      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
      for(int j = j_min; j <= j_max; ++j) {
        int j_local = j - block_start;
        local_op_in(i) += double(std::abs(i - j)) / (1 + i + j) * in(j_local);
      }
    }

    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
    // 'out' blocks stored on different MPI ranks.
    MPI_Reduce_scatter(local_op_in.data(), &out(0), block_sizes.data(),
                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  };

  // Run diagonalization!
  solver(matrix_op, params);

  if(comm_rank == 0) {
    // Number of converged eigenvalues
    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
              << " eigenvalues have converged" << std::endl;

    // Print found eigenvalues
    std::cout << "Eigenvalues (Ritz values):" << std::endl;
    std::cout << solver.eigenvalues() << std::endl;
  }

  // Check A*v = \lambda*v
  auto const& lambda = solver.eigenvalues();
  auto const& v = solver.eigenvectors();

  auto lhs = xt::xtensor<double, 1>::from_shape({(unsigned long)block_size});
  auto rhs = xt::xtensor<double, 1>::from_shape({(unsigned long)block_size});

  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
    auto const eigenvec = xt::view(v, xt::all(), i);
    matrix_op(eigenvec, // calculate the local block of A*v
              xt::view(lhs, xt::all()));
    rhs = lambda(i) * eigenvec; // and the local block of \lambda*v

    std::cout << i << ", block [" << block_start << ", "
              << (block_start + block_size - 1)
              << "]: deviation = " << xt::norm_l2(rhs - lhs) / block_size
              << std::endl;
  }

  // Print some computation statistics
  if(comm_rank == 0) {
    auto stats = solver.stats();

    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
              << std::endl;
    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
              << std::endl;
    std::cout << "Total number of steps of re-orthogonalization: "
              << stats.n_reorth_steps << std::endl;
  }

  // Terminate MPI execution environment
  MPI_Finalize();

  return 0;
}