Usage examples: MPI-parallelized solvers

These examples show how to find a few smallest eigenvalues of a real symmetric matrix and their respective eigenvectors using the MPI-parallelized (PARPACK) versions of ezARPACK’s eigensolvers. The matrix is a banded one with the bandwidth equal to 5. Note that the matrix is never stored in memory and is defined only by the rule of how it acts on a vector.

Eigen 3

  1#include <cmath>
  2#include <iostream>
  3#include <vector>
  4
  5// This example shows how to use an MPI-parallelized solver of ezARPACK and
  6// the Eigen3 storage backend to partially diagonalize a large sparse symmetric
  7// matrix and find a number of its low-lying eigenvalues.
  8
  9#include <ezarpack/mpi/arpack_solver.hpp>
 10#include <ezarpack/storages/eigen.hpp>
 11#include <ezarpack/version.hpp>
 12
 13using namespace ezarpack;
 14using namespace Eigen;
 15
 16// Size of the matrix
 17const int N = 10000;
 18
 19// We are going to use a band matrix with this bandwidth
 20const int bandwidth = 5;
 21
 22// The number of low-lying eigenvalues we want to compute
 23const int N_ev = 10;
 24
 25int main(int argc, char* argv[]) {
 26
 27  // Initialize MPI environment
 28  MPI_Init(&argc, &argv);
 29
 30  // Call utility functions from namespace 'ezarpack::mpi' to find out
 31  // the world communicator size and the rank of the calling process.
 32  const int comm_size = mpi::size(MPI_COMM_WORLD);
 33  const int comm_rank = mpi::rank(MPI_COMM_WORLD);
 34
 35  // Print ezARPACK version
 36  if(comm_rank == 0)
 37    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;
 38
 39  // Construct an MPI-parallelized solver object for the symmetric case.
 40  // For the Eigen3 storage backend, other options would be
 41  // * `mpi::arpack_solver<ezarpack::Asymmetric, eigen_storage>' for general
 42  //   real matrices;
 43  // * `mpi::arpack_solver<ezarpack::Complex, eigen_storage>' for general
 44  //   complex matrices.
 45  using solver_t = mpi::arpack_solver<ezarpack::Symmetric, eigen_storage>;
 46  solver_t solver(N, MPI_COMM_WORLD);
 47
 48  // Specify parameters for the solver
 49  using params_t = solver_t::params_t;
 50  params_t params(N_ev,               // Number of low-lying eigenvalues
 51                  params_t::Smallest, // We want the smallest eigenvalues
 52                  true);              // Yes, we want the eigenvectors
 53                                      // (Ritz vectors) as well
 54
 55  // Vectors from the N-dimensional space of the problem are partitioned
 56  // into contiguous blocks. These blocks are distributed among all
 57  // MPI processes in the communicator used to construct 'solver'.
 58  int block_start = solver.local_block_start();
 59  int block_size = solver.local_block_size();
 60  // Block owned by the calling process covers the index range
 61  // [block_start; block_start + block_size] within a full vector.
 62
 63  // Compute and collect sizes of all rank-local blocks for later use.
 64  std::vector<int> block_sizes(comm_size);
 65  for(int rank = 0; rank < comm_size; ++rank)
 66    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);
 67
 68  // Temporary vector used in distributed matrix-vector multiplication
 69  VectorXd local_op_in(N);
 70
 71  // Linear operator representing multiplication of a given vector by our matrix
 72  // The matrix to be diagonalized is defined as
 73  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
 74  auto matrix_op = [&](solver_t::vector_const_view_t in,
 75                       solver_t::vector_view_t out) {
 76    // 'in' and 'out' are views of the locally stored blocks of their respective
 77    // distributed N-dimensional vectors. Therefore, matrix-vector
 78    // multiplication has to be performed in two steps.
 79
 80    // 1. Local multiplication of A's columns
 81    // [block_start; block_start + block_size] by 'in'. The result is an
 82    // N-dimensional vector stored in 'local_op_in'.
 83    local_op_in.fill(0);
 84    for(int i = 0; i < N; ++i) {
 85      int j_min = std::max(block_start, i - bandwidth);
 86      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
 87      for(int j = j_min; j <= j_max; ++j) {
 88        int j_local = j - block_start;
 89        local_op_in(i) += double(std::abs(i - j)) / (1 + i + j) * in(j_local);
 90      }
 91    }
 92
 93    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
 94    // 'out' blocks stored on different MPI ranks.
 95    MPI_Reduce_scatter(local_op_in.data(), out.data(), block_sizes.data(),
 96                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 97  };
 98
 99  // Run diagonalization!
100  solver(matrix_op, params);
101
102  if(comm_rank == 0) {
103    // Number of converged eigenvalues
104    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
105              << " eigenvalues have converged" << std::endl;
106
107    // Print found eigenvalues
108    std::cout << "Eigenvalues (Ritz values):" << std::endl;
109    std::cout << solver.eigenvalues().transpose() << std::endl;
110  }
111
112  // Check A*v = \lambda*v
113  auto const& lambda = solver.eigenvalues();
114  auto const& v = solver.eigenvectors();
115  VectorXd lhs(block_size), rhs(block_size);
116
117  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
118    const VectorXd eigenvec_block = v.col(i);
119    matrix_op(eigenvec_block.head(block_size),
120              lhs.head(block_size));  // calculate the local block of A*v
121    rhs = lambda(i) * eigenvec_block; // and the local block of \lambda*v
122
123    std::cout << i << ", block [" << block_start << ", "
124              << (block_start + block_size - 1)
125              << "]: deviation = " << (rhs - lhs).norm() / block_size
126              << std::endl;
127  }
128
129  // Print some computation statistics
130  if(comm_rank == 0) {
131    auto stats = solver.stats();
132
133    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
134              << std::endl;
135    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
136              << std::endl;
137    std::cout << "Total number of steps of re-orthogonalization: "
138              << stats.n_reorth_steps << std::endl;
139  }
140
141  // Terminate MPI execution environment
142  MPI_Finalize();
143
144  return 0;
145}

Blaze

  1#include <cmath>
  2#include <iostream>
  3#include <vector>
  4
  5// This example shows how to use an MPI-parallelized solver of ezARPACK and
  6// the Blaze storage backend to partially diagonalize a large sparse symmetric
  7// matrix and find a number of its low-lying eigenvalues.
  8
  9#include <ezarpack/mpi/arpack_solver.hpp>
 10#include <ezarpack/storages/blaze.hpp>
 11#include <ezarpack/version.hpp>
 12
 13using namespace ezarpack;
 14using namespace blaze;
 15
 16// Size of the matrix
 17const int N = 10000;
 18
 19// We are going to use a band matrix with this bandwidth
 20const int bandwidth = 5;
 21
 22// The number of low-lying eigenvalues we want to compute
 23const int N_ev = 10;
 24
 25int main(int argc, char* argv[]) {
 26
 27  // Initialize MPI environment
 28  MPI_Init(&argc, &argv);
 29
 30  // Call utility functions from namespace 'ezarpack::mpi' to find out
 31  // the world communicator size and the rank of the calling process.
 32  const int comm_size = mpi::size(MPI_COMM_WORLD);
 33  const int comm_rank = mpi::rank(MPI_COMM_WORLD);
 34
 35  // Print ezARPACK version
 36  if(comm_rank == 0)
 37    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;
 38
 39  // Construct an MPI-parallelized solver object for the symmetric case.
 40  // For the Blaze storage backend, other options would be
 41  // * `mpi::arpack_solver<ezarpack::Asymmetric, blaze_storage>' for general
 42  //   real matrices;
 43  // * `mpi::arpack_solver<ezarpack::Complex, blaze_storage>' for general
 44  //   complex matrices.
 45  using solver_t = mpi::arpack_solver<ezarpack::Symmetric, blaze_storage>;
 46  solver_t solver(N, MPI_COMM_WORLD);
 47
 48  // Specify parameters for the solver
 49  using params_t = solver_t::params_t;
 50  params_t params(N_ev,               // Number of low-lying eigenvalues
 51                  params_t::Smallest, // We want the smallest eigenvalues
 52                  true);              // Yes, we want the eigenvectors
 53                                      // (Ritz vectors) as well
 54
 55  // Vectors from the N-dimensional space of the problem are partitioned
 56  // into contiguous blocks. These blocks are distributed among all
 57  // MPI processes in the communicator used to construct 'solver'.
 58  int block_start = solver.local_block_start();
 59  int block_size = solver.local_block_size();
 60  // Block owned by the calling process covers the index range
 61  // [block_start; block_start + block_size] within a full vector.
 62
 63  // Compute and collect sizes of all rank-local blocks for later use.
 64  std::vector<int> block_sizes(comm_size);
 65  for(int rank = 0; rank < comm_size; ++rank)
 66    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);
 67
 68  // Temporary vector used in distributed matrix-vector multiplication
 69  DynamicVector<double> local_op_in(N);
 70
 71  // Linear operator representing multiplication of a given vector by our matrix
 72  // The matrix to be diagonalized is defined as
 73  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
 74  auto matrix_op = [&](solver_t::vector_const_view_t in,
 75                       solver_t::vector_view_t out) {
 76    // 'in' and 'out' are views of the locally stored blocks of their respective
 77    // distributed N-dimensional vectors. Therefore, matrix-vector
 78    // multiplication has to be performed in two steps.
 79
 80    // 1. Local multiplication of A's columns
 81    // [block_start; block_start + block_size] by 'in'. The result is an
 82    // N-dimensional vector stored in 'local_op_in'.
 83    local_op_in.reset();
 84    for(int i = 0; i < N; ++i) {
 85      int j_min = std::max(block_start, i - bandwidth);
 86      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
 87      for(int j = j_min; j <= j_max; ++j) {
 88        int j_local = j - block_start;
 89        local_op_in[i] += double(std::abs(i - j)) / (1 + i + j) * in[j_local];
 90      }
 91    }
 92
 93    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
 94    // 'out' blocks stored on different MPI ranks.
 95    MPI_Reduce_scatter(local_op_in.data(), out.data(), block_sizes.data(),
 96                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 97  };
 98
 99  // Run diagonalization!
100  solver(matrix_op, params);
101
102  if(comm_rank == 0) {
103    // Number of converged eigenvalues
104    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
105              << " eigenvalues have converged" << std::endl;
106
107    // Print found eigenvalues
108    std::cout << "Eigenvalues (Ritz values):" << std::endl;
109    std::cout << trans(solver.eigenvalues()) << std::endl;
110  }
111
112  // Check A*v = \lambda*v
113  auto const& lambda = solver.eigenvalues();
114  auto const& v = solver.eigenvectors();
115  DynamicVector<double> lhs(block_size), rhs(block_size);
116
117  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
118    const DynamicVector<double> eigenvec = column(v, i);
119    // calculate the local block of A*v
120    matrix_op(subvector(eigenvec, 0, block_size),
121              subvector(lhs, 0, block_size));
122    rhs = lambda[i] * eigenvec; // and the local block of \lambda*v
123
124    std::cout << i << ", block [" << block_start << ", "
125              << (block_start + block_size - 1)
126              << "]: deviation = " << norm(rhs - lhs) / block_size << std::endl;
127  }
128
129  // Print some computation statistics
130  if(comm_rank == 0) {
131    auto stats = solver.stats();
132
133    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
134              << std::endl;
135    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
136              << std::endl;
137    std::cout << "Total number of steps of re-orthogonalization: "
138              << stats.n_reorth_steps << std::endl;
139  }
140
141  // Terminate MPI execution environment
142  MPI_Finalize();
143
144  return 0;
145}

Armadillo

  1#include <cmath>
  2#include <iostream>
  3#include <vector>
  4
  5// This example shows how to use an MPI-parallelized solver of ezARPACK and
  6// the Armadillo storage backend to partially diagonalize a large sparse
  7// symmetric matrix and find a number of its low-lying eigenvalues.
  8
  9#include <ezarpack/mpi/arpack_solver.hpp>
 10#include <ezarpack/storages/armadillo.hpp>
 11#include <ezarpack/version.hpp>
 12
 13using namespace ezarpack;
 14using namespace arma;
 15
 16// Size of the matrix
 17const int N = 10000;
 18
 19// We are going to use a band matrix with this bandwidth
 20const int bandwidth = 5;
 21
 22// The number of low-lying eigenvalues we want to compute
 23const int N_ev = 10;
 24
 25int main(int argc, char* argv[]) {
 26
 27  // Initialize MPI environment
 28  MPI_Init(&argc, &argv);
 29
 30  // Call utility functions from namespace 'ezarpack::mpi' to find out
 31  // the world communicator size and the rank of the calling process.
 32  const int comm_size = mpi::size(MPI_COMM_WORLD);
 33  const int comm_rank = mpi::rank(MPI_COMM_WORLD);
 34
 35  // Print ezARPACK version
 36  if(comm_rank == 0)
 37    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;
 38
 39  // Construct an MPI-parallelized solver object for the symmetric case.
 40  // For the Armadillo storage backend, other options would be
 41  // * `mpi::arpack_solver<Asymmetric, armadillo_storage>' for general
 42  //   real matrices;
 43  // * `mpi::arpack_solver<Complex, armadillo_storage>' for general
 44  //   complex matrices.
 45  using solver_t = mpi::arpack_solver<Symmetric, armadillo_storage>;
 46  solver_t solver(N, MPI_COMM_WORLD);
 47
 48  // Specify parameters for the solver
 49  using params_t = solver_t::params_t;
 50  params_t params(N_ev,               // Number of low-lying eigenvalues
 51                  params_t::Smallest, // We want the smallest eigenvalues
 52                  true);              // Yes, we want the eigenvectors
 53                                      // (Ritz vectors) as well
 54
 55  // Vectors from the N-dimensional space of the problem are partitioned
 56  // into contiguous blocks. These blocks are distributed among all
 57  // MPI processes in the communicator used to construct 'solver'.
 58  int block_start = solver.local_block_start();
 59  int block_size = solver.local_block_size();
 60  // Block owned by the calling process covers the index range
 61  // [block_start; block_start + block_size] within a full vector.
 62
 63  // Compute and collect sizes of all rank-local blocks for later use.
 64  std::vector<int> block_sizes(comm_size);
 65  for(int rank = 0; rank < comm_size; ++rank)
 66    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);
 67
 68  // Temporary vector used in distributed matrix-vector multiplication
 69  vec local_op_in(N);
 70
 71  // Linear operator representing multiplication of a given vector by our matrix
 72  // The matrix to be diagonalized is defined as
 73  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
 74  auto matrix_op = [&](solver_t::vector_const_view_t in,
 75                       solver_t::vector_view_t out) {
 76    // 'in' and 'out' are views of the locally stored blocks of their respective
 77    // distributed N-dimensional vectors. Therefore, matrix-vector
 78    // multiplication has to be performed in two steps.
 79
 80    // 1. Local multiplication of A's columns
 81    // [block_start; block_start + block_size] by 'in'. The result is an
 82    // N-dimensional vector stored in 'local_op_in'.
 83    local_op_in.zeros();
 84    for(int i = 0; i < N; ++i) {
 85      int j_min = std::max(block_start, i - bandwidth);
 86      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
 87      for(int j = j_min; j <= j_max; ++j) {
 88        int j_local = j - block_start;
 89        local_op_in[i] += double(std::abs(i - j)) / (1 + i + j) * in[j_local];
 90      }
 91    }
 92
 93    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
 94    // 'out' blocks stored on different MPI ranks.
 95    MPI_Reduce_scatter(local_op_in.memptr(), &out[0], block_sizes.data(),
 96                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 97  };
 98
 99  // Run diagonalization!
100  solver(matrix_op, params);
101
102  if(comm_rank == 0) {
103    // Number of converged eigenvalues
104    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
105              << " eigenvalues have converged" << std::endl;
106
107    // Print found eigenvalues
108    std::cout << "Eigenvalues (Ritz values):" << std::endl;
109    std::cout << solver.eigenvalues().t() << std::endl;
110  }
111
112  // Check A*v = \lambda*v
113  auto const& lambda = solver.eigenvalues();
114  auto const& v = solver.eigenvectors();
115  vec lhs(block_size), rhs(block_size);
116
117  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
118    auto const eigenvec = v.col(i);
119    matrix_op(eigenvec, lhs(span::all)); // calculate the local block of A*v
120    rhs = lambda[i] * eigenvec;          // and the local block of \lambda*v
121
122    std::cout << i << ", block [" << block_start << ", "
123              << (block_start + block_size - 1)
124              << "]: deviation = " << norm(rhs - lhs) / block_size << std::endl;
125  }
126
127  // Print some computation statistics
128  if(comm_rank == 0) {
129    auto stats = solver.stats();
130
131    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
132              << std::endl;
133    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
134              << std::endl;
135    std::cout << "Total number of steps of re-orthogonalization: "
136              << stats.n_reorth_steps << std::endl;
137  }
138
139  // Terminate MPI execution environment
140  MPI_Finalize();
141
142  return 0;
143}

Boost uBLAS

  1#include <algorithm>
  2#include <cmath>
  3#include <iostream>
  4#include <vector>
  5
  6// This example shows how to use an MPI-parallelized solver of ezARPACK and
  7// the uBLAS storage backend to partially diagonalize a large sparse symmetric
  8// matrix and find a number of its low-lying eigenvalues.
  9
 10#include <ezarpack/mpi/arpack_solver.hpp>
 11#include <ezarpack/storages/ublas.hpp>
 12#include <ezarpack/version.hpp>
 13
 14#include <boost/numeric/ublas/io.hpp>
 15
 16using namespace ezarpack;
 17using namespace boost::numeric::ublas;
 18
 19// Size of the matrix
 20const int N = 10000;
 21
 22// We are going to use a band matrix with this bandwidth
 23const int bandwidth = 5;
 24
 25// The number of low-lying eigenvalues we want to compute
 26const int N_ev = 10;
 27
 28int main(int argc, char* argv[]) {
 29
 30  // Initialize MPI environment
 31  MPI_Init(&argc, &argv);
 32
 33  // Call utility functions from namespace 'ezarpack::mpi' to find out
 34  // the world communicator size and the rank of the calling process.
 35  const int comm_size = mpi::size(MPI_COMM_WORLD);
 36  const int comm_rank = mpi::rank(MPI_COMM_WORLD);
 37
 38  // Print ezARPACK version
 39  if(comm_rank == 0)
 40    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;
 41
 42  // Construct an MPI-parallelized solver object for the symmetric case.
 43  // For the uBLAS storage backend, other options would be
 44  // * `mpi::arpack_solver<Asymmetric, ublas_storage>' for general
 45  //   real matrices;
 46  // * `mpi::arpack_solver<Complex, ublas_storage>' for general
 47  //   complex matrices.
 48  using solver_t = mpi::arpack_solver<Symmetric, ublas_storage>;
 49  solver_t solver(N, MPI_COMM_WORLD);
 50
 51  // Specify parameters for the solver
 52  using params_t = solver_t::params_t;
 53  params_t params(N_ev,               // Number of low-lying eigenvalues
 54                  params_t::Smallest, // We want the smallest eigenvalues
 55                  true);              // Yes, we want the eigenvectors
 56                                      // (Ritz vectors) as well
 57
 58  // Vectors from the N-dimensional space of the problem are partitioned
 59  // into contiguous blocks. These blocks are distributed among all
 60  // MPI processes in the communicator used to construct 'solver'.
 61  int block_start = solver.local_block_start();
 62  int block_size = solver.local_block_size();
 63  // Block owned by the calling process covers the index range
 64  // [block_start; block_start + block_size] within a full vector.
 65
 66  // Compute and collect sizes of all rank-local blocks for later use.
 67  std::vector<int> block_sizes(comm_size);
 68  for(int rank = 0; rank < comm_size; ++rank)
 69    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);
 70
 71  // Temporary vector used in distributed matrix-vector multiplication
 72  vector<double> local_op_in(N);
 73
 74  // Linear operator representing multiplication of a given vector by our matrix
 75  // The matrix to be diagonalized is defined as
 76  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
 77  auto matrix_op = [&](solver_t::vector_const_view_t in,
 78                       solver_t::vector_view_t out) {
 79    // 'in' and 'out' are views of the locally stored blocks of their respective
 80    // distributed N-dimensional vectors. Therefore, matrix-vector
 81    // multiplication has to be performed in two steps.
 82
 83    // 1. Local multiplication of A's columns
 84    // [block_start; block_start + block_size] by 'in'. The result is an
 85    // N-dimensional vector stored in 'local_op_in'.
 86    std::fill(local_op_in.begin(), local_op_in.end(), .0);
 87    for(int i = 0; i < N; ++i) {
 88      int j_min = std::max(block_start, i - bandwidth);
 89      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
 90      for(int j = j_min; j <= j_max; ++j) {
 91        int j_local = j - block_start;
 92        local_op_in(i) += double(std::abs(i - j)) / (1 + i + j) * in(j_local);
 93      }
 94    }
 95
 96    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
 97    // 'out' blocks stored on different MPI ranks.
 98    MPI_Reduce_scatter(&local_op_in(0), &out(0), block_sizes.data(), MPI_DOUBLE,
 99                       MPI_SUM, MPI_COMM_WORLD);
100  };
101
102  // Run diagonalization!
103  solver(matrix_op, params);
104
105  if(comm_rank == 0) {
106    // Number of converged eigenvalues
107    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
108              << " eigenvalues have converged" << std::endl;
109
110    // Print found eigenvalues
111    std::cout << "Eigenvalues (Ritz values):" << std::endl;
112    std::cout << solver.eigenvalues() << std::endl;
113  }
114
115  // Check A*v = \lambda*v
116  auto const& lambda = solver.eigenvalues();
117  auto const& v = solver.eigenvectors();
118  vector<double> lhs(block_size), rhs(block_size);
119
120  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
121    const vector<double> eigenvec = column(v, i);
122    // calculate the local block of A*v
123    matrix_op(subrange(eigenvec, 0, block_size), subrange(lhs, 0, block_size));
124    rhs = lambda(i) * eigenvec; // and the local block of \lambda*v
125
126    std::cout << i << ", block [" << block_start << ", "
127              << (block_start + block_size - 1)
128              << "]: deviation = " << norm_2(rhs - lhs) / block_size
129              << std::endl;
130  }
131
132  // Print some computation statistics
133  if(comm_rank == 0) {
134    auto stats = solver.stats();
135
136    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
137              << std::endl;
138    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
139              << std::endl;
140    std::cout << "Total number of steps of re-orthogonalization: "
141              << stats.n_reorth_steps << std::endl;
142  }
143
144  // Terminate MPI execution environment
145  MPI_Finalize();
146
147  return 0;
148}

TRIQS arrays

  1#include <cmath>
  2#include <iostream>
  3#include <vector>
  4
  5// This example shows how to use an MPI-parallelized solver of ezARPACK and
  6// the TRIQS storage backend to partially diagonalize a large sparse symmetric
  7// matrix and find a number of its low-lying eigenvalues.
  8
  9#include <ezarpack/mpi/arpack_solver.hpp>
 10#include <ezarpack/storages/triqs.hpp>
 11#include <ezarpack/version.hpp>
 12
 13using namespace ezarpack;
 14using namespace triqs::arrays;
 15
 16// Size of the matrix
 17const int N = 10000;
 18
 19// We are going to use a band matrix with this bandwidth
 20const int bandwidth = 5;
 21
 22// The number of low-lying eigenvalues we want to compute
 23const int N_ev = 10;
 24
 25int main(int argc, char* argv[]) {
 26
 27  // Initialize MPI environment
 28  MPI_Init(&argc, &argv);
 29
 30  // Call utility functions from namespace 'ezarpack::mpi' to find out
 31  // the world communicator size and the rank of the calling process.
 32  const int comm_size = ezarpack::mpi::size(MPI_COMM_WORLD);
 33  const int comm_rank = ezarpack::mpi::rank(MPI_COMM_WORLD);
 34
 35  // Print ezARPACK version
 36  if(comm_rank == 0)
 37    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;
 38
 39  // Construct an MPI-parallelized solver object for the symmetric case.
 40  // For the TRIQS storage backend, other options would be
 41  // * `mpi::arpack_solver<Asymmetric, triqs_storage>' for general
 42  //   real matrices;
 43  // * `mpi::arpack_solver<Complex, triqs_storage>' for general
 44  //   complex matrices.
 45  using solver_t = ezarpack::mpi::arpack_solver<Symmetric, triqs_storage>;
 46  solver_t solver(N, MPI_COMM_WORLD);
 47
 48  // Specify parameters for the solver
 49  using params_t = solver_t::params_t;
 50  params_t params(N_ev,               // Number of low-lying eigenvalues
 51                  params_t::Smallest, // We want the smallest eigenvalues
 52                  true);              // Yes, we want the eigenvectors
 53                                      // (Ritz vectors) as well
 54
 55  // Vectors from the N-dimensional space of the problem are partitioned
 56  // into contiguous blocks. These blocks are distributed among all
 57  // MPI processes in the communicator used to construct 'solver'.
 58  int block_start = solver.local_block_start();
 59  int block_size = solver.local_block_size();
 60  // Block owned by the calling process covers the index range
 61  // [block_start; block_start + block_size] within a full vector.
 62
 63  // Compute and collect sizes of all rank-local blocks for later use.
 64  std::vector<int> block_sizes(comm_size);
 65  for(int rank = 0; rank < comm_size; ++rank)
 66    block_sizes[rank] =
 67        ezarpack::mpi::compute_local_block_size(N, comm_size, rank);
 68
 69  // Temporary vector used in distributed matrix-vector multiplication
 70  vector<double> local_op_in(N);
 71
 72  // Linear operator representing multiplication of a given vector by our matrix
 73  // The matrix to be diagonalized is defined as
 74  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
 75  auto matrix_op = [&](auto in, auto out) {
 76    // 'in' and 'out' are views of the locally stored blocks of their respective
 77    // distributed N-dimensional vectors. Therefore, matrix-vector
 78    // multiplication has to be performed in two steps.
 79
 80    // 1. Local multiplication of A's columns
 81    // [block_start; block_start + block_size] by 'in'. The result is an
 82    // N-dimensional vector stored in 'local_op_in'.
 83    local_op_in() = 0;
 84    for(int i = 0; i < N; ++i) {
 85      int j_min = std::max(block_start, i - bandwidth);
 86      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
 87      for(int j = j_min; j <= j_max; ++j) {
 88        int j_local = j - block_start;
 89        local_op_in(i) += double(std::abs(i - j)) / (1 + i + j) * in(j_local);
 90      }
 91    }
 92
 93    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
 94    // 'out' blocks stored on different MPI ranks.
 95    MPI_Reduce_scatter(local_op_in.data_start(), out.data_start(),
 96                       block_sizes.data(), MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 97  };
 98
 99  // Run diagonalization!
100  solver(matrix_op, params);
101
102  if(comm_rank == 0) {
103    // Number of converged eigenvalues
104    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
105              << " eigenvalues have converged" << std::endl;
106
107    // Print found eigenvalues
108    std::cout << "Eigenvalues (Ritz values):" << std::endl;
109    std::cout << solver.eigenvalues() << std::endl;
110  }
111
112  // Check A*v = \lambda*v
113  auto const& lambda = solver.eigenvalues();
114  auto const& v = solver.eigenvectors();
115  vector<double> lhs(block_size), rhs(block_size);
116
117  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
118    auto const eigenvec = v(range(), i);
119    matrix_op(eigenvec, lhs()); // calculate the local block of A*v
120    rhs = lambda(i) * eigenvec; // and the local block of \lambda*v
121
122    std::cout << i << ", block [" << block_start << ", "
123              << (block_start + block_size - 1)
124              << "]: deviation = " << norm2(rhs - lhs) / block_size
125              << std::endl;
126  }
127
128  // Print some computation statistics
129  if(comm_rank == 0) {
130    auto stats = solver.stats();
131
132    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
133              << std::endl;
134    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
135              << std::endl;
136    std::cout << "Total number of steps of re-orthogonalization: "
137              << stats.n_reorth_steps << std::endl;
138  }
139
140  // Terminate MPI execution environment
141  MPI_Finalize();
142
143  return 0;
144}

TRIQS/nda

  1#include <cmath>
  2#include <iostream>
  3#include <vector>
  4
  5// This example shows how to use an MPI-parallelized solver of ezARPACK and
  6// the TRIQS/nda storage backend to partially diagonalize a large sparse
  7// symmetric matrix and find a number of its low-lying eigenvalues.
  8
  9#include <ezarpack/mpi/arpack_solver.hpp>
 10#include <ezarpack/storages/nda.hpp>
 11#include <ezarpack/version.hpp>
 12
 13using namespace ezarpack;
 14using namespace nda;
 15
 16// Size of the matrix
 17const int N = 10000;
 18
 19// We are going to use a band matrix with this bandwidth
 20const int bandwidth = 5;
 21
 22// The number of low-lying eigenvalues we want to compute
 23const int N_ev = 10;
 24
 25
 26int main(int argc, char* argv[]) {
 27
 28  // Initialize MPI environment
 29  MPI_Init(&argc, &argv);
 30
 31  // Call utility functions from namespace 'ezarpack::mpi' to find out
 32  // the world communicator size and the rank of the calling process.
 33  const int comm_size = mpi::size(MPI_COMM_WORLD);
 34  const int comm_rank = mpi::rank(MPI_COMM_WORLD);
 35
 36  // Print ezARPACK version
 37  if(comm_rank == 0)
 38    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;
 39
 40  // Construct an MPI-parallelized solver object for the symmetric case.
 41  // For the nda storage backend, other options would be
 42  // * `mpi::arpack_solver<Asymmetric, nda_storage>' for general real matrices;
 43  // * `mpi::arpack_solver<Complex, nda_storage>' for general complex matrices.
 44  using solver_t = mpi::arpack_solver<Symmetric, nda_storage>;
 45  solver_t solver(N, MPI_COMM_WORLD);
 46
 47  // Specify parameters for the solver
 48  using params_t = solver_t::params_t;
 49  params_t params(N_ev,               // Number of low-lying eigenvalues
 50                  params_t::Smallest, // We want the smallest eigenvalues
 51                  true);              // Yes, we want the eigenvectors
 52                                      // (Ritz vectors) as well
 53
 54  // Vectors from the N-dimensional space of the problem are partitioned
 55  // into contiguous blocks. These blocks are distributed among all
 56  // MPI processes in the communicator used to construct 'solver'.
 57  int block_start = solver.local_block_start();
 58  int block_size = solver.local_block_size();
 59  // Block owned by the calling process covers the index range
 60  // [block_start; block_start + block_size] within a full vector.
 61
 62  // Compute and collect sizes of all rank-local blocks for later use.
 63  std::vector<int> block_sizes(comm_size);
 64  for(int rank = 0; rank < comm_size; ++rank)
 65    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);
 66
 67  // Temporary vector used in distributed matrix-vector multiplication
 68  vector<double> local_op_in(N);
 69
 70  // Linear operator representing multiplication of a given vector by our matrix
 71  // The matrix to be diagonalized is defined as
 72  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
 73  auto matrix_op = [&](auto in, auto out) {
 74    // 'in' and 'out' are views of the locally stored blocks of their respective
 75    // distributed N-dimensional vectors. Therefore, matrix-vector
 76    // multiplication has to be performed in two steps.
 77
 78    // 1. Local multiplication of A's columns
 79    // [block_start; block_start + block_size] by 'in'. The result is an
 80    // N-dimensional vector stored in 'local_op_in'.
 81    local_op_in() = 0;
 82    for(int i = 0; i < N; ++i) {
 83      int j_min = std::max(block_start, i - bandwidth);
 84      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
 85      for(int j = j_min; j <= j_max; ++j) {
 86        int j_local = j - block_start;
 87        local_op_in(i) += double(std::abs(i - j)) / (1 + i + j) * in(j_local);
 88      }
 89    }
 90
 91    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
 92    // 'out' blocks stored on different MPI ranks.
 93    MPI_Reduce_scatter(local_op_in.data(), out.data(), block_sizes.data(),
 94                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 95  };
 96
 97  // Run diagonalization!
 98  solver(matrix_op, params);
 99
100  if(comm_rank == 0) {
101    // Number of converged eigenvalues
102    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
103              << " eigenvalues have converged" << std::endl;
104
105    // Print found eigenvalues
106    std::cout << "Eigenvalues (Ritz values):" << std::endl;
107    std::cout << solver.eigenvalues() << std::endl;
108  }
109
110  // Check A*v = \lambda*v
111  auto const& lambda = solver.eigenvalues();
112  auto const& v = solver.eigenvectors();
113  vector<double> lhs(block_size), rhs(block_size);
114
115  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
116    auto const eigenvec = v(range::all, i);
117    matrix_op(eigenvec, lhs()); // calculate the local block of A*v
118    rhs = lambda(i) * eigenvec; // and the local block of \lambda*v
119
120    std::cout << i << ", block [" << block_start << ", "
121              << (block_start + block_size - 1) << "]: deviation = "
122              << std::sqrt(sum(abs2(rhs - lhs))) / block_size << std::endl;
123  }
124
125  // Print some computation statistics
126  if(comm_rank == 0) {
127    auto stats = solver.stats();
128
129    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
130              << std::endl;
131    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
132              << std::endl;
133    std::cout << "Total number of steps of re-orthogonalization: "
134              << stats.n_reorth_steps << std::endl;
135  }
136
137  // Terminate MPI execution environment
138  MPI_Finalize();
139
140  return 0;
141}

xtensor

  1#include <cmath>
  2#include <iostream>
  3#include <vector>
  4
  5// This example shows how to use an MPI-parallelized solver of ezARPACK and
  6// the xtensor storage backend to partially diagonalize a large sparse symmetric
  7// matrix and find a number of its low-lying eigenvalues.
  8
  9#include <ezarpack/mpi/arpack_solver.hpp>
 10#include <ezarpack/storages/xtensor.hpp>
 11#include <ezarpack/version.hpp>
 12
 13#include <xtensor/xio.hpp>
 14#include <xtensor/xnorm.hpp>
 15
 16using namespace ezarpack;
 17using namespace xt;
 18
 19// Size of the matrix
 20const int N = 10000;
 21
 22// We are going to use a band matrix with this bandwidth
 23const int bandwidth = 5;
 24
 25// The number of low-lying eigenvalues we want to compute
 26const int N_ev = 10;
 27
 28int main(int argc, char* argv[]) {
 29
 30  // Initialize MPI environment
 31  MPI_Init(&argc, &argv);
 32
 33  // Call utility functions from namespace 'ezarpack::mpi' to find out
 34  // the world communicator size and the rank of the calling process.
 35  const int comm_size = mpi::size(MPI_COMM_WORLD);
 36  const int comm_rank = mpi::rank(MPI_COMM_WORLD);
 37
 38  // Print ezARPACK version
 39  if(comm_rank == 0)
 40    std::cout << "Using ezARPACK version " << EZARPACK_VERSION << std::endl;
 41
 42  // Construct an MPI-parallelized solver object for the symmetric case.
 43  // For the xtensor storage backend, other options would be
 44  // * `mpi::arpack_solver<ezarpack::Asymmetric, xtensor_storage>' for general
 45  //   real matrices;
 46  // * `mpi::arpack_solver<ezarpack::Complex, xtensor_storage>' for general
 47  //   complex matrices.
 48  using solver_t = mpi::arpack_solver<ezarpack::Symmetric, xtensor_storage>;
 49  solver_t solver(N, MPI_COMM_WORLD);
 50
 51  // Specify parameters for the solver
 52  using params_t = solver_t::params_t;
 53  params_t params(N_ev,               // Number of low-lying eigenvalues
 54                  params_t::Smallest, // We want the smallest eigenvalues
 55                  true);              // Yes, we want the eigenvectors
 56                                      // (Ritz vectors) as well
 57
 58  // Vectors from the N-dimensional space of the problem are partitioned
 59  // into contiguous blocks. These blocks are distributed among all
 60  // MPI processes in the communicator used to construct 'solver'.
 61  int block_start = solver.local_block_start();
 62  int block_size = solver.local_block_size();
 63  // Block owned by the calling process covers the index range
 64  // [block_start; block_start + block_size] within a full vector.
 65
 66  // Compute and collect sizes of all rank-local blocks for later use.
 67  std::vector<int> block_sizes(comm_size);
 68  for(int rank = 0; rank < comm_size; ++rank)
 69    block_sizes[rank] = mpi::compute_local_block_size(N, comm_size, rank);
 70
 71  // Temporary vector used in distributed matrix-vector multiplication
 72  auto local_op_in = xt::xtensor<double, 1>::from_shape({N});
 73
 74  // Linear operator representing multiplication of a given vector by our matrix
 75  // The matrix to be diagonalized is defined as
 76  // A_{ij} = |i-j| / (1 + i + j), if |i-j| <= bandwidth, zero otherwise
 77  auto matrix_op = [&](auto in, auto out) {
 78    // 'in' and 'out' are views of the locally stored blocks of their respective
 79    // distributed N-dimensional vectors. Therefore, matrix-vector
 80    // multiplication has to be performed in two steps.
 81
 82    // 1. Local multiplication of A's columns
 83    // [block_start; block_start + block_size] by 'in'. The result is an
 84    // N-dimensional vector stored in 'local_op_in'.
 85    local_op_in.fill(.0);
 86    for(int i = 0; i < N; ++i) {
 87      int j_min = std::max(block_start, i - bandwidth);
 88      int j_max = std::min(block_start + block_size - 1, i + bandwidth);
 89      for(int j = j_min; j <= j_max; ++j) {
 90        int j_local = j - block_start;
 91        local_op_in(i) += double(std::abs(i - j)) / (1 + i + j) * in(j_local);
 92      }
 93    }
 94
 95    // 2. Sum up (MPI reduce) results from step 1 and scatter the sum over
 96    // 'out' blocks stored on different MPI ranks.
 97    MPI_Reduce_scatter(local_op_in.data(), &out(0), block_sizes.data(),
 98                       MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 99  };
100
101  // Run diagonalization!
102  solver(matrix_op, params);
103
104  if(comm_rank == 0) {
105    // Number of converged eigenvalues
106    std::cout << solver.nconv() << " out of " << params.n_eigenvalues
107              << " eigenvalues have converged" << std::endl;
108
109    // Print found eigenvalues
110    std::cout << "Eigenvalues (Ritz values):" << std::endl;
111    std::cout << solver.eigenvalues() << std::endl;
112  }
113
114  // Check A*v = \lambda*v
115  auto const& lambda = solver.eigenvalues();
116  auto const& v = solver.eigenvectors();
117
118  auto lhs = xt::xtensor<double, 1>::from_shape({(unsigned long)block_size});
119  auto rhs = xt::xtensor<double, 1>::from_shape({(unsigned long)block_size});
120
121  for(int i = 0; i < N_ev; ++i) { // For each eigenpair ...
122    auto const eigenvec = xt::view(v, xt::all(), i);
123    matrix_op(eigenvec, // calculate the local block of A*v
124              xt::view(lhs, xt::all()));
125    rhs = lambda(i) * eigenvec; // and the local block of \lambda*v
126
127    std::cout << i << ", block [" << block_start << ", "
128              << (block_start + block_size - 1)
129              << "]: deviation = " << xt::norm_l2(rhs - lhs) / block_size
130              << std::endl;
131  }
132
133  // Print some computation statistics
134  if(comm_rank == 0) {
135    auto stats = solver.stats();
136
137    std::cout << "Number of Lanczos update iterations: " << stats.n_iter
138              << std::endl;
139    std::cout << "Total number of OP*x operations: " << stats.n_op_x_operations
140              << std::endl;
141    std::cout << "Total number of steps of re-orthogonalization: "
142              << stats.n_reorth_steps << std::endl;
143  }
144
145  // Terminate MPI execution environment
146  MPI_Finalize();
147
148  return 0;
149}