StarNEig User's Guide  v0.1.5
A task-based library for solving dense nonsymmetric eigenvalue problems
gep_dm_full_chain.c
#include "validate.h"
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <mpi.h>
// a predicate function that selects all finate eigenvalues that have positive
// a real part
static int predicate(double real, double imag, double beta, void *arg)
{
if (0.0 < real && beta != 0.0)
return 1;
return 0;
}
int main(int argc, char **argv)
{
const int n = 3000; // matrix dimension
const int root = 0; // root rank
// initialize MPI
int thread_support;
MPI_Init_thread(
&argc, (char ***)&argv, MPI_THREAD_MULTIPLE, &thread_support);
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
// the root node initializes the matrices locally
int ldA = 0, ldB = 0, ldQ = 0, ldZ = 0, ldC = 0, ldD = 0;
double *A = NULL, *B = NULL, *Q = NULL, *Z = NULL, *C = NULL, *D = NULL;
if (world_rank == root) {
srand((unsigned) time(NULL));
// generate a full random matrix A and a copy C
ldA = ((n/8)+1)*8, ldC = ((n/8)+1)*8;
A = malloc(n*ldA*sizeof(double));
C = malloc(n*ldC*sizeof(double));
for (int j = 0; j < n; j++)
for (int i = 0; i < n; i++)
A[j*ldA+i] = C[j*ldC+i] = 2.0*rand()/RAND_MAX - 1.0;
// generate a full random matrix B and a copy D
ldB = ((n/8)+1)*8, ldD = ((n/8)+1)*8;
B = malloc(n*ldB*sizeof(double));
D = malloc(n*ldD*sizeof(double));
for (int j = 0; j < n; j++)
for (int i = 0; i < n; i++)
B[j*ldB+i] = D[j*ldD+i] = 2.0*rand()/RAND_MAX - 1.0;
// generate an identity matrix Q
ldQ = ((n/8)+1)*8;
Q = malloc(n*ldA*sizeof(double));
for (int j = 0; j < n; j++)
for (int i = 0; i < n; i++)
Q[j*ldQ+i] = i == j ? 1.0 : 0.0;
// generate an identity matrix Z
ldZ = ((n/8)+1)*8;
Z = malloc(n*ldZ*sizeof(double));
for (int j = 0; j < n; j++)
for (int i = 0; i < n; i++)
Z[j*ldZ+i] = i == j ? 1.0 : 0.0;
}
// allocate space for the eigenvalues and the eigenvalue selection vector
double *real = malloc(n*sizeof(double));
double *imag = malloc(n*sizeof(double));
double *beta = malloc(n*sizeof(double));
int *select = malloc(n*sizeof(int));
// Initialize the StarNEig library using all available CPU cores and
// GPUs. The STARNEIG_FAST_DM flag indicates that the library should
// initialize itself for distributed memory computations and keep StarPU
// worker threads and StarPU-MPI communication thread awake between
// interface function calls.
// create a two-dimensional block cyclic distribution with column-major
// ordering
// Convert the local matrix A to a distributed matrix lA that is owned by
// the root node. This is done in-place, i.e., the matrices A and lA point
// to the same data.
n, n, STARNEIG_REAL_DOUBLE, root, A, ldA);
// create a distributed matrix dA using the earlier created data
// distribution and default distributed block size
// copy the local matrix lA to the distributed matrix dA (scatter)
// scatter the matrix B
n, n, STARNEIG_REAL_DOUBLE, root, B, ldB);
// scatter the matrix Q
n, n, STARNEIG_REAL_DOUBLE, root, Q, ldQ);
// scatter the matrix Z
n, n, STARNEIG_REAL_DOUBLE, root, Z, ldZ);
// reduce the dense-dense matrix pair (A,B) to Hessenberg-triangular form
printf("Hessenberg-triangular reduction...\n");
// reduce the Hessenberg-triangular matrix pair (A,B) to generalized Schur
// form
printf("Schur reduction...\n");
starneig_GEP_DM_Schur(dA, dB, dQ, dZ, real, imag, beta);
// select eigenvalues that have positive a real part
int num_selected;
starneig_GEP_DM_Select(dA, dB, &predicate, NULL, select, &num_selected);
printf("Selected %d eigenvalues out of %d.\n", num_selected, n);
// reorder selected eigenvalues to the upper left corner of the generalized
// Schur form (A,B)
printf("Reordering...\n");
starneig_GEP_DM_ReorderSchur(select, dA, dB, dQ, dZ, real, imag, beta);
// copy the distributed matrix dA back to the local matrix lA (gather)
// free the distributed matrix lA (matrix A is not freed)
// free the distributed matrix dA (all local resources are freed)
// gather the matrix B
// gather the matrix Q
// gather the matrix Z
// free the data distribution
// de-initialize the StarNEig library
// de-initialize MPI
MPI_Finalize();
if (world_rank == root) {
// check residual || Q A Z^T - C ||_F / || C ||_F
check_residual(n, ldQ, ldA, ldZ, ldC, Q, A, Z, C);
// check residual || Q B Z^T - D ||_F / || D ||_F
check_residual(n, ldQ, ldB, ldZ, ldD, Q, B, Z, D);
// check residual || Q Q^T - I ||_F / || I ||_F
check_orthogonality(n, ldQ, Q);
// check residual || Z Z^T - I ||_F / || I ||_F
check_orthogonality(n, ldZ, Z);
}
// cleanup
free(A);
free(C);
free(B);
free(D);
free(Q);
free(Z);
free(real);
free(imag);
free(beta);
free(select);
return 0;
}