diff options
author | spiros <andyspiros@gmail.com> | 2011-07-23 13:45:29 +0200 |
---|---|---|
committer | spiros <andyspiros@gmail.com> | 2011-07-23 13:45:29 +0200 |
commit | c51172bc46f4b95af6282d8782e4b145911c7afe (patch) | |
tree | 324fdb1ec70ca8a50f7bd6ffa09fa2658dd3d98a | |
parent | Added cholesky. Solved some issues. (diff) | |
download | auto-numerical-bench-c51172bc46f4b95af6282d8782e4b145911c7afe.tar.gz auto-numerical-bench-c51172bc46f4b95af6282d8782e4b145911c7afe.tar.bz2 auto-numerical-bench-c51172bc46f4b95af6282d8782e4b145911c7afe.zip |
Added working QR decomposition; added working symm_ev (but some negative
MFlops).
-rw-r--r-- | btl/actions/action_parallel_cholesky.hh | 3 | ||||
-rw-r--r-- | btl/actions/action_parallel_lu_decomp.hh | 3 | ||||
-rw-r--r-- | btl/actions/action_parallel_qr_decomp.hh | 100 | ||||
-rw-r--r-- | btl/actions/action_parallel_symm_ev.hh | 121 | ||||
-rw-r--r-- | btl/libs/PBLAS/main.cpp | 14 | ||||
-rw-r--r-- | btl/libs/PBLAS/pblas.h | 8 | ||||
-rw-r--r-- | btl/libs/PBLAS/pblas_interface_impl.hh | 54 | ||||
-rw-r--r-- | pblas.py | 3 |
8 files changed, 295 insertions, 11 deletions
diff --git a/btl/actions/action_parallel_cholesky.hh b/btl/actions/action_parallel_cholesky.hh index f89eb98..05ef3ef 100644 --- a/btl/actions/action_parallel_cholesky.hh +++ b/btl/actions/action_parallel_cholesky.hh @@ -39,7 +39,8 @@ public : Global_A_stl.push_back(temp_stl[r][c]); } - Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, 64, 64); + const int blocksize = std::max(std::min(size/4, 64), 2); + Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize); LocalRows = desc[8]; LocalCols = Local_A_stl.size()/desc[8]; diff --git a/btl/actions/action_parallel_lu_decomp.hh b/btl/actions/action_parallel_lu_decomp.hh index 18b4ac7..d3dc620 100644 --- a/btl/actions/action_parallel_lu_decomp.hh +++ b/btl/actions/action_parallel_lu_decomp.hh @@ -29,7 +29,8 @@ public : init_vector<pseudo_random>(Global_A_stl, size*size); } - Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, 64, 64); + const int blocksize = std::max(std::min(size/4, 64), 2); + Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize); LocalRows = desc[8]; LocalCols = Local_A_stl.size()/desc[8]; diff --git a/btl/actions/action_parallel_qr_decomp.hh b/btl/actions/action_parallel_qr_decomp.hh new file mode 100644 index 0000000..a41414c --- /dev/null +++ b/btl/actions/action_parallel_qr_decomp.hh @@ -0,0 +1,100 @@ +#ifndef ACTION_PARALLEL_QR_DECOMP_HH_ +#define ACTION_PARALLEL_QR_DECOMP_HH_ + +#include "utilities.h" +#include "init/init_function.hh" +#include "init/init_vector.hh" + +#include "lapack_interface.hh" +#include "STL_interface.hh" + +#include <string> +#include <algorithm> + +template<class Interface> +class Action_parallel_qr_decomp { + +public : + + // Constructor + BTL_DONT_INLINE Action_parallel_qr_decomp( int size ) : _size(size) + { + MESSAGE("Action_parallel_qr_decomp Ctor"); + + int myid, procnum; + blacs_pinfo_(&myid, &procnum); + iamroot = (myid == 0); + + // STL matrix and vector initialization + if (iamroot) { + init_vector<pseudo_random>(Global_A_stl, size*size); + } + + const int blocksize = std::max(std::min(size/4, 64), 2); + Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize); + LocalRows = desc[8]; + LocalCols = Local_A_stl.size()/desc[8]; + + // Generic local matrix and vectors initialization + Interface::matrix_from_stl(Local_A_ref, Local_A_stl); + Interface::matrix_from_stl(Local_A , Local_A_stl); + + _cost = 2.0*size*size*size; + } + + + // Invalidate copy constructor + Action_parallel_qr_decomp(const Action_parallel_qr_decomp&) + { + INFOS("illegal call to Action_parallel_qr_decomp copy constructor"); + exit(1); + } + + // Destructor + ~Action_parallel_qr_decomp() + { + MESSAGE("Action_parallel_qr_decomp destructor"); + + // Deallocation + Interface::free_matrix(Local_A_ref, Local_A_stl.size()); + Interface::free_matrix(Local_A , Local_A_stl.size()); + } + + // Action name + static inline std::string name() + { + return "qr_decomp_" + Interface::name(); + } + + double nb_op_base() + { + return _cost; + } + + BTL_DONT_INLINE void initialize() + { + Interface::copy_matrix(Local_A_ref, Local_A, Local_A_stl.size()); + } + + BTL_DONT_INLINE void calculate() + { + Interface::parallel_qr_decomp(Local_A, desc); + } + + BTL_DONT_INLINE void check_result() + { + } + +private: + int _size, desc[9], LocalRows, LocalCols; + double _cost; + bool iamroot; + + typename Interface::stl_matrix Global_A_stl; + typename Interface::stl_matrix Local_A_stl; + typename Interface::gene_matrix Local_A_ref; + typename Interface::gene_matrix Local_A; +}; + + +#endif /* ACTION_PARALLEL_QR_DECOMP_HH_ */ diff --git a/btl/actions/action_parallel_symm_ev.hh b/btl/actions/action_parallel_symm_ev.hh new file mode 100644 index 0000000..f0af0e3 --- /dev/null +++ b/btl/actions/action_parallel_symm_ev.hh @@ -0,0 +1,121 @@ +#ifndef ACTION_PARALLEL_SYMM_EV_HH_ +#define ACTION_PARALLEL_SYMM_EV_HH_ + +#include "utilities.h" +#include "init/init_function.hh" +#include "init/init_vector.hh" + +#include "lapack_interface.hh" +#include "STL_interface.hh" + +#include <string> + +template<class Interface> +class Action_parallel_symm_ev { + +public : + + // Constructor + BTL_DONT_INLINE Action_parallel_symm_ev( int size ) : _size(size) + { + MESSAGE("Action_parallel_symm_ev constructor"); + + int myid, procnum; + blacs_pinfo_(&myid, &procnum); + iamroot = (myid == 0); + + // STL matrix and vector initialization + if (iamroot) { + init_vector<pseudo_random>(Global_A_stl, size*size); + init_vector<null_function>(Global_Z_stl, size*size); + } + init_vector<null_function>(Local_w_stl, size); + + const int blocksize = std::max(std::min(size/4, 64), 2); + Interface::scatter_matrix(Global_A_stl, Local_A_stl, descA, size, size, blocksize, blocksize); + Interface::scatter_matrix(Global_Z_stl, Local_Z_stl, descZ, size, size, blocksize, blocksize); + LocalRows = descA[8]; + LocalCols = Local_A_stl.size()/descA[8]; + + // Generic local matrix and vectors initialization + Interface::matrix_from_stl(Local_A_ref, Local_A_stl); + Interface::matrix_from_stl(Local_A , Local_A_stl); + Interface::matrix_from_stl(Local_Z_ref, Local_Z_stl); + Interface::matrix_from_stl(Local_Z , Local_Z_stl); + Interface::vector_from_stl(Local_w , Local_w_stl); + Interface::vector_from_stl(Local_w_ref, Local_w_stl); + + _cost = size*size*size; + } + + + // Invalidate copy constructor + Action_parallel_symm_ev(const Action_parallel_symm_ev&) + { + INFOS("illegal call to Action_parallel_symm_ev copy constructor"); + exit(1); + } + + // Destructor + ~Action_parallel_symm_ev() + { + MESSAGE("Action_parallel_lu_decomp destructor"); + + // Deallocation + Interface::free_matrix(Local_A_ref, Local_A_stl.size()); + Interface::free_matrix(Local_A , Local_A_stl.size()); + Interface::free_matrix(Local_Z_ref, Local_Z_stl.size()); + Interface::free_matrix(Local_Z , Local_Z_stl.size()); + Interface::free_vector(Local_w_ref); + Interface::free_vector(Local_w ); + } + + // Action name + static inline std::string name() + { + return "symm_ev_" + Interface::name(); + } + + double nb_op_base() + { + return _cost; + } + + BTL_DONT_INLINE void initialize() + { + Interface::copy_matrix(Local_A_ref, Local_A, Local_A_stl.size()); + Interface::copy_matrix(Local_Z_ref, Local_Z, Local_Z_stl.size()); + Interface::copy_vector(Local_w_ref, Local_w, Local_w_stl.size()); + } + + BTL_DONT_INLINE void calculate() + { + Interface::parallel_symm_ev(Local_A, descA, Local_w, Local_Z, descZ); + } + + BTL_DONT_INLINE void check_result() + { + } + +private: + int _size, descA[9], descZ[9], LocalRows, LocalCols; + double _cost; + bool iamroot; + + typename Interface::stl_matrix Global_A_stl; + typename Interface::stl_matrix Local_A_stl; + typename Interface::gene_matrix Local_A_ref; + typename Interface::gene_matrix Local_A; + + typename Interface::stl_matrix Global_Z_stl; + typename Interface::stl_matrix Local_Z_stl; + typename Interface::gene_matrix Local_Z_ref; + typename Interface::gene_matrix Local_Z; + + typename Interface::stl_vector Local_w_stl; + typename Interface::gene_vector Local_w_ref; + typename Interface::gene_vector Local_w; +}; + + +#endif /* ACTION_PARALLEL_LU_DECOMP_HH_ */ diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp index e7b636b..c209afe 100644 --- a/btl/libs/PBLAS/main.cpp +++ b/btl/libs/PBLAS/main.cpp @@ -14,6 +14,8 @@ #include "action_parallel_matrix_vector_product.hh" #include "action_parallel_lu_decomp.hh" #include "action_parallel_cholesky.hh" +#include "action_parallel_qr_decomp.hh" +#include "action_parallel_symm_ev.hh" #include <string> @@ -24,7 +26,7 @@ int main(int argc, char **argv) bool iamroot = blacsinit(&argc, &argv); bool - general_solve=false, least_squares=false, lu_decomp=false, cholesky=false, + general_solve=false, qr_decomp=false, lu_decomp=false, cholesky=false, symm_ev=false ; @@ -32,7 +34,7 @@ int main(int argc, char **argv) for (int i = 1; i < argc; ++i) { std::string arg = argv[i]; if (arg == "general_solve") general_solve = true; - else if (arg == "least_squares") least_squares = true; + else if (arg == "qr_decomp") qr_decomp = true; else if (arg == "lu_decomp") lu_decomp = true; else if (arg == "cholesky") cholesky = true; else if (arg == "symm_ev") symm_ev = true; @@ -42,8 +44,8 @@ int main(int argc, char **argv) // if (general_solve) // distr_bench<Action_general_solve<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); -// if (least_squares) -// distr_bench<Action_least_squares<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); + if (qr_decomp) + distr_bench<Action_parallel_qr_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); if (lu_decomp) distr_bench<Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); @@ -51,8 +53,8 @@ int main(int argc, char **argv) if (cholesky) distr_bench<Action_parallel_cholesky<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); -// if (symm_ev) -// distr_bench<Action_symm_ev<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); + if (symm_ev) + distr_bench<Action_parallel_symm_ev<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); int iZERO = 0; diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h index 973b91c..a6cbeb2 100644 --- a/btl/libs/PBLAS/pblas.h +++ b/btl/libs/PBLAS/pblas.h @@ -54,6 +54,14 @@ extern "C" { void pspotrf_(const char*, const int*, float*, const int*, const int*, const int*, int*); void pdpotrf_(const char*, const int*, double*, const int*, const int*, const int*, int*); + // qr_decomp + void psgeqpf_(const int*, const int*, float*, const int*, const int*, const int*, int*, float*, float*, const int*, int*); + void pdgeqpf_(const int*, const int*, double*, const int*, const int*, const int*, int*, double*, double*, const int*, int*); + + // symm_ev + void pssyevd_(const char*, const char*, const int*, float*, const int*, const int*, const int*, float*, float*, const int*, const int*, const int*, float*, const int*, int*, const int*, int*); + void pdsyevd_(const char*, const char*, const int*, double*, const int*, const int*, const int*, double*, double*, const int*, const int*, const int*, double*, const int*, int*, const int*, int*); + #ifdef __cplusplus } diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh index 1dbf3b9..4522946 100644 --- a/btl/libs/PBLAS/pblas_interface_impl.hh +++ b/btl/libs/PBLAS/pblas_interface_impl.hh @@ -61,8 +61,58 @@ public: const char UPLO = 'U'; int info; PBLAS_FUNC(potrf)(&UPLO, &N, X, &iONE, &iONE, desc, &info); + if (info != 0) + cerr << " { cholesky error : " << info << " } "; + } + + static inline void parallel_qr_decomp(gene_matrix& X, const int* desc) + { + const int GlobalRows = desc[2], GlobalCols = desc[3], + BlockRows = desc[4], BlockCols = desc[5], + ctxt = desc[1]; + + int myrow, mycol, nprow, npcol, lwork; + SCALAR lworkd; + blacs_gridinfo_(&ctxt, &nprow, &npcol, &myrow, &mycol); + + const int iONE = 1, iZERO = 0, imONE = -1, + ipivdim = numroc_(&GlobalCols, &BlockCols, &mycol, &iZERO, &npcol); + int info; + std::vector<int> ipiv(ipivdim); + std::vector<SCALAR> tau(ipivdim); + + // Retrieve LWORK + PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &lworkd, &imONE, &info); + lwork = static_cast<int>(lworkd); +// if (info != 0) +// cerr << " { qr_decomp lwork error } "; + + std::vector<SCALAR> work(lwork); + PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &work[0], &lwork, &info); // if (info != 0) -// cerr << " { cholesky error : " << info << " } "; +// cerr << " { qr_decomp computation error } "; } -}; + static inline void parallel_symm_ev(gene_matrix& A, const int* descA, gene_vector& w, gene_matrix& Z, const int* descZ) + { + const char jobz = 'V', uplo = 'u'; + const int N = descA[2], iONE = 1, iZERO = 0, imONE = -1; + std::vector<SCALAR> work; + std::vector<int> iwork; + int lwork, liwork, info; + SCALAR lworkd; + + // Retrieve l(i)work + PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w, + Z, &iONE, &iONE, descZ, &lworkd, &imONE, &liwork, &imONE, &info); + lwork = static_cast<int>(lworkd); + work.resize(lwork); iwork.resize(liwork); +// if (info != 0) +// cerr << " { symm_ev l(i)work error } "; + + PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w, + Z, &iONE, &iONE, descZ, &work[0], &lwork, &iwork[0], &liwork, &info); +// if (info != 0) +// cerr << " { symm_ev computation error } "; + } +}; @@ -5,7 +5,8 @@ numproc = 4 class Module(btlbase.BTLBase): def _initialize(self): self.libname = "scalapack" - self.avail = ['axpy', 'matrix_vector', 'lu_decomp', 'cholesky'] + self.avail = ['axpy', 'matrix_vector', 'lu_decomp', 'cholesky', + 'qr_decomp', 'symm_ev'] def _parse_args(self, args): # Parse arguments |