From 69ad6a79043e656812e70271d79e2adae292809f Mon Sep 17 00:00:00 2001 From: zhangzhihao <1900017707@pku.edu.cn> Date: Mon, 1 May 2023 01:27:12 +0800 Subject: [PATCH 01/44] feat pexsi --- source/Makefile | 6 + source/Makefile.Objects | 7 + source/Makefile.vars | 21 +- source/module_base/parallel_global.cpp | 2 + source/module_base/parallel_global.h | 1 + source/module_hsolver/diago_pexsi.cpp | 48 + source/module_hsolver/diago_pexsi.h | 36 + source/module_hsolver/hsolver_lcao.cpp | 18 + source/module_hsolver/pexsi/DistBCDMatrix.cpp | 163 ++ source/module_hsolver/pexsi/DistBCDMatrix.h | 63 + source/module_hsolver/pexsi/DistCCSMatrix.cpp | 112 ++ source/module_hsolver/pexsi/DistCCSMatrix.h | 46 + .../pexsi/DistMatrixTransformer.cpp | 1438 +++++++++++++++++ .../pexsi/DistMatrixTransformer.h | 20 + source/module_hsolver/pexsi/pexsi_solver.cpp | 59 + source/module_hsolver/pexsi/pexsi_solver.h | 30 + source/module_hsolver/pexsi/simplePEXSI.cpp | 697 ++++++++ source/module_hsolver/pexsi/simplePEXSI.h | 8 + source/module_io/input.cpp | 14 + 19 files changed, 2786 insertions(+), 3 deletions(-) create mode 100644 source/module_hsolver/diago_pexsi.cpp create mode 100644 source/module_hsolver/diago_pexsi.h create mode 100644 source/module_hsolver/pexsi/DistBCDMatrix.cpp create mode 100644 source/module_hsolver/pexsi/DistBCDMatrix.h create mode 100644 source/module_hsolver/pexsi/DistCCSMatrix.cpp create mode 100644 source/module_hsolver/pexsi/DistCCSMatrix.h create mode 100644 source/module_hsolver/pexsi/DistMatrixTransformer.cpp create mode 100644 source/module_hsolver/pexsi/DistMatrixTransformer.h create mode 100644 source/module_hsolver/pexsi/pexsi_solver.cpp create mode 100644 source/module_hsolver/pexsi/pexsi_solver.h create mode 100644 source/module_hsolver/pexsi/simplePEXSI.cpp create mode 100644 source/module_hsolver/pexsi/simplePEXSI.h diff --git a/source/Makefile b/source/Makefile index 822d8487aa..faaf6697b8 100644 --- a/source/Makefile +++ b/source/Makefile @@ -138,6 +138,12 @@ ifdef LIBTORCH_DIR endif endif +ifdef PEXSI_DIR + INCLUDES += -I${PEXSI_INCLUDE_DIR} ${SCOTCH_INCLUDE} ${DSUPERLU_INCLUDE} + LIBS += -L${PEXSI_LIB_DIR} -lpexsi_linux_release_v2.0 ${DSUPERLU_LIB} ${PTSCOTCH_LIB} ${SCOTCH_LIB} + HONG += -D__PEXSI +endif + ifdef DeePMD_DIR HONG += -D__DPMD -DHIGH_PREC OPTS += -Wl,--no-as-needed diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 2acb7d866a..97b591d444 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -24,6 +24,7 @@ VPATH=./src_global:\ ./module_hsolver:\ ./module_hsolver/kernels:\ ./module_hsolver/genelpa:\ +./module_hsolver/pexsi:\ ./module_elecstate:\ ./module_elecstate/kernels:\ ./module_elecstate/potentials:\ @@ -238,6 +239,12 @@ OBJS_HSOLVER=diago_cg.o\ diago_iter_assist.o\ math_kernel_op.o\ dngvd_op.o\ + diago_pexsi.o\ + DistBCDMatrix.o\ + DistCCSMatrix.o\ + DistMatrixTransformer.o\ + pexsi_solver.o\ + simplePEXSI.o\ OBJS_HSOLVER_LCAO=hsolver_lcao.o\ diago_blas.o\ diff --git a/source/Makefile.vars b/source/Makefile.vars index 7c510d4ed3..d19109fc96 100644 --- a/source/Makefile.vars +++ b/source/Makefile.vars @@ -29,10 +29,19 @@ OPENMP = OFF ## CEREAL_DIR should contain an include folder. #---------------------------------------------------------------------- -ELPA_DIR = /usr/local/include/elpa-2021.05.002 -ELPA_INCLUDE_DIR = ${ELPA_DIR}/elpa +ELPA_DIR = /root/lib/ELPA +ELPA_INCLUDE_DIR = ${ELPA_DIR}/include/ + +CEREAL_DIR = /root/lib/cereal +DSUPERLU_DIR = /root/workspace/superlu_dist-7.2.0 +DSUPERLU_INCLUDE = -I${DSUPERLU_DIR}/include +DSUPERLU_LIB = ${DSUPERLU_DIR}/lib/libsuperlu_dist.a + +SCOTCH_INCLUDE = -I/usr/local/include +PTSCOTCH_DIR = /root/workspace/scotch_6.0.0 +PTSCOTCH_LIB = ${PTSCOTCH_DIR}/lib/libptscotchparmetis.a ${PTSCOTCH_DIR}/lib/libptscotch.a ${PTSCOTCH_DIR}/lib/libptscotcherrexit.a ${PTSCOTCH_DIR}/lib/libptscotcherr.a +SCOTCH_LIB = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libscotch.a ${PTSCOTCH_DIR}/lib/libscotcherr.a ${PTSCOTCH_DIR}/lib/libscotcherrexit.a -CEREAL_DIR = /usr/local/include/cereal ##------------------- FOR GNU COMPILER ------------------------------ @@ -59,8 +68,14 @@ CEREAL_DIR = /usr/local/include/cereal ## To use LIBXC: set LIBXC_DIR which contains include and lib/libxc.a (>5.1.7) ## To use DeePMD: set DeePMD_DIR and TensorFlow_DIR ## To use LibRI: set LIBRI_DIR and LIBCOMM_DIR +## To use PEXSI: set PEXSI_DIR which contains include and libpexsi.a ##--------------------------------------------------------------------- +PEXSI_DIR = /root/workspace/pexsi_v2.0.0 +PEXSI_LIB_DIR = ${PEXSI_DIR}/src +PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include + + # LIBTORCH_DIR = /usr/local # LIBNPY_DIR = /usr/local diff --git a/source/module_base/parallel_global.cpp b/source/module_base/parallel_global.cpp index 1542382fe1..954b0662cd 100644 --- a/source/module_base/parallel_global.cpp +++ b/source/module_base/parallel_global.cpp @@ -22,6 +22,7 @@ MPI_Comm STO_WORLD; MPI_Comm PARAPW_WORLD; // qianrui add it for sto-dft 2021-4-14 MPI_Comm GRID_WORLD; // mohan add 2012-01-13z MPI_Comm DIAG_WORLD; // mohan add 2012-01-13 +MPI_Group GRID_GROUP; void Parallel_Global::myProd(std::complex *in,std::complex *inout,int *len,MPI_Datatype *dptr) { @@ -80,6 +81,7 @@ void Parallel_Global::split_diag_world(const int &diag_np) } MPI_Comm_split(MPI_COMM_WORLD, color, key, &DIAG_WORLD); + MPI_Comm_group(DIAG_WORLD, &GRID_GROUP); MPI_Comm_rank(DIAG_WORLD, &GlobalV::DRANK); MPI_Comm_size(DIAG_WORLD, &GlobalV::DSIZE); GlobalV::DCOLOR=color; diff --git a/source/module_base/parallel_global.h b/source/module_base/parallel_global.h index d0e3456822..2b6d59c6c0 100644 --- a/source/module_base/parallel_global.h +++ b/source/module_base/parallel_global.h @@ -16,6 +16,7 @@ extern MPI_Comm STO_WORLD; extern MPI_Comm PARAPW_WORLD; extern MPI_Comm GRID_WORLD; //mohan add 2012-01-13 extern MPI_Comm DIAG_WORLD; //mohan add 2012-01-13 +extern MPI_Group GRID_GROUP; #endif //void myProd(std::complex *in,std::complex *inout,int *len,MPI_Datatype *dptr); diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp new file mode 100644 index 0000000000..8a7257d747 --- /dev/null +++ b/source/module_hsolver/diago_pexsi.cpp @@ -0,0 +1,48 @@ +#ifdef __PEXSI +#include "diago_pexsi.h" + +#include "c_pexsi_interface.h" +#include "module_base/global_variable.h" +#include "module_base/lapack_connector.h" +#include "module_base/timer.h" +#include "module_base/tool_quit.h" +#include "module_basis/module_ao/parallel_orbitals.h" +#include "pexsi/pexsi_solver.h" + +typedef hamilt::MatrixBlock matd; +typedef hamilt::MatrixBlock> matcd; + +namespace hsolver +{ + +void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) +{ + ModuleBase::TITLE("DiagoPEXSI", "diag"); + matd h_mat, s_mat; + phm_in->matrix(h_mat, s_mat); + std::vector eigen(GlobalV::NLOCAL, 0.0); + MPI_Comm COMM_DIAG = MPI_COMM_WORLD; + this->ps = new PEXSI_Solver(this->ParaV->blacs_ctxt, + this->ParaV->nb, + this->ParaV->nrow, + this->ParaV->ncol, + h_mat.p, + s_mat.p, + this->DM, + this->EDM, + this->totalEnergyH, + this->totalEnergyS, + this->totalFreeEnergy); + this->ps->solve(); + std::cout << this->ps->totalEnergyH << "xxxxxx" << this->ps->totalEnergyS << "xxxxxx" << this->ps->totalFreeEnergy + << std::endl; + ModuleBase::WARNING_QUIT("DiagoPexsi", "Pexsi is not completed"); +} +void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi>& psi, double* eigenvalue_in) +{ + ModuleBase::TITLE("DiagoPEXSI", "diag"); + ModuleBase::WARNING_QUIT("DiagoPexsi", "Pexsi is not completed"); +} + +} // namespace hsolver +#endif \ No newline at end of file diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h new file mode 100644 index 0000000000..a48b92b867 --- /dev/null +++ b/source/module_hsolver/diago_pexsi.h @@ -0,0 +1,36 @@ +#ifndef DIGAOPEXSI_H +#define DIGAOPEXSI_H + +#ifdef __PEXSI + +#define DIGAOPEXSI_H +#endif + +#include "module_basis/module_ao/parallel_orbitals.h" +#include "diagh.h" +#include "pexsi/pexsi_solver.h" + +namespace hsolver +{ + +class DiagoPexsi : public DiagH +{ + public: + DiagoPexsi(const Parallel_Orbitals* ParaV_in) + { + this->ParaV = ParaV_in; + } + void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) override; + void diag(hamilt::Hamilt* phm_in, psi::Psi> &psi, double *eigenvalue_in) override; + const Parallel_Orbitals* ParaV; + double* DM; + double* EDM; + double totalEnergyH; + double totalEnergyS; + double totalFreeEnergy; + PEXSI_Solver* ps; +}; + +} + +#endif diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index 0c964ec501..e870e4f191 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -7,6 +7,7 @@ #ifdef __ELPA #include "diago_elpa.h" #endif +#include "diago_pexsi.h" namespace hsolver { @@ -79,6 +80,23 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, */ ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "This method of DiagH is not supported!"); } + else if (this->method == "pexsi") + { + if (pdiagh != nullptr) + { + if (pdiagh->method != this->method) + { + delete[] pdiagh; + pdiagh = nullptr; + } + } + if (pdiagh == nullptr) + { + DiagoPexsi* tem = new DiagoPexsi(this->ParaV); + this->pdiagh = tem; + pdiagh->method = this->method; + } + } else { ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "This method of DiagH is not supported!"); diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.cpp b/source/module_hsolver/pexsi/DistBCDMatrix.cpp new file mode 100644 index 0000000000..8a3f2740e2 --- /dev/null +++ b/source/module_hsolver/pexsi/DistBCDMatrix.cpp @@ -0,0 +1,163 @@ +#include +#include "DistBCDMatrix.h" +extern "C" +{ + void Cblacs_gridinfo(int icontxt, int* nprow, int *npcol, int *myprow, int *mypcol); + int Cblacs_pnum(int blacs_ctxt, int prow, int pcol); +}; + +/* +DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol) +{ + this->comm=comm; + this->group=group; + MPI_Comm_rank(comm, &this->myproc); + this->nprows=nprow; + this->npcols=npcol; + this->size=size; + this->nblk=nblk; + this->nrow=nrow; + this->ncol=ncol; + this->LAYOUT='R'; +} + +DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char LAYOUT) +{ + this->comm=comm; + this->group=group; + MPI_Comm_rank(comm, &this->myproc); + this->nprows=nprow; + this->npcols=npcol; + this->size=size; + this->nblk=nblk; + this->nrow=nrow; + this->ncol=ncol; + if(LAYOUT == 'R' || + LAYOUT == 'r' || + LAYOUT == 'C' || + LAYOUT == 'c') + { + this->LAYOUT=LAYOUT; + } else + { + throw("The LAYOUT must be 'R', 'r', 'C', or 'c'"); + } +} + +DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol) +{ + this->comm=comm; + this->group=group; + this->blacs_ctxt=blacs_ctxt; + this->size=size; + this->nblk=nblk; + this->nrow=nrow; + this->ncol=ncol; + this->LAYOUT='R'; + Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol); + if(comm != MPI_COMM_NULL) + { + MPI_Comm_rank(comm, &this->myproc); + MPI_Comm_size(comm, &this->nprocs); + }else + { + this->myproc=-1; + this->nprocs=-1; + } +} +*/ + +DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT) +{ + this->comm=comm; + this->group=group; + this->blacs_ctxt=blacs_ctxt; + this->size=size; + this->nblk=nblk; + this->nrow=nrow; + this->ncol=ncol; + if(LAYOUT == 'R' || + LAYOUT == 'r' || + LAYOUT == 'C' || + LAYOUT == 'c') + { + this->LAYOUT=LAYOUT; + } else + { + throw("The LAYOUT must be 'R', 'r', 'C', or 'c'"); + } + + if(comm != MPI_COMM_NULL) + { + MPI_Comm_rank(comm, &this->myproc); + Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol); + }else + { + this->myproc=-1; + this->myprow=-1; + this->mypcol=-1; + } + + // synchronize matrix parameters to all processes, including those are not in bcd group + int myid_in_comm_world; + MPI_Comm_rank(MPI_COMM_WORLD, &myid_in_comm_world); + if(myid_in_comm_world == 0) + { + MPI_Comm_size(comm, &this->nprocs); + int PARA_BCAST[4]={this->nblk, this->nprocs, this->nprows, this->npcols}; + MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD); + } + else + { + int PARA_BCAST[4]; + MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD); + this->nblk=PARA_BCAST[0]; + this->nprocs=PARA_BCAST[1]; + this->nprows=PARA_BCAST[2]; + this->npcols=PARA_BCAST[3]; + } + this->prowpcol2pnum=new int[this->nprocs]; + if(myid_in_comm_world == 0) + { + for(int i=0; inprows; ++i) + { + for(int j=0; jnpcols; ++j) + { + this->prowpcol2pnum[i*this->npcols+j]=Cblacs_pnum(this->blacs_ctxt, i, j); + } + } + } + MPI_Bcast(this->prowpcol2pnum, this->nprocs, MPI_INT, 0, MPI_COMM_WORLD); +} + +DistBCDMatrix::~DistBCDMatrix() +{ + delete[] prowpcol2pnum; +} + +int DistBCDMatrix::globalRow(const int localRow) +{ + return (localRow/nblk*nprows+myprow)*nblk+localRow%nblk; +} + +int DistBCDMatrix::globalCol(const int localCol) +{ + return (localCol/nblk*npcols+mypcol)*nblk+localCol%nblk; +} + +int DistBCDMatrix::localRow(const int globalRow, int& myprow) +{ + myprow=int((globalRow%(nblk*nprows))/nblk); + return int(globalRow/(nblk*nprows))*nblk+globalRow%nblk; +} + +int DistBCDMatrix::localCol(const int globalCol, int& mypcol) +{ + mypcol=int((globalCol%(nblk*npcols))/nblk); + return int(globalCol/(nblk*npcols))*nblk+globalCol%nblk; +} + +int DistBCDMatrix::pnum(const int prow, const int pcol) +{ + return this->prowpcol2pnum[prow*this->npcols+pcol]; +} diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.h b/source/module_hsolver/pexsi/DistBCDMatrix.h new file mode 100644 index 0000000000..a0b8c7a907 --- /dev/null +++ b/source/module_hsolver/pexsi/DistBCDMatrix.h @@ -0,0 +1,63 @@ +// a Block Cyclic Data Distribution matrix +// http://www.netlib.org/utk/papers/factor/node3.html +// local matrix elements is stored in column major +// used for pexsi +class DistBCDMatrix { + + public: + // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol); + // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char LAYOUT); + + // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol); + DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT); + ~DistBCDMatrix(); + + int globalRow(const int localRow); + int globalCol(const int localCol); + int localRow(const int globalRow, int& myprow); + int localCol(const int globalCol, int& mypcol); + int pnum(const int prow, const int pcol); + //~DistBCDMatrix(); + + // MPI communicator + MPI_Comm comm; + MPI_Group group; + + // blacs context + int blacs_ctxt; + + // row and column of process grid + int nprows; + int npcols; + + // total number of processes + int nprocs; + + // Matrix size + int size; + + // block size + int nblk; + + // row and c0lumn of Local matrix part + int nrow; + int ncol; + + + // protected: + + // private: + + // current process row and column + int myprow; + int mypcol; + + // current process id + int myproc; + + int *prowpcol2pnum; + // the local data layout + // 'R' or 'r' for row-major, which is used in C/C++ + // 'C' or 'c' for column-major, which is used in Fortran + char LAYOUT; +}; diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.cpp b/source/module_hsolver/pexsi/DistCCSMatrix.cpp new file mode 100644 index 0000000000..45a14d6ac7 --- /dev/null +++ b/source/module_hsolver/pexsi/DistCCSMatrix.cpp @@ -0,0 +1,112 @@ +#include +#include "DistCCSMatrix.h" + +DistCCSMatrix::DistCCSMatrix(void) +{ + this->comm=MPI_COMM_WORLD; + this->size=0; + this->nnz=0; + this->nnzLocal=0; + this->numColLocal=0; + this->colptrLocal=NULL; + this->rowindLocal=NULL; +} + +DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in) +{ + this->comm=comm_in; + this->size=0; + this->nnz=0; + this->nnzLocal=0; + this->numColLocal=0; + this->colptrLocal=NULL; + this->rowindLocal=NULL; +} + +DistCCSMatrix::DistCCSMatrix(int size_in, int nnzLocal_in) +{ + this->comm=MPI_COMM_WORLD; + this->size=size_in; + this->nnzLocal=nnzLocal_in; + MPI_Request req; + MPI_Iallreduce(&nnzLocal, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm, &req); + this->numColLocal=0; + this->colptrLocal=new int[size]; + this->rowindLocal=new int[nnzLocal]; + + MPI_Status req_status; + MPI_Wait(&req, &req_status); +} + +DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in, int nproc_data_in, int size_in) +{ + this->comm=comm_in; + this->nproc_data=nproc_data_in; + int nproc_data_range[3]={0, this->nproc_data-1, 1}; + // create processes group with data: this->group_data and associated communicator + MPI_Comm_group(this->comm, &this->group); + MPI_Group_range_incl(this->group, 1, &nproc_data_range, &this->group_data); + this->comm_data=MPI_COMM_NULL; + MPI_Comm_create(this->comm, this->group_data, &this->comm_data); + this->size=size_in; + this->nnz=0; + this->nnzLocal=0; + int myproc; + if(comm != MPI_COMM_NULL) + { + MPI_Comm_size(comm, &nprocs); + MPI_Comm_rank(comm, &myproc); + if(myprocnumColLocal=size/nproc_data; + this->firstCol=size/nproc_data*myproc; + this->colptrLocal=new int[this->numColLocal+1]; + this->rowindLocal=NULL; + } + else if(myproc==nproc_data-1) + { + this->numColLocal=size-myproc*(size/nproc_data); + this->firstCol=size/nproc_data*myproc; + this->colptrLocal=new int[this->numColLocal+1]; + this->rowindLocal=NULL; + } + else + { + this->numColLocal=0; + this->firstCol=size-1; + this->colptrLocal=new int[this->numColLocal+1]; + this->rowindLocal=NULL; + } + } +} + +int DistCCSMatrix::globalCol(int localCol) +{ + return this->firstCol+localCol; +} + + +// NOTE: the process id is 0-based +int DistCCSMatrix::localCol(int globalCol, int& mypcol) +{ + mypcol=int(globalCol/int(this->size/this->nproc_data)); + if(mypcol >= this->nproc_data) mypcol=this->nproc_data-1; + return mypcol>0 ? globalCol-(this->size/this->nproc_data)*mypcol : globalCol; +} + +void DistCCSMatrix::setnnz(int nnzLocal_in) +{ + if(this->comm_data != MPI_COMM_NULL) + { + MPI_Allreduce(&nnzLocal_in, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm_data); + this->nnzLocal=nnzLocal_in; + this->rowindLocal=new int[nnzLocal]; + this->colptrLocal[this->numColLocal]=nnzLocal_in+1; + } +} + +DistCCSMatrix::~DistCCSMatrix() +{ + delete[] colptrLocal; + delete[] rowindLocal; +} diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.h b/source/module_hsolver/pexsi/DistCCSMatrix.h new file mode 100644 index 0000000000..43d1126bf6 --- /dev/null +++ b/source/module_hsolver/pexsi/DistCCSMatrix.h @@ -0,0 +1,46 @@ +// Distributed Compressed Column Storage Matrix format +// used for PEXSI +class DistCCSMatrix { + + public: + DistCCSMatrix(); + DistCCSMatrix(MPI_Comm comm); + DistCCSMatrix(int size, int nnzLocal); + DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal); + DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal, double* valLocal, int* index); + + int globalCol(int localCol); + int localCol(int globalCol, int& mypcol); + void setnnz(int nnzLocal); + ~DistCCSMatrix(); + + // MPI communicator + MPI_Comm comm; + MPI_Group group; + + // total number of processes and the processes with data in + int nprocs; + int nproc_data; + MPI_Group group_data; + MPI_Comm comm_data; + + // Matrix size + int size; + + // Number of non-zero values in the matrix + int nnz; + + // Number of non-zero values in the matrix of the local process + int nnzLocal; + + // number of columns in current process + int numColLocal; + + // the first column index in current process + int firstCol; + + // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal + int* colptrLocal; + int* rowindLocal; +}; + diff --git a/source/module_hsolver/pexsi/DistMatrixTransformer.cpp b/source/module_hsolver/pexsi/DistMatrixTransformer.cpp new file mode 100644 index 0000000000..285d3cdd94 --- /dev/null +++ b/source/module_hsolver/pexsi/DistMatrixTransformer.cpp @@ -0,0 +1,1438 @@ +#include +#include +#include +#include +#include +#include +#include "DistBCDMatrix.h" +#include "DistCCSMatrix.h" + + +// for debug +#ifdef _DEBUG +#include +#include +#include +#include "src_pw/global.h" +#endif +// end debug + +// find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a negtive value +// the size_process and displacement_process array will be changed after the index is found +// isFirst: wether this function is called for the first time for a index array; +// nprocs: total number of processes +// size_process: the number of indices in each process +// displacement_process: the start position in each process +// index: the array contains the indices +inline int MinimumIndexPosition(const bool isFirst, const int nprocs, + int* size_process, int* displacement_process, const int* index) +{ + // usually the minimum index is continuous, so it will be a good idea to + // check the one next to the previous index first. + static int pre_position; // previous position in index array of minimum index, + static int pre_process; // the process contains previous index + + int minimum_index=INT_MAX; // the minimum index, initial value is a large number which is larger than any other index; + int minimum_position=-1; + int minimum_process=-1; + + if(isFirst) + { + for(int i=0; i0) + { + if(minimum_index>index[displacement_process[i]]) // find a smaller index + { + minimum_position=displacement_process[i]; + minimum_index=index[minimum_position]; + minimum_process=i; + } + } + } + if(minimum_process>=0) // find it! + { + ++displacement_process[minimum_process]; + --size_process[minimum_process]; + } + pre_position=minimum_position; + pre_process=minimum_process; + return minimum_position; + } + else + { + // check the next one of pre_position + if(size_process[pre_process]>0 && // the previous process still has elements + index[pre_position+1]==index[pre_position]+1) // find it! + { + ++displacement_process[pre_process]; + --size_process[pre_process]; + ++pre_position; // new pre_position is the next one + // new pre_process keeps the same + return pre_position; // current position is the new pre_position + } + + // if the next one of pre_position is not the minimum one + for(int i=0; i0) + { + if(minimum_index>index[displacement_process[i]]) + { + minimum_position=displacement_process[i]; + minimum_index=index[minimum_position]; + minimum_process=i; + } + } + } + if(minimum_process>=0) // find it! + { + ++displacement_process[minimum_process]; + --size_process[minimum_process]; + } + pre_position=minimum_position; + pre_process=minimum_process; + return minimum_position; + } +} + +inline void buildCCSParameter(const int size, const int nprocs, + std::vector size_process, std::vector displacement_process, + const int* position_index, DistCCSMatrix &DST_Matrix, int* buffer2ccsIndex) +{ + // find the minimum one from left buffer index + if(DST_Matrix.nnzLocal<=0) return; + + int pre_col=-1; + int nnz_now=0; + int p_mini; + p_mini=MinimumIndexPosition(true, nprocs, &size_process[0], &displacement_process[0], position_index); + while(p_mini>=0) + { + int index_mini=position_index[p_mini]; + int col_mini=index_mini/DST_Matrix.size; //-DST_Matrix.firstCol; + int row_mini=index_mini%DST_Matrix.size; + if(col_mini>pre_col) // a new column starts, column pointer is a 1-based array + { + pre_col=col_mini; + DST_Matrix.colptrLocal[col_mini]=nnz_now+1; + } + DST_Matrix.rowindLocal[nnz_now]=row_mini+1; // setup row index array, which is also 1-based + // copy data from buffer to M, be careful M is a 0-based array + buffer2ccsIndex[nnz_now]=p_mini; + ++nnz_now; + p_mini=MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index); + } + // The last element of colptrLocal is nnzLocal+1 + DST_Matrix.colptrLocal[DST_Matrix.numColLocal]=nnz_now+1; +} + +inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, + double* nzvalLocal) +{ + for(int i=0; i& P) +{ + for(int i=0; i &rowidx, std::vector &colidx) +{ + #ifdef _DEBUG + char f_log[80]; + int myproc; + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + std::ofstream log; + if(myproc<100) + { + sprintf(f_log, "transformer_%2.2d.log", myproc); + log.open(f_log, std::ios::app); + log<<"start count nnz"< pH; + countMatrixDistribution(nrow*ncol, H_2d, pH); + std::map pS; + countMatrixDistribution(nrow*ncol, H_2d, pS); + plog<<"Element in H distribution:\n"; + // std::stringstream ss; + // ss.str(""); + for(auto iter=pH.begin(); iter!=pH.end(); ++iter) + { + // ss<<"p["<first<<"] : "<second<first<<"] : "<second<first<<"] : "<second<first<<"] : "<second< ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit) + { + ++nnz; + colidx.push_back(i); + rowidx.push_back(j); + } + } + } + } else if(LAYOUT == 'R' || LAYOUT == 'r') + { + for(int i=0; i ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit) + { + ++nnz; + colidx.push_back(i); + rowidx.push_back(j); + } + } + } + } else + { + #ifdef _DEBUG + if(myproc<100) log<<"unknown LAYOUT: "< &rowidx, std::vector &colidx, + int &sender_size, std::vector &sender_size_process, std::vector &sender_displacement_process, + int &receiver_size, std::vector &receiver_size_process, std::vector &receiver_displacement_process, + std::vector &buffer2ccsIndex) +{ + // debug + int myproc; + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + #ifdef _DEBUG + std::ofstream log; + if(myproc<100) + { + char f_log[80]; + sprintf(f_log, "transformer_%2.2d.log", myproc); + log.open(f_log, std::ios::app); + log<<"enter buildTransformParameter"< receiver_index(receiver_size); + MPI_Alltoallv(&sender_index[0], &sender_size_process[0], &sender_displacement_process[0], MPI_INT, + &receiver_index[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_INT, COMM_TRANS); + // debug + #ifdef _DEBUG + if(myproc<100) log<<"receiver_index is got"< sender_size_process(NPROC_TRANS); + std::vector sender_displacement_process(NPROC_TRANS); + int receiver_size; + std::vector receiver_size_process(NPROC_TRANS); + std::vector receiver_displacement_process(NPROC_TRANS); + + #ifdef _DEBUG + if(myproc<100) + { + log<<"nprocs: "< rowidx; + std::vector colidx; + int nnz=0; + #ifdef _DEBUG + if(myproc<100) log<<"start counting nnz..."< buffer2ccsIndex; + buildTransformParameter(SRC_Matrix, DST_Matrix, + NPROC_TRANS, GROUP_TRANS, COMM_TRANS, + nnz, rowidx, colidx, + sender_size, sender_size_process, sender_displacement_process, + receiver_size, receiver_size_process, receiver_displacement_process, buffer2ccsIndex); + // Do transformation + #ifdef _DEBUG + if(myproc<100) log<<"Parameters are built"< sender_buffer(sender_size); + std::vector receiver_buffer(receiver_size); + // put H to sender buffer + if(SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r') + { + for(int i=0; i sender_size_process(NPROC_TRANS); + // std::vector sender_displacement_process(NPROC_TRANS); + // std::vector receiver_size_process(NPROC_TRANS); + // std::vector receiver_displacement_process(NPROC_TRANS); + int sender_size_process[NPROC_TRANS]; + int sender_displacement_process[NPROC_TRANS]; + int receiver_size_process[NPROC_TRANS]; + int receiver_displacement_process[NPROC_TRANS]; + #ifdef _DEBUG + if(myproc<100) log<<"NPROC_TRANS = "< proc_map_bcd_trans; + int myproc_trans; + MPI_Comm_rank(COMM_TRANS, &myproc_trans); + if(myproc_trans == 0) + { + MPI_Group_size(DST_Matrix.group, &nproc_bcd); + MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS); + proc_map_bcd_trans.resize(nproc_bcd, 0); + for(int i=0; i 0) + { + sender_index=new int[sender_size]; + for(int i=0; i 0) + { + receiver_index=new int[2*receiver_size]; + receiver_buffer=new double[receiver_size]; + for(int i=0; i<2*receiver_size; ++i) + { + receiver_index[i]=-1; + } + for(int i=0; i p(sender_displacement_process); + int p[NPROC_TRANS]; + for(int i=0; i= DST_Matrix.nprows || recv_prow_bcd < 0) + { + log<<"ERROR: recv_prow_bcd error! recv_prow_bcd is "< NPROC_TRANS || recv_proc_bcd < 0) + { + log<<"ERROR: recv_proc_bcd outbound! recv_proc_bcd is "<= sender_size || p[recv_proc] < 0) + { + log<<"ERROR: sender_index's index outbound! "<= (2*sender_size)|| (p[recv_proc]*2+1) < 0) + { + log<<"ERROR: dst_index's index outbound! recv_proc:"<SRC_Matrix.nnzLocal) + { + log<<"ERROR! sender_index outbound: "<DST_Matrix.size) + { + log<<"ERROR! dst_index outbound: "< 0) + { + sprintf(f_log, "sender_index_from_%2.2d_to_%2.2d.log", myproc_trans, i); + log_sender_index.open(f_log, std::ios::app); + for(int j=sender_displacement_process[i]; j 0) + { + sprintf(f_log, "dst_index_from_%2.2d_to_%2.2d.log", myproc_trans, i); + log_dst_index.open(f_log, std::ios::app); + for(int j=sender_displacement_process[i]; j 0) + { + sprintf(f_log, "receiver_index_from_%2.2d_to_%2.2d.log", i, myproc_trans); + log_rcv_index.open(f_log, std::ios::app); + for(int j=receiver_displacement_process[i]; jDST_Matrix.nrow) + { + log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "< "<DST_Matrix.ncol) + { + log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "< "<DST_Matrix.nrow) + { + log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "< "<DST_Matrix.ncol) + { + log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "< "<=DST_Matrix_elem) + { + log<<"idx for DM ERROR: idx is "<=DST_Matrix_elem) + { + log<<"idx for EDM ERROR: idx is "< +#include + +PEXSI_Solver::PEXSI_Solver(const int blacs_text, + const int nb, + const int nrow, + const int ncol, + const double* h, + const double* s, + double* DM, + double* EDM, + double& totalEnergyH, + double& totalEnergyS, + double& totalFreeEnergy) +{ + this->blacs_text = blacs_text; + this->nb = nb; + this->nrow = nrow; + this->ncol = ncol; + this->h = new double[nrow * ncol]; + this->s = new double[nrow * ncol]; + std::memcpy(this->h, h, nrow * ncol * sizeof(double)); + std::memcpy(this->s, s, nrow * ncol * sizeof(double)); + this->DM = new double[nrow * ncol]; + this->EDM = new double[nrow * ncol]; + this->totalEnergyH = 0.0; + this->totalEnergyS = 0.0; + this->totalFreeEnergy = 0.0; +} + +int PEXSI_Solver::solve() +{ + extern MPI_Comm DIAG_WORLD; + extern MPI_Comm GRID_WORLD; + extern MPI_Group GRID_GROUP; + return simplePEXSI(MPI_COMM_WORLD, + MPI_COMM_WORLD, + GRID_GROUP, + this->blacs_text, + GlobalV::NLOCAL, + this->nb, + this->nrow, + this->ncol, + 'C', + this->h, + this->s, + GlobalV::nelec, + "PEXSIOPTION", + this->DM, + this->EDM, + this->totalEnergyH, + this->totalEnergyS, + this->totalFreeEnergy); +} \ No newline at end of file diff --git a/source/module_hsolver/pexsi/pexsi_solver.h b/source/module_hsolver/pexsi/pexsi_solver.h new file mode 100644 index 0000000000..95ade7c15f --- /dev/null +++ b/source/module_hsolver/pexsi/pexsi_solver.h @@ -0,0 +1,30 @@ +#ifndef PEXSI_Solver_H +#define PEXSI_Solver_H +class PEXSI_Solver +{ + public: + PEXSI_Solver(const int blacs_text, + const int nb, + const int nrow, + const int ncol, + const double* h, + const double* s, + double* DM, + double* EDM, + double& totalEnergyH, + double& totalEnergyS, + double& totalFreeEnergy); + int solve(); + int blacs_text; + int nb; + int nrow; + int ncol; + double* h; + double* s; + double* DM; + double* EDM; + double totalEnergyH; + double totalEnergyS; + double totalFreeEnergy; +}; +#endif \ No newline at end of file diff --git a/source/module_hsolver/pexsi/simplePEXSI.cpp b/source/module_hsolver/pexsi/simplePEXSI.cpp new file mode 100644 index 0000000000..6f1d2d1afe --- /dev/null +++ b/source/module_hsolver/pexsi/simplePEXSI.cpp @@ -0,0 +1,697 @@ +// use PEXSI to solve a Kohn-Sham equation +// the H and S matrices are given by 2D block cyclic distribution +// the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution +// #include "mpi.h" +#include +#include +#include +#include +#include +#include +#include +#include "c_pexsi_interface.h" +#include "module_base/lapack_connector.h" +#include "module_base/timer.h" +#include "module_base/tool_quit.h" +#include "DistCCSMatrix.h" +#include "DistBCDMatrix.h" +#include "DistMatrixTransformer.h" + +inline void strtolower(char *sa, char *sb) +{ + char c; + int len = strlen(sa); + for (int i = 0; i < len; i++) + { + c = sa[i]; + sb[i] = tolower(c); + } + sb[len] = '\0'; +} + +inline void setDefaultOption(int* int_para, double* double_para) +{ + // options.spin=2; + double_para[0]=2; + // options.gap=0; + double_para[2]=0; + // ZERO_Limit=DBL_MIN; + double_para[11]=DBL_MIN; + // options.matrixType=0; + int_para[3]=0; + // options.solver=1; + int_para[6]=1; + // options.ordering=0; + int_para[8]=0; + // options.rowOrdering=0; + int_para[9]=0; + // options.symmetric=0; + int_para[11]=0; + // options.transpose=0; + int_para[12]=0; + // options.nPoints=2; + int_para[14]=2; + // options.verbosity=1; + int_para[15]=1; +} + +int loadPEXSIOption(MPI_Comm comm, const std::string PexsiOptionFile, PPEXSIOptions& options, int& numProcessPerPole, double& ZERO_Limit) +{ + + // temp variable arrays read from conf file and will be bcast to all processors + + // parameter array of type int, + // 0: numPole + // 1: isInertiaCount + // 2: maxPEXSIIter + // 3: matrixType + // 4: isSymbolicFactorize + // 5: isConstructCommPattern + // 6: solver + // 7: symmetricStorage + // 8: ordering + // 9: rowOrdering + // 10: npSymbFact + // 11: symmetric + // 12: transpose + // 13: method + // 14: nPoints + // 15: verbosity + // 16: numProcessPerPole + int int_para[17]; + + // parameter array of type double + // 0: spin + // 1: temperature + // 2: gap + // 3: deltaE + // 4: muMin0 + // 5: muMax0 + // 6: mu0 + // 7: muInertiaTolerance + // 8: muInertiaExpansion + // 9: muPEXSISafeGuard + // 10: numElectronPEXSITolerance + // 11: ZERO_Limit + double double_para[12]; + int myid; + MPI_Comm_rank(comm, &myid); + if(myid==0) + { + std::ifstream ifs(PexsiOptionFile.c_str()); + if(! ifs) + { + return 1; + } + setDefaultOption(int_para, double_para); + + ifs.clear(); + ifs.seekg(0); + + char key[128]; + char lowercase_key[128]; + const int LINE_LINGTH=1024; + char unused_string[LINE_LINGTH]; + + while(ifs.good()) + { + ifs >> key; + //~ cout<<"readin word is: "<>options.spin; + ifs>>double_para[0]; + //~ cout<<"double_para[0]: "<>options.temperature; + ifs>>double_para[1]; + //~ cout<<"double_para[1]: "<>options.gap; + ifs>>double_para[2]; + //~ cout<<"double_para[2]: "<>options.deltaE; + ifs>>double_para[3]; + //~ cout<<"double_para[3]: "<>options.numPole; + ifs>>int_para[0]; + //~ cout<<"int_para[0]: "<>options.isInertiaCount; + ifs>>int_para[1]; + //~ cout<<"int_para[1]: "<>options.maxPEXSIIter; + ifs>>int_para[2]; + //~ cout<<"int_para[2]: "<>options.muMin0; + ifs>>double_para[4]; + //~ cout<<"double_para[4]: "<>options.muMax0; + ifs>>double_para[5]; + //~ cout<<"double_para[5]: "<>options.mu0; + ifs>>double_para[6]; + //~ cout<<"double_para[6]: "<>options.muInertiaTolerance; + ifs>>double_para[7]; + //~ cout<<"double_para[7]: "<>options.muInertiaExpansion; + ifs>>double_para[8]; + //~ cout<<"double_para[8]: "<>options.muPEXSISafeGuard; + ifs>>double_para[9]; + //~ cout<<"double_para[9]: "<>options.numElectronPEXSITolerance; + ifs>>double_para[10]; + //~ cout<<"double_para[10]: "<>double_para[11]; + } + else if(strcmp("matrixtype", lowercase_key)==0) + { + //~ ifs>>options.matrixType; + ifs>>int_para[3]; + //~ cout<<"int_para[3]: "<>options.isSymbolicFactorize; + ifs>>int_para[4]; + //~ cout<<"int_para[4]: "<>options.isConstructCommPattern; + ifs>>int_para[5]; + //~ cout<<"int_para[5]: "<>options.solver; + ifs>>int_para[6]; + //~ cout<<"int_para[6]: "<>options.symmetricStorage; + ifs>>int_para[7]; + //~ cout<<"int_para[7]: "<>options.ordering; + ifs>>int_para[8]; + //~ cout<<"int_para[8]: "<>options.rowOrdering; + ifs>>int_para[9]; + //~ cout<<"int_para[9]: "<>options.npSymbFact; + ifs>>int_para[10]; + //~ cout<<"int_para[10]: "<>options.symmetric; + ifs>>int_para[11]; + //~ cout<<"int_para[11]: "<>options.transpose; + ifs>>int_para[12]; + //~ cout<<"int_para[12]: "<>options.method; + ifs>>int_para[13]; + //~ cout<<"int_para[13]: "<>options.nPoints; + ifs>>int_para[14]; + //~ cout<<"int_para[14]: "<>options.verbosity; + ifs>>int_para[15]; + //~ cout<<"int_para[15]: "<>options.verbosity; + ifs>>int_para[16]; + //~ cout<<"int_para[16]: "<= (high-NPROC)) + { + flag=integral_part+1; + } + else + { + flag=integral_part; + } + for(i=flag; i>0; ++i) + { + if(NPROC%i == 0) break; + } + nprow=i; + npcol=NPROC/i; + } +} + +int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const int blacs_ctxt, // communicator parameters + const int size, const int nblk, const int nrow, const int ncol, char LAYOUT, // matrix parameters + double* H, double* S, // input matrices + const double numElectronExact, const std::string PexsiOptionFile, // pexsi parameters file + double*& DM, double*& EDM, // output matrices + double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy) // output energy +{ + int out_log=0; + if(out_log == 1) + { + std::stringstream ss; + int nproc_2D, nproc_PEXSI; + int myid_2D, myid_PEXSI; + if(comm_2D != MPI_COMM_NULL) + { + MPI_Comm_size(comm_2D, &nproc_2D); + MPI_Comm_rank(comm_2D, &myid_2D); + ss.str(""); + ss<<"\tIn 2D comm, myid = "< +// a simple interface for calling pexsi with 2D block cyclic distributed matrix +int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const int blacs_ctxt, // communicator parameters + const int size, const int nblk, const int nrow, const int ncol, char LAYOUT, // input matrix parameters + double* H, double* S, // input matrices + const double nElectronExact, const std::string PexsiOptionFile, // pexsi parameters file + double*& DM, double*& EDM, // output matrices + double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy); \ No newline at end of file diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp index 6cb48947d7..34807f17cc 100644 --- a/source/module_io/input.cpp +++ b/source/module_io/input.cpp @@ -3356,6 +3356,10 @@ void Input::Check(void) { ModuleBase::WARNING_QUIT("Input", "lapack can not be used with plane wave basis."); } + else if (ks_solver == "pexsi") + { + ModuleBase::WARNING_QUIT("Input", "pexsi can not be used with plane wave basis."); + } else if (ks_solver != "default" && ks_solver != "cg" && ks_solver != "dav") { ModuleBase::WARNING_QUIT("Input", "please check the ks_solver parameter!"); @@ -3416,6 +3420,16 @@ void Input::Check(void) #ifndef __MPI ModuleBase::WARNING_QUIT("Input", "Cusolver can not be used for series version."); #endif + } + else if (ks_solver == "pexsi") + { +#ifndef __MPI + ModuleBase::WARNING_QUIT("Input", "Cusolver can not be used for series version."); +#else + GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl; +#endif + + } else if (ks_solver != "default") { From 3c57992b1143f044b599a3c04a62fe2494cfe96d Mon Sep 17 00:00:00 2001 From: zhangzhihao <1900017707@pku.edu.cn> Date: Mon, 1 May 2023 02:29:35 +0800 Subject: [PATCH 02/44] fix : diag not completed --- source/module_base/global_function.h | 2 +- source/module_basis/module_ao/ORB_control.cpp | 2 +- .../module_ao/parallel_orbitals.cpp | 4 ++-- .../module_esolver/esolver_ks_lcao_elec.cpp | 2 +- .../hamilt_lcaodft/DM_gamma.cpp | 4 ++-- .../module_deepks/LCAO_deepks_odelta.cpp | 2 +- source/module_hsolver/diago_pexsi.cpp | 20 +++++++++++++++++-- 7 files changed, 26 insertions(+), 10 deletions(-) diff --git a/source/module_base/global_function.h b/source/module_base/global_function.h index fa77c76b26..c9bcef7914 100644 --- a/source/module_base/global_function.h +++ b/source/module_base/global_function.h @@ -352,7 +352,7 @@ double ddot_real( //========================================================== static inline bool IS_COLUMN_MAJOR_KS_SOLVER() { - return GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="cusolver"; + return GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="cusolver" || GlobalV::KS_SOLVER=="pexsi"; } }//namespace GlobalFunc diff --git a/source/module_basis/module_ao/ORB_control.cpp b/source/module_basis/module_ao/ORB_control.cpp index f3fbec9518..0439c57de1 100644 --- a/source/module_basis/module_ao/ORB_control.cpp +++ b/source/module_basis/module_ao/ORB_control.cpp @@ -183,7 +183,7 @@ void ORB_control::setup_2d_division(std::ofstream& ofs_running, ofs_running << "\n SETUP THE DIVISION OF H/S MATRIX" << std::endl; // (1) calculate nrow, ncol, nloc. - if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver") + if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi") { ofs_running << " divide the H&S matrix using 2D block algorithms." << std::endl; #ifdef __MPI diff --git a/source/module_basis/module_ao/parallel_orbitals.cpp b/source/module_basis/module_ao/parallel_orbitals.cpp index 1c02dc4bb6..c1d530cb84 100644 --- a/source/module_basis/module_ao/parallel_orbitals.cpp +++ b/source/module_basis/module_ao/parallel_orbitals.cpp @@ -80,7 +80,7 @@ void ORB_control::set_trace(std::ofstream& ofs_running) pv->ncol = nlocal; } #ifdef __MPI - else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver") // xiaohui add 2013-09-02 + else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi") // xiaohui add 2013-09-02 { // ofs_running << " nrow=" << nrow << std::endl; for (int irow = 0; irow < pv->nrow; irow++) @@ -245,7 +245,7 @@ void ORB_control::divide_HS_2d( pv->nloc = pv->MatrixInfo.col_num * pv->MatrixInfo.row_num; // init blacs context for genelpa - if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver") + if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi") { pv->blacs_ctxt = cart2blacs(pv->comm_2D, pv->dim0, diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp index f016d778c9..f6b6545b96 100644 --- a/source/module_esolver/esolver_ks_lcao_elec.cpp +++ b/source/module_esolver/esolver_ks_lcao_elec.cpp @@ -100,7 +100,7 @@ namespace ModuleESolver if(this->psid==nullptr) { int ncol = this->LOWF.ParaV->ncol_bands; - if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="lapack_gvx" + if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="lapack_gvx" || GlobalV::KS_SOLVER=="pexsi" #ifdef __CUSOLVER_LCAO ||GlobalV::KS_SOLVER=="cusolver" #endif diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp index c57e463fc7..079d0600a7 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp @@ -29,7 +29,7 @@ int Local_Orbital_Charge::setAlltoallvParameter(MPI_Comm comm_2D, int blacs_ctxt int myproc=0; Cblacs_gridinfo(blacs_ctxt, &nprows, &npcols, &myprow, &mypcol); - + std::cout << "testing" << "npcols:" << npcols << " nprows:" << nprows << endl; Cblacs_pinfo(&myproc, &nprocs); // ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"nprocs",nprocs); @@ -288,7 +288,7 @@ void Local_Orbital_Charge::gamma_file(psi::Psi* psid, Local_Orbital_wfc //allocate psi int ncol = this->ParaV->ncol_bands; - if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="lapack_gvx" || GlobalV::KS_SOLVER == "scalapack_gvx" + if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="lapack_gvx" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER=="pexsi" #ifdef __CUSOLVER_LCAO ||GlobalV::KS_SOLVER=="cusolver" #endif diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp index ecdbfb6dab..351681edf9 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp @@ -64,7 +64,7 @@ void LCAO_Deepks::cal_o_delta_k(const std::vector= 0 && nu >= 0) { int iic; - if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx") // save the matrix as column major format + if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="pexsi") // save the matrix as column major format { iic = mu + nu*ParaO.nrow; } diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index 8a7257d747..cf37092a73 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -36,12 +36,28 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, dou this->ps->solve(); std::cout << this->ps->totalEnergyH << "xxxxxx" << this->ps->totalEnergyS << "xxxxxx" << this->ps->totalFreeEnergy << std::endl; - ModuleBase::WARNING_QUIT("DiagoPexsi", "Pexsi is not completed"); } void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi>& psi, double* eigenvalue_in) { ModuleBase::TITLE("DiagoPEXSI", "diag"); - ModuleBase::WARNING_QUIT("DiagoPexsi", "Pexsi is not completed"); + matd h_mat, s_mat; + phm_in->matrix(h_mat, s_mat); + std::vector eigen(GlobalV::NLOCAL, 0.0); + MPI_Comm COMM_DIAG = MPI_COMM_WORLD; + this->ps = new PEXSI_Solver(this->ParaV->blacs_ctxt, + this->ParaV->nb, + this->ParaV->nrow, + this->ParaV->ncol, + h_mat.p, + s_mat.p, + this->DM, + this->EDM, + this->totalEnergyH, + this->totalEnergyS, + this->totalFreeEnergy); + this->ps->solve(); + std::cout << this->ps->totalEnergyH << "xxxxxx" << this->ps->totalEnergyS << "xxxxxx" << this->ps->totalFreeEnergy + << std::endl; } } // namespace hsolver From c3babb7948453edd014a817cc983d7645db0b66a Mon Sep 17 00:00:00 2001 From: zhangzhihao <1900017707@pku.edu.cn> Date: Thu, 4 May 2023 00:19:34 +0800 Subject: [PATCH 03/44] feat --- source/module_elecstate/elecstate_lcao.cpp | 23 +++++++++------ source/module_elecstate/elecstate_lcao.h | 3 ++ .../module_elecstate/elecstate_lcao_tddft.cpp | 2 ++ source/module_elecstate/energy.cpp | 4 +++ .../module_elecstate/module_charge/charge.cpp | 5 +--- source/module_esolver/esolver_ks.cpp | 9 ++---- source/module_esolver/esolver_ks_lcao.cpp | 5 ++-- .../hamilt_lcaodft/local_orbital_charge.h | 1 + source/module_hsolver/diago_pexsi.cpp | 28 ++++++------------- source/module_hsolver/hsolver_lcao.cpp | 11 +++++++- source/module_hsolver/pexsi/simplePEXSI.cpp | 3 ++ source/module_relax/relax_driver.cpp | 5 +--- 12 files changed, 53 insertions(+), 46 deletions(-) diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index 3134c248c7..d985fb53e8 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -25,13 +25,13 @@ void ElecStateLCAO::psiToRho(const psi::Psi>& psi) // this part for calculating dm_k in 2d-block format, not used for charge now // psi::Psi> dm_k_2d(); - if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" + if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "pexsi" || GlobalV::KS_SOLVER == "lapack") // Peize Lin test 2019-05-15 { cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_k); } - if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack") + if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack" || GlobalV::KS_SOLVER == "pexsi") { for (int ik = 0; ik < psi.get_nk(); ik++) { @@ -60,7 +60,6 @@ void ElecStateLCAO::psiToRho(const psi::Psi>& psi) Gint_inout inout1(this->loc->DM_R, this->charge, Gint_Tools::job_type::tau); this->uhm->GK.cal_gint(&inout1); } - this->charge->renormalize_rho(); ModuleBase::timer::tick("ElecStateLCAO", "psiToRho"); @@ -73,23 +72,26 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) ModuleBase::TITLE("ElecStateLCAO", "psiToRho"); ModuleBase::timer::tick("ElecStateLCAO", "psiToRho"); - this->calculate_weights(); - this->calEBand(); + if (GlobalV::KS_SOLVER != "pexsi") // pexsi useless + { + this->calculate_weights(); + this->calEBand(); + } - if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack") + if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack" || GlobalV::KS_SOLVER == "pexsi") { ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d"); // psi::Psi dm_gamma_2d; // caution:wfc and dm - cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma); + if (GlobalV::KS_SOLVER != "pexsi") cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma); // pexsi has done this ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d"); for (int ik = 0; ik < psi.get_nk(); ++ik) { // for gamma_only case, no convertion occured, just for print. - if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx") + if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "scalapack_gvx") { psi.fix_k(ik); this->print_psi(psi); @@ -183,4 +185,9 @@ void ElecStateLCAO::print_psi(const psi::Psi>& psi_in) return; } +void ElecStateLCAO::get_DM_from_pexsi(double* DM) +{ + this->loc->dm_gamma[0].c = DM; +} + } // namespace elecstate \ No newline at end of file diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h index c606e8b5fe..356d437a40 100644 --- a/source/module_elecstate/elecstate_lcao.h +++ b/source/module_elecstate/elecstate_lcao.h @@ -43,6 +43,9 @@ class ElecStateLCAO : public ElecState static int out_wfc_flag; static bool need_psi_grid; + //use for pexsi + void get_DM_from_pexsi(double* DM); + protected: // calculate electronic charge density on grid points or density matrix in real space // the consequence charge density rho saved into rho_out, preparing for charge mixing. diff --git a/source/module_elecstate/elecstate_lcao_tddft.cpp b/source/module_elecstate/elecstate_lcao_tddft.cpp index 86f27b0fa3..e53137fdef 100644 --- a/source/module_elecstate/elecstate_lcao_tddft.cpp +++ b/source/module_elecstate/elecstate_lcao_tddft.cpp @@ -51,7 +51,9 @@ void ElecStateLCAO_TDDFT::psiToRho_td(const psi::Psi>& psi) Gint_inout inout(this->loc->DM_R, this->charge, Gint_Tools::job_type::rho); this->uhm->GK.cal_gint(&inout); + std::cout << "this->charge->renormalize_rho(); 1" << std::endl; this->charge->renormalize_rho(); + std::cout << "this->charge->renormalize_rho(); 1 done" << std::endl; ModuleBase::timer::tick("ElecStateLCAO", "psiToRho"); return; diff --git a/source/module_elecstate/energy.cpp b/source/module_elecstate/energy.cpp index 56aa7cac47..188418946c 100644 --- a/source/module_elecstate/energy.cpp +++ b/source/module_elecstate/energy.cpp @@ -253,6 +253,10 @@ void energy::print_etot( { label = "CU"; } + else if(GlobalV::KS_SOLVER=="pexsi") + { + label = "PE"; + } else { ModuleBase::WARNING_QUIT("Energy","print_etot"); diff --git a/source/module_elecstate/module_charge/charge.cpp b/source/module_elecstate/module_charge/charge.cpp index 7d06b656db..52a09b0664 100644 --- a/source/module_elecstate/module_charge/charge.cpp +++ b/source/module_elecstate/module_charge/charge.cpp @@ -293,7 +293,7 @@ double Charge::sum_rho(void) const sum_rho += this->rho[is][ir]; } } - + std::cout << "sum_rho: " << sum_rho <( GlobalC::rhopw->nxyz ); Parallel_Reduce::reduce_double_pool( sum_rho ); @@ -313,7 +313,6 @@ double Charge::sum_rho(void) const void Charge::renormalize_rho(void) { ModuleBase::TITLE("Charge","renormalize_rho"); - const double sr = this->sum_rho(); GlobalV::ofs_warning << std::setprecision(15); ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning,"charge before normalized",sr); @@ -326,9 +325,7 @@ void Charge::renormalize_rho(void) rho[is][ir] *= normalize_factor; } } - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning,"charge after normalized",this->sum_rho()); - GlobalV::ofs_running << std::setprecision(6); return; } diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp index f094a2162a..284ed06603 100644 --- a/source/module_esolver/esolver_ks.cpp +++ b/source/module_esolver/esolver_ks.cpp @@ -183,7 +183,6 @@ namespace ModuleESolver else { ModuleBase::timer::tick(this->classname, "Run"); - this->beforescf(istep); //Something else to do before the iter loop ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT SCF"); if(this->maxniter > 0) this->printhead(); //print the headline on the screen. @@ -199,10 +198,10 @@ namespace ModuleESolver #else auto iterstart = std::chrono::system_clock::now(); #endif + std::cout << "drho: " << drho << " iter: " << iter << std::endl; FPTYPE diag_ethr = this->phsol->set_diagethr(istep, iter, drho); eachiterinit(istep, iter); this->hamilt2density(istep, iter, diag_ethr); - // It may be changed when more clever parallel algorithm is put forward. //When parallel algorithm for bands are adopted. Density will only be treated in the first group. //(Different ranks should have abtained the same, but small differences always exist in practice.) @@ -212,7 +211,6 @@ namespace ModuleESolver { // FPTYPE drho = this->estate.caldr2(); // EState should be used after it is constructed. - drho = GlobalC::CHR_MIX.get_drho(pelec->charge, GlobalV::nelec); FPTYPE hsolver_error = 0.0; if (firstscf) @@ -278,16 +276,15 @@ namespace ModuleESolver printiter(iter, drho, duration, diag_ethr); if (this->conv_elec) { + std::cout << "this->conv_elec" << std::endl; this->niter = iter; bool stop = this->do_after_converge(iter); - if(stop) break; + if(stop) {std::cout << "break\n"; break;} } } afterscf(istep); - ModuleBase::timer::tick(this->classname, "Run"); } - return; }; diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp index 89e867ec8b..86055d7bb5 100644 --- a/source/module_esolver/esolver_ks_lcao.cpp +++ b/source/module_esolver/esolver_ks_lcao.cpp @@ -519,7 +519,6 @@ void ESolver_KS_LCAO::hamilt2density(int istep, int iter, double ethr) { // save input rho pelec->charge->save_rho_before_sum_band(); - // using HSolverLCAO::solve() if (this->phsol != nullptr) { @@ -537,7 +536,7 @@ void ESolver_KS_LCAO::hamilt2density(int istep, int iter, double ethr) { this->phsol->solve(this->p_hamilt, this->psid[0], this->pelec, GlobalV::KS_SOLVER); } - + // transform energy for print GlobalC::en.eband = this->pelec->eband; GlobalC::en.demet = this->pelec->demet; @@ -1242,7 +1241,7 @@ void ESolver_KS_LCAO::afterscf(const int istep) bool ESolver_KS_LCAO::do_after_converge(int& iter) { #ifdef __EXX - + std::cout << "test exx" << std::endl; // Add EXX operator auto add_exx_operator = [&]() { if (GlobalV::GAMMA_ONLY_LOCAL) diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h index 60ce6f3e43..5160af4803 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h @@ -81,6 +81,7 @@ class Local_Orbital_Charge std::map, std::map>> DMR_sparse; + private: // whether the DM array has been allocated diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index cf37092a73..cca6de9c1b 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -34,30 +34,18 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, dou this->totalEnergyS, this->totalFreeEnergy); this->ps->solve(); - std::cout << this->ps->totalEnergyH << "xxxxxx" << this->ps->totalEnergyS << "xxxxxx" << this->ps->totalFreeEnergy - << std::endl; + this->EDM = this->ps->EDM; + this->DM = this->ps->DM; // loc.dm_gamma[ik] loc.dm_gamma[0]? + this->totalFreeEnergy = this->ps->totalFreeEnergy; + this->totalEnergyH = this->ps->totalEnergyH; + this->totalEnergyS = this->ps->totalEnergyS; + std::cout << "this->totalEnergyH: " << this->ps->totalEnergyH << std::endl << "this->totalEnergyS: " << this->ps->totalEnergyS << std::endl << "this->totalFreeEnergy" << this->ps->totalFreeEnergy << std::endl; } void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi>& psi, double* eigenvalue_in) { ModuleBase::TITLE("DiagoPEXSI", "diag"); - matd h_mat, s_mat; - phm_in->matrix(h_mat, s_mat); - std::vector eigen(GlobalV::NLOCAL, 0.0); - MPI_Comm COMM_DIAG = MPI_COMM_WORLD; - this->ps = new PEXSI_Solver(this->ParaV->blacs_ctxt, - this->ParaV->nb, - this->ParaV->nrow, - this->ParaV->ncol, - h_mat.p, - s_mat.p, - this->DM, - this->EDM, - this->totalEnergyH, - this->totalEnergyS, - this->totalFreeEnergy); - this->ps->solve(); - std::cout << this->ps->totalEnergyH << "xxxxxx" << this->ps->totalEnergyS << "xxxxxx" << this->ps->totalFreeEnergy - << std::endl; + ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case"); + } } // namespace hsolver diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index e870e4f191..2a61637dbe 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -8,6 +8,7 @@ #include "diago_elpa.h" #endif #include "diago_pexsi.h" +#include "module_elecstate/elecstate_lcao.h" namespace hsolver { @@ -122,7 +123,7 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, } } - if (this->method != "genelpa" && this->method != "scalapack_gvx" && this->method != "lapack") + if (this->method != "genelpa" && this->method != "scalapack_gvx" && this->method != "lapack" && this->method != "pexsi") { delete pdiagh; pdiagh = nullptr; @@ -137,6 +138,14 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, // calculate charge by psi // called in scf calculation + if (this->method == "pexsi") + { + DiagoPexsi* tem = dynamic_cast(this->pdiagh); + if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!"); + elecstate::ElecStateLCAO* _pes = dynamic_cast(pes); + pes->eband = tem->totalFreeEnergy; + _pes->get_DM_from_pexsi(tem->DM); + } pes->psiToRho(psi); ModuleBase::timer::tick("HSolverLCAO", "solve"); } diff --git a/source/module_hsolver/pexsi/simplePEXSI.cpp b/source/module_hsolver/pexsi/simplePEXSI.cpp index 6f1d2d1afe..438936280f 100644 --- a/source/module_hsolver/pexsi/simplePEXSI.cpp +++ b/source/module_hsolver/pexsi/simplePEXSI.cpp @@ -383,6 +383,8 @@ int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy) // output energy { int out_log=0; + std::cout << "nrow: " << nrow << std::endl; + std::cout << "ncol: " << ncol << std::endl; if(out_log == 1) { std::stringstream ss; @@ -614,6 +616,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const if(myid<100) log_DM(DST_Matrix, DMnzvalLocal, f_log); #endif } + std::cout << "totalEnergyH:" << totalEnergyH << "\ntotalEnergyS:" << totalEnergyS << "\ntotalFreeEnergy:" << totalFreeEnergy << std::endl; // clean PEXSI PPEXSIPlanFinalize(plan, &info); #ifdef _DEBUG diff --git a/source/module_relax/relax_driver.cpp b/source/module_relax/relax_driver.cpp index 173101036e..b821ba8bcf 100644 --- a/source/module_relax/relax_driver.cpp +++ b/source/module_relax/relax_driver.cpp @@ -29,7 +29,6 @@ void Relax_Driver::relax_driver(ModuleESolver::ESolver *p_esolver) while (istep <= GlobalV::RELAX_NMAX && !stop) { time_t estart = time(NULL); - if (GlobalV::OUT_LEVEL == "ie" && (GlobalV::CALCULATION == "relax" || GlobalV::CALCULATION == "cell-relax" || GlobalV::CALCULATION == "scf" || GlobalV::CALCULATION == "nscf")) @@ -38,6 +37,7 @@ void Relax_Driver::relax_driver(ModuleESolver::ESolver *p_esolver) } // mohan added eiter to count for the electron iteration number, 2021-01-28 + p_esolver->Run(istep - 1, GlobalC::ucell); time_t eend = time(NULL); @@ -80,7 +80,6 @@ void Relax_Driver::relax_driver(ModuleESolver::ESolver *p_esolver) force_step, stress_step); // pengfei Li 2018-05-14 } - if (GlobalV::CALCULATION == "relax" || GlobalV::CALCULATION == "cell-relax") { // print structure @@ -99,10 +98,8 @@ void Relax_Driver::relax_driver(ModuleESolver::ESolver *p_esolver) } } time_t fend = time(NULL); - ++istep; } - if (GlobalV::OUT_LEVEL == "i") { std::cout << " ION DYNAMICS FINISHED :)" << std::endl; From 43d743d7995cc20596c8ddf0c487b647e99f5a32 Mon Sep 17 00:00:00 2001 From: zhangzhihao <1900017707@pku.edu.cn> Date: Thu, 11 May 2023 14:37:03 +0800 Subject: [PATCH 04/44] feat: pexsi hsolver --- source/Makefile | 6 +- source/Makefile.vars | 2 +- source/module_base/parallel_global.cpp | 63 +- source/module_elecstate/elecstate_lcao.cpp | 20 +- source/module_elecstate/elecstate_lcao.h | 2 +- .../module_elecstate/module_charge/charge.cpp | 1 - source/module_esolver/esolver_ks.cpp | 1 - .../hamilt_lcaodft/DM_gamma.cpp | 2 - source/module_hsolver/diago_elpa.cpp | 32 + source/module_hsolver/diago_pexsi.cpp | 1 - source/module_hsolver/hsolver_lcao.cpp | 2 +- source/module_hsolver/pexsi/DistBCDMatrix.cpp | 1 + source/module_hsolver/pexsi/DistCCSMatrix.cpp | 1 + .../pexsi/DistMatrixTransformer.cpp | 1538 +++++++++-------- source/module_hsolver/pexsi/pexsi_solver.cpp | 44 +- source/module_hsolver/pexsi/simplePEXSI.cpp | 680 ++++---- 16 files changed, 1292 insertions(+), 1104 deletions(-) diff --git a/source/Makefile b/source/Makefile index faaf6697b8..8f3db96941 100644 --- a/source/Makefile +++ b/source/Makefile @@ -5,7 +5,7 @@ include Makefile.vars #========================== INCLUDES = -I. -Icommands -I../ LIBS = -lm -lpthread -OPTS = -std=c++14 -pedantic -m64 ${INCLUDES} +OPTS = ${INCLUDES} -Ofast -g -traceback -xHost -std=c++11 -simd -march=native -m64 -qopenmp -Werror -Wall -pedantic HONG = -D__LCAO HONG += -D__ELPA ifeq ($(OPENMP), ON) @@ -43,7 +43,7 @@ ifeq ($(DEBUG), ON) endif OPTS += -O0 -fsanitize=address -fno-omit-frame-pointer -Wall -g #It can check segmental defaults else - HONG += -Ofast -march=native -DNDEBUG + HONG += -O0 -march=native -DNDEBUG endif ifeq ($(INTEL), ON) @@ -73,7 +73,7 @@ else FFTW_INCLUDE_DIR = ${FFTW_DIR}/include FFTW_LIB_DIR = ${FFTW_DIR}/lib HONG += -D__FFTW3 - LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} + LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} -qmkl INCLUDES += -I${FFTW_INCLUDE_DIR} #========================== diff --git a/source/Makefile.vars b/source/Makefile.vars index d19109fc96..fb01878d5f 100644 --- a/source/Makefile.vars +++ b/source/Makefile.vars @@ -53,7 +53,7 @@ SCOTCH_LIB = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libsc ## CEREAL_DIR should contain an include folder. ##--------------------------------------------------------------------- -# FFTW_DIR = /public/soft/fftw_3.3.8 +# FFTW_DIR = /root/lib/FFTW3 # OPENBLAS_LIB_DIR = /public/soft/openblas/lib # SCALAPACK_LIB_DIR = /public/soft/openblas/lib diff --git a/source/module_base/parallel_global.cpp b/source/module_base/parallel_global.cpp index 954b0662cd..9245cc72ff 100644 --- a/source/module_base/parallel_global.cpp +++ b/source/module_base/parallel_global.cpp @@ -98,49 +98,30 @@ void Parallel_Global::split_diag_world(const int &diag_np) -void Parallel_Global::split_grid_world(const int &diag_np) +void Parallel_Global::split_grid_world(const int &grid_np) { #ifdef __MPI - assert(diag_np>0); - // number of processors in each 'grid group'. - int* group_grid_np = new int[diag_np]; - ModuleBase::GlobalFunc::ZEROS(group_grid_np, diag_np); - // average processors in each 'grid group' - int ave = GlobalV::NPROC/diag_np; - // remain processors. - int remain = GlobalV::NPROC - ave * diag_np; - - for(int i=0; i0); //LiuXh, 2020-12-14, diag_np --> grid_np + int myid; + MPI_Group WORLD_GROUP; + //MPI_Comm_rank(MPI_COMM_WORLD, &key); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); //LiuXh, 2020-12-14, key --> myid + MPI_Comm_group(MPI_COMM_WORLD, &WORLD_GROUP); + + int grid_proc_range[3]={0, (GlobalV::NPROC/grid_np)*grid_np-1, GlobalV::NPROC/grid_np}; + MPI_Group_range_incl(WORLD_GROUP, 1, &grid_proc_range, &GRID_GROUP); + + GRID_WORLD=MPI_COMM_NULL; + MPI_Comm_create(MPI_COMM_WORLD, GRID_GROUP, &GRID_WORLD); + if(GRID_WORLD != MPI_COMM_NULL) + { + MPI_Comm_rank(GRID_WORLD, &GlobalV::GRANK); //LiuXh, 2020-12-14, DIAG_WORLD --> GRID_WORLD + MPI_Comm_size(GRID_WORLD, &GlobalV::GSIZE); //LiuXh, 2020-12-14, DIAG_WORLD --> GRID_WORLD + }else + { + GlobalV::GRANK=-1; + GlobalV::GSIZE=-1; + } #else GlobalV::GRANK=0; //mohan fix bug 2012-02-04 GlobalV::GSIZE=1; diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index d985fb53e8..d9fcc93648 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -108,6 +108,23 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) //------------------------------------------------------------ // calculate the charge density on real space grid. //------------------------------------------------------------ + // print matrix zzh + // GlobalV::ofs_running << "dm_gamma print\n"; + // for(int i=0; i< this->loc->dm_gamma[0].nc; i++) + // { + // for(int j=0; jloc->dm_gamma[0].nr; j++) + // { + // if (std::abs(this->loc->dm_gamma[0](i, j)) < 0.00000001) + // { + // GlobalV::ofs_running << "0 "; + // } + // else + // { + // GlobalV::ofs_running << this->loc->dm_gamma[0](i, j) << " "; + // } + // } + // GlobalV::ofs_running << std::endl; + // } ModuleBase::GlobalFunc::NOTE("Calculate the charge on real space grid!"); Gint_inout inout(this->loc->DM, this->charge, Gint_Tools::job_type::rho); this->uhm->GG.cal_gint(&inout); @@ -185,8 +202,9 @@ void ElecStateLCAO::print_psi(const psi::Psi>& psi_in) return; } -void ElecStateLCAO::get_DM_from_pexsi(double* DM) +void ElecStateLCAO::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV) { + this->loc->dm_gamma[0].create(ParaV->ncol, ParaV->nrow); this->loc->dm_gamma[0].c = DM; } diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h index 356d437a40..51cb164187 100644 --- a/source/module_elecstate/elecstate_lcao.h +++ b/source/module_elecstate/elecstate_lcao.h @@ -44,7 +44,7 @@ class ElecStateLCAO : public ElecState static bool need_psi_grid; //use for pexsi - void get_DM_from_pexsi(double* DM); + void get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV); protected: // calculate electronic charge density on grid points or density matrix in real space diff --git a/source/module_elecstate/module_charge/charge.cpp b/source/module_elecstate/module_charge/charge.cpp index 52a09b0664..cec15dbc01 100644 --- a/source/module_elecstate/module_charge/charge.cpp +++ b/source/module_elecstate/module_charge/charge.cpp @@ -293,7 +293,6 @@ double Charge::sum_rho(void) const sum_rho += this->rho[is][ir]; } } - std::cout << "sum_rho: " << sum_rho <( GlobalC::rhopw->nxyz ); Parallel_Reduce::reduce_double_pool( sum_rho ); diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp index 284ed06603..eb82ccba8d 100644 --- a/source/module_esolver/esolver_ks.cpp +++ b/source/module_esolver/esolver_ks.cpp @@ -198,7 +198,6 @@ namespace ModuleESolver #else auto iterstart = std::chrono::system_clock::now(); #endif - std::cout << "drho: " << drho << " iter: " << iter << std::endl; FPTYPE diag_ethr = this->phsol->set_diagethr(istep, iter, drho); eachiterinit(istep, iter); this->hamilt2density(istep, iter, diag_ethr); diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp index 079d0600a7..2e96ed2bf8 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp @@ -29,7 +29,6 @@ int Local_Orbital_Charge::setAlltoallvParameter(MPI_Comm comm_2D, int blacs_ctxt int myproc=0; Cblacs_gridinfo(blacs_ctxt, &nprows, &npcols, &myprow, &mypcol); - std::cout << "testing" << "npcols:" << npcols << " nprows:" << nprows << endl; Cblacs_pinfo(&myproc, &nprocs); // ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"nprocs",nprocs); @@ -338,7 +337,6 @@ void Local_Orbital_Charge::gamma_file(psi::Psi* psid, Local_Orbital_wfc void Local_Orbital_Charge::cal_dk_gamma_from_2D_pub(void) { ModuleBase::TITLE("Local_Orbital_Charge","cal_dk_gamma_from_2D_pub"); - cal_dk_gamma_from_2D(); } // calculate the grid distributed DM matrix from 2D block-cyclic distributed DM matrix diff --git a/source/module_hsolver/diago_elpa.cpp b/source/module_hsolver/diago_elpa.cpp index a47c363fe2..19b641e353 100644 --- a/source/module_hsolver/diago_elpa.cpp +++ b/source/module_hsolver/diago_elpa.cpp @@ -31,7 +31,24 @@ void DiagoElpa::diag(hamilt::Hamilt *phm_in, psi::PsiDecomposedState=0; // for k pointer, the decomposed s_mat can not be reused ModuleBase::timer::tick("DiagoElpa", "elpa_solve"); + std::cout << "???" << std::endl; + GlobalV::ofs_running << "nrow: " << h_mat.row << "\nncol: " << h_mat.col << "\n"; + GlobalV::ofs_running << "print H" << std::endl; + for (int i = 0; i < h_mat.col; i++) + { + for (int j = 0; j < h_mat.row; j++) + { + if (std::abs(h_mat.p[i * h_mat.col + j]) < 0.00000001) + { + GlobalV::ofs_running << "0 "; + } + else + GlobalV::ofs_running << h_mat.p[i * h_mat.col + j] << " "; + } + GlobalV::ofs_running << std::endl; + } es.generalized_eigenvector(h_mat.p, s_mat.p, this->DecomposedState, eigen.data(), psi.get_pointer()); + ModuleBase::timer::tick("DiagoElpa", "elpa_solve"); es.exit(); @@ -56,6 +73,21 @@ void DiagoElpa::diag(hamilt::Hamilt *phm_in, psi::Psi &psi, doub //ELPA_Solver es(isReal, COMM_DIAG, GlobalV::NBANDS, h_mat.row, h_mat.col, h_mat.desc); ELPA_Solver es((const bool)isReal, COMM_DIAG, (const int)GlobalV::NBANDS, (const int)h_mat.row, (const int)h_mat.col, (const int*)h_mat.desc); ModuleBase::timer::tick("DiagoElpa", "elpa_solve"); + GlobalV::ofs_running << "nrow: " << h_mat.row << "\nncol: " << h_mat.col << "\n"; + GlobalV::ofs_running << "print H" << std::endl; + for (int i = 0; i < h_mat.col; i++) + { + for (int j = 0; j < h_mat.row; j++) + { + if (std::abs(h_mat.p[i * h_mat.col + j]) < 0.00000001) + { + GlobalV::ofs_running << "0 "; + } + else + GlobalV::ofs_running << h_mat.p[i * h_mat.col + j] << " "; + } + GlobalV::ofs_running << std::endl; + } es.generalized_eigenvector(h_mat.p, s_mat.p, this->DecomposedState, eigen.data(), psi.get_pointer()); ModuleBase::timer::tick("DiagoElpa", "elpa_solve"); es.exit(); diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index cca6de9c1b..c0b4e3e24c 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -39,7 +39,6 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, dou this->totalFreeEnergy = this->ps->totalFreeEnergy; this->totalEnergyH = this->ps->totalEnergyH; this->totalEnergyS = this->ps->totalEnergyS; - std::cout << "this->totalEnergyH: " << this->ps->totalEnergyH << std::endl << "this->totalEnergyS: " << this->ps->totalEnergyS << std::endl << "this->totalFreeEnergy" << this->ps->totalFreeEnergy << std::endl; } void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi>& psi, double* eigenvalue_in) { diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index 2a61637dbe..242bd5d5a8 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -144,7 +144,7 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!"); elecstate::ElecStateLCAO* _pes = dynamic_cast(pes); pes->eband = tem->totalFreeEnergy; - _pes->get_DM_from_pexsi(tem->DM); + _pes->get_DM_from_pexsi(tem->DM, tem->ParaV); } pes->psiToRho(psi); ModuleBase::timer::tick("HSolverLCAO", "solve"); diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.cpp b/source/module_hsolver/pexsi/DistBCDMatrix.cpp index 8a3f2740e2..383875dc87 100644 --- a/source/module_hsolver/pexsi/DistBCDMatrix.cpp +++ b/source/module_hsolver/pexsi/DistBCDMatrix.cpp @@ -142,6 +142,7 @@ int DistBCDMatrix::globalRow(const int localRow) int DistBCDMatrix::globalCol(const int localCol) { + return (localCol/nblk*npcols+mypcol)*nblk+localCol%nblk; } diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.cpp b/source/module_hsolver/pexsi/DistCCSMatrix.cpp index 45a14d6ac7..9e3fc728fa 100644 --- a/source/module_hsolver/pexsi/DistCCSMatrix.cpp +++ b/source/module_hsolver/pexsi/DistCCSMatrix.cpp @@ -91,6 +91,7 @@ int DistCCSMatrix::localCol(int globalCol, int& mypcol) { mypcol=int(globalCol/int(this->size/this->nproc_data)); if(mypcol >= this->nproc_data) mypcol=this->nproc_data-1; + return mypcol>0 ? globalCol-(this->size/this->nproc_data)*mypcol : globalCol; } diff --git a/source/module_hsolver/pexsi/DistMatrixTransformer.cpp b/source/module_hsolver/pexsi/DistMatrixTransformer.cpp index 285d3cdd94..1eec8dca12 100644 --- a/source/module_hsolver/pexsi/DistMatrixTransformer.cpp +++ b/source/module_hsolver/pexsi/DistMatrixTransformer.cpp @@ -1,233 +1,252 @@ #include -#include + #include #include -#include +#include +#include #include +#include + #include "DistBCDMatrix.h" #include "DistCCSMatrix.h" - // for debug #ifdef _DEBUG +#include + #include #include -#include + #include "src_pw/global.h" #endif // end debug -// find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a negtive value -// the size_process and displacement_process array will be changed after the index is found -// isFirst: wether this function is called for the first time for a index array; -// nprocs: total number of processes -// size_process: the number of indices in each process -// displacement_process: the start position in each process -// index: the array contains the indices -inline int MinimumIndexPosition(const bool isFirst, const int nprocs, - int* size_process, int* displacement_process, const int* index) +// find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a +// negtive value the size_process and displacement_process array will be changed after the index is found isFirst: +// wether this function is called for the first time for a index array; nprocs: total number of processes size_process: +// the number of indices in each process displacement_process: the start position in each process index: the array +// contains the indices +inline int MinimumIndexPosition(const bool isFirst, + const int nprocs, + int* size_process, + int* displacement_process, + const int* index) { // usually the minimum index is continuous, so it will be a good idea to // check the one next to the previous index first. static int pre_position; // previous position in index array of minimum index, - static int pre_process; // the process contains previous index + static int pre_process; // the process contains previous index - int minimum_index=INT_MAX; // the minimum index, initial value is a large number which is larger than any other index; - int minimum_position=-1; - int minimum_process=-1; + int minimum_index + = INT_MAX; // the minimum index, initial value is a large number which is larger than any other index; + int minimum_position = -1; + int minimum_process = -1; - if(isFirst) + if (isFirst) { - for(int i=0; i0) + if (size_process[i] > 0) { - if(minimum_index>index[displacement_process[i]]) // find a smaller index + if (minimum_index > index[displacement_process[i]]) // find a smaller index { - minimum_position=displacement_process[i]; - minimum_index=index[minimum_position]; - minimum_process=i; + minimum_position = displacement_process[i]; + minimum_index = index[minimum_position]; + minimum_process = i; } } } - if(minimum_process>=0) // find it! + if (minimum_process >= 0) // find it! { ++displacement_process[minimum_process]; --size_process[minimum_process]; } - pre_position=minimum_position; - pre_process=minimum_process; + pre_position = minimum_position; + pre_process = minimum_process; return minimum_position; } else { // check the next one of pre_position - if(size_process[pre_process]>0 && // the previous process still has elements - index[pre_position+1]==index[pre_position]+1) // find it! + if (size_process[pre_process] > 0 && // the previous process still has elements + index[pre_position + 1] == index[pre_position] + 1) // find it! { ++displacement_process[pre_process]; --size_process[pre_process]; - ++pre_position; // new pre_position is the next one - // new pre_process keeps the same + ++pre_position; // new pre_position is the next one + // new pre_process keeps the same return pre_position; // current position is the new pre_position } // if the next one of pre_position is not the minimum one - for(int i=0; i0) + if (size_process[i] > 0) { - if(minimum_index>index[displacement_process[i]]) + if (minimum_index > index[displacement_process[i]]) { - minimum_position=displacement_process[i]; - minimum_index=index[minimum_position]; - minimum_process=i; + minimum_position = displacement_process[i]; + minimum_index = index[minimum_position]; + minimum_process = i; } } } - if(minimum_process>=0) // find it! + if (minimum_process >= 0) // find it! { ++displacement_process[minimum_process]; --size_process[minimum_process]; } - pre_position=minimum_position; - pre_process=minimum_process; + pre_position = minimum_position; + pre_process = minimum_process; return minimum_position; } } -inline void buildCCSParameter(const int size, const int nprocs, - std::vector size_process, std::vector displacement_process, - const int* position_index, DistCCSMatrix &DST_Matrix, int* buffer2ccsIndex) +inline void buildCCSParameter(const int size, + const int nprocs, + std::vector size_process, + std::vector displacement_process, + const int* position_index, + DistCCSMatrix& DST_Matrix, + int* buffer2ccsIndex) { // find the minimum one from left buffer index - if(DST_Matrix.nnzLocal<=0) return; + if (DST_Matrix.nnzLocal <= 0) + return; - int pre_col=-1; - int nnz_now=0; + int pre_col = -1; + int nnz_now = 0; int p_mini; - p_mini=MinimumIndexPosition(true, nprocs, &size_process[0], &displacement_process[0], position_index); - while(p_mini>=0) + p_mini = MinimumIndexPosition(true, nprocs, &size_process[0], &displacement_process[0], position_index); + while (p_mini >= 0) { - int index_mini=position_index[p_mini]; - int col_mini=index_mini/DST_Matrix.size; //-DST_Matrix.firstCol; - int row_mini=index_mini%DST_Matrix.size; - if(col_mini>pre_col) // a new column starts, column pointer is a 1-based array + int index_mini = position_index[p_mini]; + int col_mini = index_mini / DST_Matrix.size; //-DST_Matrix.firstCol; + int row_mini = index_mini % DST_Matrix.size; + if (col_mini > pre_col) // a new column starts, column pointer is a 1-based array { - pre_col=col_mini; - DST_Matrix.colptrLocal[col_mini]=nnz_now+1; + pre_col = col_mini; + DST_Matrix.colptrLocal[col_mini] = nnz_now + 1; } - DST_Matrix.rowindLocal[nnz_now]=row_mini+1; // setup row index array, which is also 1-based + DST_Matrix.rowindLocal[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based // copy data from buffer to M, be careful M is a 0-based array - buffer2ccsIndex[nnz_now]=p_mini; + buffer2ccsIndex[nnz_now] = p_mini; ++nnz_now; - p_mini=MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index); + p_mini = MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index); } // The last element of colptrLocal is nnzLocal+1 - DST_Matrix.colptrLocal[DST_Matrix.numColLocal]=nnz_now+1; + DST_Matrix.colptrLocal[DST_Matrix.numColLocal] = nnz_now + 1; } -inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, - double* nzvalLocal) +inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal) { - for(int i=0; i& P) { - for(int i=0; i &rowidx, std::vector &colidx) +inline int getNonZeroIndex(char LAYOUT, + const int nrow, + const int ncol, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + int& nnz, + std::vector& rowidx, + std::vector& colidx) { - #ifdef _DEBUG +#ifdef _DEBUG char f_log[80]; int myproc; MPI_Comm_rank(MPI_COMM_WORLD, &myproc); std::ofstream log; - if(myproc<100) + if (myproc < 100) { sprintf(f_log, "transformer_%2.2d.log", myproc); log.open(f_log, std::ios::app); - log<<"start count nnz"< pH; - countMatrixDistribution(nrow*ncol, H_2d, pH); + countMatrixDistribution(nrow * ncol, H_2d, pH); std::map pS; - countMatrixDistribution(nrow*ncol, H_2d, pS); - plog<<"Element in H distribution:\n"; + countMatrixDistribution(nrow * ncol, H_2d, pS); + plog << "Element in H distribution:\n"; // std::stringstream ss; // ss.str(""); - for(auto iter=pH.begin(); iter!=pH.end(); ++iter) + for (auto iter = pH.begin(); iter != pH.end(); ++iter) { // ss<<"p["<first<<"] : "<second<first<<"] : "<second<first << "] : " << iter->second << std::endl; } - //OUT(ofs_running,ss.str()); - //OUT(ofs_running, "Element in S distribution:"); - plog<<"Element in S distribution:\n"; - //ss.str(""); - for(auto iter=pS.begin(); iter!=pS.end(); ++iter) + // OUT(ofs_running,ss.str()); + // OUT(ofs_running, "Element in S distribution:"); + plog << "Element in S distribution:\n"; + // ss.str(""); + for (auto iter = pS.begin(); iter != pS.end(); ++iter) { - //ss<<"p["<first<<"] : "<second<first<<"] : "<second<first<<"] : "<second<first << "] : " << iter->second << std::endl; } // OUT(ofs_running,ss.str()); plog.close(); } - #endif +#endif - int idx=0; - nnz=0; + int idx = 0; + nnz = 0; colidx.clear(); rowidx.clear(); - #ifdef _DEBUG - if(myproc<100) log<<"rowidx and colidx cleared"< ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit) + idx = i * nrow + j; + if (fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit) { - ++nnz; + ++nnz; colidx.push_back(i); rowidx.push_back(j); } } } - } else if(LAYOUT == 'R' || LAYOUT == 'r') + } + else if (LAYOUT == 'R' || LAYOUT == 'r') { - for(int i=0; i ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit) + idx = j * ncol + i; + if (fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit) { ++nnz; colidx.push_back(i); @@ -235,68 +254,78 @@ inline int getNonZeroIndex(char LAYOUT, const int nrow, const int ncol, double* } } } - } else + } + else { - #ifdef _DEBUG - if(myproc<100) log<<"unknown LAYOUT: "< &rowidx, std::vector &colidx, - int &sender_size, std::vector &sender_size_process, std::vector &sender_displacement_process, - int &receiver_size, std::vector &receiver_size_process, std::vector &receiver_displacement_process, - std::vector &buffer2ccsIndex) +int buildTransformParameter(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + const int NPROC_TRANS, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS, + const int nnz, + std::vector& rowidx, + std::vector& colidx, + int& sender_size, + std::vector& sender_size_process, + std::vector& sender_displacement_process, + int& receiver_size, + std::vector& receiver_size_process, + std::vector& receiver_displacement_process, + std::vector& buffer2ccsIndex) { // debug int myproc; MPI_Comm_rank(MPI_COMM_WORLD, &myproc); - #ifdef _DEBUG +#ifdef _DEBUG std::ofstream log; - if(myproc<100) + if (myproc < 100) { char f_log[80]; sprintf(f_log, "transformer_%2.2d.log", myproc); log.open(f_log, std::ios::app); - log<<"enter buildTransformParameter"< receiver_index(receiver_size); - MPI_Alltoallv(&sender_index[0], &sender_size_process[0], &sender_displacement_process[0], MPI_INT, - &receiver_index[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_INT, COMM_TRANS); - // debug - #ifdef _DEBUG - if(myproc<100) log<<"receiver_index is got"< receiver_size_process(NPROC_TRANS); std::vector receiver_displacement_process(NPROC_TRANS); - #ifdef _DEBUG - if(myproc<100) +#ifdef _DEBUG + if (myproc < 100) { - log<<"nprocs: "< rowidx; std::vector colidx; - int nnz=0; - #ifdef _DEBUG - if(myproc<100) log<<"start counting nnz..."< buffer2ccsIndex; - buildTransformParameter(SRC_Matrix, DST_Matrix, - NPROC_TRANS, GROUP_TRANS, COMM_TRANS, - nnz, rowidx, colidx, - sender_size, sender_size_process, sender_displacement_process, - receiver_size, receiver_size_process, receiver_displacement_process, buffer2ccsIndex); - // Do transformation - #ifdef _DEBUG - if(myproc<100) log<<"Parameters are built"< sender_buffer(sender_size); std::vector receiver_buffer(receiver_size); // put H to sender buffer - if(SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r') + if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r') { - for(int i=0; i proc_map_bcd_trans; int myproc_trans; MPI_Comm_rank(COMM_TRANS, &myproc_trans); - if(myproc_trans == 0) + if (myproc_trans == 0) { MPI_Group_size(DST_Matrix.group, &nproc_bcd); MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS); proc_map_bcd_trans.resize(nproc_bcd, 0); - for(int i=0; i 0) +#endif + if (sender_size > 0) { - sender_index=new int[sender_size]; - for(int i=0; i 0) + if (myproc < 100) + log << "; receiver_index size: "; +#endif + if (receiver_size > 0) { - receiver_index=new int[2*receiver_size]; - receiver_buffer=new double[receiver_size]; - for(int i=0; i<2*receiver_size; ++i) + receiver_index = new int[2 * receiver_size]; + receiver_buffer = new double[receiver_size]; + for (int i = 0; i < 2 * receiver_size; ++i) { - receiver_index[i]=-1; + receiver_index[i] = -1; } - for(int i=0; i p(sender_displacement_process); int p[NPROC_TRANS]; - for(int i=0; i= DST_Matrix.nprows || recv_prow_bcd < 0) + if (recv_prow_bcd >= DST_Matrix.nprows || recv_prow_bcd < 0) { - log<<"ERROR: recv_prow_bcd error! recv_prow_bcd is "< NPROC_TRANS || recv_proc_bcd < 0) + if (recv_proc_bcd > NPROC_TRANS || recv_proc_bcd < 0) { - log<<"ERROR: recv_proc_bcd outbound! recv_proc_bcd is "<= sender_size || p[recv_proc] < 0) + if (p[recv_proc] >= sender_size || p[recv_proc] < 0) { - log<<"ERROR: sender_index's index outbound! "<= (2*sender_size)|| (p[recv_proc]*2+1) < 0) + if ((p[recv_proc] * 2 + 1) >= (2 * sender_size) || (p[recv_proc] * 2 + 1) < 0) { - log<<"ERROR: dst_index's index outbound! recv_proc:"<SRC_Matrix.nnzLocal) + if (sender_index[i] < 0 || sender_index[i] > SRC_Matrix.nnzLocal) { - log<<"ERROR! sender_index outbound: "<DST_Matrix.size) + if (dst_index[i] < 0 || dst_index[i] > DST_Matrix.size) { - log<<"ERROR! dst_index outbound: "< 0) - { - sprintf(f_log, "receiver_index_from_%2.2d_to_%2.2d.log", i, myproc_trans); - log_rcv_index.open(f_log, std::ios::app); - for(int j=receiver_displacement_process[i]; j 0) + { + sprintf(f_log, "receiver_index_from_%2.2d_to_%2.2d.log", i, myproc_trans); + log_rcv_index.open(f_log, std::ios::app); + for(int j=receiver_displacement_process[i]; jDST_Matrix.nrow) - { - log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "< "<DST_Matrix.ncol) - { - log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "< "<DST_Matrix.nrow) + { + log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "< "<DST_Matrix.ncol) + { + log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "< "<DST_Matrix.nrow) + else if (receiver_index[i * 2] > DST_Matrix.nrow) { - log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "< "< " + << DST_Matrix.nrow << std::endl; log.flush(); } - if(receiver_index[i*2+1]<0) + if (receiver_index[i * 2 + 1] < 0) { - log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<DST_Matrix.ncol) + else if (receiver_index[i * 2 + 1] > DST_Matrix.ncol) { - log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "< "< " + << DST_Matrix.ncol << std::endl; log.flush(); } } - log<<"receiver_index values are checked"<=DST_Matrix_elem) + if (idx < 0 || idx >= DST_Matrix_elem) { - log<<"idx for DM ERROR: idx is "<=DST_Matrix_elem) + if (idx < 0 || idx >= DST_Matrix_elem) { - log<<"idx for EDM ERROR: idx is "< #include -#include + +#include "module_base/global_variable.h" +#include "simplePEXSI.h" PEXSI_Solver::PEXSI_Solver(const int blacs_text, const int nb, @@ -38,22 +39,23 @@ int PEXSI_Solver::solve() extern MPI_Comm DIAG_WORLD; extern MPI_Comm GRID_WORLD; extern MPI_Group GRID_GROUP; - return simplePEXSI(MPI_COMM_WORLD, - MPI_COMM_WORLD, - GRID_GROUP, - this->blacs_text, - GlobalV::NLOCAL, - this->nb, - this->nrow, - this->ncol, - 'C', - this->h, - this->s, - GlobalV::nelec, - "PEXSIOPTION", - this->DM, - this->EDM, - this->totalEnergyH, - this->totalEnergyS, - this->totalFreeEnergy); + simplePEXSI(DIAG_WORLD, + GRID_WORLD, + GRID_GROUP, + this->blacs_text, + GlobalV::NLOCAL, + this->nb, + this->nrow, + this->ncol, + 'C', + this->h, + this->s, + GlobalV::nelec, + "PEXSIOPTION", + this->DM, + this->EDM, + this->totalEnergyH, + this->totalEnergyS, + this->totalFreeEnergy); + return 0; } \ No newline at end of file diff --git a/source/module_hsolver/pexsi/simplePEXSI.cpp b/source/module_hsolver/pexsi/simplePEXSI.cpp index 438936280f..0fbeb4e0a8 100644 --- a/source/module_hsolver/pexsi/simplePEXSI.cpp +++ b/source/module_hsolver/pexsi/simplePEXSI.cpp @@ -2,22 +2,24 @@ // the H and S matrices are given by 2D block cyclic distribution // the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution // #include "mpi.h" -#include -#include -#include -#include +#include + #include +#include +#include +#include +#include #include -#include + +#include "DistBCDMatrix.h" +#include "DistCCSMatrix.h" +#include "DistMatrixTransformer.h" #include "c_pexsi_interface.h" #include "module_base/lapack_connector.h" #include "module_base/timer.h" #include "module_base/tool_quit.h" -#include "DistCCSMatrix.h" -#include "DistBCDMatrix.h" -#include "DistMatrixTransformer.h" -inline void strtolower(char *sa, char *sb) +inline void strtolower(char* sa, char* sb) { char c; int len = strlen(sa); @@ -32,30 +34,34 @@ inline void strtolower(char *sa, char *sb) inline void setDefaultOption(int* int_para, double* double_para) { // options.spin=2; - double_para[0]=2; + double_para[0] = 2; // options.gap=0; - double_para[2]=0; + double_para[2] = 0; // ZERO_Limit=DBL_MIN; - double_para[11]=DBL_MIN; + double_para[11] = DBL_MIN; // options.matrixType=0; - int_para[3]=0; + int_para[3] = 0; // options.solver=1; - int_para[6]=1; + int_para[6] = 1; // options.ordering=0; - int_para[8]=0; + int_para[8] = 0; // options.rowOrdering=0; - int_para[9]=0; + int_para[9] = 0; // options.symmetric=0; - int_para[11]=0; + int_para[11] = 0; // options.transpose=0; - int_para[12]=0; + int_para[12] = 0; // options.nPoints=2; - int_para[14]=2; + int_para[14] = 2; // options.verbosity=1; - int_para[15]=1; + int_para[15] = 1; } -int loadPEXSIOption(MPI_Comm comm, const std::string PexsiOptionFile, PPEXSIOptions& options, int& numProcessPerPole, double& ZERO_Limit) +int loadPEXSIOption(MPI_Comm comm, + const std::string PexsiOptionFile, + PPEXSIOptions& options, + int& numProcessPerPole, + double& ZERO_Limit) { // temp variable arrays read from conf file and will be bcast to all processors @@ -96,10 +102,10 @@ int loadPEXSIOption(MPI_Comm comm, const std::string PexsiOptionFile, PPEXSIOpti double double_para[12]; int myid; MPI_Comm_rank(comm, &myid); - if(myid==0) + if (myid == 0) { std::ifstream ifs(PexsiOptionFile.c_str()); - if(! ifs) + if (!ifs) { return 1; } @@ -110,195 +116,195 @@ int loadPEXSIOption(MPI_Comm comm, const std::string PexsiOptionFile, PPEXSIOpti char key[128]; char lowercase_key[128]; - const int LINE_LINGTH=1024; + const int LINE_LINGTH = 1024; char unused_string[LINE_LINGTH]; - while(ifs.good()) + while (ifs.good()) { ifs >> key; //~ cout<<"readin word is: "<>options.spin; - ifs>>double_para[0]; + ifs >> double_para[0]; //~ cout<<"double_para[0]: "<>options.temperature; - ifs>>double_para[1]; + ifs >> double_para[1]; //~ cout<<"double_para[1]: "<>options.gap; - ifs>>double_para[2]; + ifs >> double_para[2]; //~ cout<<"double_para[2]: "<>options.deltaE; - ifs>>double_para[3]; + ifs >> double_para[3]; //~ cout<<"double_para[3]: "<>options.numPole; - ifs>>int_para[0]; + ifs >> int_para[0]; //~ cout<<"int_para[0]: "<>options.isInertiaCount; - ifs>>int_para[1]; + ifs >> int_para[1]; //~ cout<<"int_para[1]: "<>options.maxPEXSIIter; - ifs>>int_para[2]; + ifs >> int_para[2]; //~ cout<<"int_para[2]: "<>options.muMin0; - ifs>>double_para[4]; + ifs >> double_para[4]; //~ cout<<"double_para[4]: "<>options.muMax0; - ifs>>double_para[5]; + ifs >> double_para[5]; //~ cout<<"double_para[5]: "<>options.mu0; - ifs>>double_para[6]; + ifs >> double_para[6]; //~ cout<<"double_para[6]: "<>options.muInertiaTolerance; - ifs>>double_para[7]; + ifs >> double_para[7]; //~ cout<<"double_para[7]: "<>options.muInertiaExpansion; - ifs>>double_para[8]; + ifs >> double_para[8]; //~ cout<<"double_para[8]: "<>options.muPEXSISafeGuard; - ifs>>double_para[9]; + ifs >> double_para[9]; //~ cout<<"double_para[9]: "<>options.numElectronPEXSITolerance; - ifs>>double_para[10]; + ifs >> double_para[10]; //~ cout<<"double_para[10]: "<>double_para[11]; + ifs >> double_para[11]; } - else if(strcmp("matrixtype", lowercase_key)==0) + else if (strcmp("matrixtype", lowercase_key) == 0) { //~ ifs>>options.matrixType; - ifs>>int_para[3]; + ifs >> int_para[3]; //~ cout<<"int_para[3]: "<>options.isSymbolicFactorize; - ifs>>int_para[4]; + ifs >> int_para[4]; //~ cout<<"int_para[4]: "<>options.isConstructCommPattern; - ifs>>int_para[5]; + ifs >> int_para[5]; //~ cout<<"int_para[5]: "<>options.solver; - ifs>>int_para[6]; + ifs >> int_para[6]; //~ cout<<"int_para[6]: "<>options.symmetricStorage; - ifs>>int_para[7]; + ifs >> int_para[7]; //~ cout<<"int_para[7]: "<>options.ordering; - ifs>>int_para[8]; + ifs >> int_para[8]; //~ cout<<"int_para[8]: "<>options.rowOrdering; - ifs>>int_para[9]; + ifs >> int_para[9]; //~ cout<<"int_para[9]: "<>options.npSymbFact; - ifs>>int_para[10]; + ifs >> int_para[10]; //~ cout<<"int_para[10]: "<>options.symmetric; - ifs>>int_para[11]; + ifs >> int_para[11]; //~ cout<<"int_para[11]: "<>options.transpose; - ifs>>int_para[12]; + ifs >> int_para[12]; //~ cout<<"int_para[12]: "<>options.method; - ifs>>int_para[13]; + ifs >> int_para[13]; //~ cout<<"int_para[13]: "<>options.nPoints; - ifs>>int_para[14]; + ifs >> int_para[14]; //~ cout<<"int_para[14]: "<>options.verbosity; - ifs>>int_para[15]; + ifs >> int_para[15]; //~ cout<<"int_para[15]: "<>options.verbosity; - ifs>>int_para[16]; + ifs >> int_para[16]; //~ cout<<"int_para[16]: "<= (high-NPROC)) + int low = pow(integral_part, 2); + int high = pow(integral_part + 1, 2); + if ((NPROC - low) >= (high - NPROC)) { - flag=integral_part+1; + flag = integral_part + 1; } else { - flag=integral_part; + flag = integral_part; } - for(i=flag; i>0; ++i) + for (i = flag; i > 0; ++i) { - if(NPROC%i == 0) break; + if (NPROC % i == 0) + break; } - nprow=i; - npcol=NPROC/i; + nprow = i; + npcol = NPROC / i; } } -int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const int blacs_ctxt, // communicator parameters - const int size, const int nblk, const int nrow, const int ncol, char LAYOUT, // matrix parameters - double* H, double* S, // input matrices - const double numElectronExact, const std::string PexsiOptionFile, // pexsi parameters file - double*& DM, double*& EDM, // output matrices - double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy) // output energy +int simplePEXSI(MPI_Comm comm_PEXSI, + MPI_Comm comm_2D, + MPI_Group group_2D, + const int blacs_ctxt, // communicator parameters + const int size, + const int nblk, + const int nrow, + const int ncol, + char LAYOUT, // matrix parameters + double* H, + double* S, // input matrices + const double numElectronExact, + const std::string PexsiOptionFile, // pexsi parameters file + double*& DM, + double*& EDM, // output matrices + double& totalEnergyH, + double& totalEnergyS, + double& totalFreeEnergy) // output energy { - int out_log=0; - std::cout << "nrow: " << nrow << std::endl; - std::cout << "ncol: " << ncol << std::endl; - if(out_log == 1) - { - std::stringstream ss; - int nproc_2D, nproc_PEXSI; - int myid_2D, myid_PEXSI; - if(comm_2D != MPI_COMM_NULL) - { - MPI_Comm_size(comm_2D, &nproc_2D); - MPI_Comm_rank(comm_2D, &myid_2D); - ss.str(""); - ss<<"\tIn 2D comm, myid = "< Date: Fri, 17 Nov 2023 17:07:23 +0800 Subject: [PATCH 05/44] CMake building implemented --- CMakeLists.txt | 9 +++++ cmake/FindPEXSI.cmake | 27 ++++++++++++++ cmake/FindParMETIS.cmake | 37 +++++++++++++++++++ cmake/FindSuperLU.cmake | 27 ++++++++++++++ source/module_hsolver/CMakeLists.txt | 14 +++++++ source/module_hsolver/pexsi/CMakeLists.txt | 5 +++ source/module_hsolver/test/CMakeLists.txt | 14 +++++++ .../module_hsolver/test/diago_lcao_test.cpp | 7 ++++ 8 files changed, 140 insertions(+) create mode 100644 cmake/FindPEXSI.cmake create mode 100644 cmake/FindParMETIS.cmake create mode 100644 cmake/FindSuperLU.cmake create mode 100644 source/module_hsolver/pexsi/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 697b90d33c..41a8f777a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,7 @@ option(DEBUG_INFO "Print message for developers to debug." OFF) option(ENABLE_NATIVE_OPTIMIZATION "Enable compilation optimization for the native machine's CPU type" OFF) # Do not use the new container by default. option(ENABLE_CONTAINER "Enable the new multi-device container." OFF) +option(USE_PEXSI "Enable support to PEXSI." ON) if (NOT ENABLE_MPI) set (ENABLE_LCAO OFF) @@ -132,6 +133,14 @@ if(ENABLE_LCAO) target_link_libraries(${ABACUS_BIN_NAME} ELPA::ELPA) add_compile_definitions(__ELPA) endif() + if(USE_PEXSI) + find_package(PEXSI REQUIRED) + find_package(SuperLU REQUIRED) + find_package(ParMETIS REQUIRED) + target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) + include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR}) + add_compile_definitions(__PEXSI) + endif() else() set(ENABLE_DEEPKS OFF) set(ENABLE_LIBRI OFF) diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake new file mode 100644 index 0000000000..6a1d699a81 --- /dev/null +++ b/cmake/FindPEXSI.cmake @@ -0,0 +1,27 @@ +############################################################################### +# - Find cereal +# Find the native cereal headers. +# +# CEREAL_FOUND - True if cereal is found. +# CEREAL_INCLUDE_DIR - Where to find cereal headers. + +find_path(PEXSI_INCLUDE_DIR + NAMES c_pexsi_interface.h + HINTS ${PEXSI_DIR} + PATH_SUFFIXES "include" +) + +find_library(PEXSI_LIBRARY + NAMES pexsi + HINTS ${PEXSI_DIR} + PATH_SUFFIXES "lib" +) + +# Handle the QUIET and REQUIRED arguments and +# set Cereal_FOUND to TRUE if all variables are non-zero. +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR) + +# Copy the results to the output variables and target. +mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR) + diff --git a/cmake/FindParMETIS.cmake b/cmake/FindParMETIS.cmake new file mode 100644 index 0000000000..b350f107f5 --- /dev/null +++ b/cmake/FindParMETIS.cmake @@ -0,0 +1,37 @@ +############################################################################### +# - Find cereal +# Find the native cereal headers. +# +# CEREAL_FOUND - True if cereal is found. +# CEREAL_INCLUDE_DIR - Where to find cereal headers. + +find_path(ParMETIS_INCLUDE_DIR + NAMES metis.h parmetis.h + HINTS ${ParMETIS_DIR} + PATH_SUFFIXES "include" +) + +find_library(METIS_LIBRARY + NAMES metis + HINTS ${ParMETIS_DIR} + PATH_SUFFIXES "lib" +) + +find_library(ParMETIS_LIBRARY + NAMES parmetis + HINTS ${ParMETIS_DIR} + PATH_SUFFIXES "lib" +) + +# print libs +# message(STATUS "ParMETIS_INCLUDE_DIR: ${ParMETIS_INCLUDE_DIR}") +# message(STATUS "ParMETIS_LIBRARY: ${ParMETIS_LIBRARY}") + +# Handle the QUIET and REQUIRED arguments and +# set Cereal_FOUND to TRUE if all variables are non-zero. +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ParMETIS DEFAULT_MSG ParMETIS_LIBRARY METIS_LIBRARY) + +# Copy the results to the output variables and target. +mark_as_advanced(ParMETIS_LIBRARY) + diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake new file mode 100644 index 0000000000..ac1d5b4fab --- /dev/null +++ b/cmake/FindSuperLU.cmake @@ -0,0 +1,27 @@ +############################################################################### +# - Find cereal +# Find the native cereal headers. +# +# CEREAL_FOUND - True if cereal is found. +# CEREAL_INCLUDE_DIR - Where to find cereal headers. + +# find_path(SuperLU_INCLUDE_DIR +# NAMES *.h +# HINTS ${SuperLU_DIR} +# PATH_SUFFIXES "include" +# ) + +find_library(SuperLU_LIBRARY + NAMES libsuperlu_dist.a + HINTS ${SuperLU_DIR} + PATH_SUFFIXES "lib" +) + +# Handle the QUIET and REQUIRED arguments and +# set Cereal_FOUND to TRUE if all variables are non-zero. +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(SuperLU DEFAULT_MSG SuperLU_LIBRARY) + +# Copy the results to the output variables and target. +mark_as_advanced(SuperLU_LIBRARY) + diff --git a/source/module_hsolver/CMakeLists.txt b/source/module_hsolver/CMakeLists.txt index c52a3d3773..9855dd3f2f 100644 --- a/source/module_hsolver/CMakeLists.txt +++ b/source/module_hsolver/CMakeLists.txt @@ -31,6 +31,19 @@ if(ENABLE_LCAO) add_coverage(diag_cusolver) endif() endif() + + if(USE_PEXSI) + list(APPEND objects + diago_pexsi.cpp + ) + # add_library( + # pexsi + # OBJECT + # ${objects} + # ) + add_subdirectory(pexsi) + endif() + endif() add_library( @@ -50,4 +63,5 @@ endif() IF (BUILD_TESTING) add_subdirectory(test) add_subdirectory(kernels/test) + message(STATUS "Building tests") endif() diff --git a/source/module_hsolver/pexsi/CMakeLists.txt b/source/module_hsolver/pexsi/CMakeLists.txt new file mode 100644 index 0000000000..8faab8b4b4 --- /dev/null +++ b/source/module_hsolver/pexsi/CMakeLists.txt @@ -0,0 +1,5 @@ +add_library(pexsi OBJECT DistBCDMatrix.cpp DistCCSMatrix.cpp DistMatrixTransformer.cpp pexsi_solver.cpp simplePEXSI.cpp) + +if(ENABLE_COVERAGE) + add_coverage(pexsi) +endif() diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt index ce124c5a5f..94cf3f0ea3 100644 --- a/source/module_hsolver/test/CMakeLists.txt +++ b/source/module_hsolver/test/CMakeLists.txt @@ -52,12 +52,26 @@ AddTest( ) if(ENABLE_LCAO) + # if(USE_ELPA and USE_PEXSI) + # AddTest( + # TARGET HSolver_LCAO + # LIBS ${math_libs} ELPA::ELPA base genelpa psi device + # SOURCES diago_lcao_test.cpp ../diago_elpa.cpp ../diago_blas.cpp + # ) if(USE_ELPA) AddTest( TARGET HSolver_LCAO LIBS ${math_libs} ELPA::ELPA base genelpa psi device SOURCES diago_lcao_test.cpp ../diago_elpa.cpp ../diago_blas.cpp ) + # elseif(USE_PEXSI) + # AddTest( + # TARGET HSolver_LCAO + # LIBS ${math_libs} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} base psi device + # SOURCES diago_lcao_test.cpp ../diago_pexsi.cpp ../diago_blas.cpp + # ) + # # print out the PEXSI library path + # message(STATUS "PEXSI_LIBRARY: ${PEXSI_LIBRARY}") else() AddTest( TARGET HSolver_LCAO diff --git a/source/module_hsolver/test/diago_lcao_test.cpp b/source/module_hsolver/test/diago_lcao_test.cpp index 1cfdf2cae9..5bfa7fc060 100644 --- a/source/module_hsolver/test/diago_lcao_test.cpp +++ b/source/module_hsolver/test/diago_lcao_test.cpp @@ -7,6 +7,9 @@ #ifdef __ELPA #include "module_hsolver/diago_elpa.h" #endif +#ifdef __PEXSI +#include "module_hsolver/diago_pexsi.h" +#endif #define PASSTHRESHOLD 1e-10 #define DETAILINFO false @@ -64,6 +67,10 @@ template class DiagoPrepare #ifdef __ELPA else if(ks_solver == "genelpa") dh = new hsolver::DiagoElpa; +#endif +#ifdef __PEXSI + else if(ks_solver == "pexsi") + dh = new hsolver::DiagoPexsi; #endif else { From 2529afe9d3862b269a6e97828eaf79744f29b23d Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Thu, 7 Dec 2023 16:13:32 +0800 Subject: [PATCH 06/44] Works --- CMakeLists.txt | 5 +++ deps/LibRI | 2 +- source/module_basis/module_ao/ORB_control.cpp | 4 +-- source/module_elecstate/elecstate.cpp | 33 ++++++++++------- source/module_elecstate/elecstate_lcao.cpp | 23 +++++++++--- source/module_elecstate/elecstate_print.cpp | 4 +++ source/module_esolver/esolver_ks.cpp | 2 +- .../module_gint/gint_rho.cpp | 2 +- source/module_hsolver/diago_pexsi.cpp | 8 +++-- source/module_hsolver/diago_pexsi.h | 35 ++++++++++--------- source/module_hsolver/hsolver_lcao.cpp | 23 ++++++------ source/module_hsolver/pexsi/DistBCDMatrix.h | 6 ++++ source/module_hsolver/pexsi/DistCCSMatrix.h | 5 +++ .../pexsi/DistMatrixTransformer.h | 6 ++++ source/module_hsolver/pexsi/pexsi_solver.h | 4 ++- source/module_hsolver/pexsi/simplePEXSI.h | 7 +++- source/module_io/write_input.cpp | 2 +- 17 files changed, 117 insertions(+), 54 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e4e3f85265..ecf4eb7b0b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,11 @@ project(ABACUS LANGUAGES CXX ) +# private options, should not be pushed to master +set(PEXSI_DIR "~/projects/pexsi-build/pexsi") +set(SuperLU_DIR "~/projects/pexsi-build/superlu") +set(ParMETIS_DIR "~/projects/pexsi-build/parmetis") + option(ENABLE_LCAO "Enable LCAO calculation." ON) option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF) option(ENABLE_LIBXC "Enable LibXC functionality" OFF) diff --git a/deps/LibRI b/deps/LibRI index 553c91c0be..b321b71a86 160000 --- a/deps/LibRI +++ b/deps/LibRI @@ -1 +1 @@ -Subproject commit 553c91c0be1d60a86e7666f0502ef866c366c600 +Subproject commit b321b71a8677a88a42bbb78e6d31c10073454e14 diff --git a/source/module_basis/module_ao/ORB_control.cpp b/source/module_basis/module_ao/ORB_control.cpp index c2e6c9b9a1..580a99d7d0 100644 --- a/source/module_basis/module_ao/ORB_control.cpp +++ b/source/module_basis/module_ao/ORB_control.cpp @@ -205,7 +205,7 @@ void ORB_control::setup_2d_division(std::ofstream& ofs_running, bool div_2d; if (ks_solver == "lapack" || ks_solver == "cg" || ks_solver == "dav") div_2d = false; #ifdef __MPI - else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver") div_2d = true; + else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi") div_2d = true; #endif else { @@ -382,7 +382,7 @@ assert(nb2d > 0); } // init blacs context for genelpa - if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver") + if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi") { pv->set_desc(nlocal, nlocal, pv->nrow); pv->set_desc_wfc_Eij(nlocal, nbands, pv->nrow); diff --git a/source/module_elecstate/elecstate.cpp b/source/module_elecstate/elecstate.cpp index 393c2d07d5..7e09f0f509 100644 --- a/source/module_elecstate/elecstate.cpp +++ b/source/module_elecstate/elecstate.cpp @@ -174,26 +174,33 @@ void ElecState::calEBand() ModuleBase::TITLE("ElecState", "calEBand"); // calculate ebands using wg and ekb double eband = 0.0; + // if (GlobalV::KS_SOLVER == "pexsi") + // { + // // tbd + // } + // else + { #ifdef _OPENMP #pragma omp parallel for collapse(2) reduction(+:eband) #endif - for (int ik = 0; ik < this->ekb.nr; ++ik) - { - for (int ibnd = 0; ibnd < this->ekb.nc; ibnd++) + for (int ik = 0; ik < this->ekb.nr; ++ik) { - eband += this->ekb(ik, ibnd) * this->wg(ik, ibnd); + for (int ibnd = 0; ibnd < this->ekb.nc; ibnd++) + { + eband += this->ekb(ik, ibnd) * this->wg(ik, ibnd); + } } - } - this->f_en.eband = eband; - if (GlobalV::KPAR != 1 && GlobalV::ESOLVER_TYPE != "sdft") - { - //================================== - // Reduce all the Energy in each cpu - //================================== - this->f_en.eband /= GlobalV::NPROC_IN_POOL; + this->f_en.eband = eband; + if (GlobalV::KPAR != 1 && GlobalV::ESOLVER_TYPE != "sdft") + { + //================================== + // Reduce all the Energy in each cpu + //================================== + this->f_en.eband /= GlobalV::NPROC_IN_POOL; #ifdef __MPI - Parallel_Reduce::reduce_all(this->f_en.eband); + Parallel_Reduce::reduce_all(this->f_en.eband); #endif + } } return; } diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index 9e8e732553..c43cdb9fd2 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -181,6 +181,7 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma); elecstate::cal_dm_psi(this->DM->get_paraV_pointer(), this->wg, psi, *(this->DM)); this->DM->cal_DMR(); + if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready { this->loc->dm_gamma.resize(GlobalV::NSPIN); @@ -189,7 +190,11 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) this->loc->set_dm_gamma(is, this->DM->get_DMK_pointer(is)); } } + } + + + ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d"); for (int ik = 0; ik < psi.get_nk(); ++ik) @@ -273,13 +278,23 @@ double ElecStateLCAO>::get_spin_constrain_energy() return sc.cal_escon(); } -template class ElecStateLCAO; // Gamma_only case -template class ElecStateLCAO>; // multi-k case - -void ElecStateLCAO::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV) +template<> +void ElecStateLCAO::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV) { this->loc->dm_gamma[0].create(ParaV->ncol, ParaV->nrow); this->loc->dm_gamma[0].c = DM; + this->loc->out_dm = 1; } +template<> +void ElecStateLCAO>::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV) +{ + ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case"); +} + +template class ElecStateLCAO; // Gamma_only case +template class ElecStateLCAO>; // multi-k case + + + } // namespace elecstate \ No newline at end of file diff --git a/source/module_elecstate/elecstate_print.cpp b/source/module_elecstate/elecstate_print.cpp index 1cc402ede5..e0da5777b5 100644 --- a/source/module_elecstate/elecstate_print.cpp +++ b/source/module_elecstate/elecstate_print.cpp @@ -295,6 +295,10 @@ void ElecState::print_etot(const bool converged, { label = "BP"; } + else if (ks_solver_type == "pexsi") + { + label = "PE"; + } else { ModuleBase::WARNING_QUIT("Energy", "print_etot found unknown ks_solver_type"); diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp index 63592d9b66..2876c60f79 100644 --- a/source/module_esolver/esolver_ks.cpp +++ b/source/module_esolver/esolver_ks.cpp @@ -423,7 +423,7 @@ namespace ModuleESolver double duration = (std::chrono::duration_cast(std::chrono::system_clock::now() - iterstart)).count() / static_cast(1e6); #endif printiter(iter, drho, duration, diag_ethr); - if (this->conv_elec) + if (this->conv_elec && iter >= 5) { std::cout << "this->conv_elec" << std::endl; this->niter = iter; diff --git a/source/module_hamilt_lcao/module_gint/gint_rho.cpp b/source/module_hamilt_lcao/module_gint/gint_rho.cpp index fe40162b0c..6d791f985f 100644 --- a/source/module_hamilt_lcao/module_gint/gint_rho.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_rho.cpp @@ -36,7 +36,7 @@ void Gint::gint_kernel_rho( ModuleBase::GlobalFunc::ZEROS(psir_DM.ptr_1D, this->bxyz*LD_pool); if(GlobalV::GAMMA_ONLY_LOCAL) { - if (GlobalV::CALCULATION == "get_pchg") + if (GlobalV::CALCULATION == "get_pchg" || GlobalV::KS_SOLVER == "pexsi") { Gint_Tools::mult_psi_DM( *this->gridt, this->bxyz, na_grid, LD_pool, diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index c0b4e3e24c..21750c13e8 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -1,3 +1,4 @@ +#include #ifdef __PEXSI #include "diago_pexsi.h" @@ -15,7 +16,8 @@ typedef hamilt::MatrixBlock> matcd; namespace hsolver { -void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) +template<> +void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) { ModuleBase::TITLE("DiagoPEXSI", "diag"); matd h_mat, s_mat; @@ -40,7 +42,9 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, dou this->totalEnergyH = this->ps->totalEnergyH; this->totalEnergyS = this->ps->totalEnergyS; } -void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi>& psi, double* eigenvalue_in) + +template<> +void DiagoPexsi>::diag(hamilt::Hamilt>* phm_in, psi::Psi>& psi, double* eigenvalue_in) { ModuleBase::TITLE("DiagoPEXSI", "diag"); ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case"); diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index a48b92b867..802ec51e97 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -13,24 +13,25 @@ namespace hsolver { -class DiagoPexsi : public DiagH -{ + template + class DiagoPexsi : public DiagH + { + private: + using Real = typename GetTypeReal::type; public: - DiagoPexsi(const Parallel_Orbitals* ParaV_in) - { - this->ParaV = ParaV_in; - } - void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) override; - void diag(hamilt::Hamilt* phm_in, psi::Psi> &psi, double *eigenvalue_in) override; - const Parallel_Orbitals* ParaV; - double* DM; - double* EDM; - double totalEnergyH; - double totalEnergyS; - double totalFreeEnergy; - PEXSI_Solver* ps; -}; - + DiagoPexsi(const Parallel_Orbitals* ParaV_in) + { + this->ParaV = ParaV_in; + } + void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, Real* eigenvalue_in) override; + const Parallel_Orbitals* ParaV; + double* DM; + double* EDM; + double totalEnergyH; + double totalEnergyS; + double totalFreeEnergy; + PEXSI_Solver* ps; + }; } #endif diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index 0494e5897a..6ae226268e 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -81,23 +81,26 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, */ ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "This method of DiagH is not supported!"); } +#ifdef __PEXSI else if (this->method == "pexsi") { - if (pdiagh != nullptr) + if (this->pdiagh != nullptr) { - if (pdiagh->method != this->method) + if (this->pdiagh->method != this->method) { - delete[] pdiagh; - pdiagh = nullptr; + delete[] this->pdiagh; + this->pdiagh = nullptr; } } - if (pdiagh == nullptr) + if (this->pdiagh == nullptr) { - DiagoPexsi* tem = new DiagoPexsi(this->ParaV); + DiagoPexsi* tem = new DiagoPexsi(this->ParaV); this->pdiagh = tem; - pdiagh->method = this->method; + // this->pdiagh = dynamic_cast*>(tem); + this->pdiagh->method = this->method; } } +#endif else { ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "This method of DiagH is not supported!"); @@ -140,10 +143,10 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, // called in scf calculation if (this->method == "pexsi") { - DiagoPexsi* tem = dynamic_cast(this->pdiagh); + DiagoPexsi* tem = dynamic_cast*>(this->pdiagh); if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!"); - elecstate::ElecStateLCAO* _pes = dynamic_cast(pes); - pes->eband = tem->totalFreeEnergy; + elecstate::ElecStateLCAO* _pes = dynamic_cast*>(pes); + pes->f_en.eband = tem->totalFreeEnergy; _pes->get_DM_from_pexsi(tem->DM, tem->ParaV); } pes->psiToRho(psi); diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.h b/source/module_hsolver/pexsi/DistBCDMatrix.h index a0b8c7a907..97c5e8652f 100644 --- a/source/module_hsolver/pexsi/DistBCDMatrix.h +++ b/source/module_hsolver/pexsi/DistBCDMatrix.h @@ -1,3 +1,7 @@ +#ifndef DISTBCDMATRIX_H +#define DISTBCDMATRIX_H + +#include // a Block Cyclic Data Distribution matrix // http://www.netlib.org/utk/papers/factor/node3.html // local matrix elements is stored in column major @@ -61,3 +65,5 @@ class DistBCDMatrix { // 'C' or 'c' for column-major, which is used in Fortran char LAYOUT; }; + +#endif // DISTBCDMATRIX_H \ No newline at end of file diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.h b/source/module_hsolver/pexsi/DistCCSMatrix.h index 43d1126bf6..48ec95d0fc 100644 --- a/source/module_hsolver/pexsi/DistCCSMatrix.h +++ b/source/module_hsolver/pexsi/DistCCSMatrix.h @@ -1,3 +1,7 @@ +#ifndef DISTCCSMATRIX_H +#define DISTCCSMATRIX_H + +#include // Distributed Compressed Column Storage Matrix format // used for PEXSI class DistCCSMatrix { @@ -44,3 +48,4 @@ class DistCCSMatrix { int* rowindLocal; }; +#endif // DISTCCSMATRIX_H diff --git a/source/module_hsolver/pexsi/DistMatrixTransformer.h b/source/module_hsolver/pexsi/DistMatrixTransformer.h index be92935642..cdc0a53f74 100644 --- a/source/module_hsolver/pexsi/DistMatrixTransformer.h +++ b/source/module_hsolver/pexsi/DistMatrixTransformer.h @@ -1,3 +1,8 @@ +#ifndef DISTMATRIXTRANSFORMER_H +#define DISTMATRIXTRANSFORMER_H + +#include "DistBCDMatrix.h" +#include "DistCCSMatrix.h" // transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution // they should have same MPI communicator // The local matrix of BCD is column-major order @@ -18,3 +23,4 @@ int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, double* S_2d, con int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* ENDnzvalLocal, DistBCDMatrix& DST_Matrix, double* DM_2d, double* END_2d); +#endif // DISTMATRIXTRANSFORMER_H \ No newline at end of file diff --git a/source/module_hsolver/pexsi/pexsi_solver.h b/source/module_hsolver/pexsi/pexsi_solver.h index 95ade7c15f..52f23b663b 100644 --- a/source/module_hsolver/pexsi/pexsi_solver.h +++ b/source/module_hsolver/pexsi/pexsi_solver.h @@ -1,5 +1,6 @@ #ifndef PEXSI_Solver_H #define PEXSI_Solver_H + class PEXSI_Solver { public: @@ -27,4 +28,5 @@ class PEXSI_Solver double totalEnergyS; double totalFreeEnergy; }; -#endif \ No newline at end of file + +#endif // PEXSI_Solver_H \ No newline at end of file diff --git a/source/module_hsolver/pexsi/simplePEXSI.h b/source/module_hsolver/pexsi/simplePEXSI.h index 5bdf8d8bbb..6a23ba6600 100644 --- a/source/module_hsolver/pexsi/simplePEXSI.h +++ b/source/module_hsolver/pexsi/simplePEXSI.h @@ -1,3 +1,6 @@ +#ifndef SIMPLE_PEXSI_H +#define SIMPLE_PEXSI_H + #include // a simple interface for calling pexsi with 2D block cyclic distributed matrix int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const int blacs_ctxt, // communicator parameters @@ -5,4 +8,6 @@ int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const double* H, double* S, // input matrices const double nElectronExact, const std::string PexsiOptionFile, // pexsi parameters file double*& DM, double*& EDM, // output matrices - double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy); \ No newline at end of file + double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy); + +#endif // SIMPLE_PEXSI_H \ No newline at end of file diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp index 703549a0be..addc18a976 100644 --- a/source/module_io/write_input.cpp +++ b/source/module_io/write_input.cpp @@ -208,7 +208,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ofs << "\n#Parameters (5.LCAO)" << std::endl; ModuleBase::GlobalFunc::OUTP(ofs, "basis_type", basis_type, "PW; LCAO in pw; LCAO"); - if (ks_solver == "HPSEPS" || ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver") + if (ks_solver == "HPSEPS" || ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi") { ModuleBase::GlobalFunc::OUTP(ofs, "nb2d", nb2d, "2d distribution of atoms"); } From 09da6b1a31231a23280531ead84e612482df3d1d Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Mon, 15 Jan 2024 17:11:02 +0800 Subject: [PATCH 07/44] adapt to the new container --- CMakeLists.txt | 11 ++- cmake/FindPEXSI.cmake | 33 +++++++- cmake/FindParMETIS.cmake | 37 --------- cmake/FindSuperLU.cmake | 27 ------- source/module_elecstate/elecstate.cpp | 5 -- source/module_elecstate/elecstate_lcao.cpp | 78 +++++++++++++------ source/module_elecstate/elecstate_lcao.h | 4 +- .../module_gint/gint_rho.cpp | 2 +- source/module_hsolver/diago_pexsi.h | 7 +- source/module_hsolver/hsolver_lcao.cpp | 13 +++- 10 files changed, 105 insertions(+), 112 deletions(-) delete mode 100644 cmake/FindParMETIS.cmake delete mode 100644 cmake/FindSuperLU.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index ecf4eb7b0b..d6d270a9bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,9 +11,10 @@ project(ABACUS ) # private options, should not be pushed to master -set(PEXSI_DIR "~/projects/pexsi-build/pexsi") -set(SuperLU_DIR "~/projects/pexsi-build/superlu") -set(ParMETIS_DIR "~/projects/pexsi-build/parmetis") +# set(PEXSI_DIR "~/Documents/projects/abacus-dependencies/pexsi") +# set(SuperLU_DIR "~/Documents/projects/abacus-dependencies/superlu") +# set(ParMETIS_DIR "~/Documents/projects/abacus-dependencies/parmetis") +# set(ELPA_INCLUDE_DIR "/usr/include/elpa_openmp-2023.05.001") option(ENABLE_LCAO "Enable LCAO calculation." ON) option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF) @@ -39,7 +40,7 @@ option(DEBUG_INFO "Print message for developers to debug." OFF) option(ENABLE_NATIVE_OPTIMIZATION "Enable compilation optimization for the native machine's CPU type" OFF) option(COMMIT_INFO "Print commit information in log" ON) option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON) -option(USE_PEXSI "Enable support to PEXSI." ON) +option(USE_PEXSI "Enable support for PEXSI." ON) # get commit info if(COMMIT_INFO) @@ -179,8 +180,6 @@ if(ENABLE_LCAO) if(USE_PEXSI) find_package(PEXSI REQUIRED) - find_package(SuperLU REQUIRED) - find_package(ParMETIS REQUIRED) target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR}) add_compile_definitions(__PEXSI) diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake index 6a1d699a81..22fe4dd01c 100644 --- a/cmake/FindPEXSI.cmake +++ b/cmake/FindPEXSI.cmake @@ -2,8 +2,8 @@ # - Find cereal # Find the native cereal headers. # -# CEREAL_FOUND - True if cereal is found. -# CEREAL_INCLUDE_DIR - Where to find cereal headers. +# PEXSI_FOUND - True if cereal is found. +# PEXSI_INCLUDE_DIR - Where to find cereal headers. find_path(PEXSI_INCLUDE_DIR NAMES c_pexsi_interface.h @@ -17,11 +17,36 @@ find_library(PEXSI_LIBRARY PATH_SUFFIXES "lib" ) +find_path(ParMETIS_INCLUDE_DIR + NAMES metis.h parmetis.h + HINTS ${ParMETIS_DIR} + PATH_SUFFIXES "include" +) + +find_library(METIS_LIBRARY + NAMES metis + HINTS ${ParMETIS_DIR} + PATH_SUFFIXES "lib" +) + +find_library(ParMETIS_LIBRARY + NAMES parmetis + HINTS ${ParMETIS_DIR} + PATH_SUFFIXES "lib" +) + +find_library(SuperLU_LIBRARY + NAMES libsuperlu_dist.a + HINTS ${SuperLU_DIR} + PATH_SUFFIXES "lib" +) + # Handle the QUIET and REQUIRED arguments and # set Cereal_FOUND to TRUE if all variables are non-zero. include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR) +find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_LIBRARY) + # Copy the results to the output variables and target. -mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR) +mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_LIBRARY) diff --git a/cmake/FindParMETIS.cmake b/cmake/FindParMETIS.cmake deleted file mode 100644 index b350f107f5..0000000000 --- a/cmake/FindParMETIS.cmake +++ /dev/null @@ -1,37 +0,0 @@ -############################################################################### -# - Find cereal -# Find the native cereal headers. -# -# CEREAL_FOUND - True if cereal is found. -# CEREAL_INCLUDE_DIR - Where to find cereal headers. - -find_path(ParMETIS_INCLUDE_DIR - NAMES metis.h parmetis.h - HINTS ${ParMETIS_DIR} - PATH_SUFFIXES "include" -) - -find_library(METIS_LIBRARY - NAMES metis - HINTS ${ParMETIS_DIR} - PATH_SUFFIXES "lib" -) - -find_library(ParMETIS_LIBRARY - NAMES parmetis - HINTS ${ParMETIS_DIR} - PATH_SUFFIXES "lib" -) - -# print libs -# message(STATUS "ParMETIS_INCLUDE_DIR: ${ParMETIS_INCLUDE_DIR}") -# message(STATUS "ParMETIS_LIBRARY: ${ParMETIS_LIBRARY}") - -# Handle the QUIET and REQUIRED arguments and -# set Cereal_FOUND to TRUE if all variables are non-zero. -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(ParMETIS DEFAULT_MSG ParMETIS_LIBRARY METIS_LIBRARY) - -# Copy the results to the output variables and target. -mark_as_advanced(ParMETIS_LIBRARY) - diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake deleted file mode 100644 index ac1d5b4fab..0000000000 --- a/cmake/FindSuperLU.cmake +++ /dev/null @@ -1,27 +0,0 @@ -############################################################################### -# - Find cereal -# Find the native cereal headers. -# -# CEREAL_FOUND - True if cereal is found. -# CEREAL_INCLUDE_DIR - Where to find cereal headers. - -# find_path(SuperLU_INCLUDE_DIR -# NAMES *.h -# HINTS ${SuperLU_DIR} -# PATH_SUFFIXES "include" -# ) - -find_library(SuperLU_LIBRARY - NAMES libsuperlu_dist.a - HINTS ${SuperLU_DIR} - PATH_SUFFIXES "lib" -) - -# Handle the QUIET and REQUIRED arguments and -# set Cereal_FOUND to TRUE if all variables are non-zero. -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(SuperLU DEFAULT_MSG SuperLU_LIBRARY) - -# Copy the results to the output variables and target. -mark_as_advanced(SuperLU_LIBRARY) - diff --git a/source/module_elecstate/elecstate.cpp b/source/module_elecstate/elecstate.cpp index 7e09f0f509..e2a4c3eec6 100644 --- a/source/module_elecstate/elecstate.cpp +++ b/source/module_elecstate/elecstate.cpp @@ -174,11 +174,6 @@ void ElecState::calEBand() ModuleBase::TITLE("ElecState", "calEBand"); // calculate ebands using wg and ekb double eband = 0.0; - // if (GlobalV::KS_SOLVER == "pexsi") - // { - // // tbd - // } - // else { #ifdef _OPENMP #pragma omp parallel for collapse(2) reduction(+:eband) diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index c43cdb9fd2..8ca7d2b050 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -166,33 +166,28 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) ModuleBase::TITLE("ElecStateLCAO", "psiToRho"); ModuleBase::timer::tick("ElecStateLCAO", "psiToRho"); - if (GlobalV::KS_SOLVER != "pexsi") // useless for pexsi - { - this->calculate_weights(); - this->calEBand(); - } + this->calculate_weights(); + this->calEBand(); if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack" || GlobalV::KS_SOLVER == "pexsi") { ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d"); - if (GlobalV::KS_SOLVER != "pexsi") - { - // get DMK in 2d-block format - //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma); - elecstate::cal_dm_psi(this->DM->get_paraV_pointer(), this->wg, psi, *(this->DM)); - this->DM->cal_DMR(); - if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready + // get DMK in 2d-block format + //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma); + elecstate::cal_dm_psi(this->DM->get_paraV_pointer(), this->wg, psi, *(this->DM)); + this->DM->cal_DMR(); + + if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready + { + this->loc->dm_gamma.resize(GlobalV::NSPIN); + for (int is = 0; is < GlobalV::NSPIN; ++is) { - this->loc->dm_gamma.resize(GlobalV::NSPIN); - for (int is = 0; is < GlobalV::NSPIN; ++is) - { - this->loc->set_dm_gamma(is, this->DM->get_DMK_pointer(is)); - } + this->loc->set_dm_gamma(is, this->DM->get_DMK_pointer(is)); } - } + ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d"); @@ -278,20 +273,57 @@ double ElecStateLCAO>::get_spin_constrain_energy() return sc.cal_escon(); } +#ifdef __PEXSI template<> -void ElecStateLCAO::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV) +void ElecStateLCAO::dmToRho(double* pexsi_DM) { - this->loc->dm_gamma[0].create(ParaV->ncol, ParaV->nrow); - this->loc->dm_gamma[0].c = DM; - this->loc->out_dm = 1; + ModuleBase::timer::tick("ElecStateLCAO", "dmToRho"); + + this->loc->set_dm_gamma(0, pexsi_DM); + + // old 2D-to-Grid conversion has been replaced by new Gint Refactor 2023/09/25 + if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready + { + this->loc->cal_dk_gamma_from_2D_pub(); + } + + auto DM = this->get_DM(); + DM->set_DMK_pointer(0, pexsi_DM); + DM->cal_DMR(); + + for (int is = 0; is < GlobalV::NSPIN; is++) + { + ModuleBase::GlobalFunc::ZEROS(this->charge->rho[is], this->charge->nrxx); // mohan 2009-11-10 + } + + ModuleBase::GlobalFunc::NOTE("Calculate the charge on real space grid!"); + this->uhm->GG.transfer_DM2DtoGrid(this->DM->get_DMR_vector()); // transfer DM2D to DM_grid in gint + Gint_inout inout(this->loc->DM, this->charge->rho, Gint_Tools::job_type::rho); + this->uhm->GG.cal_gint(&inout); + if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5) + { + for (int is = 0; is < GlobalV::NSPIN; is++) + { + ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[0], this->charge->nrxx); + } + Gint_inout inout1(this->loc->DM, this->charge->kin_r, Gint_Tools::job_type::tau); + this->uhm->GG.cal_gint(&inout1); + } + + this->charge->renormalize_rho(); + + ModuleBase::timer::tick("ElecStateLCAO", "dmToRho"); + return; } template<> -void ElecStateLCAO>::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV) +void ElecStateLCAO>::dmToRho(std::complex* DM) { ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case"); } +#endif + template class ElecStateLCAO; // Gamma_only case template class ElecStateLCAO>; // multi-k case diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h index 6adefc6cda..8c86844486 100644 --- a/source/module_elecstate/elecstate_lcao.h +++ b/source/module_elecstate/elecstate_lcao.h @@ -59,8 +59,10 @@ class ElecStateLCAO : public ElecState double get_spin_constrain_energy() override; +#ifdef __PEXSI //use for pexsi - void get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV); + void dmToRho(TK* DM); +#endif protected: // calculate electronic charge density on grid points or density matrix in real space diff --git a/source/module_hamilt_lcao/module_gint/gint_rho.cpp b/source/module_hamilt_lcao/module_gint/gint_rho.cpp index 6d791f985f..fe40162b0c 100644 --- a/source/module_hamilt_lcao/module_gint/gint_rho.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_rho.cpp @@ -36,7 +36,7 @@ void Gint::gint_kernel_rho( ModuleBase::GlobalFunc::ZEROS(psir_DM.ptr_1D, this->bxyz*LD_pool); if(GlobalV::GAMMA_ONLY_LOCAL) { - if (GlobalV::CALCULATION == "get_pchg" || GlobalV::KS_SOLVER == "pexsi") + if (GlobalV::CALCULATION == "get_pchg") { Gint_Tools::mult_psi_DM( *this->gridt, this->bxyz, na_grid, LD_pool, diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index 802ec51e97..486b3a90b1 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -1,11 +1,6 @@ #ifndef DIGAOPEXSI_H #define DIGAOPEXSI_H -#ifdef __PEXSI - -#define DIGAOPEXSI_H -#endif - #include "module_basis/module_ao/parallel_orbitals.h" #include "diagh.h" #include "pexsi/pexsi_solver.h" @@ -25,7 +20,7 @@ namespace hsolver } void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, Real* eigenvalue_in) override; const Parallel_Orbitals* ParaV; - double* DM; + T* DM; double* EDM; double totalEnergyH; double totalEnergyS; diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index 6ae226268e..259e043e4b 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -7,7 +7,11 @@ #ifdef __ELPA #include "diago_elpa.h" #endif + +#ifdef __PEXSI #include "diago_pexsi.h" +#endif + #include "module_elecstate/elecstate_lcao.h" namespace hsolver @@ -141,15 +145,20 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, // calculate charge by psi // called in scf calculation +#ifdef __PEXSI if (this->method == "pexsi") { DiagoPexsi* tem = dynamic_cast*>(this->pdiagh); if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!"); elecstate::ElecStateLCAO* _pes = dynamic_cast*>(pes); pes->f_en.eband = tem->totalFreeEnergy; - _pes->get_DM_from_pexsi(tem->DM, tem->ParaV); + _pes->dmToRho(tem->DM); + } + else +#endif + { + pes->psiToRho(psi); } - pes->psiToRho(psi); ModuleBase::timer::tick("HSolverLCAO", "solve"); } template From f52bd9909789c71b8ca3a49bfaf5e4ca530067cc Mon Sep 17 00:00:00 2001 From: Hongxu Ren <60290838+Flying-dragon-boxing@users.noreply.github.com> Date: Mon, 15 Jan 2024 17:13:55 +0800 Subject: [PATCH 08/44] Turn off USE_PEXSI --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d6d270a9bb..f00594b3b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,7 @@ option(DEBUG_INFO "Print message for developers to debug." OFF) option(ENABLE_NATIVE_OPTIMIZATION "Enable compilation optimization for the native machine's CPU type" OFF) option(COMMIT_INFO "Print commit information in log" ON) option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON) -option(USE_PEXSI "Enable support for PEXSI." ON) +option(USE_PEXSI "Enable support for PEXSI." OFF) # get commit info if(COMMIT_INFO) @@ -651,4 +651,4 @@ install(PROGRAMS ${ABACUS_BIN_PATH} if(ENABLE_COVERAGE) coverage_evaluate() -endif() \ No newline at end of file +endif() From 682d19baab43329caee5d0070691a3fc1c48d572 Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Tue, 16 Jan 2024 21:14:37 +0800 Subject: [PATCH 09/44] Update LibRI to 553c91c --- deps/LibRI | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/LibRI b/deps/LibRI index b321b71a86..553c91c0be 160000 --- a/deps/LibRI +++ b/deps/LibRI @@ -1 +1 @@ -Subproject commit b321b71a8677a88a42bbb78e6d31c10073454e14 +Subproject commit 553c91c0be1d60a86e7666f0502ef866c366c600 From 90f600e57a744506118eecf65614a648b2c96540 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Wed, 17 Jan 2024 16:21:44 +0800 Subject: [PATCH 10/44] modify include files --- CMakeLists.txt | 6 +++--- source/module_hsolver/hsolver_lcao.cpp | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e3f6b4ac8..8440662355 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,9 +11,9 @@ project(ABACUS ) # private options, should not be pushed to master -# set(PEXSI_DIR "~/Documents/projects/abacus-dependencies/pexsi") -# set(SuperLU_DIR "~/Documents/projects/abacus-dependencies/superlu") -# set(ParMETIS_DIR "~/Documents/projects/abacus-dependencies/parmetis") +# set(PEXSI_DIR "~/projects/pexsi-build/pexsi") +# set(SuperLU_DIR "~/projects/pexsi-build/superlu") +# set(ParMETIS_DIR "~/projects/pexsi-build/parmetis") # set(ELPA_INCLUDE_DIR "/usr/include/elpa_openmp-2023.05.001") option(ENABLE_LCAO "Enable LCAO calculation." ON) diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index 085d6ac389..7adea8b91d 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -16,6 +16,10 @@ #ifdef __CUSOLVER_LCAO #include "diago_cusolver.h" #endif +#ifdef __PEXSI +#include "diago_pexsi.h" +#include "module_elecstate/elecstate_lcao.h" +#endif namespace hsolver { From 3d8c3668bf5b5e0f28e14075fbcbf5f8901feb91 Mon Sep 17 00:00:00 2001 From: FlyingDragonBoxing Date: Mon, 22 Jan 2024 14:12:47 +0800 Subject: [PATCH 11/44] namespace-ize --- source/module_hsolver/CMakeLists.txt | 7 +- source/module_hsolver/diago_pexsi.cpp | 2 +- source/module_hsolver/diago_pexsi.h | 39 +++-- .../{pexsi => module_pexsi}/CMakeLists.txt | 0 .../module_pexsi/dist_bcd_matrix.cpp | 113 ++++++++++++ .../module_pexsi/dist_bcd_matrix.h | 73 ++++++++ .../module_pexsi/dist_ccs_matrix.cpp | 117 +++++++++++++ .../module_pexsi/dist_ccs_matrix.h | 55 ++++++ .../dist_matrix_transformer.cpp} | 10 +- .../module_pexsi/dist_matrix_transformer.h | 36 ++++ .../{pexsi => module_pexsi}/pexsi_solver.cpp | 21 ++- .../{pexsi => module_pexsi}/pexsi_solver.h | 9 +- .../simple_pexsi.cpp} | 9 +- .../module_pexsi/simple_pexsi.h | 27 +++ source/module_hsolver/pexsi/DistBCDMatrix.cpp | 164 ------------------ source/module_hsolver/pexsi/DistBCDMatrix.h | 69 -------- source/module_hsolver/pexsi/DistCCSMatrix.cpp | 113 ------------ source/module_hsolver/pexsi/DistCCSMatrix.h | 51 ------ .../pexsi/DistMatrixTransformer.h | 26 --- source/module_hsolver/pexsi/simplePEXSI.h | 13 -- 20 files changed, 483 insertions(+), 471 deletions(-) rename source/module_hsolver/{pexsi => module_pexsi}/CMakeLists.txt (100%) create mode 100644 source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp create mode 100644 source/module_hsolver/module_pexsi/dist_bcd_matrix.h create mode 100644 source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp create mode 100644 source/module_hsolver/module_pexsi/dist_ccs_matrix.h rename source/module_hsolver/{pexsi/DistMatrixTransformer.cpp => module_pexsi/dist_matrix_transformer.cpp} (99%) create mode 100644 source/module_hsolver/module_pexsi/dist_matrix_transformer.h rename source/module_hsolver/{pexsi => module_pexsi}/pexsi_solver.cpp (87%) rename source/module_hsolver/{pexsi => module_pexsi}/pexsi_solver.h (81%) rename source/module_hsolver/{pexsi/simplePEXSI.cpp => module_pexsi/simple_pexsi.cpp} (99%) create mode 100644 source/module_hsolver/module_pexsi/simple_pexsi.h delete mode 100644 source/module_hsolver/pexsi/DistBCDMatrix.cpp delete mode 100644 source/module_hsolver/pexsi/DistBCDMatrix.h delete mode 100644 source/module_hsolver/pexsi/DistCCSMatrix.cpp delete mode 100644 source/module_hsolver/pexsi/DistCCSMatrix.h delete mode 100644 source/module_hsolver/pexsi/DistMatrixTransformer.h delete mode 100644 source/module_hsolver/pexsi/simplePEXSI.h diff --git a/source/module_hsolver/CMakeLists.txt b/source/module_hsolver/CMakeLists.txt index 498380f728..9a023fb5d0 100644 --- a/source/module_hsolver/CMakeLists.txt +++ b/source/module_hsolver/CMakeLists.txt @@ -42,12 +42,7 @@ if(ENABLE_LCAO) list(APPEND objects diago_pexsi.cpp ) - # add_library( - # pexsi - # OBJECT - # ${objects} - # ) - add_subdirectory(pexsi) + add_subdirectory(module_pexsi) endif() endif() diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index 21750c13e8..fbaf7b1806 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -24,7 +24,7 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& phm_in->matrix(h_mat, s_mat); std::vector eigen(GlobalV::NLOCAL, 0.0); MPI_Comm COMM_DIAG = MPI_COMM_WORLD; - this->ps = new PEXSI_Solver(this->ParaV->blacs_ctxt, + this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt, this->ParaV->nb, this->ParaV->nrow, this->ParaV->ncol, diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index 486b3a90b1..018397a33d 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -1,32 +1,33 @@ #ifndef DIGAOPEXSI_H #define DIGAOPEXSI_H -#include "module_basis/module_ao/parallel_orbitals.h" #include "diagh.h" +#include "module_basis/module_ao/parallel_orbitals.h" #include "pexsi/pexsi_solver.h" namespace hsolver { - template - class DiagoPexsi : public DiagH - { +template +class DiagoPexsi : public DiagH +{ private: - using Real = typename GetTypeReal::type; + using Real = typename GetTypeReal::type; + public: - DiagoPexsi(const Parallel_Orbitals* ParaV_in) - { - this->ParaV = ParaV_in; - } - void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, Real* eigenvalue_in) override; - const Parallel_Orbitals* ParaV; - T* DM; - double* EDM; - double totalEnergyH; - double totalEnergyS; - double totalFreeEnergy; - PEXSI_Solver* ps; - }; -} + DiagoPexsi(const Parallel_Orbitals* ParaV_in) + { + this->ParaV = ParaV_in; + } + void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, Real* eigenvalue_in) override; + const Parallel_Orbitals* ParaV; + T* DM; + double* EDM; + double totalEnergyH; + double totalEnergyS; + double totalFreeEnergy; + pexsi::PEXSI_Solver* ps; +}; +} // namespace hsolver #endif diff --git a/source/module_hsolver/pexsi/CMakeLists.txt b/source/module_hsolver/module_pexsi/CMakeLists.txt similarity index 100% rename from source/module_hsolver/pexsi/CMakeLists.txt rename to source/module_hsolver/module_pexsi/CMakeLists.txt diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp new file mode 100644 index 0000000000..cf815bd4ae --- /dev/null +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp @@ -0,0 +1,113 @@ +#include "dist_bcd_matrix.h" + +#include +extern "C" +{ + void Cblacs_gridinfo(int icontxt, int* nprow, int* npcol, int* myprow, int* mypcol); + int Cblacs_pnum(int blacs_ctxt, int prow, int pcol); +}; + +namespace pexsi +{ +DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, + MPI_Group group, + int blacs_ctxt, + int size, + int nblk, + int nrow, + int ncol, + char LAYOUT) +{ + this->comm = comm; + this->group = group; + this->blacs_ctxt = blacs_ctxt; + this->size = size; + this->nblk = nblk; + this->nrow = nrow; + this->ncol = ncol; + if (LAYOUT == 'R' || LAYOUT == 'r' || LAYOUT == 'C' || LAYOUT == 'c') + { + this->LAYOUT = LAYOUT; + } + else + { + throw("The LAYOUT must be 'R', 'r', 'C', or 'c'"); + } + + if (comm != MPI_COMM_NULL) + { + MPI_Comm_rank(comm, &this->myproc); + Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol); + } + else + { + this->myproc = -1; + this->myprow = -1; + this->mypcol = -1; + } + + // synchronize matrix parameters to all processes, including those are not in bcd group + int myid_in_comm_world; + MPI_Comm_rank(MPI_COMM_WORLD, &myid_in_comm_world); + if (myid_in_comm_world == 0) + { + MPI_Comm_size(comm, &this->nprocs); + int PARA_BCAST[4] = {this->nblk, this->nprocs, this->nprows, this->npcols}; + MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD); + } + else + { + int PARA_BCAST[4]; + MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD); + this->nblk = PARA_BCAST[0]; + this->nprocs = PARA_BCAST[1]; + this->nprows = PARA_BCAST[2]; + this->npcols = PARA_BCAST[3]; + } + this->prowpcol2pnum = new int[this->nprocs]; + if (myid_in_comm_world == 0) + { + for (int i = 0; i < this->nprows; ++i) + { + for (int j = 0; j < this->npcols; ++j) + { + this->prowpcol2pnum[i * this->npcols + j] = Cblacs_pnum(this->blacs_ctxt, i, j); + } + } + } + MPI_Bcast(this->prowpcol2pnum, this->nprocs, MPI_INT, 0, MPI_COMM_WORLD); +} + +DistBCDMatrix::~DistBCDMatrix() +{ + delete[] prowpcol2pnum; +} + +int DistBCDMatrix::globalRow(const int localRow) +{ + return (localRow / nblk * nprows + myprow) * nblk + localRow % nblk; +} + +int DistBCDMatrix::globalCol(const int localCol) +{ + + return (localCol / nblk * npcols + mypcol) * nblk + localCol % nblk; +} + +int DistBCDMatrix::localRow(const int globalRow, int& myprow) +{ + myprow = int((globalRow % (nblk * nprows)) / nblk); + return int(globalRow / (nblk * nprows)) * nblk + globalRow % nblk; +} + +int DistBCDMatrix::localCol(const int globalCol, int& mypcol) +{ + mypcol = int((globalCol % (nblk * npcols)) / nblk); + return int(globalCol / (nblk * npcols)) * nblk + globalCol % nblk; +} + +int DistBCDMatrix::pnum(const int prow, const int pcol) +{ + return this->prowpcol2pnum[prow * this->npcols + pcol]; +} +} // namespace pexsi \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h new file mode 100644 index 0000000000..7dbddbad7c --- /dev/null +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h @@ -0,0 +1,73 @@ +#ifndef DISTBCDMATRIX_H +#define DISTBCDMATRIX_H + +#include +// a Block Cyclic Data Distribution matrix +// http://www.netlib.org/utk/papers/factor/node3.html +// local matrix elements is stored in column major +// used for pexsi +namespace pexsi +{ +class DistBCDMatrix +{ + + public: + // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol); + // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char + // LAYOUT); + + // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol); + DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT); + ~DistBCDMatrix(); + + int globalRow(const int localRow); + int globalCol(const int localCol); + int localRow(const int globalRow, int& myprow); + int localCol(const int globalCol, int& mypcol); + int pnum(const int prow, const int pcol); + //~DistBCDMatrix(); + + private: + // MPI communicator + MPI_Comm comm; + MPI_Group group; + + // blacs context + int blacs_ctxt; + + // row and column of process grid + int nprows; + int npcols; + + // total number of processes + int nprocs; + + // Matrix size + int size; + + // block size + int nblk; + + // row and c0lumn of Local matrix part + int nrow; + int ncol; + + // protected: + + // private: + + // current process row and column + int myprow; + int mypcol; + + // current process id + int myproc; + + int* prowpcol2pnum; + // the local data layout + // 'R' or 'r' for row-major, which is used in C/C++ + // 'C' or 'c' for column-major, which is used in Fortran + char LAYOUT; +}; +} // namespace pexsi +#endif // DISTBCDMATRIX_H \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp new file mode 100644 index 0000000000..365622d249 --- /dev/null +++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp @@ -0,0 +1,117 @@ +#include "dist_ccs_matrix.h" + +#include + +namespace pexsi +{ +DistCCSMatrix::DistCCSMatrix(void) +{ + this->comm = MPI_COMM_WORLD; + this->size = 0; + this->nnz = 0; + this->nnzLocal = 0; + this->numColLocal = 0; + this->colptrLocal = NULL; + this->rowindLocal = NULL; +} + +DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in) +{ + this->comm = comm_in; + this->size = 0; + this->nnz = 0; + this->nnzLocal = 0; + this->numColLocal = 0; + this->colptrLocal = NULL; + this->rowindLocal = NULL; +} + +DistCCSMatrix::DistCCSMatrix(int size_in, int nnzLocal_in) +{ + this->comm = MPI_COMM_WORLD; + this->size = size_in; + this->nnzLocal = nnzLocal_in; + MPI_Request req; + MPI_Iallreduce(&nnzLocal, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm, &req); + this->numColLocal = 0; + this->colptrLocal = new int[size]; + this->rowindLocal = new int[nnzLocal]; + + MPI_Status req_status; + MPI_Wait(&req, &req_status); +} + +DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in, int nproc_data_in, int size_in) +{ + this->comm = comm_in; + this->nproc_data = nproc_data_in; + int nproc_data_range[3] = {0, this->nproc_data - 1, 1}; + // create processes group with data: this->group_data and associated communicator + MPI_Comm_group(this->comm, &this->group); + MPI_Group_range_incl(this->group, 1, &nproc_data_range, &this->group_data); + this->comm_data = MPI_COMM_NULL; + MPI_Comm_create(this->comm, this->group_data, &this->comm_data); + this->size = size_in; + this->nnz = 0; + this->nnzLocal = 0; + int myproc; + if (comm != MPI_COMM_NULL) + { + MPI_Comm_size(comm, &nprocs); + MPI_Comm_rank(comm, &myproc); + if (myproc < nproc_data - 1) + { + this->numColLocal = size / nproc_data; + this->firstCol = size / nproc_data * myproc; + this->colptrLocal = new int[this->numColLocal + 1]; + this->rowindLocal = NULL; + } + else if (myproc == nproc_data - 1) + { + this->numColLocal = size - myproc * (size / nproc_data); + this->firstCol = size / nproc_data * myproc; + this->colptrLocal = new int[this->numColLocal + 1]; + this->rowindLocal = NULL; + } + else + { + this->numColLocal = 0; + this->firstCol = size - 1; + this->colptrLocal = new int[this->numColLocal + 1]; + this->rowindLocal = NULL; + } + } +} + +int DistCCSMatrix::globalCol(int localCol) +{ + return this->firstCol + localCol; +} + +// NOTE: the process id is 0-based +int DistCCSMatrix::localCol(int globalCol, int& mypcol) +{ + mypcol = int(globalCol / int(this->size / this->nproc_data)); + if (mypcol >= this->nproc_data) + mypcol = this->nproc_data - 1; + + return mypcol > 0 ? globalCol - (this->size / this->nproc_data) * mypcol : globalCol; +} + +void DistCCSMatrix::setnnz(int nnzLocal_in) +{ + if (this->comm_data != MPI_COMM_NULL) + { + MPI_Allreduce(&nnzLocal_in, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm_data); + this->nnzLocal = nnzLocal_in; + this->rowindLocal = new int[nnzLocal]; + this->colptrLocal[this->numColLocal] = nnzLocal_in + 1; + } +} + +DistCCSMatrix::~DistCCSMatrix() +{ + delete[] colptrLocal; + delete[] rowindLocal; +} +} // namespace pexsi \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h new file mode 100644 index 0000000000..aa5e67b6ab --- /dev/null +++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h @@ -0,0 +1,55 @@ +#ifndef DISTCCSMATRIX_H +#define DISTCCSMATRIX_H + +#include +// Distributed Compressed Column Storage Matrix format +// used for PEXSI +namespace pexsi +{ +class DistCCSMatrix +{ + + public: + DistCCSMatrix(); + DistCCSMatrix(MPI_Comm comm); + DistCCSMatrix(int size, int nnzLocal); + DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal); + DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal, double* valLocal, int* index); + + int globalCol(int localCol); + int localCol(int globalCol, int& mypcol); + void setnnz(int nnzLocal); + ~DistCCSMatrix(); + + private: + // MPI communicator + MPI_Comm comm; + MPI_Group group; + + // total number of processes and the processes with data in + int nprocs; + int nproc_data; + MPI_Group group_data; + MPI_Comm comm_data; + + // Matrix size + int size; + + // Number of non-zero values in the matrix + int nnz; + + // Number of non-zero values in the matrix of the local process + int nnzLocal; + + // number of columns in current process + int numColLocal; + + // the first column index in current process + int firstCol; + + // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal + int* colptrLocal; + int* rowindLocal; +}; +} // namespace pexsi +#endif // DISTCCSMATRIX_H diff --git a/source/module_hsolver/pexsi/DistMatrixTransformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp similarity index 99% rename from source/module_hsolver/pexsi/DistMatrixTransformer.cpp rename to source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp index 1eec8dca12..01b96f42cc 100644 --- a/source/module_hsolver/pexsi/DistMatrixTransformer.cpp +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp @@ -7,8 +7,8 @@ #include #include -#include "DistBCDMatrix.h" -#include "DistCCSMatrix.h" +#include "dist_bcd_matrix.h" +#include "dist_ccs_matrix.h" // for debug #ifdef _DEBUG @@ -21,6 +21,8 @@ #endif // end debug +namespace pexsi +{ // find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a // negtive value the size_process and displacement_process array will be changed after the index is found isFirst: // wether this function is called for the first time for a index array; nprocs: total number of processes size_process: @@ -232,7 +234,7 @@ inline int getNonZeroIndex(char LAYOUT, idx = i * nrow + j; if (fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit) { - ++nnz; + ++nnz; colidx.push_back(i); rowidx.push_back(j); } @@ -1592,3 +1594,5 @@ MPI_Barrier(COMM_TRANS); #endif return 0; } + +} // namespace pexsi diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h new file mode 100644 index 0000000000..1d28866c96 --- /dev/null +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h @@ -0,0 +1,36 @@ +#ifndef DISTMATRIXTRANSFORMER_H +#define DISTMATRIXTRANSFORMER_H + +#include "dist_bcd_matrix.h" +#include "dist_ccs_matrix.h" +// transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution +// they should have same MPI communicator +// The local matrix of BCD is column-major order +// int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, const double ZERO_Limit, +// DistCCSMatrix &DST_Matrix, double*& H_ccs); + +// transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution +// two destination matrices share the same non-zero elements positions +// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, +// even if one of them is acturely zero All matrices must have same MPI communicator +namespace pexsi +{ +int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + DistCCSMatrix& DST_Matrix, + double*& H_ccs, + double*& S_ccs); + +// int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, +// DistBCDMatrix& DST_Matrix, double* DM_2d); + +int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, + double* DMnzvalLocal, + double* ENDnzvalLocal, + DistBCDMatrix& DST_Matrix, + double* DM_2d, + double* END_2d); +} // namespace pexsi +#endif // DISTMATRIXTRANSFORMER_H \ No newline at end of file diff --git a/source/module_hsolver/pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp similarity index 87% rename from source/module_hsolver/pexsi/pexsi_solver.cpp rename to source/module_hsolver/module_pexsi/pexsi_solver.cpp index 523e6bb2d5..90d16ae993 100644 --- a/source/module_hsolver/pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -5,8 +5,9 @@ #include #include "module_base/global_variable.h" -#include "simplePEXSI.h" +namespace pexsi +{ PEXSI_Solver::PEXSI_Solver(const int blacs_text, const int nb, const int nrow, @@ -58,4 +59,20 @@ int PEXSI_Solver::solve() this->totalEnergyS, this->totalFreeEnergy); return 0; -} \ No newline at end of file +} + +const double* PEXSI_Solver::get_DM() const +{ + return DM; +} + +const double* PEXSI_Solver::get_EDM() const +{ + return EDM; +} + +const double PEXSI_Solver::get_totalFreeEnergy() const +{ + return totalFreeEnergy; +} +} // namespace pexsi \ No newline at end of file diff --git a/source/module_hsolver/pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h similarity index 81% rename from source/module_hsolver/pexsi/pexsi_solver.h rename to source/module_hsolver/module_pexsi/pexsi_solver.h index 52f23b663b..0c3164e5f0 100644 --- a/source/module_hsolver/pexsi/pexsi_solver.h +++ b/source/module_hsolver/module_pexsi/pexsi_solver.h @@ -1,6 +1,8 @@ #ifndef PEXSI_Solver_H #define PEXSI_Solver_H +namespace pexsi +{ class PEXSI_Solver { public: @@ -16,6 +18,11 @@ class PEXSI_Solver double& totalEnergyS, double& totalFreeEnergy); int solve(); + const double* get_DM() const; + const double* get_EDM() const; + const double get_totalFreeEnergy() const; + + private: int blacs_text; int nb; int nrow; @@ -28,5 +35,5 @@ class PEXSI_Solver double totalEnergyS; double totalFreeEnergy; }; - +} // namespace pexsi #endif // PEXSI_Solver_H \ No newline at end of file diff --git a/source/module_hsolver/pexsi/simplePEXSI.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp similarity index 99% rename from source/module_hsolver/pexsi/simplePEXSI.cpp rename to source/module_hsolver/module_pexsi/simple_pexsi.cpp index 0fbeb4e0a8..845beef18c 100644 --- a/source/module_hsolver/pexsi/simplePEXSI.cpp +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -11,14 +11,16 @@ #include #include -#include "DistBCDMatrix.h" -#include "DistCCSMatrix.h" -#include "DistMatrixTransformer.h" #include "c_pexsi_interface.h" +#include "dist_bcd_matrix.h" +#include "dist_ccs_matrix.h" +#include "dist_matrix_transformer.h" #include "module_base/lapack_connector.h" #include "module_base/timer.h" #include "module_base/tool_quit.h" +namespace pexsi +{ inline void strtolower(char* sa, char* sb) { char c; @@ -700,3 +702,4 @@ int simplePEXSI(MPI_Comm comm_PEXSI, // MPI_Barrier(MPI_COMM_WORLD); return 0; } +} // namespace pexsi \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.h b/source/module_hsolver/module_pexsi/simple_pexsi.h new file mode 100644 index 0000000000..6d569154e9 --- /dev/null +++ b/source/module_hsolver/module_pexsi/simple_pexsi.h @@ -0,0 +1,27 @@ +#ifndef SIMPLE_PEXSI_H +#define SIMPLE_PEXSI_H + +#include +// a simple interface for calling pexsi with 2D block cyclic distributed matrix +namespace pexsi +{ +int simplePEXSI(MPI_Comm comm_PEXSI, + MPI_Comm comm_2D, + MPI_Group group_2D, + const int blacs_ctxt, // communicator parameters + const int size, + const int nblk, + const int nrow, + const int ncol, + char LAYOUT, // input matrix parameters + double* H, + double* S, // input matrices + const double nElectronExact, + const std::string PexsiOptionFile, // pexsi parameters file + double*& DM, + double*& EDM, // output matrices + double& totalEnergyH, + double& totalEnergyS, + double& totalFreeEnergy); +} +#endif // SIMPLE_PEXSI_H \ No newline at end of file diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.cpp b/source/module_hsolver/pexsi/DistBCDMatrix.cpp deleted file mode 100644 index 383875dc87..0000000000 --- a/source/module_hsolver/pexsi/DistBCDMatrix.cpp +++ /dev/null @@ -1,164 +0,0 @@ -#include -#include "DistBCDMatrix.h" -extern "C" -{ - void Cblacs_gridinfo(int icontxt, int* nprow, int *npcol, int *myprow, int *mypcol); - int Cblacs_pnum(int blacs_ctxt, int prow, int pcol); -}; - -/* -DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol) -{ - this->comm=comm; - this->group=group; - MPI_Comm_rank(comm, &this->myproc); - this->nprows=nprow; - this->npcols=npcol; - this->size=size; - this->nblk=nblk; - this->nrow=nrow; - this->ncol=ncol; - this->LAYOUT='R'; -} - -DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char LAYOUT) -{ - this->comm=comm; - this->group=group; - MPI_Comm_rank(comm, &this->myproc); - this->nprows=nprow; - this->npcols=npcol; - this->size=size; - this->nblk=nblk; - this->nrow=nrow; - this->ncol=ncol; - if(LAYOUT == 'R' || - LAYOUT == 'r' || - LAYOUT == 'C' || - LAYOUT == 'c') - { - this->LAYOUT=LAYOUT; - } else - { - throw("The LAYOUT must be 'R', 'r', 'C', or 'c'"); - } -} - -DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol) -{ - this->comm=comm; - this->group=group; - this->blacs_ctxt=blacs_ctxt; - this->size=size; - this->nblk=nblk; - this->nrow=nrow; - this->ncol=ncol; - this->LAYOUT='R'; - Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol); - if(comm != MPI_COMM_NULL) - { - MPI_Comm_rank(comm, &this->myproc); - MPI_Comm_size(comm, &this->nprocs); - }else - { - this->myproc=-1; - this->nprocs=-1; - } -} -*/ - -DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT) -{ - this->comm=comm; - this->group=group; - this->blacs_ctxt=blacs_ctxt; - this->size=size; - this->nblk=nblk; - this->nrow=nrow; - this->ncol=ncol; - if(LAYOUT == 'R' || - LAYOUT == 'r' || - LAYOUT == 'C' || - LAYOUT == 'c') - { - this->LAYOUT=LAYOUT; - } else - { - throw("The LAYOUT must be 'R', 'r', 'C', or 'c'"); - } - - if(comm != MPI_COMM_NULL) - { - MPI_Comm_rank(comm, &this->myproc); - Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol); - }else - { - this->myproc=-1; - this->myprow=-1; - this->mypcol=-1; - } - - // synchronize matrix parameters to all processes, including those are not in bcd group - int myid_in_comm_world; - MPI_Comm_rank(MPI_COMM_WORLD, &myid_in_comm_world); - if(myid_in_comm_world == 0) - { - MPI_Comm_size(comm, &this->nprocs); - int PARA_BCAST[4]={this->nblk, this->nprocs, this->nprows, this->npcols}; - MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD); - } - else - { - int PARA_BCAST[4]; - MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD); - this->nblk=PARA_BCAST[0]; - this->nprocs=PARA_BCAST[1]; - this->nprows=PARA_BCAST[2]; - this->npcols=PARA_BCAST[3]; - } - this->prowpcol2pnum=new int[this->nprocs]; - if(myid_in_comm_world == 0) - { - for(int i=0; inprows; ++i) - { - for(int j=0; jnpcols; ++j) - { - this->prowpcol2pnum[i*this->npcols+j]=Cblacs_pnum(this->blacs_ctxt, i, j); - } - } - } - MPI_Bcast(this->prowpcol2pnum, this->nprocs, MPI_INT, 0, MPI_COMM_WORLD); -} - -DistBCDMatrix::~DistBCDMatrix() -{ - delete[] prowpcol2pnum; -} - -int DistBCDMatrix::globalRow(const int localRow) -{ - return (localRow/nblk*nprows+myprow)*nblk+localRow%nblk; -} - -int DistBCDMatrix::globalCol(const int localCol) -{ - - return (localCol/nblk*npcols+mypcol)*nblk+localCol%nblk; -} - -int DistBCDMatrix::localRow(const int globalRow, int& myprow) -{ - myprow=int((globalRow%(nblk*nprows))/nblk); - return int(globalRow/(nblk*nprows))*nblk+globalRow%nblk; -} - -int DistBCDMatrix::localCol(const int globalCol, int& mypcol) -{ - mypcol=int((globalCol%(nblk*npcols))/nblk); - return int(globalCol/(nblk*npcols))*nblk+globalCol%nblk; -} - -int DistBCDMatrix::pnum(const int prow, const int pcol) -{ - return this->prowpcol2pnum[prow*this->npcols+pcol]; -} diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.h b/source/module_hsolver/pexsi/DistBCDMatrix.h deleted file mode 100644 index 97c5e8652f..0000000000 --- a/source/module_hsolver/pexsi/DistBCDMatrix.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef DISTBCDMATRIX_H -#define DISTBCDMATRIX_H - -#include -// a Block Cyclic Data Distribution matrix -// http://www.netlib.org/utk/papers/factor/node3.html -// local matrix elements is stored in column major -// used for pexsi -class DistBCDMatrix { - - public: - // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol); - // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char LAYOUT); - - // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol); - DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT); - ~DistBCDMatrix(); - - int globalRow(const int localRow); - int globalCol(const int localCol); - int localRow(const int globalRow, int& myprow); - int localCol(const int globalCol, int& mypcol); - int pnum(const int prow, const int pcol); - //~DistBCDMatrix(); - - // MPI communicator - MPI_Comm comm; - MPI_Group group; - - // blacs context - int blacs_ctxt; - - // row and column of process grid - int nprows; - int npcols; - - // total number of processes - int nprocs; - - // Matrix size - int size; - - // block size - int nblk; - - // row and c0lumn of Local matrix part - int nrow; - int ncol; - - - // protected: - - // private: - - // current process row and column - int myprow; - int mypcol; - - // current process id - int myproc; - - int *prowpcol2pnum; - // the local data layout - // 'R' or 'r' for row-major, which is used in C/C++ - // 'C' or 'c' for column-major, which is used in Fortran - char LAYOUT; -}; - -#endif // DISTBCDMATRIX_H \ No newline at end of file diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.cpp b/source/module_hsolver/pexsi/DistCCSMatrix.cpp deleted file mode 100644 index 9e3fc728fa..0000000000 --- a/source/module_hsolver/pexsi/DistCCSMatrix.cpp +++ /dev/null @@ -1,113 +0,0 @@ -#include -#include "DistCCSMatrix.h" - -DistCCSMatrix::DistCCSMatrix(void) -{ - this->comm=MPI_COMM_WORLD; - this->size=0; - this->nnz=0; - this->nnzLocal=0; - this->numColLocal=0; - this->colptrLocal=NULL; - this->rowindLocal=NULL; -} - -DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in) -{ - this->comm=comm_in; - this->size=0; - this->nnz=0; - this->nnzLocal=0; - this->numColLocal=0; - this->colptrLocal=NULL; - this->rowindLocal=NULL; -} - -DistCCSMatrix::DistCCSMatrix(int size_in, int nnzLocal_in) -{ - this->comm=MPI_COMM_WORLD; - this->size=size_in; - this->nnzLocal=nnzLocal_in; - MPI_Request req; - MPI_Iallreduce(&nnzLocal, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm, &req); - this->numColLocal=0; - this->colptrLocal=new int[size]; - this->rowindLocal=new int[nnzLocal]; - - MPI_Status req_status; - MPI_Wait(&req, &req_status); -} - -DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in, int nproc_data_in, int size_in) -{ - this->comm=comm_in; - this->nproc_data=nproc_data_in; - int nproc_data_range[3]={0, this->nproc_data-1, 1}; - // create processes group with data: this->group_data and associated communicator - MPI_Comm_group(this->comm, &this->group); - MPI_Group_range_incl(this->group, 1, &nproc_data_range, &this->group_data); - this->comm_data=MPI_COMM_NULL; - MPI_Comm_create(this->comm, this->group_data, &this->comm_data); - this->size=size_in; - this->nnz=0; - this->nnzLocal=0; - int myproc; - if(comm != MPI_COMM_NULL) - { - MPI_Comm_size(comm, &nprocs); - MPI_Comm_rank(comm, &myproc); - if(myprocnumColLocal=size/nproc_data; - this->firstCol=size/nproc_data*myproc; - this->colptrLocal=new int[this->numColLocal+1]; - this->rowindLocal=NULL; - } - else if(myproc==nproc_data-1) - { - this->numColLocal=size-myproc*(size/nproc_data); - this->firstCol=size/nproc_data*myproc; - this->colptrLocal=new int[this->numColLocal+1]; - this->rowindLocal=NULL; - } - else - { - this->numColLocal=0; - this->firstCol=size-1; - this->colptrLocal=new int[this->numColLocal+1]; - this->rowindLocal=NULL; - } - } -} - -int DistCCSMatrix::globalCol(int localCol) -{ - return this->firstCol+localCol; -} - - -// NOTE: the process id is 0-based -int DistCCSMatrix::localCol(int globalCol, int& mypcol) -{ - mypcol=int(globalCol/int(this->size/this->nproc_data)); - if(mypcol >= this->nproc_data) mypcol=this->nproc_data-1; - - return mypcol>0 ? globalCol-(this->size/this->nproc_data)*mypcol : globalCol; -} - -void DistCCSMatrix::setnnz(int nnzLocal_in) -{ - if(this->comm_data != MPI_COMM_NULL) - { - MPI_Allreduce(&nnzLocal_in, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm_data); - this->nnzLocal=nnzLocal_in; - this->rowindLocal=new int[nnzLocal]; - this->colptrLocal[this->numColLocal]=nnzLocal_in+1; - } -} - -DistCCSMatrix::~DistCCSMatrix() -{ - delete[] colptrLocal; - delete[] rowindLocal; -} diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.h b/source/module_hsolver/pexsi/DistCCSMatrix.h deleted file mode 100644 index 48ec95d0fc..0000000000 --- a/source/module_hsolver/pexsi/DistCCSMatrix.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef DISTCCSMATRIX_H -#define DISTCCSMATRIX_H - -#include -// Distributed Compressed Column Storage Matrix format -// used for PEXSI -class DistCCSMatrix { - - public: - DistCCSMatrix(); - DistCCSMatrix(MPI_Comm comm); - DistCCSMatrix(int size, int nnzLocal); - DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal); - DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal, double* valLocal, int* index); - - int globalCol(int localCol); - int localCol(int globalCol, int& mypcol); - void setnnz(int nnzLocal); - ~DistCCSMatrix(); - - // MPI communicator - MPI_Comm comm; - MPI_Group group; - - // total number of processes and the processes with data in - int nprocs; - int nproc_data; - MPI_Group group_data; - MPI_Comm comm_data; - - // Matrix size - int size; - - // Number of non-zero values in the matrix - int nnz; - - // Number of non-zero values in the matrix of the local process - int nnzLocal; - - // number of columns in current process - int numColLocal; - - // the first column index in current process - int firstCol; - - // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal - int* colptrLocal; - int* rowindLocal; -}; - -#endif // DISTCCSMATRIX_H diff --git a/source/module_hsolver/pexsi/DistMatrixTransformer.h b/source/module_hsolver/pexsi/DistMatrixTransformer.h deleted file mode 100644 index cdc0a53f74..0000000000 --- a/source/module_hsolver/pexsi/DistMatrixTransformer.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef DISTMATRIXTRANSFORMER_H -#define DISTMATRIXTRANSFORMER_H - -#include "DistBCDMatrix.h" -#include "DistCCSMatrix.h" -// transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution -// they should have same MPI communicator -// The local matrix of BCD is column-major order -// int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, const double ZERO_Limit, -// DistCCSMatrix &DST_Matrix, double*& H_ccs); - -// transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution -// two destination matrices share the same non-zero elements positions -// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, even if -// one of them is acturely zero -// All matrices must have same MPI communicator -int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, double* S_2d, const double ZERO_Limit, - DistCCSMatrix &DST_Matrix, double*& H_ccs, double*& S_ccs); - -// int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, - // DistBCDMatrix& DST_Matrix, double* DM_2d); - -int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* ENDnzvalLocal, - DistBCDMatrix& DST_Matrix, double* DM_2d, double* END_2d); - -#endif // DISTMATRIXTRANSFORMER_H \ No newline at end of file diff --git a/source/module_hsolver/pexsi/simplePEXSI.h b/source/module_hsolver/pexsi/simplePEXSI.h deleted file mode 100644 index 6a23ba6600..0000000000 --- a/source/module_hsolver/pexsi/simplePEXSI.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef SIMPLE_PEXSI_H -#define SIMPLE_PEXSI_H - -#include -// a simple interface for calling pexsi with 2D block cyclic distributed matrix -int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const int blacs_ctxt, // communicator parameters - const int size, const int nblk, const int nrow, const int ncol, char LAYOUT, // input matrix parameters - double* H, double* S, // input matrices - const double nElectronExact, const std::string PexsiOptionFile, // pexsi parameters file - double*& DM, double*& EDM, // output matrices - double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy); - -#endif // SIMPLE_PEXSI_H \ No newline at end of file From 8075b14fd0e74e80ff98776416a0e922592f1441 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Wed, 24 Jan 2024 22:52:16 +0800 Subject: [PATCH 12/44] new inputs added --- CMakeLists.txt | 8 +- cmake/FindPEXSI.cmake | 8 +- source/module_base/global_variable.cpp | 29 ++ source/module_base/global_variable.h | 27 + source/module_hsolver/diago_pexsi.cpp | 44 +- source/module_hsolver/diago_pexsi.h | 2 +- .../module_pexsi/CMakeLists.txt | 2 +- .../module_pexsi/dist_bcd_matrix.h | 23 + .../module_pexsi/dist_ccs_matrix.h | 40 ++ .../module_pexsi/dist_matrix_transformer.cpp | 231 +++++---- .../module_pexsi/dist_matrix_transformer.h | 64 ++- .../module_pexsi/pexsi_solver.cpp | 24 +- .../module_pexsi/pexsi_solver.h | 6 +- .../module_pexsi/simple_pexsi.cpp | 486 +++++++++--------- source/module_io/input.cpp | 167 +++++- source/module_io/input.h | 28 + source/module_io/input_conv.cpp | 29 ++ source/module_io/write_input.cpp | 27 +- 18 files changed, 853 insertions(+), 392 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8440662355..619c7ac6fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,12 +10,6 @@ project(ABACUS LANGUAGES CXX ) -# private options, should not be pushed to master -# set(PEXSI_DIR "~/projects/pexsi-build/pexsi") -# set(SuperLU_DIR "~/projects/pexsi-build/superlu") -# set(ParMETIS_DIR "~/projects/pexsi-build/parmetis") -# set(ELPA_INCLUDE_DIR "/usr/include/elpa_openmp-2023.05.001") - option(ENABLE_LCAO "Enable LCAO calculation." ON) option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF) option(ENABLE_LIBXC "Enable LibXC functionality" OFF) @@ -189,7 +183,7 @@ if(ENABLE_LCAO) if(USE_PEXSI) find_package(PEXSI REQUIRED) - target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) + target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR}) add_compile_definitions(__PEXSI) endif() diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake index 22fe4dd01c..062764acce 100644 --- a/cmake/FindPEXSI.cmake +++ b/cmake/FindPEXSI.cmake @@ -35,18 +35,18 @@ find_library(ParMETIS_LIBRARY PATH_SUFFIXES "lib" ) -find_library(SuperLU_LIBRARY +find_library(SuperLU_DIST_LIBRARY NAMES libsuperlu_dist.a - HINTS ${SuperLU_DIR} + HINTS ${SuperLU_DIST_DIR} PATH_SUFFIXES "lib" ) # Handle the QUIET and REQUIRED arguments and # set Cereal_FOUND to TRUE if all variables are non-zero. include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_LIBRARY) +find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_DIST_LIBRARY) # Copy the results to the output variables and target. -mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_LIBRARY) +mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_DIST_LIBRARY) diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp index 696bcd6088..2af22b8856 100644 --- a/source/module_base/global_variable.cpp +++ b/source/module_base/global_variable.cpp @@ -300,4 +300,33 @@ std::string qo_basis = "hydrogen"; std::vector qo_strategy = {}; double qo_thr = 1.0e-6; std::vector qo_screening_coeff = {}; + +//========================================================== +// PEXSI related +//========================================================== +int pexsi_npole = 54; +int pexsi_inertia = 1; +int pexsi_nmax = 80; +// int pexsi_symbolic = 1; +int pexsi_comm = 1; +int pexsi_storage = 1; +int pexsi_ordering = 0; +int pexsi_row_ordering = 1; +int pexsi_nproc = 1; +int pexsi_symm = 1; +int pexsi_trans = 0; +int pexsi_method = 1; +int pexsi_nproc_pole = 1; +// double pexsi_spin = 2; +double pexsi_temp = 0.0001; +double pexsi_gap = 0; +double pexsi_delta_e = 20.0; +double pexsi_mu_lower = -10; +double pexsi_mu_upper = 10; +double pexsi_mu = 0.0; +double pexsi_mu_thr = 0.05; +double pexsi_mu_expand = 0.3; +double pexsi_mu_guard = 0.2; +double pexsi_elec_thr = 0.001; +double pexsi_zero_thr = 1e-10; } // namespace GlobalV diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h index b1fbb1748d..843c954451 100644 --- a/source/module_base/global_variable.h +++ b/source/module_base/global_variable.h @@ -328,5 +328,32 @@ extern std::string qo_basis; extern std::vector qo_strategy; extern double qo_thr; extern std::vector qo_screening_coeff; + +// PEXSI related +extern int pexsi_npole; +extern int pexsi_inertia; +extern int pexsi_nmax; +// extern int pexsi_symbolic; +extern int pexsi_comm; +extern int pexsi_storage; +extern int pexsi_ordering; +extern int pexsi_row_ordering; +extern int pexsi_nproc; +extern int pexsi_symm; +extern int pexsi_trans; +extern int pexsi_method; +extern int pexsi_nproc_pole; +// extern double pexsi_spin; +extern double pexsi_temp; +extern double pexsi_gap; +extern double pexsi_delta_e; +extern double pexsi_mu_lower; +extern double pexsi_mu_upper; +extern double pexsi_mu; +extern double pexsi_mu_thr; +extern double pexsi_mu_expand; +extern double pexsi_mu_guard; +extern double pexsi_elec_thr; +extern double pexsi_zero_thr; } // namespace GlobalV #endif diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index fbaf7b1806..8e4ee5b15b 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -1,14 +1,13 @@ #include #ifdef __PEXSI -#include "diago_pexsi.h" - #include "c_pexsi_interface.h" +#include "diago_pexsi.h" #include "module_base/global_variable.h" #include "module_base/lapack_connector.h" #include "module_base/timer.h" #include "module_base/tool_quit.h" #include "module_basis/module_ao/parallel_orbitals.h" -#include "pexsi/pexsi_solver.h" +#include "module_pexsi/pexsi_solver.h" typedef hamilt::MatrixBlock matd; typedef hamilt::MatrixBlock> matcd; @@ -16,7 +15,7 @@ typedef hamilt::MatrixBlock> matcd; namespace hsolver { -template<> +template <> void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) { ModuleBase::TITLE("DiagoPEXSI", "diag"); @@ -25,30 +24,31 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& std::vector eigen(GlobalV::NLOCAL, 0.0); MPI_Comm COMM_DIAG = MPI_COMM_WORLD; this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt, - this->ParaV->nb, - this->ParaV->nrow, - this->ParaV->ncol, - h_mat.p, - s_mat.p, - this->DM, - this->EDM, - this->totalEnergyH, - this->totalEnergyS, - this->totalFreeEnergy); + this->ParaV->nb, + this->ParaV->nrow, + this->ParaV->ncol, + h_mat.p, + s_mat.p, + this->DM, + this->EDM, + this->totalEnergyH, + this->totalEnergyS, + this->totalFreeEnergy); this->ps->solve(); - this->EDM = this->ps->EDM; - this->DM = this->ps->DM; // loc.dm_gamma[ik] loc.dm_gamma[0]? - this->totalFreeEnergy = this->ps->totalFreeEnergy; - this->totalEnergyH = this->ps->totalEnergyH; - this->totalEnergyS = this->ps->totalEnergyS; + this->EDM = this->ps->get_EDM(); + this->DM = this->ps->get_DM(); // loc.dm_gamma[ik] loc.dm_gamma[0]? + this->totalFreeEnergy = this->ps->get_totalFreeEnergy(); + this->totalEnergyH = this->ps->get_totalEnergyH(); + this->totalEnergyS = this->ps->get_totalEnergyS(); } -template<> -void DiagoPexsi>::diag(hamilt::Hamilt>* phm_in, psi::Psi>& psi, double* eigenvalue_in) +template <> +void DiagoPexsi>::diag(hamilt::Hamilt>* phm_in, + psi::Psi>& psi, + double* eigenvalue_in) { ModuleBase::TITLE("DiagoPEXSI", "diag"); ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case"); - } } // namespace hsolver diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index 018397a33d..c212d7795a 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -3,7 +3,7 @@ #include "diagh.h" #include "module_basis/module_ao/parallel_orbitals.h" -#include "pexsi/pexsi_solver.h" +#include "module_pexsi/pexsi_solver.h" namespace hsolver { diff --git a/source/module_hsolver/module_pexsi/CMakeLists.txt b/source/module_hsolver/module_pexsi/CMakeLists.txt index 8faab8b4b4..87d16ff557 100644 --- a/source/module_hsolver/module_pexsi/CMakeLists.txt +++ b/source/module_hsolver/module_pexsi/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(pexsi OBJECT DistBCDMatrix.cpp DistCCSMatrix.cpp DistMatrixTransformer.cpp pexsi_solver.cpp simplePEXSI.cpp) +add_library(pexsi OBJECT dist_bcd_matrix.cpp dist_ccs_matrix.cpp dist_matrix_transformer.cpp pexsi_solver.cpp simple_pexsi.cpp) if(ENABLE_COVERAGE) add_coverage(pexsi) diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h index 7dbddbad7c..98b8512893 100644 --- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h @@ -2,6 +2,8 @@ #define DISTBCDMATRIX_H #include + +#include "module_hsolver/module_pexsi/dist_matrix_transformer.h" // a Block Cyclic Data Distribution matrix // http://www.netlib.org/utk/papers/factor/node3.html // local matrix elements is stored in column major @@ -27,6 +29,27 @@ class DistBCDMatrix int pnum(const int prow, const int pcol); //~DistBCDMatrix(); + const MPI_Comm get_comm() const + { + return comm; + }; + const MPI_Group get_group() const + { + return group; + }; + const int get_nrow() const + { + return nrow; + }; + const int get_ncol() const + { + return ncol; + }; + const char get_LAYOUT() const + { + return LAYOUT; + }; + private: // MPI communicator MPI_Comm comm; diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h index aa5e67b6ab..a63a0dc16c 100644 --- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h +++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h @@ -19,6 +19,44 @@ class DistCCSMatrix int globalCol(int localCol); int localCol(int globalCol, int& mypcol); void setnnz(int nnzLocal); + + const MPI_Comm get_comm() const + { + return comm; + }; + const MPI_Group get_group() const + { + return group; + }; + const MPI_Group get_group_data() const + { + return group_data; + }; + const int get_size() const + { + return size; + }; + const int get_nnz() const + { + return nnz; + }; + const int get_nnzlocal() const + { + return nnzLocal; + }; + const int get_numcol_local() const + { + return numColLocal; + }; + int* get_colptr_local() const + { + return colptrLocal; + }; + int* get_rowind_local() const + { + return rowindLocal; + }; + ~DistCCSMatrix(); private: @@ -50,6 +88,8 @@ class DistCCSMatrix // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal int* colptrLocal; int* rowindLocal; + + // friend class DistMatrixTransformer; }; } // namespace pexsi #endif // DISTCCSMATRIX_H diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp index 01b96f42cc..18fe445043 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp @@ -1,3 +1,5 @@ +#include "dist_matrix_transformer.h" + #include #include @@ -28,11 +30,11 @@ namespace pexsi // wether this function is called for the first time for a index array; nprocs: total number of processes size_process: // the number of indices in each process displacement_process: the start position in each process index: the array // contains the indices -inline int MinimumIndexPosition(const bool isFirst, - const int nprocs, - int* size_process, - int* displacement_process, - const int* index) +inline int DistMatrixTransformer::MinimumIndexPosition(const bool isFirst, + const int nprocs, + int* size_process, + int* displacement_process, + const int* index) { // usually the minimum index is continuous, so it will be a good idea to // check the one next to the previous index first. @@ -104,16 +106,16 @@ inline int MinimumIndexPosition(const bool isFirst, } } -inline void buildCCSParameter(const int size, - const int nprocs, - std::vector size_process, - std::vector displacement_process, - const int* position_index, - DistCCSMatrix& DST_Matrix, - int* buffer2ccsIndex) +inline void DistMatrixTransformer::buildCCSParameter(const int size, + const int nprocs, + std::vector size_process, + std::vector displacement_process, + const int* position_index, + DistCCSMatrix& DST_Matrix, + int* buffer2ccsIndex) { // find the minimum one from left buffer index - if (DST_Matrix.nnzLocal <= 0) + if (DST_Matrix.get_nnzlocal() <= 0) return; int pre_col = -1; @@ -123,31 +125,34 @@ inline void buildCCSParameter(const int size, while (p_mini >= 0) { int index_mini = position_index[p_mini]; - int col_mini = index_mini / DST_Matrix.size; //-DST_Matrix.firstCol; - int row_mini = index_mini % DST_Matrix.size; + int col_mini = index_mini / DST_Matrix.get_size(); //-DST_Matrix.firstCol; + int row_mini = index_mini % DST_Matrix.get_size(); if (col_mini > pre_col) // a new column starts, column pointer is a 1-based array { pre_col = col_mini; - DST_Matrix.colptrLocal[col_mini] = nnz_now + 1; + DST_Matrix.get_colptr_local()[col_mini] = nnz_now + 1; } - DST_Matrix.rowindLocal[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based + DST_Matrix.get_rowind_local()[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based // copy data from buffer to M, be careful M is a 0-based array buffer2ccsIndex[nnz_now] = p_mini; ++nnz_now; p_mini = MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index); } // The last element of colptrLocal is nnzLocal+1 - DST_Matrix.colptrLocal[DST_Matrix.numColLocal] = nnz_now + 1; + DST_Matrix.get_colptr_local()[DST_Matrix.get_numcol_local()] = nnz_now + 1; } -inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal) +inline void DistMatrixTransformer::buffer2CCSvalue(int nnzLocal, + int* buffer2ccsIndex, + double* buffer, + double* nzvalLocal) { for (int i = 0; i < nnzLocal; ++i) { nzvalLocal[i] = buffer[buffer2ccsIndex[i]]; } } -inline void countMatrixDistribution(int N, double* A, std::map& P) +inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std::map& P) { for (int i = 0; i < N; ++i) { @@ -161,15 +166,15 @@ inline void countMatrixDistribution(int N, double* A, std::map& P) } // find out the index of non-zero elements -inline int getNonZeroIndex(char LAYOUT, - const int nrow, - const int ncol, - double* H_2d, - double* S_2d, - const double ZERO_Limit, - int& nnz, - std::vector& rowidx, - std::vector& colidx) +inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT, + const int nrow, + const int ncol, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + int& nnz, + std::vector& rowidx, + std::vector& colidx) { #ifdef _DEBUG char f_log[80]; @@ -275,21 +280,21 @@ inline int getNonZeroIndex(char LAYOUT, return 0; } -int buildTransformParameter(DistBCDMatrix& SRC_Matrix, - DistCCSMatrix& DST_Matrix, - const int NPROC_TRANS, - MPI_Group& GROUP_TRANS, - MPI_Comm& COMM_TRANS, - const int nnz, - std::vector& rowidx, - std::vector& colidx, - int& sender_size, - std::vector& sender_size_process, - std::vector& sender_displacement_process, - int& receiver_size, - std::vector& receiver_size_process, - std::vector& receiver_displacement_process, - std::vector& buffer2ccsIndex) +int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + const int NPROC_TRANS, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS, + const int nnz, + std::vector& rowidx, + std::vector& colidx, + int& sender_size, + std::vector& sender_size_process, + std::vector& sender_displacement_process, + int& receiver_size, + std::vector& receiver_size_process, + std::vector& receiver_displacement_process, + std::vector& buffer2ccsIndex) { // debug int myproc; @@ -322,12 +327,12 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix, std::vector proc_map_data_trans; if (myproc == 0) { - MPI_Group_size(DST_Matrix.group_data, &nproc_data); + MPI_Group_size(DST_Matrix.get_group_data(), &nproc_data); MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS); proc_map_data_trans.resize(nproc_data, 0); for (int i = 0; i < nproc_data; ++i) { - MPI_Group_translate_ranks(DST_Matrix.group_data, 1, &i, GROUP_TRANS, &proc_map_data_trans[i]); + MPI_Group_translate_ranks(DST_Matrix.get_group_data(), 1, &i, GROUP_TRANS, &proc_map_data_trans[i]); } MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS); } @@ -429,7 +434,7 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix, int dst_col = DST_Matrix.localCol(g_col, dst_process); int l_row = rowidx[i]; int dst_row = SRC_Matrix.globalRow(l_row); - sender_index[i] = dst_col * DST_Matrix.size + dst_row; + sender_index[i] = dst_col * DST_Matrix.get_size() + dst_row; } // debug #ifdef _DEBUG @@ -478,10 +483,10 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix, return 0; } -int newGroupCommTrans(DistBCDMatrix& SRC_Matrix, - DistCCSMatrix& DST_Matrix, - MPI_Group& GROUP_TRANS, - MPI_Comm& COMM_TRANS) +int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS) { // debug #ifdef _DEBUG @@ -499,7 +504,7 @@ int newGroupCommTrans(DistBCDMatrix& SRC_Matrix, #endif // build transfortram communicator which contains both processes of BCD processors and // CCS processors with nonzero elements - MPI_Group_union(DST_Matrix.group_data, SRC_Matrix.group, &GROUP_TRANS); + MPI_Group_union(DST_Matrix.get_group_data(), SRC_Matrix.get_group(), &GROUP_TRANS); MPI_Comm_create(MPI_COMM_WORLD, GROUP_TRANS, &COMM_TRANS); // debug #ifdef _DEBUG @@ -557,7 +562,7 @@ int newGroupCommTrans(DistBCDMatrix& SRC_Matrix, return 0; } -int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS) +int DistMatrixTransformer::deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS) { MPI_Group_free(&GROUP_TRANS); if (COMM_TRANS != MPI_COMM_NULL) @@ -571,13 +576,13 @@ int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS) // two destination matrices share the same non-zero elements positions // if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, // even if one of them is acturely zero All matrices must have same MPI communicator -int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, - double* H_2d, - double* S_2d, - const double ZERO_Limit, - DistCCSMatrix& DST_Matrix, - double*& H_ccs, - double*& S_ccs) +int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + DistCCSMatrix& DST_Matrix, + double*& H_ccs, + double*& S_ccs) { // debug #ifdef _DEBUG @@ -614,9 +619,9 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, { log << "nprocs: " << SRC_Matrix.nprocs << " ; myprow: " << SRC_Matrix.myprow << " ; mypcol: " << SRC_Matrix.mypcol << std::endl; - log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.nrow << " ; ncol: " << SRC_Matrix.ncol + log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.get_nrow() << " ; ncol: " << SRC_Matrix.get_ncol() << std::endl; - log << "layout:" << SRC_Matrix.LAYOUT << std::endl; + log << "layout:" << SRC_Matrix.get_LAYOUT() << std::endl; log << "ZERO = " << ZERO_Limit << std::endl; log << "DST_Matrix parameters:" << std::endl; log << "size: " << DST_Matrix.size << " ;nproc_data: " << DST_Matrix.nproc_data << std::endl; @@ -633,11 +638,11 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, if (myproc < 100) log << "start counting nnz..." << std::endl; #endif - if (SRC_Matrix.comm != MPI_COMM_NULL) + if (SRC_Matrix.get_comm() != MPI_COMM_NULL) { - getNonZeroIndex(SRC_Matrix.LAYOUT, - SRC_Matrix.nrow, - SRC_Matrix.ncol, + getNonZeroIndex(SRC_Matrix.get_LAYOUT(), + SRC_Matrix.get_nrow(), + SRC_Matrix.get_ncol(), H_2d, S_2d, ZERO_Limit, @@ -654,11 +659,11 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, if(SRC_Matrix.comm != MPI_COMM_NULL) { log<<"NonZeroIndex :"< sender_buffer(sender_size); std::vector receiver_buffer(receiver_size); // put H to sender buffer - if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r') + if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r') { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]]; + sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]]; } } else { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]]; + sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; } } #ifdef _DEBUG @@ -749,18 +754,18 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, #endif // put S to sender buffer - if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r') + if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r') { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]]; + sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]]; } } else { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]]; + sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; } } #ifdef _DEBUG @@ -804,12 +809,12 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, // transform two sparse matrices from Compressed Column Storage (CCS) to block cyclic distribution (BCD) distribution // two source matrices share the same non-zero elements positions -int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, - double* DMnzvalLocal, - double* EDMnzvalLocal, - DistBCDMatrix& DST_Matrix, - double* DM, - double* EDM) +int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, + double* DMnzvalLocal, + double* EDMnzvalLocal, + DistBCDMatrix& DST_Matrix, + double* DM, + double* EDM) { // debug #ifdef _DEBUG @@ -840,7 +845,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, if (COMM_TRANS != MPI_COMM_NULL) { // init DM and EDM with 0 - for (int i = 0; i < DST_Matrix.nrow * DST_Matrix.ncol; ++i) + for (int i = 0; i < DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); ++i) { DM[i] = 0; EDM[i] = 0; @@ -877,12 +882,12 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, MPI_Comm_rank(COMM_TRANS, &myproc_trans); if (myproc_trans == 0) { - MPI_Group_size(DST_Matrix.group, &nproc_bcd); + MPI_Group_size(DST_Matrix.get_group(), &nproc_bcd); MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS); proc_map_bcd_trans.resize(nproc_bcd, 0); for (int i = 0; i < nproc_bcd; ++i) { - MPI_Group_translate_ranks(DST_Matrix.group, 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]); + MPI_Group_translate_ranks(DST_Matrix.get_group(), 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]); } MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS); } @@ -933,7 +938,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, log << "display all columns and rows of nonzeros values:\n"; int log_nnz = 0; #endif - for (int icol = 0; icol < SRC_Matrix.numColLocal; ++icol) + for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol) { int g_col = SRC_Matrix.globalCol(icol); int recv_pcol_bcd; @@ -942,9 +947,9 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, // log<DST_Matrix.nrow) + else if(receiver_index[i*2]>DST_Matrix.get_nrow()) { - log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "< "< "<DST_Matrix.ncol) + else if(receiver_index[i*2+1]>DST_Matrix.get_ncol()) { - log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "< "< "< DST_Matrix.nrow) + else if (receiver_index[i * 2] > DST_Matrix.get_nrow()) { log << "ERROR! receiver_index(BCD)[" << 2 * i << "] = " << receiver_index[i * 2] << " > " - << DST_Matrix.nrow << std::endl; + << DST_Matrix.get_nrow() << std::endl; log.flush(); } if (receiver_index[i * 2 + 1] < 0) @@ -1388,10 +1393,10 @@ MPI_Barrier(COMM_TRANS); << std::endl; log.flush(); } - else if (receiver_index[i * 2 + 1] > DST_Matrix.ncol) + else if (receiver_index[i * 2 + 1] > DST_Matrix.get_ncol()) { log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " > " - << DST_Matrix.ncol << std::endl; + << DST_Matrix.get_ncol() << std::endl; log.flush(); } } @@ -1428,14 +1433,14 @@ MPI_Barrier(COMM_TRANS); // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM"); #endif // transform receiver_buffer to DM - if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r') + if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r') { - int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = ix * DST_Matrix.ncol + iy; + int idx = ix * DST_Matrix.get_ncol() + iy; #ifdef _DEBUG if (myproc < 100) { @@ -1444,7 +1449,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem << std::endl; log << "index number is " << 2 * i << " ix = " << ix << " iy = " << iy - << " ncol = " << DST_Matrix.ncol << std::endl; + << " ncol = " << DST_Matrix.get_ncol() << std::endl; log.flush(); } } @@ -1454,12 +1459,12 @@ MPI_Barrier(COMM_TRANS); } else { - int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = iy * DST_Matrix.nrow + ix; + int idx = iy * DST_Matrix.get_nrow() + ix; #ifdef _DEBUG if (myproc < 100) { @@ -1468,7 +1473,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem << std::endl; log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " nrow = " << DST_Matrix.nrow << std::endl; + << " nrow = " << DST_Matrix.get_nrow() << std::endl; log.flush(); } } @@ -1512,14 +1517,14 @@ MPI_Barrier(COMM_TRANS); // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM"); #endif // transform receiver_buffer to EDM - if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r') + if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r') { - int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = ix * DST_Matrix.ncol + iy; + int idx = ix * DST_Matrix.get_ncol() + iy; #ifdef _DEBUG if (myproc < 100) { @@ -1528,7 +1533,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem << std::endl; log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " ncol = " << DST_Matrix.ncol << std::endl; + << " ncol = " << DST_Matrix.get_ncol() << std::endl; log.flush(); } } @@ -1538,12 +1543,12 @@ MPI_Barrier(COMM_TRANS); } else { - int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = iy * DST_Matrix.nrow + ix; + int idx = iy * DST_Matrix.get_nrow() + ix; #ifdef _DEBUG if (myproc < 100) { @@ -1552,7 +1557,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem << std::endl; log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " nrow = " << DST_Matrix.nrow << std::endl; + << " nrow = " << DST_Matrix.get_nrow() << std::endl; log.flush(); } } diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h index 1d28866c96..e3e27e995a 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h @@ -1,8 +1,9 @@ #ifndef DISTMATRIXTRANSFORMER_H #define DISTMATRIXTRANSFORMER_H -#include "dist_bcd_matrix.h" -#include "dist_ccs_matrix.h" +#include + +#include // transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution // they should have same MPI communicator // The local matrix of BCD is column-major order @@ -15,6 +16,62 @@ // even if one of them is acturely zero All matrices must have same MPI communicator namespace pexsi { +class DistBCDMatrix; +class DistCCSMatrix; + +namespace DistMatrixTransformer +{ +int MinimumIndexPosition(const bool isFirst, + const int nprocs, + int* size_process, + int* displacement_process, + const int* index); + +void buildCCSParameter(const int size, + const int nprocs, + std::vector size_process, + std::vector displacement_process, + const int* position_index, + DistCCSMatrix& DST_Matrix, + int* buffer2ccsIndex); + +void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal); + +void countMatrixDistribution(int N, double* A, std::map& P); + +int getNonZeroIndex(char LAYOUT, + const int nrow, + const int ncol, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + int& nnz, + std::vector& rowidx, + std::vector& colidx); + +int buildTransformParameter(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + const int NPROC_TRANS, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS, + const int nnz, + std::vector& rowidx, + std::vector& colidx, + int& sender_size, + std::vector& sender_size_process, + std::vector& sender_displacement_process, + int& receiver_size, + std::vector& receiver_size_process, + std::vector& receiver_displacement_process, + std::vector& buffer2ccsIndex); + +int newGroupCommTrans(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS); + +int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS); + int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, double* H_2d, double* S_2d, @@ -31,6 +88,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* ENDnzvalLocal, DistBCDMatrix& DST_Matrix, double* DM_2d, - double* END_2d); + double* ED_2d); +}; // namespace DistMatrixTransformer } // namespace pexsi #endif // DISTMATRIXTRANSFORMER_H \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp index 90d16ae993..2d6f2674d9 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -5,6 +5,11 @@ #include #include "module_base/global_variable.h" +#include "simple_pexsi.h" + +extern MPI_Comm DIAG_WORLD; +extern MPI_Comm GRID_WORLD; +extern MPI_Group GRID_GROUP; namespace pexsi { @@ -37,9 +42,7 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text, int PEXSI_Solver::solve() { - extern MPI_Comm DIAG_WORLD; - extern MPI_Comm GRID_WORLD; - extern MPI_Group GRID_GROUP; + simplePEXSI(DIAG_WORLD, GRID_WORLD, GRID_GROUP, @@ -61,12 +64,12 @@ int PEXSI_Solver::solve() return 0; } -const double* PEXSI_Solver::get_DM() const +double* PEXSI_Solver::get_DM() const { return DM; } -const double* PEXSI_Solver::get_EDM() const +double* PEXSI_Solver::get_EDM() const { return EDM; } @@ -75,4 +78,15 @@ const double PEXSI_Solver::get_totalFreeEnergy() const { return totalFreeEnergy; } + +const double PEXSI_Solver::get_totalEnergyH() const +{ + return totalEnergyH; +} + +const double PEXSI_Solver::get_totalEnergyS() const +{ + return totalEnergyS; +} + } // namespace pexsi \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h index 0c3164e5f0..b3d7aed152 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.h +++ b/source/module_hsolver/module_pexsi/pexsi_solver.h @@ -18,9 +18,11 @@ class PEXSI_Solver double& totalEnergyS, double& totalFreeEnergy); int solve(); - const double* get_DM() const; - const double* get_EDM() const; + double* get_DM() const; + double* get_EDM() const; const double get_totalFreeEnergy() const; + const double get_totalEnergyH() const; + const double get_totalEnergyS() const; private: int blacs_text; diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp index 845beef18c..2d1705557c 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -18,6 +18,7 @@ #include "module_base/lapack_connector.h" #include "module_base/timer.h" #include "module_base/tool_quit.h" +#include "module_base/global_variable.h" namespace pexsi { @@ -102,220 +103,252 @@ int loadPEXSIOption(MPI_Comm comm, // 10: numElectronPEXSITolerance // 11: ZERO_Limit double double_para[12]; - int myid; - MPI_Comm_rank(comm, &myid); - if (myid == 0) - { - std::ifstream ifs(PexsiOptionFile.c_str()); - if (!ifs) - { - return 1; - } - setDefaultOption(int_para, double_para); - ifs.clear(); - ifs.seekg(0); + // read in PEXSI options from GlobalV + int_para[0] = GlobalV::pexsi_npole; + int_para[1] = GlobalV::pexsi_inertia; + int_para[2] = GlobalV::pexsi_nmax; + int_para[3] = 0; + int_para[4] = 1; // GlobalV::pexsi_symbolic; + int_para[5] = GlobalV::pexsi_comm; + int_para[6] = 0; + int_para[7] = GlobalV::pexsi_storage; + int_para[8] = GlobalV::pexsi_ordering; + int_para[9] = GlobalV::pexsi_row_ordering; + int_para[10] = GlobalV::pexsi_nproc; + int_para[11] = GlobalV::pexsi_symm; + int_para[12] = GlobalV::pexsi_trans; + int_para[13] = GlobalV::pexsi_method; + int_para[14] = 2; + int_para[15] = 0; + int_para[16] = GlobalV::pexsi_nproc_pole; - char key[128]; - char lowercase_key[128]; - const int LINE_LINGTH = 1024; - char unused_string[LINE_LINGTH]; + double_para[0] = GlobalV::NSPIN; // GlobalV::pexsi_spin; + double_para[1] = GlobalV::pexsi_temp; + double_para[2] = GlobalV::pexsi_gap; + double_para[3] = GlobalV::pexsi_delta_e; + double_para[4] = GlobalV::pexsi_mu_lower; + double_para[5] = GlobalV::pexsi_mu_upper; + double_para[6] = GlobalV::pexsi_mu; + double_para[7] = GlobalV::pexsi_mu_thr; + double_para[8] = GlobalV::pexsi_mu_expand; + double_para[9] = GlobalV::pexsi_mu_guard; + double_para[10] = GlobalV::pexsi_elec_thr; + double_para[11] = GlobalV::pexsi_zero_thr; + // int myid; + // MPI_Comm_rank(comm, &myid); + // if (myid == 0) + // { + // std::ifstream ifs(PexsiOptionFile.c_str()); + // if (!ifs) + // { + // return 1; + // } + // setDefaultOption(int_para, double_para); - while (ifs.good()) - { - ifs >> key; - //~ cout<<"readin word is: "<>options.spin; - ifs >> double_para[0]; - //~ cout<<"double_para[0]: "<>options.temperature; - ifs >> double_para[1]; - //~ cout<<"double_para[1]: "<>options.gap; - ifs >> double_para[2]; - //~ cout<<"double_para[2]: "<>options.deltaE; - ifs >> double_para[3]; - //~ cout<<"double_para[3]: "<>options.numPole; - ifs >> int_para[0]; - //~ cout<<"int_para[0]: "<>options.isInertiaCount; - ifs >> int_para[1]; - //~ cout<<"int_para[1]: "<>options.maxPEXSIIter; - ifs >> int_para[2]; - //~ cout<<"int_para[2]: "<>options.muMin0; - ifs >> double_para[4]; - //~ cout<<"double_para[4]: "<>options.muMax0; - ifs >> double_para[5]; - //~ cout<<"double_para[5]: "<>options.mu0; - ifs >> double_para[6]; - //~ cout<<"double_para[6]: "<>options.muInertiaTolerance; - ifs >> double_para[7]; - //~ cout<<"double_para[7]: "<>options.muInertiaExpansion; - ifs >> double_para[8]; - //~ cout<<"double_para[8]: "<>options.muPEXSISafeGuard; - ifs >> double_para[9]; - //~ cout<<"double_para[9]: "<>options.numElectronPEXSITolerance; - ifs >> double_para[10]; - //~ cout<<"double_para[10]: "<> double_para[11]; - } - else if (strcmp("matrixtype", lowercase_key) == 0) - { - //~ ifs>>options.matrixType; - ifs >> int_para[3]; - //~ cout<<"int_para[3]: "<>options.isSymbolicFactorize; - ifs >> int_para[4]; - //~ cout<<"int_para[4]: "<>options.isConstructCommPattern; - ifs >> int_para[5]; - //~ cout<<"int_para[5]: "<>options.solver; - ifs >> int_para[6]; - //~ cout<<"int_para[6]: "<>options.symmetricStorage; - ifs >> int_para[7]; - //~ cout<<"int_para[7]: "<>options.ordering; - ifs >> int_para[8]; - //~ cout<<"int_para[8]: "<>options.rowOrdering; - ifs >> int_para[9]; - //~ cout<<"int_para[9]: "<>options.npSymbFact; - ifs >> int_para[10]; - //~ cout<<"int_para[10]: "<>options.symmetric; - ifs >> int_para[11]; - //~ cout<<"int_para[11]: "<>options.transpose; - ifs >> int_para[12]; - //~ cout<<"int_para[12]: "<>options.method; - ifs >> int_para[13]; - //~ cout<<"int_para[13]: "<>options.nPoints; - ifs >> int_para[14]; - //~ cout<<"int_para[14]: "<>options.verbosity; - ifs >> int_para[15]; - //~ cout<<"int_para[15]: "<>options.verbosity; - ifs >> int_para[16]; - //~ cout<<"int_para[16]: "<> key; + // //~ cout<<"readin word is: "<>options.spin; + // ifs >> double_para[0]; + // //~ cout<<"double_para[0]: "<>options.temperature; + // ifs >> double_para[1]; + // //~ cout<<"double_para[1]: "<>options.gap; + // ifs >> double_para[2]; + // //~ cout<<"double_para[2]: "<>options.deltaE; + // ifs >> double_para[3]; + // //~ cout<<"double_para[3]: "<>options.numPole; + // ifs >> int_para[0]; + // //~ cout<<"int_para[0]: "<>options.isInertiaCount; + // ifs >> int_para[1]; + // //~ cout<<"int_para[1]: "<>options.maxPEXSIIter; + // ifs >> int_para[2]; + // //~ cout<<"int_para[2]: "<>options.muMin0; + // ifs >> double_para[4]; + // //~ cout<<"double_para[4]: "<>options.muMax0; + // ifs >> double_para[5]; + // //~ cout<<"double_para[5]: "<>options.mu0; + // ifs >> double_para[6]; + // //~ cout<<"double_para[6]: "<>options.muInertiaTolerance; + // ifs >> double_para[7]; + // //~ cout<<"double_para[7]: "<>options.muInertiaExpansion; + // ifs >> double_para[8]; + // //~ cout<<"double_para[8]: "<>options.muPEXSISafeGuard; + // ifs >> double_para[9]; + // //~ cout<<"double_para[9]: "<>options.numElectronPEXSITolerance; + // ifs >> double_para[10]; + // //~ cout<<"double_para[10]: "<> double_para[11]; + // } + // else if (strcmp("matrixtype", lowercase_key) == 0) + // { + // //~ ifs>>options.matrixType; + // ifs >> int_para[3]; + // //~ cout<<"int_para[3]: "<>options.isSymbolicFactorize; + // ifs >> int_para[4]; + // //~ cout<<"int_para[4]: "<>options.isConstructCommPattern; + // ifs >> int_para[5]; + // //~ cout<<"int_para[5]: "<>options.solver; + // ifs >> int_para[6]; + // //~ cout<<"int_para[6]: "<>options.symmetricStorage; + // ifs >> int_para[7]; + // //~ cout<<"int_para[7]: "<>options.ordering; + // ifs >> int_para[8]; + // //~ cout<<"int_para[8]: "<>options.rowOrdering; + // ifs >> int_para[9]; + // //~ cout<<"int_para[9]: "<>options.npSymbFact; + // ifs >> int_para[10]; + // //~ cout<<"int_para[10]: "<>options.symmetric; + // ifs >> int_para[11]; + // //~ cout<<"int_para[11]: "<>options.transpose; + // ifs >> int_para[12]; + // //~ cout<<"int_para[12]: "<>options.method; + // ifs >> int_para[13]; + // //~ cout<<"int_para[13]: "<>options.nPoints; + // ifs >> int_para[14]; + // //~ cout<<"int_para[14]: "<>options.verbosity; + // ifs >> int_para[15]; + // //~ cout<<"int_para[15]: "<>options.verbosity; + // ifs >> int_para[16]; + // //~ cout<<"int_para[16]: "< qo_strategy = {}; std::vector qo_screening_coeff = {}; + //========================================================== + // variables for PEXSI + //========================================================== + int pexsi_npole = 54; + int pexsi_inertia = 1; + int pexsi_nmax = 80; + // int pexsi_symbolic = 1; + int pexsi_comm = 1; + int pexsi_storage = 1; + int pexsi_ordering = 0; + int pexsi_row_ordering = 1; + int pexsi_nproc = 1; + int pexsi_symm = 1; + int pexsi_trans = 0; + int pexsi_method = 1; + int pexsi_nproc_pole = 1; + // double pexsi_spin = 2; + double pexsi_temp = 0.0001; + double pexsi_gap = 0; + double pexsi_delta_e = 20.0; + double pexsi_mu_lower = -10; + double pexsi_mu_upper = 10; + double pexsi_mu = 0.0; + double pexsi_mu_thr = 0.05; + double pexsi_mu_expand = 0.3; + double pexsi_mu_guard = 0.2; + double pexsi_elec_thr = 0.001; + double pexsi_zero_thr = 1e-10; private: //========================================================== diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp index a52245d05c..89aff00eed 100644 --- a/source/module_io/input_conv.cpp +++ b/source/module_io/input_conv.cpp @@ -765,6 +765,35 @@ void Input_Conv::Convert(void) GlobalV::qo_strategy = INPUT.qo_strategy; GlobalV::qo_thr = INPUT.qo_thr; GlobalV::qo_screening_coeff = INPUT.qo_screening_coeff; + + //----------------------------------------------- + // PEXSI related parameters + //----------------------------------------------- + GlobalV::pexsi_npole = INPUT.pexsi_npole; + GlobalV::pexsi_inertia = INPUT.pexsi_inertia; + GlobalV::pexsi_nmax = INPUT.pexsi_nmax; + // GlobalV::pexsi_symbolic = INPUT.pexsi_symbolic; + GlobalV::pexsi_comm = INPUT.pexsi_comm; + GlobalV::pexsi_storage = INPUT.pexsi_storage; + GlobalV::pexsi_ordering = INPUT.pexsi_ordering; + GlobalV::pexsi_row_ordering = INPUT.pexsi_row_ordering; + GlobalV::pexsi_nproc = INPUT.pexsi_nproc; + GlobalV::pexsi_symm = INPUT.pexsi_symm; + GlobalV::pexsi_trans = INPUT.pexsi_trans; + GlobalV::pexsi_method = INPUT.pexsi_method; + GlobalV::pexsi_nproc_pole = INPUT.pexsi_nproc_pole; + // GlobalV::pexsi_spin = INPUT.pexsi_spin; + GlobalV::pexsi_temp = INPUT.pexsi_temp; + GlobalV::pexsi_gap = INPUT.pexsi_gap; + GlobalV::pexsi_delta_e = INPUT.pexsi_delta_e; + GlobalV::pexsi_mu_lower = INPUT.pexsi_mu_lower; + GlobalV::pexsi_mu_upper = INPUT.pexsi_mu_upper; + GlobalV::pexsi_mu = INPUT.pexsi_mu; + GlobalV::pexsi_mu_thr = INPUT.pexsi_mu_thr; + GlobalV::pexsi_mu_expand = INPUT.pexsi_mu_expand; + GlobalV::pexsi_mu_guard = INPUT.pexsi_mu_guard; + GlobalV::pexsi_elec_thr = INPUT.pexsi_elec_thr; + GlobalV::pexsi_zero_thr = INPUT.pexsi_zero_thr; ModuleBase::timer::tick("Input_Conv", "Convert"); return; } diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp index 6003a4fdb0..2c30d8ff47 100644 --- a/source/module_io/write_input.cpp +++ b/source/module_io/write_input.cpp @@ -493,7 +493,32 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ModuleBase::GlobalFunc::OUTP(ofs, "qo_switch", qo_switch, "0: no QO analysis; 1: QO analysis"); ModuleBase::GlobalFunc::OUTP(ofs, "qo_basis", qo_basis, "type of QO basis function: hydrogen: hydrogen-like basis, pswfc: read basis from pseudopotential"); ModuleBase::GlobalFunc::OUTP(ofs, "qo_thr", qo_thr, "accuracy for evaluating cutoff radius of QO basis function"); - + + ofs << "\n#Parameters (24.PEXSI)" << std::endl; + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_npole", pexsi_npole, "Number of poles in expansion"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_inertia", pexsi_inertia, "Whether inertia counting is used at the very beginning of PEXSI process"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nmax", pexsi_nmax, "Maximum number of PEXSI iterations after each inertia counting procedure."); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_comm", pexsi_comm, "Whether to construct PSelInv communication pattern"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices, 0: non-symmetric, 1: symmetric"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_ordering", pexsi_ordering, "Ordering strategy for factorization and selected inversion"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc", pexsi_nproc, "Number of processors for parmetis"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "matrix symmetry, 0: non-symmetric, 1: symmetric"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "transpose, 0: no transpose, 1: transpose"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_method", pexsi_method, "pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc_pole", pexsi_nproc_pole, "Number of processes used by each pole"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_temp", pexsi_temp, "Temperature, in the same unit as H"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_gap", pexsi_gap, "Spectral gap"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \f$S^{-1} H\f$"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_lower", pexsi_mu_lower, "Initial guess of lower bound for mu"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_upper", pexsi_mu_upper, "Initial guess of upper bound for mu"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu", pexsi_mu, "Initial guess for mu (for the solver)"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_thr", pexsi_mu_thr, "Stopping criterion in terms of the chemical potential for the inertia counting procedure"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_expand", pexsi_mu_expand, "If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_guard", pexsi_mu_guard, "Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_elec_thr", pexsi_elec_thr, "Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_zero_thr", pexsi_zero_thr, "if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0"); + ofs.close(); return; } \ No newline at end of file From 1e428d27ba2b63d8531f6dd51bc1cf8fcb2abe4a Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Fri, 26 Jan 2024 15:38:55 +0800 Subject: [PATCH 13/44] Configure Makefile Compiling, fix typos --- source/Makefile | 17 +++++++++-------- source/Makefile.Objects | 18 ++++++++++-------- source/Makefile.vars | 20 +++++--------------- source/module_io/input.cpp | 2 +- 4 files changed, 25 insertions(+), 32 deletions(-) diff --git a/source/Makefile b/source/Makefile index 0dcd329161..7bd81f26d9 100644 --- a/source/Makefile +++ b/source/Makefile @@ -7,7 +7,7 @@ include Makefile.vars INCLUDES = -I. -Icommands -I../ -Imodule_base/module_container LIBS = -lm -lpthread -OPTS = ${INCLUDES} -Ofast -g -traceback -xHost -std=c++11 -simd -march=native -m64 -qopenmp -Werror -Wall -pedantic +OPTS = ${INCLUDES} -std=c++14 -pedantic -m64 ${INCLUDES} HONG = -D__LCAO HONG += -D__ELPA ifeq ($(OPENMP), ON) @@ -75,7 +75,7 @@ else FFTW_INCLUDE_DIR = ${FFTW_DIR}/include FFTW_LIB_DIR = ${FFTW_DIR}/lib HONG += -D__FFTW3 - LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} -qmkl + LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} INCLUDES += -I${FFTW_INCLUDE_DIR} #========================== @@ -140,12 +140,6 @@ ifdef LIBTORCH_DIR endif endif -ifdef PEXSI_DIR - INCLUDES += -I${PEXSI_INCLUDE_DIR} ${SCOTCH_INCLUDE} ${DSUPERLU_INCLUDE} - LIBS += -L${PEXSI_LIB_DIR} -lpexsi_linux_release_v2.0 ${DSUPERLU_LIB} ${PTSCOTCH_LIB} ${SCOTCH_LIB} - HONG += -D__PEXSI -endif - ifdef DeePMD_DIR HONG += -D__DPMD -DHIGH_PREC OPTS += -Wl,--no-as-needed @@ -175,6 +169,13 @@ ifdef DeePMD_DIR INCLUDES += -I${TensorFlow_INCLUDE_DIR} endif +ifdef PEXSI_DIR + OBJS_ABACUS += ${OBJS_HSOLVER_PEXSI} + INCLUDES += -I${PEXSI_DIR}/include -I${PARMETIS_DIR}/include -I${DSUPERLU_DIR}/include + LIBS += -L${PEXSI_DIR}/lib -lpexsi -L${DSUPERLU_DIR}/lib -lsuperlu_dist -L${PARMETIS_DIR}/lib -lparmetis -lmetis + HONG += -D__PEXSI +endif + include Makefile.Objects #========================== diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 2a69761da3..71e637a80b 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -30,7 +30,7 @@ VPATH=./src_global:\ ./module_hsolver:\ ./module_hsolver/kernels:\ ./module_hsolver/genelpa:\ -./module_hsolver/pexsi:\ +./module_hsolver/module_pexsi:\ ./module_elecstate:\ ./module_elecstate/kernels:\ ./module_elecstate/potentials:\ @@ -102,6 +102,7 @@ ${OBJS_VDW}\ ${OBJS_DFTU}\ ${OBJS_DELTASPIN}\ ${OBJS_TENSOR}\ +${OBJS_HSOLVER_PEXSI}\ OBJS_MAIN=main.o\ driver.o\ @@ -290,13 +291,7 @@ OBJS_HSOLVER=diago_cg.o\ diago_iter_assist.o\ math_kernel_op.o\ dngvd_op.o\ - diago_pexsi.o\ - DistBCDMatrix.o\ - DistCCSMatrix.o\ - DistMatrixTransformer.o\ - pexsi_solver.o\ - simplePEXSI.o\ - + OBJS_HSOLVER_LCAO=hsolver_lcao.o\ diago_blas.o\ diago_elpa.o\ @@ -305,6 +300,13 @@ OBJS_HSOLVER_LCAO=hsolver_lcao.o\ elpa_new_complex.o\ utils.o\ +OBJS_HSOLVER_PEXSI=diago_pexsi.o\ + pexsi_solver.o\ + simple_pexsi.o\ + dist_bcd_matrix.o\ + dist_ccs_matrix.o\ + dist_matrix_transformer.o\ + OBJS_MD=fire.o\ langevin.o\ md_base.o\ diff --git a/source/Makefile.vars b/source/Makefile.vars index 860bbdd806..477b0a251d 100644 --- a/source/Makefile.vars +++ b/source/Makefile.vars @@ -33,15 +33,6 @@ ELPA_DIR = /root/lib/ELPA ELPA_INCLUDE_DIR = ${ELPA_DIR}/include/ CEREAL_DIR = /root/lib/cereal -DSUPERLU_DIR = /root/workspace/superlu_dist-7.2.0 -DSUPERLU_INCLUDE = -I${DSUPERLU_DIR}/include -DSUPERLU_LIB = ${DSUPERLU_DIR}/lib/libsuperlu_dist.a - -SCOTCH_INCLUDE = -I/usr/local/include -PTSCOTCH_DIR = /root/workspace/scotch_6.0.0 -PTSCOTCH_LIB = ${PTSCOTCH_DIR}/lib/libptscotchparmetis.a ${PTSCOTCH_DIR}/lib/libptscotch.a ${PTSCOTCH_DIR}/lib/libptscotcherrexit.a ${PTSCOTCH_DIR}/lib/libptscotcherr.a -SCOTCH_LIB = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libscotch.a ${PTSCOTCH_DIR}/lib/libscotcherr.a ${PTSCOTCH_DIR}/lib/libscotcherrexit.a - ##------------------- FOR GNU COMPILER ------------------------------ @@ -68,14 +59,9 @@ SCOTCH_LIB = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libsc ## To use LIBXC: set LIBXC_DIR which contains include and lib/libxc.a (>5.1.7) ## To use DeePMD: set DeePMD_DIR and TensorFlow_DIR ## To use LibRI: set LIBRI_DIR and LIBCOMM_DIR -## To use PEXSI: set PEXSI_DIR which contains include and libpexsi.a +## To use PEXSI: set PEXSI_DIR DSUPERLU_DIR and PARMETIS_DIR ##--------------------------------------------------------------------- -PEXSI_DIR = /root/workspace/pexsi_v2.0.0 -PEXSI_LIB_DIR = ${PEXSI_DIR}/src -PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include - - # LIBTORCH_DIR = /usr/local # LIBNPY_DIR = /usr/local @@ -87,6 +73,10 @@ PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include # LIBRI_DIR = /public/software/LibRI # LIBCOMM_DIR = /public/software/LibComm +# PEXSI_DIR = /home/rhx/projects/pexsi-build/pexsi +# DSUPERLU_DIR = /home/rhx/projects/pexsi-build/superlu +# PARMETIS_DIR = /home/rhx/projects/pexsi-build/parmetis + ##--------------------------------------------------------------------- # NP = 14 # It is not supported. use make -j14 or make -j to parallelly compile # DEBUG = OFF diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp index 1edc3a092a..9d4b984728 100644 --- a/source/module_io/input.cpp +++ b/source/module_io/input.cpp @@ -4086,7 +4086,7 @@ void Input::Check(void) GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl; #else ModuleBase::WARNING_QUIT("Input", - "Can not use genelpa if abacus is not compiled with PEXSI. Please change ks_solver to scalapack_gvx."); + "Can not use PEXSI if abacus is not compiled with PEXSI. Please change ks_solver to scalapack_gvx."); #endif From 1264b6475e73485d9c3f4aaca8157c44899a430f Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Fri, 26 Jan 2024 15:59:34 +0800 Subject: [PATCH 14/44] Fix Makefile Intel toolchains compile errors --- source/module_hsolver/module_pexsi/simple_pexsi.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp index 2d1705557c..df72a061c5 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -2,6 +2,7 @@ // the H and S matrices are given by 2D block cyclic distribution // the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution // #include "mpi.h" +#ifdef __PEXSI #include #include @@ -728,4 +729,5 @@ int simplePEXSI(MPI_Comm comm_PEXSI, // MPI_Barrier(MPI_COMM_WORLD); return 0; } -} // namespace pexsi \ No newline at end of file +} // namespace pexsi +#endif \ No newline at end of file From a6941467a5295cb5c584a193e5bffd3e13e1eff4 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Fri, 26 Jan 2024 16:14:15 +0800 Subject: [PATCH 15/44] Fix even more PEXSI related Makefile compiling issues --- source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp | 4 +++- source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp | 4 +++- .../module_hsolver/module_pexsi/dist_matrix_transformer.cpp | 2 ++ source/module_hsolver/module_pexsi/pexsi_solver.cpp | 4 +++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp index cf815bd4ae..e498b83a2e 100644 --- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp @@ -1,3 +1,4 @@ +#ifdef __PEXSI #include "dist_bcd_matrix.h" #include @@ -110,4 +111,5 @@ int DistBCDMatrix::pnum(const int prow, const int pcol) { return this->prowpcol2pnum[prow * this->npcols + pcol]; } -} // namespace pexsi \ No newline at end of file +} // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp index 365622d249..ddd02aaa9a 100644 --- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp +++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp @@ -1,3 +1,4 @@ +#ifdef __PEXSI #include "dist_ccs_matrix.h" #include @@ -114,4 +115,5 @@ DistCCSMatrix::~DistCCSMatrix() delete[] colptrLocal; delete[] rowindLocal; } -} // namespace pexsi \ No newline at end of file +} // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp index 18fe445043..ef6c6fec72 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp @@ -1,3 +1,4 @@ +#ifdef __PEXSI #include "dist_matrix_transformer.h" #include @@ -1601,3 +1602,4 @@ MPI_Barrier(COMM_TRANS); } } // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp index 2d6f2674d9..1be66abf59 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -1,3 +1,4 @@ +#ifdef __PEXSI #include "pexsi_solver.h" #include @@ -89,4 +90,5 @@ const double PEXSI_Solver::get_totalEnergyS() const return totalEnergyS; } -} // namespace pexsi \ No newline at end of file +} // namespace pexsi +#endif \ No newline at end of file From cd3a02880fa2a045cd3f06237fffbb99ae3e1440 Mon Sep 17 00:00:00 2001 From: Hongxu Ren <60290838+Flying-dragon-boxing@users.noreply.github.com> Date: Sun, 28 Jan 2024 14:00:20 +0800 Subject: [PATCH 16/44] Modify inputs and update to latest version (#2) * run INPUT.Default() in every process in InputParaTest (#3490) Co-authored-by: kirk0830 <67682086+kirk0830@users.noreply.github.com> * add blas support for FindLAPACK.cmake (#3497) * more unittest of QO: towards orbital selection (#3499) * Fix: fix bug in mulliken charge calculation (#3503) * fix phase * fix case test * Refactor: namespace Conv_Coulomb_Pot_K (#3446) * Refactor: namespace Conv_Coulomb_Pot_K * Refactor: namespace Conv_Coulomb_Pot_K --------- Co-authored-by: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com> * enable the computation of all zeros in one function call (#3449) Co-authored-by: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com> * replace ios.eof() by ios.good() to avoid meeting badbit and failbit in reading STRU (#3506) * Build: add ccache to accelerate the testing process (#3509) * Build: add ccache to accelerate the testing process * Update test.yml * Update test.yml * Update test.yml * Docs: to avoid the misunderstanding in docs (#3518) * to avoid the misunderstanding in docs * Update docs/quick_start/hands_on.md Co-authored-by: Chun Cai --------- Co-authored-by: Chun Cai * Docs: fix a missing depencency in conda build env (#3508) * Feature: Add ENABLE_RAPIDJSON option to control the output of abacus.json (#3519) Add ENABLE_RAPIDJSON option to control the output of abacus.json * Feature: add python wrapper for math sphbes (#3475) * recommit for review * add python wrapper * remove timer since performace tests add * Feature: support segment split in kline mode in KPT file and `out_band` band output precision control, `8` as default (#3493) * add precision control * correct serial version of nscf_band function * fix issue 3482 * update unit and integrated test * update document * correct unittest and make compatible with false and true * fix: bug in Autotest.sh when result.ref has no totaltimeref (#3523) * Fix : unit test of module_xc (#3524) * Fix: omit small magnetic moments to avoid numerical instability (#3530) * update deltalambda * avoid numerical error in orbMulP * add constrain on Mi * change case reference value * Fix: fix multiple compiler warnings (#3515) * Fix: add noreturn attribute to warning_quit * Add type conversion * fix string literal * fix small number trunctuation * Fix system call returned value not checked * fix missing braket * Refactor parameter_pool.cpp and parameter_pool.h * remove duplicated return statements * Change WARNING_QUIT occurances in tests * Add warning message to help debug UT * output the default precision flag (#3496) Co-authored-by: kirk0830 <67682086+kirk0830@users.noreply.github.com> * Build: Improving CMake performance for finding LibXC and ELPA (#3478) * Fix for finding LibXC and ELPA * For compatibility to previous routines * syntax fix for FindELPA.cmake * Update cmake/FindELPA.cmake Co-authored-by: Chun Cai * Using CMake interface as default for finding LibXC * update docs * fix for FindLibxc: changing imcompatible if statement * fix for FindLibxc: changing imcompatible if statement * fix for FindLibxc: changing imcompatible if statement * update docs for installing pkg-config * Update FindLibxc.cmake * Update FindLibxc.cmake * remove previous LibXC routine in CMakeLists.txt Co-authored-by: Chun Cai * Update easy_install.md with Makefile-built LibXC supported * Update easy_install.md to include different behavior in different version on finding ELPA --------- Co-authored-by: Chun Cai * Docs: correct some docs about mp2 smearing method (#3533) * correct some docs about mp2 smearing method * add docs about mv method * Feature : printing band density (#3501) Co-authored-by: wenfei-li Co-authored-by: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com> * add some docs for PR#3501 (#3537) * Feature: enable restart charge density mixing during SCF (#3542) * add a new parameter mixing_restart * do not update rho if iter==mixing_restart * do not update rho if iter==mixing_restart-1 * reset mix and rho_mdata if iter==mixing_restart * fix SCF exit directly since drho=0 if iter=GlobalV::MIXING_RESTART * re-set_mixing in eachiterinit for PW and LCAO * enable SCF restarts in esolver_ks::RUN * add some UnitTests * add some Docs * new inputs added * Update input-main.md (#3551) Solve the format problem mentioned in issue 3543 * Build: fix compatibility issue against toolchain install (#3540) * Fix for finding LibXC and ELPA * For compatibility to previous routines * syntax fix for FindELPA.cmake * Update cmake/FindELPA.cmake Co-authored-by: Chun Cai * Using CMake interface as default for finding LibXC * update docs * fix for FindLibxc: changing imcompatible if statement * fix for FindLibxc: changing imcompatible if statement * fix for FindLibxc: changing imcompatible if statement * update docs for installing pkg-config * Update FindLibxc.cmake * Update FindLibxc.cmake * remove previous LibXC routine in CMakeLists.txt Co-authored-by: Chun Cai * Update easy_install.md with Makefile-built LibXC supported * Update easy_install.md to include different behavior in different version on finding ELPA * fix compatibility issue against toolchain * Change default ELPA install routine to old one --------- Co-authored-by: Chun Cai * Test: Configure performance tests for math libraries (#3511) * add performace test of sphbes functions. * fix benchmark cmake errors * add dependencies for docker * update docs * add performance tests for sphbes * add google benchmark * rewrite benchmark tests in fixtures * disable internal testing in benchmark * merge benchmark into integration test --------- Co-authored-by: StarGrys <771582678@qq.com> * Configure Makefile Compiling, fix typos * Fix Makefile Intel toolchains compile errors * Fix even more PEXSI related Makefile compiling issues * Update hsolver_pw.cpp (#3556) when use_uspp==false, overlap matrix should be E. * Fix: cuda build target (#3276) * Fix: cuda buid target * Update CMakeLists.txt --------- Co-authored-by: Denghui Lu --------- Co-authored-by: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com> Co-authored-by: kirk0830 <67682086+kirk0830@users.noreply.github.com> Co-authored-by: Haozhi Han Co-authored-by: Zhao Tianqi Co-authored-by: PeizeLin <78645006+PeizeLin@users.noreply.github.com> Co-authored-by: jinzx10 Co-authored-by: Chun Cai Co-authored-by: Peng Xingliang <91927439+pxlxingliang@users.noreply.github.com> Co-authored-by: Jie Li <76780849+jieli-matrix@users.noreply.github.com> Co-authored-by: Wenfei Li <38569667+wenfei-li@users.noreply.github.com> Co-authored-by: Denghui Lu Co-authored-by: YI Zeping <18586016708@163.com> Co-authored-by: wenfei-li Co-authored-by: jingan-181 <78459531+jingan-181@users.noreply.github.com> Co-authored-by: StarGrys <771582678@qq.com> Co-authored-by: Haozhi Han --- .github/workflows/test.yml | 18 +- CMakeLists.txt | 77 +- Dockerfile.cuda | 2 +- Dockerfile.gnu | 2 +- Dockerfile.intel | 2 +- cmake/FindELPA.cmake | 44 +- cmake/FindLAPACK.cmake | 2 +- cmake/FindLibxc.cmake | 36 + cmake/FindPEXSI.cmake | 8 +- deps/libpaw_interface | 2 +- docs/advanced/input_files/input-main.md | 37 +- docs/advanced/install.md | 10 + docs/quick_start/easy_install.md | 9 +- docs/quick_start/hands_on.md | 2 +- python/pyabacus/CMakeLists.txt | 11 +- python/pyabacus/src/py_abacus.cpp | 13 + python/pyabacus/src/py_math_base.cpp | 63 ++ python/pyabacus/src/py_numerical_radial.cpp | 4 +- python/pyabacus/src/pyabacus/__init__.py | 5 +- python/pyabacus/tests/test_base_math.py | 15 + python/pyabacus/tests/test_nr.py | 25 - source/Makefile | 17 +- source/Makefile.Objects | 18 +- source/Makefile.vars | 20 +- source/module_base/global_variable.cpp | 30 + source/module_base/global_variable.h | 28 + source/module_base/math_sphbes.cpp | 56 +- source/module_base/math_sphbes.h | 13 +- source/module_base/para_json.cpp | 977 ++++++++++++++++++ source/module_base/para_json.h | 560 ++++++++++ source/module_base/test/CMakeLists.txt | 14 + .../module_base/test/complexmatrix_test.cpp | 22 +- .../module_base/test/inverse_matrix_test.cpp | 2 +- source/module_base/test/math_sphbes_test.cpp | 16 +- source/module_base/test/math_ylmreal_test.cpp | 364 +++---- source/module_base/test/para_json_test.cpp | 68 ++ source/module_base/test/perf_sphbes_test.cpp | 72 ++ source/module_base/tool_quit.h | 8 +- source/module_cell/klist.cpp | 44 +- source/module_cell/klist.h | 1 + .../test/sltk_atom_input_test.cpp | 2 +- source/module_cell/read_atoms.cpp | 189 ++-- source/module_elecstate/occupy.cpp | 414 +------- source/module_esolver/esolver_ks.cpp | 19 +- source/module_esolver/esolver_ks_lcao.cpp | 27 +- source/module_esolver/esolver_ks_pw.cpp | 71 +- .../module_xc/test/test_xc.cpp | 4 +- .../module_xc/test/test_xc1.cpp | 2 +- .../module_xc/test/test_xc2.cpp | 10 +- .../module_xc/test/test_xc4.cpp | 4 +- .../module_xc/test/test_xc5.cpp | 65 -- .../module_xc/test/xc3_mock.h | 16 +- .../module_deltaspin/cal_mw.cpp | 2 +- .../module_deltaspin/cal_mw_helper.cpp | 14 +- .../module_deltaspin/lambda_loop.cpp | 4 +- .../module_tddft/test/tddft_test.cpp | 3 +- source/module_hsolver/diago_pexsi.cpp | 44 +- source/module_hsolver/diago_pexsi.h | 2 +- source/module_hsolver/hsolver_pw.cpp | 22 +- .../module_pexsi/CMakeLists.txt | 2 +- .../module_pexsi/dist_bcd_matrix.cpp | 4 +- .../module_pexsi/dist_bcd_matrix.h | 23 + .../module_pexsi/dist_ccs_matrix.cpp | 4 +- .../module_pexsi/dist_ccs_matrix.h | 40 + .../module_pexsi/dist_matrix_transformer.cpp | 233 +++-- .../module_pexsi/dist_matrix_transformer.h | 64 +- .../module_pexsi/pexsi_solver.cpp | 28 +- .../module_pexsi/pexsi_solver.h | 6 +- .../module_pexsi/simple_pexsi.cpp | 490 ++++----- source/module_io/input.cpp | 230 ++++- source/module_io/input.h | 43 +- source/module_io/input_conv.cpp | 30 + source/module_io/mulliken_charge.cpp | 4 +- source/module_io/nscf_band.cpp | 41 +- source/module_io/nscf_band.h | 1 + source/module_io/parameter_pool.cpp | 39 +- source/module_io/parameter_pool.h | 8 +- source/module_io/test/input_conv_test.cpp | 1 + source/module_io/test/input_test.cpp | 14 +- source/module_io/test/input_test_para.cpp | 9 +- source/module_io/test/support/INPUT | 2 +- source/module_io/test/support/witestfile | 2 +- source/module_io/test/to_qo_test.cpp | 34 +- source/module_io/test/write_input_test.cpp | 11 +- .../module_io/test_serial/nscf_band_test.cpp | 11 +- source/module_io/write_input.cpp | 33 +- source/module_ri/Exx_LRI.hpp | 10 +- source/module_ri/LRI_CV_Tools.hpp | 11 +- .../module_ri/conv_coulomb_pot_k-template.h | 51 - source/module_ri/conv_coulomb_pot_k.cpp | 181 ++-- source/module_ri/conv_coulomb_pot_k.h | 47 +- source/module_ri/conv_coulomb_pot_k.hpp | 37 + source/module_ri/exx_lip.cpp | 14 +- .../107_PW_OBOD_MemSaver/refBANDS_1.dat | 12 +- .../107_PW_OB_outputbands/refBANDS_1.dat | 12 +- .../204_NO_KP_NC_deltaspin/mulliken.txt.ref | 168 +-- .../204_NO_KP_NC_deltaspin/result.ref | 6 +- tests/integrate/207_NO_KP_OB/refBANDS_1.dat | 12 +- tests/integrate/Autotest.sh | 10 +- toolchain/README.md | 15 +- toolchain/install_abacus_toolchain.sh | 14 +- toolchain/scripts/common_vars.sh | 3 +- toolchain/scripts/stage4/install_rapidjson.sh | 93 ++ toolchain/scripts/stage4/install_stage4.sh | 1 + toolchain/scripts/tool_kit.sh | 9 +- toolchain/toolchain_gnu.sh | 1 + toolchain/toolchain_intel-mpich.sh | 1 + toolchain/toolchain_intel.sh | 1 + 108 files changed, 4025 insertions(+), 1722 deletions(-) create mode 100644 cmake/FindLibxc.cmake create mode 100644 python/pyabacus/src/py_abacus.cpp create mode 100644 python/pyabacus/src/py_math_base.cpp create mode 100644 python/pyabacus/tests/test_base_math.py delete mode 100644 python/pyabacus/tests/test_nr.py create mode 100644 source/module_base/para_json.cpp create mode 100644 source/module_base/para_json.h create mode 100644 source/module_base/test/para_json_test.cpp create mode 100644 source/module_base/test/perf_sphbes_test.cpp delete mode 100644 source/module_ri/conv_coulomb_pot_k-template.h create mode 100644 source/module_ri/conv_coulomb_pot_k.hpp create mode 100755 toolchain/scripts/stage4/install_rapidjson.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 856e56d97a..76f48347a8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,27 +2,37 @@ name: Integration Test and Unit Test on: pull_request: - + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true - + jobs: test: name: Test runs-on: self-hosted if: github.repository_owner == 'deepmodeling' - container: ghcr.io/deepmodeling/abacus-gnu + container: + image: ghcr.io/deepmodeling/abacus-gnu + volumes: + - /tmp/ccache:/github/home/.ccache steps: - name: Checkout uses: actions/checkout@v4 with: submodules: recursive + + - name: Install Ccache + run: | + sudo apt-get update + sudo apt-get install -y ccache + - name: Build run: | - cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON + cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON -DENABLE_GOOGLEBENCH=ON cmake --build build -j8 cmake --install build + - name: Test env: GTEST_COLOR: 'yes' diff --git a/CMakeLists.txt b/CMakeLists.txt index 8440662355..73a846304b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,12 +10,6 @@ project(ABACUS LANGUAGES CXX ) -# private options, should not be pushed to master -# set(PEXSI_DIR "~/projects/pexsi-build/pexsi") -# set(SuperLU_DIR "~/projects/pexsi-build/superlu") -# set(ParMETIS_DIR "~/projects/pexsi-build/parmetis") -# set(ELPA_INCLUDE_DIR "/usr/include/elpa_openmp-2023.05.001") - option(ENABLE_LCAO "Enable LCAO calculation." ON) option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF) option(ENABLE_LIBXC "Enable LibXC functionality" OFF) @@ -40,7 +34,36 @@ option(DEBUG_INFO "Print message for developers to debug." OFF) option(ENABLE_NATIVE_OPTIMIZATION "Enable compilation optimization for the native machine's CPU type" OFF) option(COMMIT_INFO "Print commit information in log" ON) option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON) +option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF) +option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF) option(USE_PEXSI "Enable support for PEXSI." OFF) + + + +# enable json support +if(ENABLE_RAPIDJSON) + find_package(RapidJSON) + if(NOT RapidJSON_FOUND) + message(WARNING "Rapidjson is not found, trying downloading from github, or you can install Rapidjson first and reinstall abacus.") + include(FetchContent) + FetchContent_Declare( + rapidjson + GIT_REPOSITORY https://github.com/Tencent/rapidjson.git + GIT_TAG "origin/master" + GIT_SHALLOW TRUE + GIT_PROGRESS TRUE + ) + set(RAPIDJSON_BUILD_TESTS OFF CACHE INTERNAL "") + set(RAPIDJSON_BUILD_EXAMPLES OFF CACHE INTERNAL "") + FetchContent_MakeAvailable(rapidjson) + set(RapidJSON_INCLUDE_PATH "${rapidjson_SOURCE_DIR}/include") + endif() + add_compile_definitions(__RAPIDJSON) + add_definitions(-DRAPIDJSON_HAS_CXX11_NOEXCEPT=0) + include_directories(${RapidJSON_INCLUDE_PATH}) +endif() + + if (USE_CUDA) set(USE_CUSOLVER_LCAO ON) else() @@ -189,7 +212,7 @@ if(ENABLE_LCAO) if(USE_PEXSI) find_package(PEXSI REQUIRED) - target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) + target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR}) add_compile_definitions(__PEXSI) endif() @@ -414,8 +437,7 @@ endif() if(ENABLE_DEEPKS) # Torch uses outdated components to detech CUDA arch, causing failure on latest CUDA kits. - # See above for setting CMAKE_CUDA_ARCHITECTURES - set(TORCH_CUDA_ARCH_LIST CMAKE_CUDA_ARCHITECTURES) + # Set CMake variable TORCH_CUDA_ARCH_LIST in the form of "major.minor" if required. find_package(Torch REQUIRED) if(NOT Torch_VERSION VERSION_LESS "2.1.0") set_if_higher(CMAKE_CXX_STANDARD 17) @@ -522,11 +544,8 @@ if(DEFINED Libxc_DIR) set(ENABLE_LIBXC ON) endif() if(ENABLE_LIBXC) - find_package(Libxc REQUIRED HINTS - ${Libxc_DIR}/share/cmake/Libxc - ${Libxc_DIR}/lib/cmake/Libxc - ${Libxc_DIR}/lib64/cmake/Libxc - ) + # use `cmake/FindLibxc.cmake` to detect Libxc installation with `pkg-config` + find_package(Libxc REQUIRED) message(STATUS "Found Libxc: version " ${Libxc_VERSION}) if(${Libxc_VERSION} VERSION_LESS 5.1.7) message(FATAL_ERROR "LibXC >= 5.1.7 is required.") @@ -580,6 +599,25 @@ if(INFO) # modifications on blas_connector and lapack_connector endif() +# Add performance test in abacus +IF (ENABLE_GOOGLEBENCH) + set(BUILD_TESTING ON) + find_package(benchmark HINTS ${BENCHMARK_DIR}) + if(NOT ${benchmark_FOUND}) + set(BENCHMARK_USE_BUNDLED_GTEST OFF) + include(FetchContent) + FetchContent_Declare( + benchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG "origin/main" + GIT_SHALLOW TRUE + GIT_PROGRESS TRUE + ) + set(BENCHMARK_ENABLE_TESTING OFF) + FetchContent_MakeAvailable(benchmark) + endif() +endif() + IF (BUILD_TESTING) set_if_higher(CMAKE_CXX_STANDARD 14) # Required in orbital include(CTest) @@ -609,8 +647,14 @@ IF (BUILD_TESTING) endif() #dependencies & link library - target_link_libraries(${UT_TARGET} ${UT_LIBS} - Threads::Threads GTest::gtest_main GTest::gmock_main) + if(ENABLE_GOOGLEBENCH) + target_link_libraries(${UT_TARGET} ${UT_LIBS} + Threads::Threads GTest::gtest_main GTest::gmock_main benchmark::benchmark) + else() + target_link_libraries(${UT_TARGET} ${UT_LIBS} + Threads::Threads GTest::gtest_main GTest::gmock_main) + endif() + if(USE_OPENMP) target_link_libraries(${UT_TARGET} OpenMP::OpenMP_CXX) endif() @@ -620,6 +664,7 @@ IF (BUILD_TESTING) WORKING_DIRECTORY $ ) endfunction(AddTest) + endif() add_subdirectory(source) diff --git a/Dockerfile.cuda b/Dockerfile.cuda index 719f7c4278..e950f097f9 100644 --- a/Dockerfile.cuda +++ b/Dockerfile.cuda @@ -2,7 +2,7 @@ FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 RUN apt update && apt install -y --no-install-recommends \ libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev \ - libxc-dev libgtest-dev libgmock-dev python3-numpy \ + libxc-dev libgtest-dev libgmock-dev libbenchmark-dev python3-numpy \ bc cmake git g++ make bc time sudo unzip vim wget ENV GIT_SSL_NO_VERIFY=true TERM=xterm-256color \ diff --git a/Dockerfile.gnu b/Dockerfile.gnu index 0b6b45d248..060d930563 100644 --- a/Dockerfile.gnu +++ b/Dockerfile.gnu @@ -1,7 +1,7 @@ FROM ubuntu:22.04 RUN apt update && apt install -y --no-install-recommends \ libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev \ - libxc-dev libgtest-dev libgmock-dev python3-numpy \ + libxc-dev libgtest-dev libgmock-dev libbenchmark-dev python3-numpy \ bc cmake git g++ make bc time sudo unzip vim wget gfortran ENV GIT_SSL_NO_VERIFY=true TERM=xterm-256color \ diff --git a/Dockerfile.intel b/Dockerfile.intel index 6cac8c9f5f..3947f05b9e 100644 --- a/Dockerfile.intel +++ b/Dockerfile.intel @@ -2,7 +2,7 @@ FROM ubuntu:22.04 RUN apt-get update && apt-get install -y \ bc cmake git gnupg gcc g++ python3-numpy sudo wget vim unzip \ - libcereal-dev libxc-dev libgtest-dev libgmock-dev + libcereal-dev libxc-dev libgtest-dev libgmock-dev libbenchmark-dev # Following steps by https://software.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/installation/install-using-package-managers/apt.html . RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ diff --git a/cmake/FindELPA.cmake b/cmake/FindELPA.cmake index 5769f7248c..4105e47592 100644 --- a/cmake/FindELPA.cmake +++ b/cmake/FindELPA.cmake @@ -7,34 +7,50 @@ # ELPA_INCLUDE_DIR - Where to find ELPA headers. # -find_path(ELPA_INCLUDE_DIR +find_package(PkgConfig) + +find_path(ELPA_INCLUDE_DIRS elpa/elpa.h HINTS ${ELPA_DIR} PATH_SUFFIXES "include" "include/elpa" ) if(USE_OPENMP) - find_library(ELPA_LIBRARY - NAMES elpa_openmp elpa - HINTS ${ELPA_DIR} - PATH_SUFFIXES "lib" - ) + find_library(ELPA_LINK_LIBRARIES + NAMES elpa_openmp elpa + HINTS ${ELPA_DIR} + PATH_SUFFIXES "lib" + ) else() - find_library(ELPA_LIBRARY - NAMES elpa - HINTS ${ELPA_DIR} - PATH_SUFFIXES "lib" - ) + find_library(ELPA_LINK_LIBRARIES + NAMES elpa + HINTS ${ELPA_DIR} + PATH_SUFFIXES "lib" + ) +endif() + +if(NOT ELPA_INCLUDE_DIRS AND PKG_CONFIG_FOUND) + if(DEFINED ELPA_DIR) + string(APPEND CMAKE_PREFIX_PATH ";${ELPA_DIR}") + endif() + if(USE_OPENMP) + pkg_search_module(ELPA REQUIRED IMPORTED_TARGET GLOBAL elpa_openmp) + else() + pkg_search_module(ELPA REQUIRED IMPORTED_TARGET GLOBAL elpa) + endif() +elseif(NOT PKG_CONFIG_FOUND) + message( + "ELPA : We need pkg-config to get all information about the elpa library") endif() # Handle the QUIET and REQUIRED arguments and # set ELPA_FOUND to TRUE if all variables are non-zero. include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(ELPA DEFAULT_MSG ELPA_LIBRARY ELPA_INCLUDE_DIR) +find_package_handle_standard_args(ELPA DEFAULT_MSG ELPA_LINK_LIBRARIES ELPA_INCLUDE_DIRS) # Copy the results to the output variables and target. if(ELPA_FOUND) - set(ELPA_LIBRARIES ${ELPA_LIBRARY}) - set(ELPA_INCLUDE_DIR ${ELPA_INCLUDE_DIR}) + list(GET ELPA_LINK_LIBRARIES 0 ELPA_LIBRARY) + set(ELPA_INCLUDE_DIR ${ELPA_INCLUDE_DIRS}) if(NOT TARGET ELPA::ELPA) add_library(ELPA::ELPA UNKNOWN IMPORTED) diff --git a/cmake/FindLAPACK.cmake b/cmake/FindLAPACK.cmake index 4f4bfbc425..c240d5facf 100644 --- a/cmake/FindLAPACK.cmake +++ b/cmake/FindLAPACK.cmake @@ -6,7 +6,7 @@ # find_library(LAPACK_LIBRARY - NAMES openblas + NAMES openblas blas HINTS ${LAPACK_DIR} PATH_SUFFIXES "lib" ) diff --git a/cmake/FindLibxc.cmake b/cmake/FindLibxc.cmake new file mode 100644 index 0000000000..4a3c04cba7 --- /dev/null +++ b/cmake/FindLibxc.cmake @@ -0,0 +1,36 @@ +include(FindPackageHandleStandardArgs) + +if(DEFINED Libxc_DIR) + string(APPEND CMAKE_PREFIX_PATH ";${Libxc_DIR}") +endif() +# Using CMake interface as default. +# NO REQUIRED here, otherwhile it would throw error +# with no LibXC found. +find_package(Libxc HINTS + ${Libxc_DIR}/share/cmake/Libxc + ${Libxc_DIR}/lib/cmake/Libxc + ${Libxc_DIR}/lib64/cmake/Libxc + ) +if(NOT TARGET Libxc::xc) + find_package(PkgConfig REQUIRED) + pkg_search_module(Libxc REQUIRED IMPORTED_TARGET GLOBAL libxc) + find_package_handle_standard_args(Libxc DEFAULT_MSG Libxc_LINK_LIBRARIES Libxc_INCLUDE_DIRS) +endif() + + +# Copy the results to the output variables and target. +# if find_package() above works, Libxc::xc would be present and +# below would be skipped. +if(Libxc_FOUND AND NOT TARGET Libxc::xc) + set(Libxc_LIBRARY ${Libxc_LINK_LIBRARIES}) + set(Libxc_LIBRARIES ${Libxc_LIBRARY}) + set(Libxc_INCLUDE_DIR ${Libxc_INCLUDE_DIRS}) + add_library(Libxc::xc UNKNOWN IMPORTED) + set_target_properties(Libxc::xc PROPERTIES + IMPORTED_LOCATION "${Libxc_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${Libxc_INCLUDE_DIR}") +endif() + +set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${Libxc_INCLUDE_DIR}) + +mark_as_advanced(Libxc_INCLUDE_DIR Libxc_LIBRARY) diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake index 22fe4dd01c..062764acce 100644 --- a/cmake/FindPEXSI.cmake +++ b/cmake/FindPEXSI.cmake @@ -35,18 +35,18 @@ find_library(ParMETIS_LIBRARY PATH_SUFFIXES "lib" ) -find_library(SuperLU_LIBRARY +find_library(SuperLU_DIST_LIBRARY NAMES libsuperlu_dist.a - HINTS ${SuperLU_DIR} + HINTS ${SuperLU_DIST_DIR} PATH_SUFFIXES "lib" ) # Handle the QUIET and REQUIRED arguments and # set Cereal_FOUND to TRUE if all variables are non-zero. include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_LIBRARY) +find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_DIST_LIBRARY) # Copy the results to the output variables and target. -mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_LIBRARY) +mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_DIST_LIBRARY) diff --git a/deps/libpaw_interface b/deps/libpaw_interface index 893cfe5b88..c211c0ab33 160000 --- a/deps/libpaw_interface +++ b/deps/libpaw_interface @@ -1 +1 @@ -Subproject commit 893cfe5b88c4b640b88a82335474d9f67d4c4cf6 +Subproject commit c211c0ab330adf3cc374f50ab3edee46b174e64c diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index b9ee122af6..8c675cab92 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -72,6 +72,7 @@ - [mixing\_beta](#mixing_beta) - [mixing\_beta\_mag](#mixing_beta_mag) - [mixing\_ndim](#mixing_ndim) + - [mixing\_restart](#mixing_restart) - [mixing\_gg0](#mixing_gg0) - [mixing\_gg0\_mag](#mixing_gg0_mag) - [mixing\_gg0\_min](#mixing_gg0_min) @@ -145,6 +146,8 @@ - [out\_app\_flag](#out_app_flag) - [out\_ndigits](#out_ndigits) - [out\_interval](#out_interval) + - [band\_print\_num](#band_print_num) + - [bands\_to\_print](#bands_to_print) - [out\_element\_info](#out_element_info) - [restart\_save](#restart_save) - [restart\_load](#restart_load) @@ -945,6 +948,8 @@ calculations. - **fixed**: fixed occupations (available for non-coductors only) - **gauss** or **gaussian**: Gaussian smearing method. - **mp**: methfessel-paxton smearing method; recommended for metals. + - **mp2**: 2-nd methfessel-paxton smearing method; recommended for metals. + - **mv** or **cold**: marzari-vanderbilt smearing method. - **fd**: Fermi-Dirac smearing method: $f=1/\{1+\exp[(E-\mu)/kT]\}$ and smearing_sigma below is the temperature $T$ (in Ry). - **Default**: gauss @@ -1001,6 +1006,13 @@ We recommend the following options: For systems that are difficult to converge, one could try increasing the value of 'mixing_ndim' to enhance the stability of the self-consistent field (SCF) calculation. - **Default**: 8 +### mixing_restart + +- **Type**: Integer +- **Description**: At `mixing_restart`-th iteration, SCF will restart by using output charge density from perivos iteration as input charge density directly, and start a new mixing. `mixing_restart=0|1` means SCF starts from scratch. + +- **Default**: 0 + ### mixing_gg0 - **Type**: Real @@ -1494,8 +1506,8 @@ These variables are used to control the output of properties. ### out_band -- **Type**: Boolean -- **Description**: Whether to output the band structure (in eV). For more information, refer to the [band.md](../elec_properties/band.md) +- **Type**: Boolean Integer(optional) +- **Description**: Whether to output the band structure (in eV), optionally output precision can be set by a second parameter, default is 8. For more information, refer to the [band.md](../elec_properties/band.md) - **Default**: False ### out_proj_band @@ -1599,6 +1611,20 @@ These variables are used to control the output of properties. - **Description**: Control the interval for printing Mulliken population analysis, $r(R)$, $H(R)$, $S(R)$, $T(R)$, $dH(R)$, $H(k)$, $S(k)$ and $wfc(k)$ matrices during molecular dynamics calculations. Check input parameters [out_mul](#out_mul), [out_mat_r](#out_mat_r), [out_mat_hs2](#out_mat_hs2), [out_mat_t](#out_mat_t), [out_mat_dh](#out_mat_dh), [out_mat_hs](#out_mat_hs) and [out_wfc_lcao](#out_wfc_lcao) for more information, respectively. - **Default**: 1 +### band_print_num + +- **Type**: Integer +- **Availability**: PW basis +- **Description**: If you want to plot a partial charge density contributed from some chosen bands. `band_print_num` define the number of band list. The result can be found in "band*.cube". +- **Default**: 0 + +### bands_to_print + +- **Type**: vector +- **Availability**: band_print_num > 0 +- **Description**: define which band you want to choose for partial charge density. +- **Default**: [] + ### out_element_info - **Type**: Boolean @@ -2776,9 +2802,9 @@ These variables are used to control berry phase and wannier90 interface paramete - **Type**: String - **Description**: the spin direction for the Wannier function calculation when nspin is set to 2 - - "up": Calculate spin up for the Wannier function. - - "down": Calculate spin down for the Wannier function. -- **Default**: "up" + - `up`: Calculate spin up for the Wannier function. + - `down`: Calculate spin down for the Wannier function. +- **Default**: `up` ### out_wannier_mmn @@ -2818,6 +2844,7 @@ These variables are used to control berry phase and wannier90 interface paramete - **Description**: write the "UNK.*" file in ASCII format or binary format. - 0: write the "UNK.*" file in binary format. - 1: write the "UNK.*" file in ASCII format (text file format). +- **Default**: 1 [back to top](#full-list-of-input-keywords) diff --git a/docs/advanced/install.md b/docs/advanced/install.md index e929fac34c..d6201a060f 100644 --- a/docs/advanced/install.md +++ b/docs/advanced/install.md @@ -69,6 +69,16 @@ After building and installing, unit tests can be performed with `ctest`. To run a subset of unit test, use `ctest -R ` to perform tests with name matched by given pattern. +## Build Performance Tests + +To build performance tests for ABACUS, define `ENABLE_GOOGLEBENCH` flag. You can also specify the path to a local installation of [Google Benchmark](https://github.com/google/benchmark.git) by setting `BENCHMARK_DIR` flags. If not found locally, the configuration process will try to download it automatically. + +```bash +cmake -B build -DENABLE_GOOGLEBENCH=1 +``` + +Google Benchmark requires Google Test to build and run the tests. When setting `ENABLE_GOOGLEBENCH` to ON, `BUILD_TESTING` is automatically enabled. After building and installing, performance tests can be executed with `ctest`. + ## Build with CUDA support ### Extra prerequisites diff --git a/docs/quick_start/easy_install.md b/docs/quick_start/easy_install.md index 957b9d3262..4089e303a3 100644 --- a/docs/quick_start/easy_install.md +++ b/docs/quick_start/easy_install.md @@ -28,7 +28,7 @@ These requirements support the calculation of plane-wave basis in ABACUS. For LC Some of these packages can be installed with popular package management system, such as `apt` and `yum`: ```bash -sudo apt update && sudo apt install -y libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev libxc-dev g++ make cmake bc git +sudo apt update && sudo apt install -y libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev libxc-dev g++ make cmake bc git pkgconf ``` > Installing ELPA by apt only matches requirements on Ubuntu 22.04. For earlier linux distributions, you should build ELPA from source. @@ -111,12 +111,12 @@ Here, 'build' is the path for building ABACUS; and '-D' is used for setting up s - `LAPACK_DIR`: Path to OpenBLAS library `libopenblas.so`(including BLAS and LAPACK) - `SCALAPACK_DIR`: Path to ScaLAPACK library `libscalapack.so` - `ELPA_DIR`: Path to ELPA install directory; should be the folder containing 'include' and 'lib'. - > Note: If you install ELPA from source, please add a symlink to avoid the additional include file folder with version name: `ln -s elpa/include/elpa-2021.05.002/elpa elpa/include/elpa`. This is a known behavior of ELPA. + > Note: In ABACUS v3.5.1 or earlier, if you install ELPA from source , please add a symlink to avoid the additional include file folder with version name: `ln -s elpa/include/elpa-2021.05.002/elpa elpa/include/elpa` to help the build system find ELPA headers. - `FFTW3_DIR`: Path to FFTW3. - `CEREAL_INCLUDE_DIR`: Path to the parent folder of `cereal/cereal.hpp`. Will download from GitHub if absent. - `Libxc_DIR`: (Optional) Path to Libxc. - > Note: Building Libxc from source with Makefile does NOT support using it in CMake here. Please compile Libxc with CMake instead. + > Note: In ABACUS v3.5.1 or earlier, Libxc built from source with Makefile is NOT supported; please compile Libxc with CMake instead. - `LIBRI_DIR`: (Optional) Path to LibRI. - `LIBCOMM_DIR`: (Optional) Path to LibComm. @@ -126,6 +126,7 @@ Here, 'build' is the path for building ABACUS; and '-D' is used for setting up s - `ENABLE_LIBRI=OFF`: [Enable LibRI](../advanced/install.md#add-libri-support) to suppport variety of functionals. If `LIBRI_DIR` and `LIBCOMM_DIR` is defined, `ENABLE_LIBRI` will set to 'ON'. - `USE_OPENMP=ON`: Enable OpenMP support. Building ABACUS without OpenMP is not fully tested yet. - `BUILD_TESTING=OFF`: [Build unit tests](../advanced/install.md#build-unit-tests). + - `ENABLE_GOOGLEBENCH=OFF`: [Build performance tests](../advanced/install.md#build-performance-tests) - `ENABLE_MPI=ON`: Enable MPI parallel compilation. If set to `OFF`, a serial version of ABACUS with PW basis only will be compiled. Currently serial version of ABACUS with LCAO basis is not supported yet, so `ENABLE_LCAO` will be automatically set to `OFF`. - `ENABLE_COVERAGE=OFF`: Build ABACUS executable supporting [coverage analysis](../CONTRIBUTING.md#generating-code-coverage-report). This feature has a drastic impact on performance. - `ENABLE_ASAN=OFF`: Build with Address Sanitizer. This feature would help detecting memory problems. @@ -229,7 +230,7 @@ conda create -n abacus_env abacus -c conda-forge conda activate abacus_env export CMAKE_PREFIX_PATH=$CONDA_PREFIX:$CMAKE_PREFIX_PATH -# By default OpenBLAS is used; run `conda install "blas=*=mkl" mkl_fft -c conda-forge` to switch implementation. +# By default OpenBLAS is used; run `conda install "blas=*=mkl" mkl_fft mkl-devel -c conda-forge` to switch implementation. export MKLROOT=$CONDA_PREFIX # If Intel MKL is required. export CMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'`:$CMAKE_PREFIX_PATH # If DEEPKS support is required; diff --git a/docs/quick_start/hands_on.md b/docs/quick_start/hands_on.md index d63c6b0232..2e0e768169 100644 --- a/docs/quick_start/hands_on.md +++ b/docs/quick_start/hands_on.md @@ -57,7 +57,7 @@ basis_type lcao calculation scf # this is the key parameter telling abacus to do a scf calculation ``` -The pseudopotential files of `Mg_ONCV_PBE-1.0.upf` and `O_ONCV_PBE-1.0.upf` should be provided under the directory of `pseudo_dir`, and the orbital files `Mg_gga_8au_100Ry_4s2p1d.orb` and `O_gga_8au_100Ry_2s2p1d.orb` under the directory of `orbital_dir`. The pseudopotential and orbital files can be downloaded from the [ABACUS website](http://abacus.ustc.edu.cn/pseudo/list.htm). +The pseudopotential files of `Mg_ONCV_PBE-1.0.upf` and `O_ONCV_PBE-1.0.upf` should be provided under the directory of `pseudo_dir` defined in `INPUT` (the default directory is "./"), and the orbital files `Mg_gga_8au_100Ry_4s2p1d.orb` and `O_gga_8au_100Ry_2s2p1d.orb` under the directory of `orbital_dir` also defined in `INPUT` (the default directory is "./"). The pseudopotential and orbital files can be downloaded from the [ABACUS website](http://abacus.ustc.edu.cn/pseudo/list.htm). The final mandatory input file is called `KPT`, which sets the reciprocal space k-mesh. Below is an example: diff --git a/python/pyabacus/CMakeLists.txt b/python/pyabacus/CMakeLists.txt index 399bd4fe57..0effbe83f2 100644 --- a/python/pyabacus/CMakeLists.txt +++ b/python/pyabacus/CMakeLists.txt @@ -12,9 +12,14 @@ set(BASE_PATH "${PROJECT_SOURCE_DIR}/../../source/module_base") set(ABACUS_SOURCE_DIR "${PROJECT_SOURCE_DIR}/../../source") include_directories(${BASE_PATH} ${ABACUS_SOURCE_DIR}) list(APPEND _sources - ${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.h - ${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.cpp - ${PROJECT_SOURCE_DIR}/src/py_numerical_radial.cpp) + #${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.h + #${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.cpp + ${ABACUS_SOURCE_DIR}/module_base/constants.h + ${ABACUS_SOURCE_DIR}/module_base/math_sphbes.h + ${ABACUS_SOURCE_DIR}/module_base/math_sphbes.cpp + ${PROJECT_SOURCE_DIR}/src/py_abacus.cpp + #${PROJECT_SOURCE_DIR}/src/py_numerical_radial.cpp + ${PROJECT_SOURCE_DIR}/src/py_math_base.cpp) python_add_library(_core MODULE ${_sources} WITH_SOABI) target_link_libraries(_core PRIVATE pybind11::headers) target_compile_definitions(_core PRIVATE VERSION_INFO=${PROJECT_VERSION}) diff --git a/python/pyabacus/src/py_abacus.cpp b/python/pyabacus/src/py_abacus.cpp new file mode 100644 index 0000000000..34b354dc6b --- /dev/null +++ b/python/pyabacus/src/py_abacus.cpp @@ -0,0 +1,13 @@ +#include +#include + +namespace py = pybind11; + +void bind_numerical_radial(py::module& m); +void bind_math_base(py::module& m); + +PYBIND11_MODULE(_core, m) +{ + // bind_numerical_radial(m); + bind_math_base(m); +} \ No newline at end of file diff --git a/python/pyabacus/src/py_math_base.cpp b/python/pyabacus/src/py_math_base.cpp new file mode 100644 index 0000000000..4378690897 --- /dev/null +++ b/python/pyabacus/src/py_math_base.cpp @@ -0,0 +1,63 @@ +#include +#include + +#include "module_base/math_sphbes.h" + +namespace py = pybind11; +using namespace pybind11::literals; +template +using overload_cast_ = pybind11::detail::overload_cast_impl; + +void bind_math_base(py::module& m) +{ + py::module module_base = m.def_submodule("ModuleBase"); + + py::class_(module_base, "Sphbes") + .def(py::init<>()) + .def_static("sphbesj", overload_cast_()(&ModuleBase::Sphbes::sphbesj), "l"_a, "x"_a) + .def_static("dsphbesj", overload_cast_()(&ModuleBase::Sphbes::dsphbesj), "l"_a, "x"_a) + .def_static("sphbesj", + [](const int n, py::array_t r, const double q, const int l, py::array_t jl) { + py::buffer_info r_info = r.request(); + if (r_info.ndim != 1) + { + throw std::runtime_error("r array must be 1-dimensional"); + } + py::buffer_info jl_info = jl.request(); + if (jl_info.ndim != 1) + { + throw std::runtime_error("jl array must be 1-dimensional"); + } + ModuleBase::Sphbes::sphbesj(n, + static_cast(r_info.ptr), + q, + l, + static_cast(jl_info.ptr)); + }) + .def_static("dsphbesj", + [](const int n, py::array_t r, const double q, const int l, py::array_t djl) { + py::buffer_info r_info = r.request(); + if (r_info.ndim != 1) + { + throw std::runtime_error("r array must be 1-dimensional"); + } + py::buffer_info djl_info = djl.request(); + if (djl_info.ndim != 1) + { + throw std::runtime_error("djl array must be 1-dimensional"); + } + ModuleBase::Sphbes::dsphbesj(n, + static_cast(r_info.ptr), + q, + l, + static_cast(djl_info.ptr)); + }) + .def_static("sphbes_zeros", [](const int l, const int n, py::array_t zeros) { + py::buffer_info zeros_info = zeros.request(); + if (zeros_info.ndim != 1) + { + throw std::runtime_error("zeros array must be 1-dimensional"); + } + ModuleBase::Sphbes::sphbes_zeros(l, n, static_cast(zeros_info.ptr)); + }); +} \ No newline at end of file diff --git a/python/pyabacus/src/py_numerical_radial.cpp b/python/pyabacus/src/py_numerical_radial.cpp index 296229b3d1..ebda8f080b 100644 --- a/python/pyabacus/src/py_numerical_radial.cpp +++ b/python/pyabacus/src/py_numerical_radial.cpp @@ -8,7 +8,7 @@ using namespace pybind11::literals; template using overload_cast_ = pybind11::detail::overload_cast_impl; -PYBIND11_MODULE(_core, m) +void bind_numerical_radial(py::module& m) { // Create the submodule for NumericalRadial py::module m_numerical_radial = m.def_submodule("NumericalRadial"); @@ -165,4 +165,4 @@ PYBIND11_MODULE(_core, m) .def_property_readonly("kgrid", overload_cast_()(&NumericalRadial::kgrid, py::const_)) .def_property_readonly("rvalue", overload_cast_()(&NumericalRadial::rvalue, py::const_)) .def_property_readonly("kvalue", overload_cast_()(&NumericalRadial::kvalue, py::const_)); -} +} \ No newline at end of file diff --git a/python/pyabacus/src/pyabacus/__init__.py b/python/pyabacus/src/pyabacus/__init__.py index cda9318053..94d8c0d5b8 100644 --- a/python/pyabacus/src/pyabacus/__init__.py +++ b/python/pyabacus/src/pyabacus/__init__.py @@ -1,3 +1,4 @@ from __future__ import annotations -from ._core import __doc__, __version__, NumericalRadial -__all__ = ["__doc__", "__version__", "NumericalRadial"] \ No newline at end of file +# from ._core import __doc__, __version__, NumericalRadial, ModuleBase +from ._core import ModuleBase +__all__ = ["ModuleBase"] \ No newline at end of file diff --git a/python/pyabacus/tests/test_base_math.py b/python/pyabacus/tests/test_base_math.py new file mode 100644 index 0000000000..97d5118bac --- /dev/null +++ b/python/pyabacus/tests/test_base_math.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +import pyabacus as m +import numpy as np + + +def test_version(): + assert m.__version__ == "0.0.1" + +def test_sphbes(): + s = m.ModuleBase.Sphbes() + # test for sphbesj + assert s.sphbesj(1, 0.0) == 0.0 + assert s.sphbesj(0, 0.0) == 1.0 + diff --git a/python/pyabacus/tests/test_nr.py b/python/pyabacus/tests/test_nr.py deleted file mode 100644 index 4986331b25..0000000000 --- a/python/pyabacus/tests/test_nr.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import annotations - -import pyabacus as m - - -def test_version(): - assert m.__version__ == "0.0.1" - -def test_attributes(): - chi = m.NumericalRadial() - # string - assert chi.symbol == '' - # integer - assert chi.itype == 0 - assert chi.izeta == 0 - assert chi.l == -1 - assert chi.nr == 0 - assert chi.nk == 0 - # float - assert chi.rcut == 0.0 - assert chi.kcut == 0.0 - assert chi.pr == 0.0 - assert chi.pk == 0.0 - # bool - assert chi.is_fft_compliant == False diff --git a/source/Makefile b/source/Makefile index 0dcd329161..7bd81f26d9 100644 --- a/source/Makefile +++ b/source/Makefile @@ -7,7 +7,7 @@ include Makefile.vars INCLUDES = -I. -Icommands -I../ -Imodule_base/module_container LIBS = -lm -lpthread -OPTS = ${INCLUDES} -Ofast -g -traceback -xHost -std=c++11 -simd -march=native -m64 -qopenmp -Werror -Wall -pedantic +OPTS = ${INCLUDES} -std=c++14 -pedantic -m64 ${INCLUDES} HONG = -D__LCAO HONG += -D__ELPA ifeq ($(OPENMP), ON) @@ -75,7 +75,7 @@ else FFTW_INCLUDE_DIR = ${FFTW_DIR}/include FFTW_LIB_DIR = ${FFTW_DIR}/lib HONG += -D__FFTW3 - LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} -qmkl + LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} INCLUDES += -I${FFTW_INCLUDE_DIR} #========================== @@ -140,12 +140,6 @@ ifdef LIBTORCH_DIR endif endif -ifdef PEXSI_DIR - INCLUDES += -I${PEXSI_INCLUDE_DIR} ${SCOTCH_INCLUDE} ${DSUPERLU_INCLUDE} - LIBS += -L${PEXSI_LIB_DIR} -lpexsi_linux_release_v2.0 ${DSUPERLU_LIB} ${PTSCOTCH_LIB} ${SCOTCH_LIB} - HONG += -D__PEXSI -endif - ifdef DeePMD_DIR HONG += -D__DPMD -DHIGH_PREC OPTS += -Wl,--no-as-needed @@ -175,6 +169,13 @@ ifdef DeePMD_DIR INCLUDES += -I${TensorFlow_INCLUDE_DIR} endif +ifdef PEXSI_DIR + OBJS_ABACUS += ${OBJS_HSOLVER_PEXSI} + INCLUDES += -I${PEXSI_DIR}/include -I${PARMETIS_DIR}/include -I${DSUPERLU_DIR}/include + LIBS += -L${PEXSI_DIR}/lib -lpexsi -L${DSUPERLU_DIR}/lib -lsuperlu_dist -L${PARMETIS_DIR}/lib -lparmetis -lmetis + HONG += -D__PEXSI +endif + include Makefile.Objects #========================== diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 2a69761da3..71e637a80b 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -30,7 +30,7 @@ VPATH=./src_global:\ ./module_hsolver:\ ./module_hsolver/kernels:\ ./module_hsolver/genelpa:\ -./module_hsolver/pexsi:\ +./module_hsolver/module_pexsi:\ ./module_elecstate:\ ./module_elecstate/kernels:\ ./module_elecstate/potentials:\ @@ -102,6 +102,7 @@ ${OBJS_VDW}\ ${OBJS_DFTU}\ ${OBJS_DELTASPIN}\ ${OBJS_TENSOR}\ +${OBJS_HSOLVER_PEXSI}\ OBJS_MAIN=main.o\ driver.o\ @@ -290,13 +291,7 @@ OBJS_HSOLVER=diago_cg.o\ diago_iter_assist.o\ math_kernel_op.o\ dngvd_op.o\ - diago_pexsi.o\ - DistBCDMatrix.o\ - DistCCSMatrix.o\ - DistMatrixTransformer.o\ - pexsi_solver.o\ - simplePEXSI.o\ - + OBJS_HSOLVER_LCAO=hsolver_lcao.o\ diago_blas.o\ diago_elpa.o\ @@ -305,6 +300,13 @@ OBJS_HSOLVER_LCAO=hsolver_lcao.o\ elpa_new_complex.o\ utils.o\ +OBJS_HSOLVER_PEXSI=diago_pexsi.o\ + pexsi_solver.o\ + simple_pexsi.o\ + dist_bcd_matrix.o\ + dist_ccs_matrix.o\ + dist_matrix_transformer.o\ + OBJS_MD=fire.o\ langevin.o\ md_base.o\ diff --git a/source/Makefile.vars b/source/Makefile.vars index 860bbdd806..477b0a251d 100644 --- a/source/Makefile.vars +++ b/source/Makefile.vars @@ -33,15 +33,6 @@ ELPA_DIR = /root/lib/ELPA ELPA_INCLUDE_DIR = ${ELPA_DIR}/include/ CEREAL_DIR = /root/lib/cereal -DSUPERLU_DIR = /root/workspace/superlu_dist-7.2.0 -DSUPERLU_INCLUDE = -I${DSUPERLU_DIR}/include -DSUPERLU_LIB = ${DSUPERLU_DIR}/lib/libsuperlu_dist.a - -SCOTCH_INCLUDE = -I/usr/local/include -PTSCOTCH_DIR = /root/workspace/scotch_6.0.0 -PTSCOTCH_LIB = ${PTSCOTCH_DIR}/lib/libptscotchparmetis.a ${PTSCOTCH_DIR}/lib/libptscotch.a ${PTSCOTCH_DIR}/lib/libptscotcherrexit.a ${PTSCOTCH_DIR}/lib/libptscotcherr.a -SCOTCH_LIB = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libscotch.a ${PTSCOTCH_DIR}/lib/libscotcherr.a ${PTSCOTCH_DIR}/lib/libscotcherrexit.a - ##------------------- FOR GNU COMPILER ------------------------------ @@ -68,14 +59,9 @@ SCOTCH_LIB = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libsc ## To use LIBXC: set LIBXC_DIR which contains include and lib/libxc.a (>5.1.7) ## To use DeePMD: set DeePMD_DIR and TensorFlow_DIR ## To use LibRI: set LIBRI_DIR and LIBCOMM_DIR -## To use PEXSI: set PEXSI_DIR which contains include and libpexsi.a +## To use PEXSI: set PEXSI_DIR DSUPERLU_DIR and PARMETIS_DIR ##--------------------------------------------------------------------- -PEXSI_DIR = /root/workspace/pexsi_v2.0.0 -PEXSI_LIB_DIR = ${PEXSI_DIR}/src -PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include - - # LIBTORCH_DIR = /usr/local # LIBNPY_DIR = /usr/local @@ -87,6 +73,10 @@ PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include # LIBRI_DIR = /public/software/LibRI # LIBCOMM_DIR = /public/software/LibComm +# PEXSI_DIR = /home/rhx/projects/pexsi-build/pexsi +# DSUPERLU_DIR = /home/rhx/projects/pexsi-build/superlu +# PARMETIS_DIR = /home/rhx/projects/pexsi-build/parmetis + ##--------------------------------------------------------------------- # NP = 14 # It is not supported. use make -j14 or make -j to parallelly compile # DEBUG = OFF diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp index 696bcd6088..6b7015dc25 100644 --- a/source/module_base/global_variable.cpp +++ b/source/module_base/global_variable.cpp @@ -248,6 +248,7 @@ std::string of_kernel_file = "WTkernel.txt"; std::string MIXING_MODE = "broyden"; double MIXING_BETA = 0.7; int MIXING_NDIM = 8; +int MIXING_RESTART = 0; double MIXING_GG0 = 1.00; double MIXING_BETA_MAG = 1.6; double MIXING_GG0_MAG = 1.00; @@ -300,4 +301,33 @@ std::string qo_basis = "hydrogen"; std::vector qo_strategy = {}; double qo_thr = 1.0e-6; std::vector qo_screening_coeff = {}; + +//========================================================== +// PEXSI related +//========================================================== +int pexsi_npole = 54; +int pexsi_inertia = 1; +int pexsi_nmax = 80; +// int pexsi_symbolic = 1; +int pexsi_comm = 1; +int pexsi_storage = 1; +int pexsi_ordering = 0; +int pexsi_row_ordering = 1; +int pexsi_nproc = 1; +int pexsi_symm = 1; +int pexsi_trans = 0; +int pexsi_method = 1; +int pexsi_nproc_pole = 1; +// double pexsi_spin = 2; +double pexsi_temp = 0.0001; +double pexsi_gap = 0; +double pexsi_delta_e = 20.0; +double pexsi_mu_lower = -10; +double pexsi_mu_upper = 10; +double pexsi_mu = 0.0; +double pexsi_mu_thr = 0.05; +double pexsi_mu_expand = 0.3; +double pexsi_mu_guard = 0.2; +double pexsi_elec_thr = 0.001; +double pexsi_zero_thr = 1e-10; } // namespace GlobalV diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h index b1fbb1748d..9808ca080b 100644 --- a/source/module_base/global_variable.h +++ b/source/module_base/global_variable.h @@ -277,6 +277,7 @@ extern std::string of_kernel_file; // The name of WT kernel file. extern std::string MIXING_MODE; extern double MIXING_BETA; extern int MIXING_NDIM; +extern int MIXING_RESTART; extern double MIXING_GG0; extern bool MIXING_TAU; extern double MIXING_BETA_MAG; @@ -328,5 +329,32 @@ extern std::string qo_basis; extern std::vector qo_strategy; extern double qo_thr; extern std::vector qo_screening_coeff; + +// PEXSI related +extern int pexsi_npole; +extern int pexsi_inertia; +extern int pexsi_nmax; +// extern int pexsi_symbolic; +extern int pexsi_comm; +extern int pexsi_storage; +extern int pexsi_ordering; +extern int pexsi_row_ordering; +extern int pexsi_nproc; +extern int pexsi_symm; +extern int pexsi_trans; +extern int pexsi_method; +extern int pexsi_nproc_pole; +// extern double pexsi_spin; +extern double pexsi_temp; +extern double pexsi_gap; +extern double pexsi_delta_e; +extern double pexsi_mu_lower; +extern double pexsi_mu_upper; +extern double pexsi_mu; +extern double pexsi_mu_thr; +extern double pexsi_mu_expand; +extern double pexsi_mu_guard; +extern double pexsi_elec_thr; +extern double pexsi_zero_thr; } // namespace GlobalV #endif diff --git a/source/module_base/math_sphbes.cpp b/source/module_base/math_sphbes.cpp index 5e7f41de54..73e0127e6b 100644 --- a/source/module_base/math_sphbes.cpp +++ b/source/module_base/math_sphbes.cpp @@ -1,7 +1,7 @@ #include "math_sphbes.h" -#include "timer.h" #include "constants.h" #include +#include #include @@ -425,7 +425,6 @@ void Sphbes::Spherical_Bessel double *jl // jl(1:msh) = j_l(q*r(i)),spherical bessel function ) { - ModuleBase::timer::tick("Sphbes","Spherical_Bessel"); double x1=0.0; int i=0; @@ -598,7 +597,6 @@ void Sphbes::Spherical_Bessel } } - ModuleBase::timer::tick("Sphbes","Spherical_Bessel"); return; } @@ -613,7 +611,6 @@ void Sphbes::Spherical_Bessel double *sjp ) { - ModuleBase::timer::tick("Sphbes","Spherical_Bessel"); //calculate jlx first Spherical_Bessel (msh, r, q, l, sj); @@ -634,7 +631,6 @@ void Sphbes::dSpherical_Bessel_dx double *djl // jl(1:msh) = j_l(q*r(i)),spherical bessel function ) { - ModuleBase::timer::tick("Sphbes","dSpherical_Bessel_dq"); if (l < 0 ) { std::cout << "We temporarily only calculate derivative of l >= 0." << std::endl; @@ -682,7 +678,6 @@ void Sphbes::dSpherical_Bessel_dx } delete[] jl; } - ModuleBase::timer::tick("Sphbes","dSpherical_Bessel_dq"); return; } @@ -808,7 +803,7 @@ void Sphbes::dsphbesj(const int n, } } -void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros) +void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros, const bool return_all) { assert( n > 0 ); assert( l >= 0 ); @@ -818,10 +813,22 @@ void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros) // This property enables us to use bracketing method recursively // to find all zeros of j_l from the zeros of j_0. - // if l is odd , j_0 --> j_1 --> j_3 --> j_5 --> ... - // if l is even, j_0 --> j_2 --> j_4 --> j_6 --> ... - - int nz = n + (l+1)/2; // number of effective zeros in buffer + // If return_all is true, zeros of j_0, j_1, ..., j_l will all be returned + // such that zeros[l*n+i] is the i-th zero of j_l. As such, it is required + // that the array "zeros" has a size of (l+1)*n. + // + // If return_all is false, only the zeros of j_l will be returned + // and "zeros" is merely required to have a size of n. + // Note that in this case the bracketing method can be applied with a stride + // of 2 instead of 1: + // j_0 --> j_1 --> j_3 --> j_5 --> ... --> j_l (odd l) + // j_0 --> j_2 --> j_4 --> j_6 --> ... --> j_l (even l) + + // Every recursion step reduces the number of zeros by 1. + // If return_all is true, one needs to start with n+l zeros of j_0 + // to ensure n zeros of j_l; otherwise with a stride of 2 one only + // needs to start with n+(l+1)/2 zeros of j_0 + int nz = n + ( return_all ? l : (l+1)/2 ); double* buffer = new double[nz]; // zeros of j_0 = sin(x)/x is just n*pi @@ -831,27 +838,34 @@ void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros) buffer[i] = (i+1) * PI; } - int ll = 1; + int ll; // active l auto jl = [&ll] (double x) { return sphbesj(ll, x); }; - - if (l % 2 == 1) + int stride; + std::function copy_if_needed; + int offset = 0; // keeps track of the position in zeros for next copy (used when return_all == true) + if (return_all) { - for (int i = 0; i < nz-1; i++) - { - buffer[i] = illinois(jl, buffer[i], buffer[i+1], 1e-15, 50); - } - --nz; + copy_if_needed = [&](){ std::copy(buffer, buffer + n, zeros + offset); offset += n; }; + stride = 1; + ll = 1; + } + else + { + copy_if_needed = [](){}; + stride = 2; + ll = 2 - l % 2; } - for (ll = 2 + l%2; ll <= l; ll += 2, --nz) + for (; ll <= l; ll += stride, --nz) { + copy_if_needed(); for (int i = 0; i < nz-1; i++) { buffer[i] = illinois(jl, buffer[i], buffer[i+1], 1e-15, 50); } } - std::copy(buffer, buffer + n, zeros); + std::copy(buffer, buffer + n, zeros + offset); delete[] buffer; } diff --git a/source/module_base/math_sphbes.h b/source/module_base/math_sphbes.h index c654847a5d..7aa9c78a48 100644 --- a/source/module_base/math_sphbes.h +++ b/source/module_base/math_sphbes.h @@ -126,13 +126,18 @@ class Sphbes * This function computes the first n positive zeros of the l-th order * spherical Bessel function of the first kind. * - * @param[in] l order of the spherical Bessel function - * @param[in] n number of zeros to be computed - * @param[out] zeros on exit, contains the first n positive zeros in ascending order + * @param[in] l (maximum) order of the spherical Bessel function + * @param[in] n number of zeros to be computed (for each j_l if return_all is true) + * @param[out] zeros on exit, contains the positive zeros. + * @param[in] return_all if true, return all zeros from j_0 to j_l such that zeros[l*n+i] + * is the i-th zero of j_l. If false, return only the first n zeros of j_l. + * + * @note The size of array "zeros" must be at least (l+1)*n if return_all is true, and n otherwise. */ static void sphbes_zeros(const int l, const int n, - double* const zeros + double* const zeros, + bool return_all = false ); private: diff --git a/source/module_base/para_json.cpp b/source/module_base/para_json.cpp new file mode 100644 index 0000000000..1f042271f8 --- /dev/null +++ b/source/module_base/para_json.cpp @@ -0,0 +1,977 @@ +#include "para_json.h" +#include "module_base/global_variable.h" + +#ifdef __RAPIDJSON + +namespace Para_Json +{ + int test=4; + // @param doc: the output json file + rapidjson::Document doc; + rapidjson::Value abacus(rapidjson::kObjectType); + + // @param general_info : + rapidjson::Value general_info(rapidjson::kObjectType); + rapidjson::Value version; + + rapidjson::Value commit; + rapidjson::Value begin_time; + rapidjson::Value begin_date; + rapidjson::Value device_g; + // @param general_info -- parallel: + rapidjson::Value parallel(rapidjson::kObjectType); + rapidjson::Value drank; + rapidjson::Value dsize; + rapidjson::Value dcolor ; + // @param general_info -- path + rapidjson::Value path(rapidjson::kObjectType); + rapidjson::Value global_out_dir; + rapidjson::Value global_in_card; + rapidjson::Value pseudo_dir_path ; + rapidjson::Value orbital_dir_path; + + + // @param reading_information: + rapidjson::Value readin_info(rapidjson::kObjectType); + // @param reading_information -- input_file: + rapidjson::Value input_file(rapidjson::kObjectType); + + + // @param reading_information -- input_file -- system_variables: + rapidjson::Value input_suffix; + rapidjson::Value ntype; + rapidjson::Value calculation; + rapidjson::Value esolver_type; + rapidjson::Value symmetry; + rapidjson::Value symmetry_precfield; + rapidjson::Value symmetry_autoclose; + rapidjson::Value kpar; + rapidjson::Value bndpar; + rapidjson::Value latname; + rapidjson::Value init_wfc; + rapidjson::Value init_chg; + rapidjson::Value init_vel; + rapidjson::Value nelec; + rapidjson::Value nupdown; + rapidjson::Value dft_functional; + rapidjson::Value xc_temperature; + rapidjson::Value pseudo_rcut(rapidjson::kNumberType ); + rapidjson::Value pseudo_mesh; + rapidjson::Value mem_saver; + rapidjson::Value diago_proc; + rapidjson::Value nbspline; + rapidjson::Value kspacing(rapidjson::kArrayType); + rapidjson::Value min_dist_coef(rapidjson::kNumberType); + rapidjson::Value device; + // @param reading_information -- input_file -- files_related + rapidjson::Value stru_file; + rapidjson::Value kpoint_file; + rapidjson::Value pseudo_dir; + rapidjson::Value orbital_dir; + rapidjson::Value read_file_dir; + rapidjson::Value wannier_card; + + // @param reading_information -- input_file -- planewave_related + rapidjson::Value ecutwfc; + rapidjson::Value nx; + rapidjson::Value ny; + rapidjson::Value nz; + rapidjson::Value pw_seed; + rapidjson::Value pw_diag_thr; + rapidjson::Value pw_diag_nmax; + rapidjson::Value pw_diag_ndim; + // @param reading_information -- input_file -- numerical_atomic_orbitals_related + rapidjson::Value nb2d; + rapidjson::Value lmaxmax; + rapidjson::Value lcao_ecut; + rapidjson::Value lcao_dk; + rapidjson::Value lcao_dr; + rapidjson::Value lcao_rmax; + rapidjson::Value search_radius; + rapidjson::Value search_pbc; + rapidjson::Value bx; + rapidjson::Value by; + rapidjson::Value bz; + // @param reading_information -- input_file -- electronic_structure + rapidjson::Value basis_type; + rapidjson::Value ks_solver; + rapidjson::Value nbands; + rapidjson::Value nbands_istate; + rapidjson::Value nspin; + rapidjson::Value smearing_method; + rapidjson::Value smearing_sigma; + rapidjson::Value smearing_sigma_temp; + rapidjson::Value mixing_type; + rapidjson::Value mixing_beta; + rapidjson::Value mixing_ndim; + rapidjson::Value mixing_gg0; + rapidjson::Value mixing_tau; + rapidjson::Value mixing_dftu; + rapidjson::Value gamma_only; + rapidjson::Value printe; + rapidjson::Value scf_nmax; + rapidjson::Value scf_thr; + rapidjson::Value scf_thr_type; + rapidjson::Value chg_extrap; + rapidjson::Value lspinorb; + rapidjson::Value noncolin; + rapidjson::Value soc_lambda; + // @param reading_information -- input_file -- electronic_structure_SDFT + rapidjson::Value method_sto; + rapidjson::Value nbands_sto; + rapidjson::Value nche_sto(rapidjson::kNumberType); + rapidjson::Value emin_sto; + rapidjson::Value emax_sto; + rapidjson::Value seed_sto; + rapidjson::Value initsto_freq; + rapidjson::Value npart_sto; + // @param reading_information -- input_file -- geometry_relaxation + rapidjson::Value relax_method; + rapidjson::Value relax_new; + rapidjson::Value relax_scale_force; + rapidjson::Value relax_nmax; + rapidjson::Value relax_cg_thr; + rapidjson::Value cal_force; + rapidjson::Value force_thr; + rapidjson::Value force_thr_ev; + rapidjson::Value force_thr_ev2; + rapidjson::Value relax_bfgs_w1; + rapidjson::Value relax_bfgs_w2; + rapidjson::Value relax_bfgs_rmax; + rapidjson::Value relax_bfgs_rmin; + rapidjson::Value relax_bfgs_init; + rapidjson::Value cal_stress; + rapidjson::Value stress_thr; + rapidjson::Value press1; + rapidjson::Value press2; + rapidjson::Value press3; + rapidjson::Value fixed_axes; + rapidjson::Value fixed_ibrav; + rapidjson::Value fixed_atoms; + rapidjson::Value cell_factor; + + // @param reading_information -- input_file -- output_information_related + rapidjson::Value out_mul; + rapidjson::Value out_freq_elec; + rapidjson::Value out_freq_ion; + rapidjson::Value out_chg; + rapidjson::Value out_pot; + rapidjson::Value out_dm; + rapidjson::Value out_dm1; + rapidjson::Value out_wfc_pw; + rapidjson::Value out_wfc_r; + rapidjson::Value out_wfc_lcao; + rapidjson::Value out_dos; + rapidjson::Value out_band; + rapidjson::Value out_proj_band; + rapidjson::Value out_stru; + rapidjson::Value out_bandgap; + rapidjson::Value out_level; + rapidjson::Value out_alllog; + rapidjson::Value out_mat_hs; + rapidjson::Value out_mat_r; + rapidjson::Value out_mat_hs2; + rapidjson::Value out_mat_t; + rapidjson::Value out_mat_dh; + rapidjson::Value out_app_flag; + rapidjson::Value out_interval; + rapidjson::Value out_element_info; + rapidjson::Value restart_save; + rapidjson::Value restart_load; + rapidjson::Value rpa; + + // @param reading_information -- input_file -- density_of_states + rapidjson::Value dos_edelta_ev; + rapidjson::Value dos_sigma; + rapidjson::Value dos_scale; + rapidjson::Value dos_emin_ev; + rapidjson::Value dos_emax_ev; + rapidjson::Value dos_nche; + // @param reading_information -- input_file -- naos + rapidjson::Value bessel_nao_ecut; + rapidjson::Value bessel_nao_tolerence; + rapidjson::Value bessel_nao_rcut; + rapidjson::Value bessel_nao_smooth; + rapidjson::Value bessel_nao_sigma; + // @param reading_information -- input_file -- deepks + rapidjson::Value input_file_out_labels; + rapidjson::Value input_file_scf; + rapidjson::Value input_file_model; + rapidjson::Value bessel_descriptor_lmax; + rapidjson::Value bessel_descriptor_ecut; + rapidjson::Value bessel_descriptor_tolerence; + rapidjson::Value bessel_descriptor_rcut; + rapidjson::Value bessel_descriptor_smooth; + rapidjson::Value bessel_descriptor_sigma; + rapidjson::Value input_file_bandgap; + rapidjson::Value input_file_out_unittest; + // @param reading_information -- input_file -- ofdft + rapidjson::Value of_kinetic; + rapidjson::Value of_method; + rapidjson::Value of_conv; + rapidjson::Value of_tole; + rapidjson::Value of_tolp; + rapidjson::Value of_tf_weight; + rapidjson::Value of_vw_weight; + rapidjson::Value of_wt_alpha; + rapidjson::Value of_wt_beta; + rapidjson::Value of_wt_rho0; + rapidjson::Value of_hold_rho0; + rapidjson::Value of_lkt_a; + rapidjson::Value of_read_kernel; + rapidjson::Value of_kernel_file; + rapidjson::Value of_full_pw; + rapidjson::Value of_full_pw_dim; + + // @param reading_information -- input_file -- electric_field_and_dipole_correction + rapidjson::Value efield_flag; + rapidjson::Value dip_cor_flag; + rapidjson::Value efield_dir; + rapidjson::Value efield_pos_max; + rapidjson::Value efield_pos_dec; + rapidjson::Value efield_amp; + // @param reading_information -- input_file -- gate_field + rapidjson::Value gate_flag; + rapidjson::Value zgate; + rapidjson::Value block; + rapidjson::Value block_down; + rapidjson::Value block_up; + rapidjson::Value block_height; + // @param reading_information -- input_file -- exact_exchange + rapidjson::Value exx_hybrid_alpha; + rapidjson::Value exx_hse_omega; + rapidjson::Value exx_separate_loop; + rapidjson::Value exx_hybrid_step; + rapidjson::Value exx_mixing_beta; + rapidjson::Value exx_lambda; + rapidjson::Value exx_pca_threshold; + rapidjson::Value exx_c_threshold; + rapidjson::Value exx_v_threshold; + rapidjson::Value exx_dm_threshold; + rapidjson::Value exx_c_grad_threshold; + rapidjson::Value exx_v_grad_threshold; + rapidjson::Value exx_schwarz_threshold; + rapidjson::Value exx_cauchy_threshold; + rapidjson::Value exx_cauchy_force_threshold; + rapidjson::Value exx_cauchy_stress_threshold; + rapidjson::Value exx_ccp_threshold; + rapidjson::Value exx_ccp_rmesh_times; + rapidjson::Value exx_distribute_type; + rapidjson::Value exx_opt_orb_lmax; + rapidjson::Value exx_opt_orb_ecut; + rapidjson::Value exx_opt_orb_tolerence; + rapidjson::Value exx_real_number; + + // @param reading_information -- input_file -- molecular_dynamics + rapidjson::Value md_type; + rapidjson::Value md_nstep; + rapidjson::Value md_dt; + rapidjson::Value md_thermostat; + rapidjson::Value md_tlast; + rapidjson::Value md_tfirst; + rapidjson::Value md_restart; + rapidjson::Value md_restartfreq; + rapidjson::Value md_dumpfreq; + rapidjson::Value dump_force; + rapidjson::Value dump_vel; + rapidjson::Value dump_virial; + rapidjson::Value md_seed; + rapidjson::Value md_tfreq; + rapidjson::Value md_tchain; + rapidjson::Value md_pmode; + rapidjson::Value md_prec_level; + rapidjson::Value ref_cell_factor; + rapidjson::Value md_pcouple; + rapidjson::Value md_pfirst; + rapidjson::Value md_plast; + rapidjson::Value md_pfreq; + rapidjson::Value md_pchain; + rapidjson::Value lj_rcut; + rapidjson::Value lj_epsilon; + rapidjson::Value lj_sigma; + rapidjson::Value pot_file; + rapidjson::Value msst_direction; + rapidjson::Value msst_vel; + rapidjson::Value msst_vis; + rapidjson::Value msst_tscale; + rapidjson::Value msst_qmass; + rapidjson::Value md_damp; + rapidjson::Value md_tolerance; + rapidjson::Value md_nraise; + rapidjson::Value cal_syns; + rapidjson::Value dmax; + + // @param reading_information -- input_file -- dft_plus_u + rapidjson::Value orbital_corr(rapidjson::kArrayType); + rapidjson::Value hubbard_u(rapidjson::kArrayType); + rapidjson::Value yukawa_potential; + rapidjson::Value yukawa_lambda; + rapidjson::Value omc; + + // @param reading_information -- input_file -- vdw_correction + rapidjson::Value vdw_method; + rapidjson::Value vdw_s6; + rapidjson::Value vdw_s8; + rapidjson::Value vdw_a1; + rapidjson::Value vdw_a2; + rapidjson::Value vdw_d; + rapidjson::Value vdw_abc; + rapidjson::Value vdw_C6_file; + rapidjson::Value vdw_C6_unit; + rapidjson::Value vdw_R0_file; + rapidjson::Value vdw_R0_unit; + rapidjson::Value vdw_cutoff_type; + rapidjson::Value vdw_cutoff_radius; + rapidjson::Value vdw_radius_unit; + rapidjson::Value vdw_cutoff_period(rapidjson::kArrayType); + rapidjson::Value vdw_cn_thr; + rapidjson::Value vdw_cn_thr_unit; + + // @param reading_information -- input_file -- berry_phase_and_wannier90_interface + rapidjson::Value berry_phase; + rapidjson::Value gdir; + rapidjson::Value towannier90; + rapidjson::Value nnkpfile; + rapidjson::Value wannier_spin; + + // @param reading_information -- input_file -- tddft + rapidjson::Value td_edm; + rapidjson::Value td_print_eij; + rapidjson::Value td_propagator; + rapidjson::Value td_vext; + rapidjson::Value td_vext_dire; + rapidjson::Value td_stype; + rapidjson::Value td_ttype; + rapidjson::Value td_tstart; + rapidjson::Value td_tend; + rapidjson::Value td_lcut1; + rapidjson::Value td_lcut2; + rapidjson::Value td_gauss_freq; + rapidjson::Value td_gauss_phase; + rapidjson::Value td_gauss_sigma; + rapidjson::Value td_gauss_t0; + rapidjson::Value td_gauss_amp; + rapidjson::Value td_trape_freq; + rapidjson::Value td_trape_phase; + rapidjson::Value td_trape_t1; + rapidjson::Value td_trape_t2; + rapidjson::Value td_trape_t3; + rapidjson::Value td_trape_amp; + rapidjson::Value td_trigo_freq1; + rapidjson::Value td_trigo_freq2; + rapidjson::Value td_trigo_phase1; + rapidjson::Value td_trigo_phase2; + rapidjson::Value td_trigo_amp; + rapidjson::Value td_heavi_t0; + rapidjson::Value td_heavi_amp; + rapidjson::Value td_out_dipole; + rapidjson::Value td_out_efield; + rapidjson::Value ocp; + rapidjson::Value ocp_set; + + // @param reading_information -- input_file -- debuging_related + rapidjson::Value t_in_h; + rapidjson::Value vl_in_h; + rapidjson::Value vnl_in_h; + rapidjson::Value vh_in_h; + rapidjson::Value vion_in_h; + rapidjson::Value test_force; + rapidjson::Value test_stress; + rapidjson::Value colour; + rapidjson::Value test_skip_ewald; + + // @param reading_information -- input_file -- electronic_conductivities + rapidjson::Value cal_cond; + rapidjson::Value cond_nche; + rapidjson::Value cond_dw; + rapidjson::Value cond_wcut; + rapidjson::Value cond_dt; + rapidjson::Value cond_dtbatch; + rapidjson::Value cond_fwhm; + rapidjson::Value cond_nonlocal; + // @param reading_information -- input_file -- implicit_solvation_model + rapidjson::Value imp_sol; + rapidjson::Value eb_k; + rapidjson::Value tau; + rapidjson::Value sigma_k; + rapidjson::Value nc_k; + + // @param reading_information -- stru_infos: + rapidjson::Value stru_infos(rapidjson::kObjectType); + // rapidjson::Value ATOMIC_SPECIES(rapidjson::kArrayType); + // rapidjson::Value NUMERICAL_ORBITAL; + // rapidjson::Value LATTICE_CONSTANT(rapidjson::kArrayType); + // rapidjson::Value ATOMIC_POSITIONS(rapidjson::kArrayType); + + // @param reading_information -- KPT_infos + rapidjson::Value KPT_infos(rapidjson::kObjectType); + // rapidjson::Value total_number; + // rapidjson::Value mode; + // rapidjson::Value vectors(rapidjson::kArrayType); + + // @param reading_information -- orb_infos + rapidjson::Value orb_infos(rapidjson::kObjectType); + + // @param reading_information -- pp + rapidjson::Value pp(rapidjson::kObjectType); + + // @param init + rapidjson::Value init(rapidjson::kObjectType); + // @param init -- general + // rapidjson::Value calculation; + // rapidjson::Value esolver_type; + // rapidjson::Value basis_type; + // rapidjson::Value gamma_only; + // rapidjson::Value ks_solver; + // rapidjson::Value ntype; + // rapidjson::Value nspin; + // rapidjson::Value ecutwfc; + // rapidjson::Value scf_thr; + // rapidjson::Value scf_nmax; + + // @param init -- symmetry + // rapidjson::Value symmetry(rapidjson::kObjectType); + // rapidjson::Value BRAVAIS_TYPE; + // rapidjson::Value BRAVAIS_LATTICE_NAME; + // rapidjson::Value IBRAV; + // rapidjson::Value LATTICE_CONSTANT_A; + // rapidjson::Value right_hand_lattice; + + // @param init -- Kpoints + rapidjson::Value kpoints(rapidjson::kObjectType); + rapidjson::Value nkstot; + rapidjson::Value nkstot_ibz; + rapidjson::Value coordinates(rapidjson::kArrayType); + rapidjson::Value weight(rapidjson::kArrayType); + + // @param init -- grid + rapidjson::Value grid(rapidjson::kObjectType); + rapidjson::Value energy_cutoff_for_wavefunc; + rapidjson::Value fft_grid_for_wave_functions(rapidjson::kArrayType); + rapidjson::Value number_of_plane_waves; + rapidjson::Value number_of_sticks; + + // @param init -- Smearing + // rapidjson::Value smearing_method; + // rapidjson::Value smearing_sigma; + + // @param init -- mixing + rapidjson::Value mixing; + + + // @param output + rapidjson::Value output(rapidjson::kArrayType); + + + + // @param final_stru + rapidjson::Value final_stru(rapidjson::kObjectType); + rapidjson::Value cell; + rapidjson::Value coordinate; + + + + /** + * The functions below initialize the json output parameter + * tree to connect the nodes of the module + */ + + /** + * @brief add Top stage:parameter in Abacus: + */ + void Init_json_abacus() + { + + + // add First stage:parameter in abcus: + + abacus.AddMember("general_info", general_info, doc.GetAllocator()); + + abacus.AddMember("readin_info", readin_info, doc.GetAllocator()); + + abacus.AddMember("init", init, doc.GetAllocator()); + + abacus.AddMember("output", output, doc.GetAllocator()); + + abacus.AddMember("final_stru", final_stru, doc.GetAllocator()); + + doc.SetObject(); + // abacus.SetObject(); + doc.AddMember("ABACUS", abacus, doc.GetAllocator()); + /** + * . + * . + * . + * . + * . + * . + * . + * */ + } + /** + * @brief add Second stage:parameter in Abacus - general_info: + */ + void Init_json_abacus_generalInfo(){ + general_info.AddMember("version", version, doc.GetAllocator()); + + general_info.AddMember("commit", commit, doc.GetAllocator()); + + general_info.AddMember("begin_time", begin_time, doc.GetAllocator()); + + general_info.AddMember("begin_date", begin_date, doc.GetAllocator()); + + general_info.AddMember("device", device_g, doc.GetAllocator()); + + + + parallel.AddMember("drank", drank, doc.GetAllocator()); + + parallel.AddMember("dsize", dsize, doc.GetAllocator()); + + parallel.AddMember("dcolor", dcolor, doc.GetAllocator()); + + + // add Third stage:parameter in parallel: + general_info.AddMember("parallel", parallel, doc.GetAllocator()); + + } + /** + * @brief delete null node + */ + void RemoveNullValues(rapidjson::Value& parent) { + if (parent.IsObject()) { + for (rapidjson::Value::MemberIterator itr = parent.MemberBegin(); itr != parent.MemberEnd(); ) { + if (itr->value.IsNull()) { + itr = parent.EraseMember(itr); + } else { + // delet son null node + RemoveNullValues(itr->value); + ++itr; + } + } + } else if (parent.IsArray()) { + for (int i = 0; i < parent.Size(); ) { + if (parent[i].IsNull()) { + parent.Erase(parent.Begin() + i); + } else { + // delet son null node + RemoveNullValues(parent[i]); + ++i; + } + } + } + } + + /** + * @brief add Second stage:parameter in Abacus - readin_info: + */ + void Init_json_abacus_readinInfo(){ + //add Third stage:parameter in system_variables: + input_file.AddMember("suffix", input_suffix, doc.GetAllocator()); + input_file.AddMember("ntype", ntype, doc.GetAllocator()); + input_file.AddMember("calculation", calculation, doc.GetAllocator()); + input_file.AddMember("esolver_type", esolver_type, doc.GetAllocator()); + input_file.AddMember("symmetry", symmetry, doc.GetAllocator()); + input_file.AddMember("symmetry_precfield", symmetry_precfield, doc.GetAllocator()); + input_file.AddMember("symmetry_autoclose", symmetry_autoclose, doc.GetAllocator()); + input_file.AddMember("kpar", kpar, doc.GetAllocator()); + input_file.AddMember("bndpar", bndpar, doc.GetAllocator()); + input_file.AddMember("latname", latname, doc.GetAllocator()); + input_file.AddMember("init_wfc", init_wfc, doc.GetAllocator()); + input_file.AddMember("init_chg", init_chg, doc.GetAllocator()); + input_file.AddMember("init_vel", init_vel, doc.GetAllocator()); + input_file.AddMember("nelec", nelec, doc.GetAllocator()); + input_file.AddMember("nupdown", nupdown, doc.GetAllocator()); + input_file.AddMember("dft_functional", dft_functional, doc.GetAllocator()); + input_file.AddMember("xc_temperature", xc_temperature, doc.GetAllocator()); + input_file.AddMember("pseudo_rcut", pseudo_rcut, doc.GetAllocator()); + input_file.AddMember("pseudo_mesh", pseudo_mesh, doc.GetAllocator()); + input_file.AddMember("mem_saver", mem_saver, doc.GetAllocator()); + input_file.AddMember("diago_proc", diago_proc, doc.GetAllocator()); + input_file.AddMember("nbspline", nbspline, doc.GetAllocator()); + input_file.AddMember("kspacing", kspacing, doc.GetAllocator()); + input_file.AddMember("min_dist_coef", min_dist_coef, doc.GetAllocator()); + input_file.AddMember("device", device, doc.GetAllocator()); + + //add Third stage:parameter in files_related: + input_file.AddMember("stru_file", stru_file, doc.GetAllocator()); + input_file.AddMember("kpoint_file", kpoint_file, doc.GetAllocator()); + input_file.AddMember("pseudo_dir", pseudo_dir, doc.GetAllocator()); + input_file.AddMember("orbital_dir", orbital_dir, doc.GetAllocator()); + input_file.AddMember("read_file_dir", read_file_dir, doc.GetAllocator()); + input_file.AddMember("wannier_card", wannier_card, doc.GetAllocator()); + + //add Third stage:parameter in planewave_related: + input_file.AddMember("ecutwfc", ecutwfc, doc.GetAllocator()); + input_file.AddMember("nx", nx, doc.GetAllocator()); + input_file.AddMember("ny", ny, doc.GetAllocator()); + input_file.AddMember("nz", nz, doc.GetAllocator()); + input_file.AddMember("pw_seed", pw_seed, doc.GetAllocator()); + input_file.AddMember("pw_diag_thr", pw_diag_thr, doc.GetAllocator()); + input_file.AddMember("pw_diag_nmax", pw_diag_nmax, doc.GetAllocator()); + input_file.AddMember("pw_diag_ndim", pw_diag_ndim, doc.GetAllocator()); + + + //add Third stage:parameter in numerical_atomic_orbitals_related: + input_file.AddMember("nb2d", nb2d, doc.GetAllocator()); + input_file.AddMember("lmaxmax", lmaxmax, doc.GetAllocator()); + input_file.AddMember("lcao_ecut", lcao_ecut, doc.GetAllocator()); + input_file.AddMember("lcao_dk", lcao_dk, doc.GetAllocator()); + input_file.AddMember("lcao_dr", lcao_dr, doc.GetAllocator()); + input_file.AddMember("lcao_rmax", lcao_rmax, doc.GetAllocator()); + input_file.AddMember("search_radius", search_radius, doc.GetAllocator()); + input_file.AddMember("search_pbc", search_pbc, doc.GetAllocator()); + input_file.AddMember("bx", bx, doc.GetAllocator()); + input_file.AddMember("by", by, doc.GetAllocator()); + input_file.AddMember("bz", bz, doc.GetAllocator()); + + //add Third stage:parameter in electronic_structure: + input_file.AddMember("basis_type", basis_type, doc.GetAllocator()); + input_file.AddMember("ks_solver", ks_solver, doc.GetAllocator()); + input_file.AddMember("nbands", nbands, doc.GetAllocator()); + input_file.AddMember("nbands_istate", nbands_istate, doc.GetAllocator()); + input_file.AddMember("nspin", nspin, doc.GetAllocator()); + input_file.AddMember("smearing_method", smearing_method, doc.GetAllocator()); + input_file.AddMember("smearing_sigma", smearing_sigma, doc.GetAllocator()); + input_file.AddMember("smearing_sigma_temp", smearing_sigma_temp, doc.GetAllocator()); + input_file.AddMember("mixing_type", mixing_type, doc.GetAllocator()); + input_file.AddMember("mixing_beta", mixing_beta, doc.GetAllocator()); + input_file.AddMember("mixing_ndim", mixing_ndim, doc.GetAllocator()); + input_file.AddMember("mixing_gg0", mixing_gg0, doc.GetAllocator()); + input_file.AddMember("mixing_tau", mixing_tau, doc.GetAllocator()); + input_file.AddMember("mixing_dftu", mixing_dftu, doc.GetAllocator()); + input_file.AddMember("gamma_only", gamma_only, doc.GetAllocator()); + input_file.AddMember("printe", printe, doc.GetAllocator()); + input_file.AddMember("scf_nmax", scf_nmax, doc.GetAllocator()); + input_file.AddMember("scf_thr", scf_thr, doc.GetAllocator()); + input_file.AddMember("scf_thr_type", scf_thr_type, doc.GetAllocator()); + input_file.AddMember("chg_extrap", chg_extrap, doc.GetAllocator()); + input_file.AddMember("lspinorb", lspinorb, doc.GetAllocator()); + input_file.AddMember("noncolin", noncolin, doc.GetAllocator()); + input_file.AddMember("soc_lambda", soc_lambda, doc.GetAllocator()); + + + //add Third stage:parameter in electronic_structure_SDFT: + input_file.AddMember("method_sto", method_sto, doc.GetAllocator()); + input_file.AddMember("nbands_sto", nbands_sto, doc.GetAllocator()); + input_file.AddMember("nche_sto", nche_sto, doc.GetAllocator()); + input_file.AddMember("emin_sto", emin_sto, doc.GetAllocator()); + input_file.AddMember("emax_sto", emax_sto, doc.GetAllocator()); + input_file.AddMember("seed_sto", seed_sto, doc.GetAllocator()); + input_file.AddMember("initsto_freq", initsto_freq, doc.GetAllocator()); + input_file.AddMember("npart_sto", npart_sto, doc.GetAllocator()); + + + //add Third stage:parameter in geometry_relaxation: + input_file.AddMember("relax_method", relax_method, doc.GetAllocator()); + input_file.AddMember("relax_new", relax_new, doc.GetAllocator()); + input_file.AddMember("relax_scale_force", relax_scale_force, doc.GetAllocator()); + input_file.AddMember("relax_nmax", relax_nmax, doc.GetAllocator()); + input_file.AddMember("relax_cg_thr", relax_cg_thr, doc.GetAllocator()); + input_file.AddMember("cal_force", cal_force, doc.GetAllocator()); + input_file.AddMember("force_thr", force_thr, doc.GetAllocator()); + input_file.AddMember("force_thr_ev", force_thr_ev, doc.GetAllocator()); + input_file.AddMember("force_thr_ev2", force_thr_ev2, doc.GetAllocator()); + input_file.AddMember("relax_bfgs_w1", relax_bfgs_w1, doc.GetAllocator()); + input_file.AddMember("relax_bfgs_w2", relax_bfgs_w2, doc.GetAllocator()); + input_file.AddMember("relax_bfgs_rmax", relax_bfgs_rmax, doc.GetAllocator()); + input_file.AddMember("relax_bfgs_rmin", relax_bfgs_rmin, doc.GetAllocator()); + input_file.AddMember("relax_bfgs_init", relax_bfgs_init, doc.GetAllocator()); + input_file.AddMember("cal_stress", cal_stress, doc.GetAllocator()); + input_file.AddMember("stress_thr", stress_thr, doc.GetAllocator()); + input_file.AddMember("press1", press1, doc.GetAllocator()); + input_file.AddMember("press2", press2, doc.GetAllocator()); + input_file.AddMember("press3", press3, doc.GetAllocator()); + input_file.AddMember("fixed_axes", fixed_axes, doc.GetAllocator()); + input_file.AddMember("fixed_ibrav", fixed_ibrav, doc.GetAllocator()); + input_file.AddMember("fixed_atoms", fixed_atoms, doc.GetAllocator()); + input_file.AddMember("cell_factor", cell_factor, doc.GetAllocator()); + + + //add Third stage:parameter in output_information_related: + input_file.AddMember("out_mul", out_mul, doc.GetAllocator()); + input_file.AddMember("out_freq_elec", out_freq_elec, doc.GetAllocator()); + input_file.AddMember("out_freq_ion", out_freq_ion, doc.GetAllocator()); + input_file.AddMember("out_chg", out_chg, doc.GetAllocator()); + input_file.AddMember("out_pot", out_pot, doc.GetAllocator()); + input_file.AddMember("out_dm", out_dm, doc.GetAllocator()); + input_file.AddMember("out_dm1", out_dm1, doc.GetAllocator()); + input_file.AddMember("out_wfc_pw", out_wfc_pw, doc.GetAllocator()); + input_file.AddMember("out_wfc_r", out_wfc_r, doc.GetAllocator()); + input_file.AddMember("out_wfc_lcao", out_wfc_lcao, doc.GetAllocator()); + input_file.AddMember("out_dos", out_dos, doc.GetAllocator()); + input_file.AddMember("out_band", out_band, doc.GetAllocator()); + input_file.AddMember("out_proj_band", out_proj_band, doc.GetAllocator()); + input_file.AddMember("out_stru", out_stru, doc.GetAllocator()); + input_file.AddMember("out_bandgap", out_bandgap, doc.GetAllocator()); + input_file.AddMember("out_level", out_level, doc.GetAllocator()); + input_file.AddMember("out_alllog", out_alllog, doc.GetAllocator()); + input_file.AddMember("out_mat_hs", out_mat_hs, doc.GetAllocator()); + input_file.AddMember("out_mat_r", out_mat_r, doc.GetAllocator()); + input_file.AddMember("out_mat_hs2", out_mat_hs2, doc.GetAllocator()); + input_file.AddMember("out_mat_t", out_mat_t, doc.GetAllocator()); + input_file.AddMember("out_mat_dh", out_mat_dh, doc.GetAllocator()); + input_file.AddMember("out_app_flag", out_app_flag, doc.GetAllocator()); + input_file.AddMember("out_interval", out_interval, doc.GetAllocator()); + input_file.AddMember("out_element_info", out_element_info, doc.GetAllocator()); + input_file.AddMember("restart_save", restart_save, doc.GetAllocator()); + input_file.AddMember("restart_load", restart_load, doc.GetAllocator()); + input_file.AddMember("rpa", rpa, doc.GetAllocator()); + + //add Third stage:parameter in density_of_states: + input_file.AddMember("dos_edelta_ev", dos_edelta_ev, doc.GetAllocator()); + input_file.AddMember("dos_sigma", dos_sigma, doc.GetAllocator()); + input_file.AddMember("dos_scale", dos_scale, doc.GetAllocator()); + input_file.AddMember("dos_emin_ev", dos_emin_ev, doc.GetAllocator()); + input_file.AddMember("dos_emax_ev", dos_emax_ev, doc.GetAllocator()); + input_file.AddMember("dos_nche", dos_nche, doc.GetAllocator()); + + //add Third stage:parameter in naos: + input_file.AddMember("bessel_nao_ecut", bessel_nao_ecut, doc.GetAllocator()); + input_file.AddMember("bessel_nao_tolerence", bessel_nao_tolerence, doc.GetAllocator()); + input_file.AddMember("bessel_nao_rcut", bessel_nao_rcut, doc.GetAllocator()); + input_file.AddMember("bessel_nao_smooth", bessel_nao_smooth, doc.GetAllocator()); + input_file.AddMember("bessel_nao_sigma", bessel_nao_sigma, doc.GetAllocator()); + + //add Third stage:parameter in deepks: + input_file.AddMember("input_file_out_labels", input_file_out_labels, doc.GetAllocator()); + input_file.AddMember("input_file_scf", input_file_scf, doc.GetAllocator()); + input_file.AddMember("input_file_model", input_file_model, doc.GetAllocator()); + input_file.AddMember("bessel_descriptor_lmax", bessel_descriptor_lmax, doc.GetAllocator()); + input_file.AddMember("bessel_descriptor_ecut", bessel_descriptor_ecut, doc.GetAllocator()); + input_file.AddMember("bessel_descriptor_tolerence", bessel_descriptor_tolerence, doc.GetAllocator()); + input_file.AddMember("bessel_descriptor_rcut", bessel_descriptor_rcut, doc.GetAllocator()); + input_file.AddMember("bessel_descriptor_smooth", bessel_descriptor_smooth, doc.GetAllocator()); + input_file.AddMember("bessel_descriptor_sigma", bessel_descriptor_sigma, doc.GetAllocator()); + input_file.AddMember("input_file_bandgap", input_file_bandgap, doc.GetAllocator()); + input_file.AddMember("input_file_out_unittest", input_file_out_unittest, doc.GetAllocator()); + + //add Third stage:parameter in ofdft: + input_file.AddMember("of_kinetic", of_kinetic, doc.GetAllocator()); + input_file.AddMember("of_method", of_method, doc.GetAllocator()); + input_file.AddMember("of_conv", of_conv, doc.GetAllocator()); + input_file.AddMember("of_tole", of_tole, doc.GetAllocator()); + input_file.AddMember("of_tolp", of_tolp, doc.GetAllocator()); + input_file.AddMember("of_tf_weight", of_tf_weight, doc.GetAllocator()); + input_file.AddMember("of_vw_weight", of_vw_weight, doc.GetAllocator()); + input_file.AddMember("of_wt_alpha", of_wt_alpha, doc.GetAllocator()); + input_file.AddMember("of_wt_beta", of_wt_beta, doc.GetAllocator()); + input_file.AddMember("of_wt_rho0", of_wt_rho0, doc.GetAllocator()); + input_file.AddMember("of_hold_rho0", of_hold_rho0, doc.GetAllocator()); + input_file.AddMember("of_lkt_a", of_lkt_a, doc.GetAllocator()); + input_file.AddMember("of_read_kernel", of_read_kernel, doc.GetAllocator()); + input_file.AddMember("of_kernel_file", of_kernel_file, doc.GetAllocator()); + input_file.AddMember("of_full_pw", of_full_pw, doc.GetAllocator()); + input_file.AddMember("of_full_pw_dim", of_full_pw_dim, doc.GetAllocator()); + + + //add Third stage:parameter in electric_field_and_dipole_correction: + input_file.AddMember("efield_flag", efield_flag, doc.GetAllocator()); + input_file.AddMember("dip_cor_flag", dip_cor_flag, doc.GetAllocator()); + input_file.AddMember("efield_dir", efield_dir, doc.GetAllocator()); + input_file.AddMember("efield_pos_max", efield_pos_max, doc.GetAllocator()); + input_file.AddMember("efield_pos_dec", efield_pos_dec, doc.GetAllocator()); + input_file.AddMember("efield_amp", efield_amp, doc.GetAllocator()); + + //add Third stage:parameter in gate_field: + input_file.AddMember("gate_flag", gate_flag, doc.GetAllocator()); + input_file.AddMember("zgate", zgate, doc.GetAllocator()); + input_file.AddMember("block", block, doc.GetAllocator()); + input_file.AddMember("block_down", block_down, doc.GetAllocator()); + input_file.AddMember("block_up", block_up, doc.GetAllocator()); + input_file.AddMember("block_height", block_height, doc.GetAllocator()); + + //add Third stage:parameter in exact_exchange: + input_file.AddMember("exx_hybrid_alpha", exx_hybrid_alpha, doc.GetAllocator()); + input_file.AddMember("exx_hse_omega", exx_hse_omega, doc.GetAllocator()); + input_file.AddMember("exx_separate_loop", exx_separate_loop, doc.GetAllocator()); + input_file.AddMember("exx_hybrid_step", exx_hybrid_step, doc.GetAllocator()); + input_file.AddMember("exx_mixing_beta", exx_mixing_beta, doc.GetAllocator()); + input_file.AddMember("exx_lambda", exx_lambda, doc.GetAllocator()); + input_file.AddMember("exx_pca_threshold", exx_pca_threshold, doc.GetAllocator()); + input_file.AddMember("exx_c_threshold", exx_c_threshold, doc.GetAllocator()); + input_file.AddMember("exx_v_threshold", exx_v_threshold, doc.GetAllocator()); + input_file.AddMember("exx_dm_threshold", exx_dm_threshold, doc.GetAllocator()); + input_file.AddMember("exx_c_grad_threshold", exx_c_grad_threshold, doc.GetAllocator()); + input_file.AddMember("exx_v_grad_threshold", exx_v_grad_threshold, doc.GetAllocator()); + input_file.AddMember("exx_schwarz_threshold", exx_schwarz_threshold, doc.GetAllocator()); + input_file.AddMember("exx_cauchy_threshold", exx_cauchy_threshold, doc.GetAllocator()); + input_file.AddMember("exx_cauchy_force_threshold", exx_cauchy_force_threshold, doc.GetAllocator()); + input_file.AddMember("exx_cauchy_stress_threshold", exx_cauchy_stress_threshold, doc.GetAllocator()); + input_file.AddMember("exx_ccp_threshold", exx_ccp_threshold, doc.GetAllocator()); + input_file.AddMember("exx_ccp_rmesh_times", exx_ccp_rmesh_times, doc.GetAllocator()); + input_file.AddMember("exx_distribute_type", exx_distribute_type, doc.GetAllocator()); + input_file.AddMember("exx_opt_orb_lmax", exx_opt_orb_lmax, doc.GetAllocator()); + input_file.AddMember("exx_opt_orb_ecut", exx_opt_orb_ecut, doc.GetAllocator()); + input_file.AddMember("exx_opt_orb_tolerence", exx_opt_orb_tolerence, doc.GetAllocator()); + input_file.AddMember("exx_real_number", exx_real_number, doc.GetAllocator()); + + + //add Third stage:parameter in molecular_dynamics: + input_file.AddMember("md_type", md_type, doc.GetAllocator()); + input_file.AddMember("md_nstep", md_nstep, doc.GetAllocator()); + input_file.AddMember("md_dt", md_dt, doc.GetAllocator()); + input_file.AddMember("md_thermostat", md_thermostat, doc.GetAllocator()); + input_file.AddMember("md_tlast", md_tlast, doc.GetAllocator()); + input_file.AddMember("md_tfirst", md_tfirst, doc.GetAllocator()); + input_file.AddMember("md_restart", md_restart, doc.GetAllocator()); + input_file.AddMember("md_restartfreq", md_restartfreq, doc.GetAllocator()); + input_file.AddMember("md_dumpfreq", md_dumpfreq, doc.GetAllocator()); + input_file.AddMember("dump_force", dump_force, doc.GetAllocator()); + input_file.AddMember("dump_vel", dump_vel, doc.GetAllocator()); + input_file.AddMember("dump_virial", dump_virial, doc.GetAllocator()); + input_file.AddMember("md_seed", md_seed, doc.GetAllocator()); + input_file.AddMember("md_tfreq", md_tfreq, doc.GetAllocator()); + input_file.AddMember("md_tchain", md_tchain, doc.GetAllocator()); + input_file.AddMember("md_pmode", md_pmode, doc.GetAllocator()); + input_file.AddMember("md_prec_level", md_prec_level, doc.GetAllocator()); + input_file.AddMember("ref_cell_factor", ref_cell_factor, doc.GetAllocator()); + input_file.AddMember("md_pcouple", md_pcouple, doc.GetAllocator()); + input_file.AddMember("md_pfirst", md_pfirst, doc.GetAllocator()); + input_file.AddMember("md_plast", md_plast, doc.GetAllocator()); + input_file.AddMember("md_pfreq", md_pfreq, doc.GetAllocator()); + input_file.AddMember("md_pchain", md_pchain, doc.GetAllocator()); + input_file.AddMember("lj_rcut", lj_rcut, doc.GetAllocator()); + input_file.AddMember("lj_epsilon", lj_epsilon, doc.GetAllocator()); + input_file.AddMember("lj_sigma", lj_sigma, doc.GetAllocator()); + input_file.AddMember("pot_file", pot_file, doc.GetAllocator()); + input_file.AddMember("msst_direction", msst_direction, doc.GetAllocator()); + input_file.AddMember("msst_vel", msst_vel, doc.GetAllocator()); + input_file.AddMember("msst_vis", msst_vis, doc.GetAllocator()); + input_file.AddMember("msst_tscale", msst_tscale, doc.GetAllocator()); + input_file.AddMember("msst_qmass", msst_qmass, doc.GetAllocator()); + input_file.AddMember("md_damp", md_damp, doc.GetAllocator()); + input_file.AddMember("md_tolerance", md_tolerance, doc.GetAllocator()); + input_file.AddMember("md_nraise", md_nraise, doc.GetAllocator()); + input_file.AddMember("cal_syns", cal_syns, doc.GetAllocator()); + input_file.AddMember("dmax", dmax, doc.GetAllocator()); + + //add Third stage:parameter in dft_plus_u: + input_file.AddMember("orbital_corr", orbital_corr, doc.GetAllocator()); + input_file.AddMember("hubbard_u", hubbard_u, doc.GetAllocator()); + input_file.AddMember("yukawa_potential", yukawa_potential, doc.GetAllocator()); + input_file.AddMember("yukawa_lambda", yukawa_lambda, doc.GetAllocator()); + input_file.AddMember("omc", omc, doc.GetAllocator()); + + //add Third stage:parameter in vdw_correction: + input_file.AddMember("vdw_method", vdw_method, doc.GetAllocator()); + input_file.AddMember("vdw_s6", vdw_s6, doc.GetAllocator()); + input_file.AddMember("vdw_s8", vdw_s8, doc.GetAllocator()); + input_file.AddMember("vdw_a1", vdw_a1, doc.GetAllocator()); + input_file.AddMember("vdw_a2", vdw_a2, doc.GetAllocator()); + input_file.AddMember("vdw_d", vdw_d, doc.GetAllocator()); + input_file.AddMember("vdw_abc", vdw_abc, doc.GetAllocator()); + input_file.AddMember("vdw_C6_file", vdw_C6_file, doc.GetAllocator()); + input_file.AddMember("vdw_C6_unit", vdw_C6_unit, doc.GetAllocator()); + input_file.AddMember("vdw_R0_file", vdw_R0_file, doc.GetAllocator()); + input_file.AddMember("vdw_R0_unit", vdw_R0_unit, doc.GetAllocator()); + input_file.AddMember("vdw_cutoff_type", vdw_cutoff_type, doc.GetAllocator()); + input_file.AddMember("vdw_cutoff_radius", vdw_cutoff_radius, doc.GetAllocator()); + input_file.AddMember("vdw_radius_unit", vdw_radius_unit, doc.GetAllocator()); + input_file.AddMember("vdw_cutoff_period", vdw_cutoff_period, doc.GetAllocator()); + input_file.AddMember("vdw_cn_thr", vdw_cn_thr, doc.GetAllocator()); + input_file.AddMember("vdw_cn_thr_unit", vdw_cn_thr_unit, doc.GetAllocator()); + + //add Third stage:parameter in berry_phase_and_wannier90_interface: + input_file.AddMember("berry_phase", berry_phase, doc.GetAllocator()); + input_file.AddMember("gdir", gdir, doc.GetAllocator()); + input_file.AddMember("towannier90", towannier90, doc.GetAllocator()); + input_file.AddMember("nnkpfile", nnkpfile, doc.GetAllocator()); + input_file.AddMember("wannier_spin", wannier_spin, doc.GetAllocator()); + + //add Third stage:parameter in tddft: + input_file.AddMember("td_edm", td_edm, doc.GetAllocator()); + input_file.AddMember("td_print_eij", td_print_eij, doc.GetAllocator()); + input_file.AddMember("td_propagator", td_propagator, doc.GetAllocator()); + input_file.AddMember("td_vext", td_vext, doc.GetAllocator()); + input_file.AddMember("td_vext_dire", td_vext_dire, doc.GetAllocator()); + input_file.AddMember("td_stype", td_stype, doc.GetAllocator()); + input_file.AddMember("td_ttype", td_ttype, doc.GetAllocator()); + input_file.AddMember("td_tstart", td_tstart, doc.GetAllocator()); + input_file.AddMember("td_tend", td_tend, doc.GetAllocator()); + input_file.AddMember("td_lcut1", td_lcut1, doc.GetAllocator()); + input_file.AddMember("td_lcut2", td_lcut2, doc.GetAllocator()); + input_file.AddMember("td_gauss_freq", td_gauss_freq, doc.GetAllocator()); + input_file.AddMember("td_gauss_phase", td_gauss_phase, doc.GetAllocator()); + input_file.AddMember("td_gauss_sigma", td_gauss_sigma, doc.GetAllocator()); + input_file.AddMember("td_gauss_t0", td_gauss_t0, doc.GetAllocator()); + input_file.AddMember("td_gauss_amp", td_gauss_amp, doc.GetAllocator()); + input_file.AddMember("td_trape_freq", td_trape_freq, doc.GetAllocator()); + input_file.AddMember("td_trape_phase", td_trape_phase, doc.GetAllocator()); + input_file.AddMember("td_trape_t1", td_trape_t1, doc.GetAllocator()); + input_file.AddMember("td_trape_t2", td_trape_t2, doc.GetAllocator()); + input_file.AddMember("td_trape_t3", td_trape_t3, doc.GetAllocator()); + input_file.AddMember("td_trape_amp", td_trape_amp, doc.GetAllocator()); + input_file.AddMember("td_trigo_freq1", td_trigo_freq1, doc.GetAllocator()); + input_file.AddMember("td_trigo_freq2", td_trigo_freq2, doc.GetAllocator()); + input_file.AddMember("td_trigo_phase1", td_trigo_phase1, doc.GetAllocator()); + input_file.AddMember("td_trigo_phase2", td_trigo_phase2, doc.GetAllocator()); + input_file.AddMember("td_trigo_amp", td_trigo_amp, doc.GetAllocator()); + input_file.AddMember("td_heavi_t0", td_heavi_t0, doc.GetAllocator()); + input_file.AddMember("td_heavi_amp", td_heavi_amp, doc.GetAllocator()); + input_file.AddMember("td_out_dipole", td_out_dipole, doc.GetAllocator()); + input_file.AddMember("td_out_efield", td_out_efield, doc.GetAllocator()); + input_file.AddMember("ocp", ocp, doc.GetAllocator()); + input_file.AddMember("ocp_set", ocp_set, doc.GetAllocator()); + + //add Third stage:parameter in debuging_related: + input_file.AddMember("t_in_h", t_in_h, doc.GetAllocator()); + input_file.AddMember("vl_in_h", vl_in_h, doc.GetAllocator()); + input_file.AddMember("vnl_in_h", vnl_in_h, doc.GetAllocator()); + input_file.AddMember("vh_in_h", vh_in_h, doc.GetAllocator()); + input_file.AddMember("vion_in_h", vion_in_h, doc.GetAllocator()); + input_file.AddMember("test_force", test_force, doc.GetAllocator()); + input_file.AddMember("test_stress", test_stress, doc.GetAllocator()); + input_file.AddMember("colour", colour, doc.GetAllocator()); + input_file.AddMember("test_skip_ewald", test_skip_ewald, doc.GetAllocator()); + + //add Third stage:parameter in electronic_conductivities: + input_file.AddMember("cal_cond", cal_cond, doc.GetAllocator()); + input_file.AddMember("cond_nche", cond_nche, doc.GetAllocator()); + input_file.AddMember("cond_dw", cond_dw, doc.GetAllocator()); + input_file.AddMember("cond_wcut", cond_wcut, doc.GetAllocator()); + input_file.AddMember("cond_dt", cond_dt, doc.GetAllocator()); + input_file.AddMember("cond_dtbatch", cond_dtbatch, doc.GetAllocator()); + input_file.AddMember("cond_fwhm", cond_fwhm, doc.GetAllocator()); + input_file.AddMember("cond_nonlocal", cond_nonlocal, doc.GetAllocator()); + + //add Third stage:parameter in implicit_solvation_model: + input_file.AddMember("imp_sol", imp_sol, doc.GetAllocator()); + input_file.AddMember("eb_k", eb_k, doc.GetAllocator()); + input_file.AddMember("tau", tau, doc.GetAllocator()); + input_file.AddMember("sigma_k", sigma_k, doc.GetAllocator()); + input_file.AddMember("nc_k", nc_k, doc.GetAllocator()); + + + RemoveNullValues(input_file); + + + // after add child_node's node in readin_info, add child node + // add parameters in readin_info: + readin_info.AddMember("input_file", input_file, doc.GetAllocator()); + + } + + + void Finish_json_tree(){ + // Converts a json object to a string + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc.Accept(writer); + + // Output the json string to a file + std::string json_path; + json_path.append("abacus.json"); + + std::ofstream ofs(json_path); + ofs << buffer.GetString() << std::endl; + ofs.close(); + } + + + + +} + + +#endif \ No newline at end of file diff --git a/source/module_base/para_json.h b/source/module_base/para_json.h new file mode 100644 index 0000000000..0e829dc80e --- /dev/null +++ b/source/module_base/para_json.h @@ -0,0 +1,560 @@ + +#include +#include +#include +#include +#include + +#ifdef __RAPIDJSON + +#include +#include +#include + + +/** + * @brief This namespace is used to store the json object of the + * abacus parameter and its handlers. Used to read the parameters + * at run time and finally organize them into json format files + * +*/ +namespace Para_Json +{ + + extern int test; + // @param doc: the output json file + extern rapidjson::Document doc; + extern rapidjson::Value abacus; + + // @param general_info : + extern rapidjson::Value general_info; + extern rapidjson::Value version; + extern rapidjson::Value commit; + extern rapidjson::Value begin_time; + extern rapidjson::Value begin_date; + extern rapidjson::Value device_g; + // @param general_info -- parallel: + extern rapidjson::Value parallel; + extern rapidjson::Value drank; + extern rapidjson::Value dsize; + extern rapidjson::Value dcolor ; + // @param general_info -- path + extern rapidjson::Value path; + extern rapidjson::Value global_out_dir; + extern rapidjson::Value global_in_card; + extern rapidjson::Value pseudo_dir_path ; + extern rapidjson::Value orbital_dir_path; + + + // @param reading_information: + extern rapidjson::Value readin_info; + // @param reading_information -- input_para: + + // @param reading_information -- input_para -- system_variables: + extern rapidjson::Value system_variables; + + extern rapidjson::Value input_file; + extern rapidjson::Value input_suffix; + extern rapidjson::Value ntype; + extern rapidjson::Value calculation; + extern rapidjson::Value esolver_type; + extern rapidjson::Value symmetry; + extern rapidjson::Value symmetry_precfield; + extern rapidjson::Value symmetry_autoclose; + extern rapidjson::Value kpar; + extern rapidjson::Value bndpar; + extern rapidjson::Value latname; + extern rapidjson::Value init_wfc; + extern rapidjson::Value init_chg; + extern rapidjson::Value init_vel; + extern rapidjson::Value nelec; + extern rapidjson::Value nupdown; + extern rapidjson::Value dft_functional; + extern rapidjson::Value xc_temperature; + extern rapidjson::Value pseudo_rcut; + extern rapidjson::Value pseudo_mesh; + extern rapidjson::Value mem_saver; + extern rapidjson::Value diago_proc; + extern rapidjson::Value nbspline; + extern rapidjson::Value kspacing; + extern rapidjson::Value min_dist_coef; + extern rapidjson::Value device; + // @param reading_information -- input_para -- files_related + + extern rapidjson::Value stru_file; + extern rapidjson::Value kpoint_file; + extern rapidjson::Value pseudo_dir; + extern rapidjson::Value orbital_dir; + extern rapidjson::Value read_file_dir; + extern rapidjson::Value wannier_card; + // @param reading_information -- input_para -- planewave_related + + extern rapidjson::Value ecutwfc; + extern rapidjson::Value nx; + extern rapidjson::Value ny; + extern rapidjson::Value nz; + extern rapidjson::Value pw_seed; + extern rapidjson::Value pw_diag_thr; + extern rapidjson::Value pw_diag_nmax; + extern rapidjson::Value pw_diag_ndim; + // @param reading_information -- input_para -- numerical_atomic_orbitals_related + + extern rapidjson::Value nb2d; + extern rapidjson::Value lmaxmax; + extern rapidjson::Value lcao_ecut; + extern rapidjson::Value lcao_dk; + extern rapidjson::Value lcao_dr; + extern rapidjson::Value lcao_rmax; + extern rapidjson::Value search_radius; + extern rapidjson::Value search_pbc; + extern rapidjson::Value bx; + extern rapidjson::Value by; + extern rapidjson::Value bz; + // @param reading_information -- input_para -- electronic_structure + + extern rapidjson::Value basis_type; + extern rapidjson::Value ks_solver; + extern rapidjson::Value nbands; + extern rapidjson::Value nbands_istate; + extern rapidjson::Value nspin; + extern rapidjson::Value smearing_method; + extern rapidjson::Value smearing_sigma; + extern rapidjson::Value smearing_sigma_temp; + extern rapidjson::Value mixing_type; + extern rapidjson::Value mixing_beta; + extern rapidjson::Value mixing_ndim; + extern rapidjson::Value mixing_gg0; + extern rapidjson::Value mixing_tau; + extern rapidjson::Value mixing_dftu; + extern rapidjson::Value gamma_only; + extern rapidjson::Value printe; + extern rapidjson::Value scf_nmax; + extern rapidjson::Value scf_thr; + extern rapidjson::Value scf_thr_type; + extern rapidjson::Value chg_extrap; + extern rapidjson::Value lspinorb; + extern rapidjson::Value noncolin; + extern rapidjson::Value soc_lambda; + // @param reading_information -- input_para -- electronic_structure_SDFT + + extern rapidjson::Value method_sto; + extern rapidjson::Value nbands_sto; + extern rapidjson::Value nche_sto; + extern rapidjson::Value emin_sto; + extern rapidjson::Value emax_sto; + extern rapidjson::Value seed_sto; + extern rapidjson::Value initsto_freq; + extern rapidjson::Value npart_sto; + // @param reading_information -- input_para -- geometry_relaxation + + extern rapidjson::Value relax_method; + extern rapidjson::Value relax_new; + extern rapidjson::Value relax_scale_force; + extern rapidjson::Value relax_nmax; + extern rapidjson::Value relax_cg_thr; + extern rapidjson::Value cal_force; + extern rapidjson::Value force_thr; + extern rapidjson::Value force_thr_ev; + extern rapidjson::Value force_thr_ev2; + extern rapidjson::Value relax_bfgs_w1; + extern rapidjson::Value relax_bfgs_w2; + extern rapidjson::Value relax_bfgs_rmax; + extern rapidjson::Value relax_bfgs_rmin; + extern rapidjson::Value relax_bfgs_init; + extern rapidjson::Value cal_stress; + extern rapidjson::Value stress_thr; + extern rapidjson::Value press1; + extern rapidjson::Value press2; + extern rapidjson::Value press3; + extern rapidjson::Value fixed_axes; + extern rapidjson::Value fixed_ibrav; + extern rapidjson::Value fixed_atoms; + extern rapidjson::Value cell_factor; + + // @param reading_information -- input_para -- output_information_related + + extern rapidjson::Value out_mul; + extern rapidjson::Value out_freq_elec; + extern rapidjson::Value out_freq_ion; + extern rapidjson::Value out_chg; + extern rapidjson::Value out_pot; + extern rapidjson::Value out_dm; + extern rapidjson::Value out_dm1; + extern rapidjson::Value out_wfc_pw; + extern rapidjson::Value out_wfc_r; + extern rapidjson::Value out_wfc_lcao; + extern rapidjson::Value out_dos; + extern rapidjson::Value out_band; + extern rapidjson::Value out_proj_band; + extern rapidjson::Value out_stru; + extern rapidjson::Value out_bandgap; + extern rapidjson::Value out_level; + extern rapidjson::Value out_alllog; + extern rapidjson::Value out_mat_hs; + extern rapidjson::Value out_mat_r; + extern rapidjson::Value out_mat_hs2; + extern rapidjson::Value out_mat_t; + extern rapidjson::Value out_mat_dh; + extern rapidjson::Value out_app_flag; + extern rapidjson::Value out_interval; + extern rapidjson::Value out_element_info; + extern rapidjson::Value restart_save; + extern rapidjson::Value restart_load; + extern rapidjson::Value rpa; + + // @param reading_information -- input_para -- density_of_states + + extern rapidjson::Value dos_edelta_ev; + extern rapidjson::Value dos_sigma; + extern rapidjson::Value dos_scale; + extern rapidjson::Value dos_emin_ev; + extern rapidjson::Value dos_emax_ev; + extern rapidjson::Value dos_nche; + // @param reading_information -- input_para -- naos + extern rapidjson::Value bessel_nao_ecut; + extern rapidjson::Value bessel_nao_tolerence; + extern rapidjson::Value bessel_nao_rcut; + extern rapidjson::Value bessel_nao_smooth; + extern rapidjson::Value bessel_nao_sigma; + // @param reading_information -- input_para -- deepks + + extern rapidjson::Value deepks_out_labels; + extern rapidjson::Value deepks_scf; + extern rapidjson::Value deepks_model; + extern rapidjson::Value bessel_descriptor_lmax; + extern rapidjson::Value bessel_descriptor_ecut; + extern rapidjson::Value bessel_descriptor_tolerence; + extern rapidjson::Value bessel_descriptor_rcut; + extern rapidjson::Value bessel_descriptor_smooth; + extern rapidjson::Value bessel_descriptor_sigma; + extern rapidjson::Value deepks_bandgap; + extern rapidjson::Value deepks_out_unittest; + // @param reading_information -- input_para -- ofdft + extern rapidjson::Value of_kinetic; + extern rapidjson::Value of_method; + extern rapidjson::Value of_conv; + extern rapidjson::Value of_tole; + extern rapidjson::Value of_tolp; + extern rapidjson::Value of_tf_weight; + extern rapidjson::Value of_vw_weight; + extern rapidjson::Value of_wt_alpha; + extern rapidjson::Value of_wt_beta; + extern rapidjson::Value of_wt_rho0; + extern rapidjson::Value of_hold_rho0; + extern rapidjson::Value of_lkt_a; + extern rapidjson::Value of_read_kernel; + extern rapidjson::Value of_kernel_file; + extern rapidjson::Value of_full_pw; + extern rapidjson::Value of_full_pw_dim; + + // @param reading_information -- input_para -- electric_field_and_dipole_correction + + extern rapidjson::Value efield_flag; + extern rapidjson::Value dip_cor_flag; + extern rapidjson::Value efield_dir; + extern rapidjson::Value efield_pos_max; + extern rapidjson::Value efield_pos_dec; + extern rapidjson::Value efield_amp; + // @param reading_information -- input_para -- gate_field + + extern rapidjson::Value gate_flag; + extern rapidjson::Value zgate; + extern rapidjson::Value block; + extern rapidjson::Value block_down; + extern rapidjson::Value block_up; + extern rapidjson::Value block_height; + // @param reading_information -- input_para -- exact_exchange + extern rapidjson::Value exx_hybrid_alpha; + extern rapidjson::Value exx_hse_omega; + extern rapidjson::Value exx_separate_loop; + extern rapidjson::Value exx_hybrid_step; + extern rapidjson::Value exx_mixing_beta; + extern rapidjson::Value exx_lambda; + extern rapidjson::Value exx_pca_threshold; + extern rapidjson::Value exx_c_threshold; + extern rapidjson::Value exx_v_threshold; + extern rapidjson::Value exx_dm_threshold; + extern rapidjson::Value exx_c_grad_threshold; + extern rapidjson::Value exx_v_grad_threshold; + extern rapidjson::Value exx_schwarz_threshold; + extern rapidjson::Value exx_cauchy_threshold; + extern rapidjson::Value exx_cauchy_force_threshold; + extern rapidjson::Value exx_cauchy_stress_threshold; + extern rapidjson::Value exx_ccp_threshold; + extern rapidjson::Value exx_ccp_rmesh_times; + extern rapidjson::Value exx_distribute_type; + extern rapidjson::Value exx_opt_orb_lmax; + extern rapidjson::Value exx_opt_orb_ecut; + extern rapidjson::Value exx_opt_orb_tolerence; + extern rapidjson::Value exx_real_number; + + // @param reading_information -- input_para -- molecular_dynamics + extern rapidjson::Value md_type; + extern rapidjson::Value md_nstep; + extern rapidjson::Value md_dt; + extern rapidjson::Value md_thermostat; + extern rapidjson::Value md_tlast; + extern rapidjson::Value md_tfirst; + extern rapidjson::Value md_restart; + extern rapidjson::Value md_restartfreq; + extern rapidjson::Value md_dumpfreq; + extern rapidjson::Value dump_force; + extern rapidjson::Value dump_vel; + extern rapidjson::Value dump_virial; + extern rapidjson::Value md_seed; + extern rapidjson::Value md_tfreq; + extern rapidjson::Value md_tchain; + extern rapidjson::Value md_pmode; + extern rapidjson::Value md_prec_level; + extern rapidjson::Value ref_cell_factor; + extern rapidjson::Value md_pcouple; + extern rapidjson::Value md_pfirst; + extern rapidjson::Value md_plast; + extern rapidjson::Value md_pfreq; + extern rapidjson::Value md_pchain; + extern rapidjson::Value lj_rcut; + extern rapidjson::Value lj_epsilon; + extern rapidjson::Value lj_sigma; + extern rapidjson::Value pot_file; + extern rapidjson::Value msst_direction; + extern rapidjson::Value msst_vel; + extern rapidjson::Value msst_vis; + extern rapidjson::Value msst_tscale; + extern rapidjson::Value msst_qmass; + extern rapidjson::Value md_damp; + extern rapidjson::Value md_tolerance; + extern rapidjson::Value md_nraise; + extern rapidjson::Value cal_syns; + extern rapidjson::Value dmax; + + // @param reading_information -- input_para -- dft_plus_u + extern rapidjson::Value orbital_corr; + extern rapidjson::Value hubbard_u; + extern rapidjson::Value yukawa_potential; + extern rapidjson::Value yukawa_lambda; + extern rapidjson::Value omc; + + // @param reading_information -- input_para -- vdw_correction + extern rapidjson::Value vdw_method; + extern rapidjson::Value vdw_s6; + extern rapidjson::Value vdw_s8; + extern rapidjson::Value vdw_a1; + extern rapidjson::Value vdw_a2; + extern rapidjson::Value vdw_d; + extern rapidjson::Value vdw_abc; + extern rapidjson::Value vdw_C6_file; + extern rapidjson::Value vdw_C6_unit; + extern rapidjson::Value vdw_R0_file; + extern rapidjson::Value vdw_R0_unit; + extern rapidjson::Value vdw_cutoff_type; + extern rapidjson::Value vdw_cutoff_radius; + extern rapidjson::Value vdw_radius_unit; + extern rapidjson::Value vdw_cutoff_period; + extern rapidjson::Value vdw_cn_thr; + extern rapidjson::Value vdw_cn_thr_unit; + + // @param reading_information -- input_para -- berry_phase_and_wannier90_interface + extern rapidjson::Value berry_phase; + extern rapidjson::Value gdir; + extern rapidjson::Value towannier90; + extern rapidjson::Value nnkpfile; + extern rapidjson::Value wannier_spin; + + // @param reading_information -- input_para -- tddft + extern rapidjson::Value td_edm; + extern rapidjson::Value td_print_eij; + extern rapidjson::Value td_propagator; + extern rapidjson::Value td_vext; + extern rapidjson::Value td_vext_dire; + extern rapidjson::Value td_stype; + extern rapidjson::Value td_ttype; + extern rapidjson::Value td_tstart; + extern rapidjson::Value td_tend; + extern rapidjson::Value td_lcut1; + extern rapidjson::Value td_lcut2; + extern rapidjson::Value td_gauss_freq; + extern rapidjson::Value td_gauss_phase; + extern rapidjson::Value td_gauss_sigma; + extern rapidjson::Value td_gauss_t0; + extern rapidjson::Value td_gauss_amp; + extern rapidjson::Value td_trape_freq; + extern rapidjson::Value td_trape_phase; + extern rapidjson::Value td_trape_t1; + extern rapidjson::Value td_trape_t2; + extern rapidjson::Value td_trape_t3; + extern rapidjson::Value td_trape_amp; + extern rapidjson::Value td_trigo_freq1; + extern rapidjson::Value td_trigo_freq2; + extern rapidjson::Value td_trigo_phase1; + extern rapidjson::Value td_trigo_phase2; + extern rapidjson::Value td_trigo_amp; + extern rapidjson::Value td_heavi_t0; + extern rapidjson::Value td_heavi_amp; + extern rapidjson::Value td_out_dipole; + extern rapidjson::Value td_out_efield; + extern rapidjson::Value ocp; + extern rapidjson::Value ocp_set; + + // @param reading_information -- input_para -- debuging_related + extern rapidjson::Value t_in_h; + extern rapidjson::Value vl_in_h; + extern rapidjson::Value vnl_in_h; + extern rapidjson::Value vh_in_h; + extern rapidjson::Value vion_in_h; + extern rapidjson::Value test_force; + extern rapidjson::Value test_stress; + extern rapidjson::Value colour; + extern rapidjson::Value test_skip_ewald; + + // @param reading_information -- input_para -- electronic_conductivities + extern rapidjson::Value cal_cond; + extern rapidjson::Value cond_nche; + extern rapidjson::Value cond_dw; + extern rapidjson::Value cond_wcut; + extern rapidjson::Value cond_dt; + extern rapidjson::Value cond_dtbatch; + extern rapidjson::Value cond_fwhm; + extern rapidjson::Value cond_nonlocal; + + // @param reading_information -- input_para -- implicit_solvation_model + extern rapidjson::Value imp_sol; + extern rapidjson::Value eb_k; + extern rapidjson::Value tau; + extern rapidjson::Value sigma_k; + extern rapidjson::Value nc_k; + + // @param reading_information -- stru_infos: + extern rapidjson::Value stru_infos; + // extern rapidjson::Value ATOMIC_SPECIES; + // extern rapidjson::Value NUMERICAL_ORBITAL; + // extern rapidjson::Value LATTICE_CONSTANT; + // extern rapidjson::Value ATOMIC_POSITIONS; + + // @param reading_information -- KPT_infos + extern rapidjson::Value KPT_infos; + // extern rapidjson::Value total_number; + // extern rapidjson::Value mode; + // extern rapidjson::Value vectors; + + // @param reading_information -- orb_infos + extern rapidjson::Value orb_infos; + + // @param reading_information -- pp + extern rapidjson::Value pp; + + // @param init + extern rapidjson::Value init; + // @param init -- general + // extern rapidjson::Value calculation; + // extern rapidjson::Value esolver_type; + // extern rapidjson::Value basis_type; + // extern rapidjson::Value gamma_only; + // extern rapidjson::Value ks_solver; + // extern rapidjson::Value ntype; + // extern rapidjson::Value nspin; + // extern rapidjson::Value ecutwfc; + // extern rapidjson::Value scf_thr; + // extern rapidjson::Value scf_nmax; + + // @param init -- symmetry + // extern rapidjson::Value symmetry; + // extern rapidjson::Value BRAVAIS_TYPE; + // extern rapidjson::Value BRAVAIS_LATTICE_NAME; + // extern rapidjson::Value IBRAV; + // extern rapidjson::Value LATTICE_CONSTANT_A; + // extern rapidjson::Value right_hand_lattice; + + // @param init -- Kpoints + extern rapidjson::Value kpoints; + extern rapidjson::Value nkstot; + extern rapidjson::Value nkstot_ibz; + extern rapidjson::Value coordinates; + extern rapidjson::Value weight; + + // @param init -- grid + extern rapidjson::Value grid; + extern rapidjson::Value energy_cutoff_for_wavefunc; + extern rapidjson::Value fft_grid_for_wave_functions; + extern rapidjson::Value number_of_plane_waves; + extern rapidjson::Value number_of_sticks; + + // @param init -- Smearing + // extern rapidjson::Value smearing_method; + // extern rapidjson::Value smearing_sigma; + + // @param init -- mixing + extern rapidjson::Value mixing; + + + // @param output + extern rapidjson::Value output; + + + + // @param final_stru + extern rapidjson::Value final_stru; + extern rapidjson::Value cell; + extern rapidjson::Value coordinate; + + + + + /** + * The functions below initialize the json output parameter + * tree to connect the nodes of the module + */ + + /** + * @brief add Top stage:parameter in Abacus: + */ + void Init_json_abacus(); + + + /** + * @brief add Second stage:parameter in Abacus - general_info: + */ + void Init_json_abacus_generalInfo(); + + + /** + * @brief add Second stage:parameter in Abacus - readin_info: + */ + void Init_json_abacus_readinInfo(); + + + /** + * @brief finish json tree build + */ + void Finish_json_tree(); + + + + /** + * @brief This function is used to populate the template type parameter + * values into rapidjson's Value object + */ + template + void set_json_value(rapidjson::Value &json_v,T *para){ + if(std::is_same::value) + { + json_v.SetInt(*reinterpret_cast(para)); + } + else if(std::is_same::value) + { + json_v.SetDouble(*reinterpret_cast(para)); + } + else if(std::is_same::value) + { + json_v.SetBool(*reinterpret_cast(para)); + } + else if(std::is_same::value) + { + // json_v.SetString(rapidjson::StringRef((*reinterpret_cast(para)).c_str())); + + json_v.SetString((*reinterpret_cast(para)).c_str(), std::strlen((*reinterpret_cast(para)).c_str()), doc.GetAllocator()); + //printf("exx_real_number = %s\n",(*reinterpret_cast(para)).c_str()); + } + } +} + +#endif \ No newline at end of file diff --git a/source/module_base/test/CMakeLists.txt b/source/module_base/test/CMakeLists.txt index 666152b476..008df422e5 100644 --- a/source/module_base/test/CMakeLists.txt +++ b/source/module_base/test/CMakeLists.txt @@ -217,3 +217,17 @@ AddTest( SOURCES assoc_laguerre_test.cpp ../assoc_laguerre.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp LIBS ${math_libs} formatter ) +if(ENABLE_GOOGLEBENCH) + AddTest( + TARGET perf_sphbes + LIBS formatter + SOURCES perf_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp + ) +endif() + +if(ENABLE_RAPIDJSON) + AddTest( + TARGET base_para_json_test + SOURCES para_json_test.cpp ../para_json.cpp + ) +endif() diff --git a/source/module_base/test/complexmatrix_test.cpp b/source/module_base/test/complexmatrix_test.cpp index 026aeb40de..0adc52363a 100644 --- a/source/module_base/test/complexmatrix_test.cpp +++ b/source/module_base/test/complexmatrix_test.cpp @@ -23,8 +23,8 @@ * - set_as_identity_matrix() * - print():Output the elements of this complex matrix greater than threshold. * - checkreal() - * - * Tested relative functions + * + * Tested relative functions * - operator "+" "-" "*" between two ComplexMatrix * - operator "*" between a ComplexMatrix and double or complex, and reverse. * - trace() @@ -35,13 +35,13 @@ * - conj() * - scale_accumulate(): * - scaled_sum(): - * + * */ //a mock function of WARNING_QUIT, to avoid the uncorrected call by matrix.cpp at line 37. namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} + void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} } inline void EXPECT_COMPLEX_EQ(const std::complex& a,const std::complex& b) @@ -104,8 +104,8 @@ TEST_F(ComplexMatrixTest,ConstructorCM) TEST_F(ComplexMatrixTest,ConstructorCMrvalue) { - ModuleBase::ComplexMatrix cm2(cm22); - ModuleBase::ComplexMatrix cm1(std::move(cm22)); + ModuleBase::ComplexMatrix cm2(cm22); + ModuleBase::ComplexMatrix cm1(std::move(cm22)); EXPECT_EQ(cm1.nr,cm2.nr); EXPECT_EQ(cm1.nc,cm2.nc); EXPECT_EQ(cm1.size,cm2.size); @@ -338,15 +338,15 @@ TEST_F(ComplexMatrixTest,OperatorMultMatrix) EXPECT_EQ(cm33.nr,3); EXPECT_EQ(cm33.nc,3); EXPECT_EQ(cm33.size,9); - EXPECT_COMPLEX_EQ(cm33(0,0),std::complex{-46.0,72.0 }); + EXPECT_COMPLEX_EQ(cm33(0,0),std::complex{-46.0,72.0 }); EXPECT_COMPLEX_EQ(cm33(0,1),std::complex{-46.0,118.0 }); EXPECT_COMPLEX_EQ(cm33(0,2),std::complex{-46.0,164.0 }); EXPECT_COMPLEX_EQ(cm33(1,0),std::complex{-54.0,84.0 }); EXPECT_COMPLEX_EQ(cm33(1,1),std::complex{-54.0,138.0 }); - EXPECT_COMPLEX_EQ(cm33(1,2),std::complex{-54.0,192.0 }); + EXPECT_COMPLEX_EQ(cm33(1,2),std::complex{-54.0,192.0 }); EXPECT_COMPLEX_EQ(cm33(2,0),std::complex{-62.0,96.0 }); EXPECT_COMPLEX_EQ(cm33(2,1),std::complex{-62.0,158.0 }); - EXPECT_COMPLEX_EQ(cm33(2,2),std::complex{-62.0,220.0 }); + EXPECT_COMPLEX_EQ(cm33(2,2),std::complex{-62.0,220.0 }); EXPECT_DEATH(cm22 * cm32,""); } @@ -525,7 +525,7 @@ TEST_F(ComplexMatrixTest,ScaleSumArray) cmout = new ModuleBase::ComplexMatrix*[2]; cmin1 = new ModuleBase::ComplexMatrix*[2]; cmin2 = new ModuleBase::ComplexMatrix*[2]; - + cmin1[0] = &cm1; cmin1[1] = &cm2; cmin2[0] = &cm3; @@ -563,7 +563,7 @@ TEST_F(ComplexMatrixTest,print) EXPECT_THAT(output,testing::HasSubstr("(3,4)\t(4,5)\t")); ifs.close(); remove("printtest1.log"); -// The condition of std::abs(data)>threshold_abs && std::imag(data)) <= threshold_imag +// The condition of std::abs(data)>threshold_abs && std::imag(data)) <= threshold_imag ofs.open("printtest2.log"); cm22.print(ofs,1e-10,2); ofs.close(); diff --git a/source/module_base/test/inverse_matrix_test.cpp b/source/module_base/test/inverse_matrix_test.cpp index df68f58a56..a871f906cd 100644 --- a/source/module_base/test/inverse_matrix_test.cpp +++ b/source/module_base/test/inverse_matrix_test.cpp @@ -19,7 +19,7 @@ //a mock function of WARNING_QUIT, to avoid the uncorrected call by matrix.cpp at line 37. namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} + void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} } TEST(InverseMatrixComplexTest, InverseMatrixComplex) diff --git a/source/module_base/test/math_sphbes_test.cpp b/source/module_base/test/math_sphbes_test.cpp index 521d4dc2f4..e72c6e289c 100644 --- a/source/module_base/test/math_sphbes_test.cpp +++ b/source/module_base/test/math_sphbes_test.cpp @@ -352,15 +352,27 @@ TEST_F(Sphbes, Zeros) int lmax = 20; int nzeros = 500; - double* zeros = new double[nzeros]; + double* zeros = new double[nzeros*(lmax+1)]; for (int l = 0; l <= lmax; ++l) { - ModuleBase::Sphbes::sphbes_zeros(l, nzeros, zeros); + ModuleBase::Sphbes::sphbes_zeros(l, nzeros, zeros, false); for (int i = 0; i < nzeros; ++i) { EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[i])), 1e-14); } } + + + ModuleBase::Sphbes::sphbes_zeros(lmax, nzeros, zeros, true); + for (int l = 0; l <= lmax; ++l) + { + for (int i = 0; i < nzeros; ++i) + { + EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[l*nzeros+i])), 1e-14); + } + } + + delete[] zeros; } TEST_F(Sphbes, ZerosOld) diff --git a/source/module_base/test/math_ylmreal_test.cpp b/source/module_base/test/math_ylmreal_test.cpp index d5e7a504ed..13d0bd2b69 100644 --- a/source/module_base/test/math_ylmreal_test.cpp +++ b/source/module_base/test/math_ylmreal_test.cpp @@ -13,16 +13,16 @@ ***********************************************/ /** - * For lmax <5 cases, the reference values are calculated by the formula from + * For lmax <5 cases, the reference values are calculated by the formula from * https://formulasearchengine.com/wiki/Table_of_spherical_harmonics. Note, these - * formula lack of the Condon–Shortley phase (-1)^m, and in this unit test, item + * formula lack of the Condon–Shortley phase (-1)^m, and in this unit test, item * (-1)^m is multiplied. * For lmax >=5, the reference values are calculated by YlmReal::Ylm_Real. * * - Tested functions of class YlmReal * - Ylm_Real * - Ylm_Real2 - * - rlylm + * - rlylm * - YlmRealTemplate (double and float) * * - Tested functions of class Ylm @@ -30,9 +30,9 @@ * - sph_harm * - rl_sph_harm * - grad_rl_sph_harm - * - equality_value_test: test the eqaulity of Ylm function between rl_sph_harm (spherical input) and get_ylm_real (Cartesian input) + * - equality_value_test: test the eqaulity of Ylm function between rl_sph_harm (spherical input) and get_ylm_real (Cartesian input) * - equality_gradient_test:test the eqaulity of Ylm gradient function between grad_rl_sph_harm(spherical input) and rlylm (Cartesian input) - * + * */ @@ -40,7 +40,7 @@ //mock functions of WARNING_QUIT and WARNING namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} + void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} void WARNING(const std::string &file,const std::string &description) {return ;} } @@ -58,7 +58,7 @@ class YlmRealTest : public testing::Test ModuleBase::Vector3 *g; //vectors of the 4 points double *ref; //reference of Ylm double *rly; //Ylm - double (*rlgy)[3]; //the gradient of Ylm + double (*rlgy)[3]; //the gradient of Ylm std::vector rlyvector; //Ylm std::vector> rlgyvector; //the gradient of Ylm @@ -91,101 +91,101 @@ class YlmRealTest : public testing::Test double y4m4(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 3./4.*sqrt(35./M_PI) * x*y*(x*x - y*y) / (r*r*r*r);} //the reference values are calculated by ModuleBase::Ylm::grad_rl_sph_harm - //1st dimension: example, 2nd dimension: Ylm, 3rd dimension: dx/dy/dz + //1st dimension: example, 2nd dimension: Ylm, 3rd dimension: dx/dy/dz double rlgyref[4][64][3] = { - { { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, {-6.30783e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -1.09255e+00}, - { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 1.09255e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 1.09255e+00, -0.00000e+00}, - {-0.00000e+00, 0.00000e+00, -1.11953e+00}, { 1.37114e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 4.57046e-01, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 1.44531e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.77013e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, -1.77013e+00, 0.00000e+00}, { 1.26943e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.00714e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.89235e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, -9.46175e-01, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, -1.77013e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 2.50334e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 2.50334e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.75425e+00}, {-2.26473e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.52947e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.39677e+00}, {-0.00000e+00, -0.00000e+00, 0.00000e+00}, - { 2.44619e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.46771e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.07566e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.28191e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -3.28191e+00, 0.00000e+00}, - {-1.90708e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.91311e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, - { 2.76362e+00, 0.00000e+00, -0.00000e+00}, {-0.00000e+00, 9.21205e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.76362e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.02739e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, -2.01826e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, -2.36662e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 4.09910e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 4.09910e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.38995e+00}, { 3.16161e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, 4.51658e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 3.31900e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-3.28564e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.40813e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -3.11349e+00}, - {-0.00000e+00, -0.00000e+00, 0.00000e+00}, { 3.63241e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 2.59458e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 2.64596e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-4.95014e+00, 0.00000e+00, -0.00000e+00}, + { { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -4.88603e-01, 0.00000e+00}, {-6.30783e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -1.09255e+00}, + { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 1.09255e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 1.09255e+00, -0.00000e+00}, + {-0.00000e+00, 0.00000e+00, -1.11953e+00}, { 1.37114e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 4.57046e-01, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 1.44531e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.77013e+00, 0.00000e+00, -0.00000e+00}, + { 0.00000e+00, -1.77013e+00, 0.00000e+00}, { 1.26943e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.00714e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.89235e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, -9.46175e-01, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, -1.77013e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 2.50334e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 2.50334e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.75425e+00}, {-2.26473e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -4.52947e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.39677e+00}, {-0.00000e+00, -0.00000e+00, 0.00000e+00}, + { 2.44619e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.46771e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.07566e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.28191e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -3.28191e+00, 0.00000e+00}, + {-1.90708e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.91311e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, + { 2.76362e+00, 0.00000e+00, -0.00000e+00}, {-0.00000e+00, 9.21205e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.76362e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.02739e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, -2.01826e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, -2.36662e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 4.09910e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 4.09910e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.38995e+00}, { 3.16161e+00, 0.00000e+00, -0.00000e+00}, + { 0.00000e+00, 4.51658e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 3.31900e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-3.28564e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.40813e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -3.11349e+00}, + {-0.00000e+00, -0.00000e+00, 0.00000e+00}, { 3.63241e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 2.59458e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 2.64596e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-4.95014e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, -4.95014e+00, 0.00000e+00} }, { - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 0.00000e+00, -6.30783e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -1.09255e+00}, { 0.00000e+00, -1.09255e+00, 0.00000e+00}, { 1.09255e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -1.11953e+00}, { 4.57046e-01, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.37114e+00, -0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -1.44531e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 1.77013e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 1.77013e+00, 0.00000e+00}, { 0.00000e+00, 1.26943e+00, -0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 2.00714e+00}, { 0.00000e+00, 1.89235e+00, -0.00000e+00}, {-9.46175e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.77013e+00}, { 0.00000e+00, 2.50334e+00, -0.00000e+00}, - {-2.50334e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.75425e+00}, {-4.52947e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -2.26473e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.39677e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-1.46771e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -2.44619e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.07566e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.28191e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -3.28191e+00, 0.00000e+00}, - { 0.00000e+00, -1.90708e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.91311e+00}, - { 0.00000e+00, -2.76362e+00, 0.00000e+00}, { 9.21205e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -2.76362e+00}, { 0.00000e+00, -3.02739e+00, 0.00000e+00}, { 2.01826e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.36662e+00}, { 0.00000e+00, -4.09910e+00, 0.00000e+00}, - { 4.09910e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.38995e+00}, { 4.51658e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 3.16161e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.31900e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 1.40813e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.28564e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.11349e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 2.59458e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.63241e+00, -0.00000e+00}, - { 0.00000e+00, 0.00000e+00, -2.64596e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.95014e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 4.95014e+00, -0.00000e+00} + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 0.00000e+00, -6.30783e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -0.00000e+00, -1.09255e+00}, { 0.00000e+00, -1.09255e+00, 0.00000e+00}, { 1.09255e+00, 0.00000e+00, -0.00000e+00}, + { 0.00000e+00, -0.00000e+00, -1.11953e+00}, { 4.57046e-01, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.37114e+00, -0.00000e+00}, + { 0.00000e+00, -0.00000e+00, -1.44531e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 1.77013e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 1.77013e+00, 0.00000e+00}, { 0.00000e+00, 1.26943e+00, -0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 2.00714e+00}, { 0.00000e+00, 1.89235e+00, -0.00000e+00}, {-9.46175e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.77013e+00}, { 0.00000e+00, 2.50334e+00, -0.00000e+00}, + {-2.50334e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.75425e+00}, {-4.52947e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -2.26473e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.39677e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-1.46771e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -2.44619e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.07566e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.28191e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -3.28191e+00, 0.00000e+00}, + { 0.00000e+00, -1.90708e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.91311e+00}, + { 0.00000e+00, -2.76362e+00, 0.00000e+00}, { 9.21205e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -0.00000e+00, -2.76362e+00}, { 0.00000e+00, -3.02739e+00, 0.00000e+00}, { 2.01826e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.36662e+00}, { 0.00000e+00, -4.09910e+00, 0.00000e+00}, + { 4.09910e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.38995e+00}, { 4.51658e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 3.16161e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.31900e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + { 1.40813e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.28564e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.11349e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 2.59458e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.63241e+00, -0.00000e+00}, + { 0.00000e+00, 0.00000e+00, -2.64596e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.95014e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 4.95014e+00, -0.00000e+00} }, { - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.26157e+00}, {-1.09255e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -1.09255e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.22045e-16}, {-0.00000e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 2.23906e+00}, {-1.82818e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.82818e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 8.81212e-16}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.84324e-16, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 5.55112e-17, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 3.38514e+00}, {-2.67619e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -2.67619e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.30756e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-5.52973e-16, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.66533e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.67801e+00}, {-3.62357e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -3.62357e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.87108e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-1.22267e-15, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.68219e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.93038e-32, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -6.16298e-33, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 6.10264e+00}, {-4.66097e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -4.66097e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 8.98664e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-2.30221e-15, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 6.93334e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 1.77767e-31, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -2.22209e-32, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 7.64784e+00}, {-5.78122e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -5.78122e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.51096e-14}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-3.91011e-15, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.17757e-15, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.67737e-31, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -5.84671e-32, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 1.13319e-47, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.26157e+00}, {-1.09255e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -1.09255e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.22045e-16}, {-0.00000e+00, 0.00000e+00, -0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 2.23906e+00}, {-1.82818e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.82818e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 8.81212e-16}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.84324e-16, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 5.55112e-17, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 3.38514e+00}, {-2.67619e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -2.67619e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.30756e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-5.52973e-16, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.66533e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.67801e+00}, {-3.62357e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -3.62357e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.87108e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-1.22267e-15, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.68219e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.93038e-32, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -6.16298e-33, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 6.10264e+00}, {-4.66097e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -4.66097e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 8.98664e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-2.30221e-15, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 6.93334e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + { 1.77767e-31, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -2.22209e-32, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 7.64784e+00}, {-5.78122e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -5.78122e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.51096e-14}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-3.91011e-15, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.17757e-15, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.67737e-31, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -5.84671e-32, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 1.13319e-47, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.41649e-48, 0.00000e+00} }, { - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 3.64183e-01, 3.64183e-01, -7.28366e-01}, { 6.30783e-01, -0.00000e+00, 6.30783e-01}, - {-0.00000e+00, 6.30783e-01, 6.30783e-01}, {-6.30783e-01, 6.30783e-01, -1.66533e-16}, {-6.30783e-01, -6.30783e-01, 0.00000e+00}, - {-7.46353e-01, -7.46353e-01, 0.00000e+00}, { 0.00000e+00, 3.04697e-01, -1.21879e+00}, { 3.04697e-01, 0.00000e+00, -1.21879e+00}, - { 9.63537e-01, -9.63537e-01, 4.01253e-16}, { 9.63537e-01, 9.63537e-01, 9.63537e-01}, {-4.44089e-16, 1.18009e+00, -2.22045e-16}, - {-1.18009e+00, -1.11022e-16, 0.00000e+00}, { 4.88603e-01, 4.88603e-01, 1.30294e+00}, {-1.03006e+00, -7.72548e-01, 7.72548e-01}, - {-7.72548e-01, -1.03006e+00, 7.72548e-01}, {-7.28366e-01, 7.28366e-01, -5.25363e-16}, {-3.64183e-01, -3.64183e-01, -2.18510e+00}, - { 7.69185e-16, -2.04397e+00, -6.81324e-01}, { 2.04397e+00, 1.92296e-16, 6.81324e-01}, { 9.63537e-01, 9.63537e-01, -1.44756e-16}, - {-9.63537e-01, 9.63537e-01, -5.55112e-17}, { 5.19779e-01, 5.19779e-01, -1.81923e+00}, { 1.40917e+00, 8.05238e-01, 8.05238e-01}, - { 8.05238e-01, 1.40917e+00, 8.05238e-01}, { 0.00000e+00, -4.44089e-16, 3.24739e-16}, {-1.06523e+00, -1.06523e+00, 2.13046e+00}, - {-2.17439e-01, 1.73951e+00, 1.73951e+00}, {-1.73951e+00, 2.17439e-01, -1.73951e+00}, {-1.84503e+00, -1.84503e+00, -9.22517e-01}, - { 1.84503e+00, -1.84503e+00, 6.58625e-16}, { 1.45863e+00, 1.11022e-15, 0.00000e+00}, {-8.88178e-16, 1.45863e+00, 0.00000e+00}, - {-1.46807e+00, -1.46807e+00, 5.87227e-01}, {-4.48502e-01, -3.36617e-16, -2.24251e+00}, {-3.36617e-16, -4.48502e-01, -2.24251e+00}, - { 7.09144e-01, -7.09144e-01, 1.87222e-16}, { 2.12743e+00, 2.12743e+00, -9.38779e-16}, { 7.09144e-01, -5.11006e-16, -2.12743e+00}, - { 1.02201e-15, -7.09144e-01, 2.12743e+00}, { 1.81260e+00, 1.81260e+00, 2.58943e+00}, {-2.07154e+00, 2.07154e+00, -1.66969e-15}, - {-3.03637e+00, -2.31111e-15, -6.07275e-01}, { 1.84889e-15, -3.03637e+00, -6.07275e-01}, { 1.05183e+00, -1.05183e+00, 5.77778e-17}, - { 1.05183e+00, 1.05183e+00, 4.03986e-17}, { 1.27464e+00, 1.27464e+00, 1.69952e+00}, {-1.28472e+00, -1.20442e+00, 1.92707e+00}, - {-1.20442e+00, -1.28472e+00, 1.92707e+00}, {-8.52285e-01, 8.52285e-01, -6.74704e-16}, {-1.50789e+00, -1.50789e+00, -2.95022e+00}, - {-1.11260e+00, -2.08612e+00, 9.27164e-01}, { 2.08612e+00, 1.11260e+00, -9.27164e-01}, {-3.07506e-01, -3.07506e-01, -3.69007e+00}, - { 1.23002e+00, -1.23002e+00, 2.28018e-15}, { 3.69007e+00, -1.53753e-01, 1.84503e+00}, {-1.53753e-01, 3.69007e+00, 1.84503e+00}, - {-2.35197e+00, 2.35197e+00, -8.00513e-16}, {-2.35197e+00, -2.35197e+00, -7.83988e-01}, { 1.37903e-15, -1.46671e+00, 9.77875e-17}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 3.64183e-01, 3.64183e-01, -7.28366e-01}, { 6.30783e-01, -0.00000e+00, 6.30783e-01}, + {-0.00000e+00, 6.30783e-01, 6.30783e-01}, {-6.30783e-01, 6.30783e-01, -1.66533e-16}, {-6.30783e-01, -6.30783e-01, 0.00000e+00}, + {-7.46353e-01, -7.46353e-01, 0.00000e+00}, { 0.00000e+00, 3.04697e-01, -1.21879e+00}, { 3.04697e-01, 0.00000e+00, -1.21879e+00}, + { 9.63537e-01, -9.63537e-01, 4.01253e-16}, { 9.63537e-01, 9.63537e-01, 9.63537e-01}, {-4.44089e-16, 1.18009e+00, -2.22045e-16}, + {-1.18009e+00, -1.11022e-16, 0.00000e+00}, { 4.88603e-01, 4.88603e-01, 1.30294e+00}, {-1.03006e+00, -7.72548e-01, 7.72548e-01}, + {-7.72548e-01, -1.03006e+00, 7.72548e-01}, {-7.28366e-01, 7.28366e-01, -5.25363e-16}, {-3.64183e-01, -3.64183e-01, -2.18510e+00}, + { 7.69185e-16, -2.04397e+00, -6.81324e-01}, { 2.04397e+00, 1.92296e-16, 6.81324e-01}, { 9.63537e-01, 9.63537e-01, -1.44756e-16}, + {-9.63537e-01, 9.63537e-01, -5.55112e-17}, { 5.19779e-01, 5.19779e-01, -1.81923e+00}, { 1.40917e+00, 8.05238e-01, 8.05238e-01}, + { 8.05238e-01, 1.40917e+00, 8.05238e-01}, { 0.00000e+00, -4.44089e-16, 3.24739e-16}, {-1.06523e+00, -1.06523e+00, 2.13046e+00}, + {-2.17439e-01, 1.73951e+00, 1.73951e+00}, {-1.73951e+00, 2.17439e-01, -1.73951e+00}, {-1.84503e+00, -1.84503e+00, -9.22517e-01}, + { 1.84503e+00, -1.84503e+00, 6.58625e-16}, { 1.45863e+00, 1.11022e-15, 0.00000e+00}, {-8.88178e-16, 1.45863e+00, 0.00000e+00}, + {-1.46807e+00, -1.46807e+00, 5.87227e-01}, {-4.48502e-01, -3.36617e-16, -2.24251e+00}, {-3.36617e-16, -4.48502e-01, -2.24251e+00}, + { 7.09144e-01, -7.09144e-01, 1.87222e-16}, { 2.12743e+00, 2.12743e+00, -9.38779e-16}, { 7.09144e-01, -5.11006e-16, -2.12743e+00}, + { 1.02201e-15, -7.09144e-01, 2.12743e+00}, { 1.81260e+00, 1.81260e+00, 2.58943e+00}, {-2.07154e+00, 2.07154e+00, -1.66969e-15}, + {-3.03637e+00, -2.31111e-15, -6.07275e-01}, { 1.84889e-15, -3.03637e+00, -6.07275e-01}, { 1.05183e+00, -1.05183e+00, 5.77778e-17}, + { 1.05183e+00, 1.05183e+00, 4.03986e-17}, { 1.27464e+00, 1.27464e+00, 1.69952e+00}, {-1.28472e+00, -1.20442e+00, 1.92707e+00}, + {-1.20442e+00, -1.28472e+00, 1.92707e+00}, {-8.52285e-01, 8.52285e-01, -6.74704e-16}, {-1.50789e+00, -1.50789e+00, -2.95022e+00}, + {-1.11260e+00, -2.08612e+00, 9.27164e-01}, { 2.08612e+00, 1.11260e+00, -9.27164e-01}, {-3.07506e-01, -3.07506e-01, -3.69007e+00}, + { 1.23002e+00, -1.23002e+00, 2.28018e-15}, { 3.69007e+00, -1.53753e-01, 1.84503e+00}, {-1.53753e-01, 3.69007e+00, 1.84503e+00}, + {-2.35197e+00, 2.35197e+00, -8.00513e-16}, {-2.35197e+00, -2.35197e+00, -7.83988e-01}, { 1.37903e-15, -1.46671e+00, 9.77875e-17}, { 1.46671e+00, 1.14919e-15, 1.34475e-16} } }; @@ -206,71 +206,71 @@ class YlmRealTest : public testing::Test rlgy = new double[nylm][3]; rlgyvector.resize(nylm,std::vector(3)); ref = new double[64*4]{ - y00(g[0].x, g[0].y, g[0].z), y00(g[1].x, g[1].y, g[1].z), y00(g[2].x, g[2].y, g[2].z), y00(g[3].x, g[3].y, g[3].z), - y10(g[0].x, g[0].y, g[0].z), y10(g[1].x, g[1].y, g[1].z), y10(g[2].x, g[2].y, g[2].z), y10(g[3].x, g[3].y, g[3].z), - y11(g[0].x, g[0].y, g[0].z), y11(g[1].x, g[1].y, g[1].z), y11(g[2].x, g[2].y, g[2].z), y11(g[3].x, g[3].y, g[3].z), - y1m1(g[0].x, g[0].y, g[0].z), y1m1(g[1].x, g[1].y, g[1].z), y1m1(g[2].x, g[2].y, g[2].z), y1m1(g[3].x, g[3].y, g[3].z), - y20(g[0].x, g[0].y, g[0].z), y20(g[1].x, g[1].y, g[1].z), y20(g[2].x, g[2].y, g[2].z), y20(g[3].x, g[3].y, g[3].z), - y21(g[0].x, g[0].y, g[0].z), y21(g[1].x, g[1].y, g[1].z), y21(g[2].x, g[2].y, g[2].z), y21(g[3].x, g[3].y, g[3].z), - y2m1(g[0].x, g[0].y, g[0].z), y2m1(g[1].x, g[1].y, g[1].z), y2m1(g[2].x, g[2].y, g[2].z), y2m1(g[3].x, g[3].y, g[3].z), - y22(g[0].x, g[0].y, g[0].z), y22(g[1].x, g[1].y, g[1].z), y22(g[2].x, g[2].y, g[2].z), y22(g[3].x, g[3].y, g[3].z), - y2m2(g[0].x, g[0].y, g[0].z), y2m2(g[1].x, g[1].y, g[1].z), y2m2(g[2].x, g[2].y, g[2].z), y2m2(g[3].x, g[3].y, g[3].z), - y30(g[0].x, g[0].y, g[0].z), y30(g[1].x, g[1].y, g[1].z), y30(g[2].x, g[2].y, g[2].z), y30(g[3].x, g[3].y, g[3].z), - y31(g[0].x, g[0].y, g[0].z), y31(g[1].x, g[1].y, g[1].z), y31(g[2].x, g[2].y, g[2].z), y31(g[3].x, g[3].y, g[3].z), - y3m1(g[0].x, g[0].y, g[0].z), y3m1(g[1].x, g[1].y, g[1].z), y3m1(g[2].x, g[2].y, g[2].z), y3m1(g[3].x, g[3].y, g[3].z), - y32(g[0].x, g[0].y, g[0].z), y32(g[1].x, g[1].y, g[1].z), y32(g[2].x, g[2].y, g[2].z), y32(g[3].x, g[3].y, g[3].z), - y3m2(g[0].x, g[0].y, g[0].z), y3m2(g[1].x, g[1].y, g[1].z), y3m2(g[2].x, g[2].y, g[2].z), y3m2(g[3].x, g[3].y, g[3].z), - y33(g[0].x, g[0].y, g[0].z), y33(g[1].x, g[1].y, g[1].z), y33(g[2].x, g[2].y, g[2].z), y33(g[3].x, g[3].y, g[3].z), - y3m3(g[0].x, g[0].y, g[0].z), y3m3(g[1].x, g[1].y, g[1].z), y3m3(g[2].x, g[2].y, g[2].z), y3m3(g[3].x, g[3].y, g[3].z), - y40(g[0].x, g[0].y, g[0].z), y40(g[1].x, g[1].y, g[1].z), y40(g[2].x, g[2].y, g[2].z), y40(g[3].x, g[3].y, g[3].z), - y41(g[0].x, g[0].y, g[0].z), y41(g[1].x, g[1].y, g[1].z), y41(g[2].x, g[2].y, g[2].z), y41(g[3].x, g[3].y, g[3].z), - y4m1(g[0].x, g[0].y, g[0].z), y4m1(g[1].x, g[1].y, g[1].z), y4m1(g[2].x, g[2].y, g[2].z), y4m1(g[3].x, g[3].y, g[3].z), - y42(g[0].x, g[0].y, g[0].z), y42(g[1].x, g[1].y, g[1].z), y42(g[2].x, g[2].y, g[2].z), y42(g[3].x, g[3].y, g[3].z), - y4m2(g[0].x, g[0].y, g[0].z), y4m2(g[1].x, g[1].y, g[1].z), y4m2(g[2].x, g[2].y, g[2].z), y4m2(g[3].x, g[3].y, g[3].z), - y43(g[0].x, g[0].y, g[0].z), y43(g[1].x, g[1].y, g[1].z), y43(g[2].x, g[2].y, g[2].z), y43(g[3].x, g[3].y, g[3].z), - y4m3(g[0].x, g[0].y, g[0].z), y4m3(g[1].x, g[1].y, g[1].z), y4m3(g[2].x, g[2].y, g[2].z), y4m3(g[3].x, g[3].y, g[3].z), - y44(g[0].x, g[0].y, g[0].z), y44(g[1].x, g[1].y, g[1].z), y44(g[2].x, g[2].y, g[2].z), y44(g[3].x, g[3].y, g[3].z), - y4m4(g[0].x, g[0].y, g[0].z), y4m4(g[1].x, g[1].y, g[1].z), y4m4(g[2].x, g[2].y, g[2].z), y4m4(g[3].x, g[3].y, g[3].z), - 0.000000000000000, 0.000000000000000, 0.935602579627389, 0.090028400200397, - -0.452946651195697, -0.000000000000000, -0.000000000000000, -0.348678494661834, - -0.000000000000000, -0.452946651195697, -0.000000000000000, -0.348678494661834, - -0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, - -0.000000000000000, -0.000000000000000, 0.000000000000000, -0.000000000000000, - 0.489238299435250, 0.000000000000000, -0.000000000000000, -0.376615818502422, - 0.000000000000000, -0.489238299435250, -0.000000000000000, 0.376615818502422, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.532615198330370, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, - -0.656382056840170, -0.000000000000000, -0.000000000000000, -0.168427714314628, - -0.000000000000000, -0.656382056840170, -0.000000000000000, -0.168427714314628, - -0.317846011338142, -0.317846011338142, 1.017107236282055, 0.226023830284901, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.258942827786103, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.258942827786103, - 0.460602629757462, -0.460602629757462, 0.000000000000000, -0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.409424559784410, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.136474853261470, - -0.000000000000000, 0.000000000000000, -0.000000000000000, -0.136474853261470, - -0.504564900728724, -0.504564900728724, 0.000000000000000, -0.598002845308118, - -0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.350610246256556, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.350610246256556, - 0.683184105191914, -0.683184105191914, 0.000000000000000, -0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.202424920056864, - 0.000000000000000, 0.000000000000000, 1.092548430592079, -0.350435072502801, - 0.451658037912587, 0.000000000000000, -0.000000000000000, 0.046358202625865, - 0.000000000000000, 0.451658037912587, -0.000000000000000, 0.046358202625865, - 0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.492067081245654, - -0.469376801586882, -0.000000000000000, -0.000000000000000, 0.187354445356332, - -0.000000000000000, 0.469376801586882, -0.000000000000000, -0.187354445356332, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.355076798886913, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, - 0.518915578720260, 0.000000000000000, -0.000000000000000, -0.443845998608641, - 0.000000000000000, 0.518915578720260, -0.000000000000000, -0.443845998608641, - 0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.452635881587108, - -0.707162732524596, 0.000000000000000, -0.000000000000000, 0.120972027847095, - -0.000000000000000, 0.707162732524596, -0.000000000000000, -0.120972027847095 - } ; + y00(g[0].x, g[0].y, g[0].z), y00(g[1].x, g[1].y, g[1].z), y00(g[2].x, g[2].y, g[2].z), y00(g[3].x, g[3].y, g[3].z), + y10(g[0].x, g[0].y, g[0].z), y10(g[1].x, g[1].y, g[1].z), y10(g[2].x, g[2].y, g[2].z), y10(g[3].x, g[3].y, g[3].z), + y11(g[0].x, g[0].y, g[0].z), y11(g[1].x, g[1].y, g[1].z), y11(g[2].x, g[2].y, g[2].z), y11(g[3].x, g[3].y, g[3].z), + y1m1(g[0].x, g[0].y, g[0].z), y1m1(g[1].x, g[1].y, g[1].z), y1m1(g[2].x, g[2].y, g[2].z), y1m1(g[3].x, g[3].y, g[3].z), + y20(g[0].x, g[0].y, g[0].z), y20(g[1].x, g[1].y, g[1].z), y20(g[2].x, g[2].y, g[2].z), y20(g[3].x, g[3].y, g[3].z), + y21(g[0].x, g[0].y, g[0].z), y21(g[1].x, g[1].y, g[1].z), y21(g[2].x, g[2].y, g[2].z), y21(g[3].x, g[3].y, g[3].z), + y2m1(g[0].x, g[0].y, g[0].z), y2m1(g[1].x, g[1].y, g[1].z), y2m1(g[2].x, g[2].y, g[2].z), y2m1(g[3].x, g[3].y, g[3].z), + y22(g[0].x, g[0].y, g[0].z), y22(g[1].x, g[1].y, g[1].z), y22(g[2].x, g[2].y, g[2].z), y22(g[3].x, g[3].y, g[3].z), + y2m2(g[0].x, g[0].y, g[0].z), y2m2(g[1].x, g[1].y, g[1].z), y2m2(g[2].x, g[2].y, g[2].z), y2m2(g[3].x, g[3].y, g[3].z), + y30(g[0].x, g[0].y, g[0].z), y30(g[1].x, g[1].y, g[1].z), y30(g[2].x, g[2].y, g[2].z), y30(g[3].x, g[3].y, g[3].z), + y31(g[0].x, g[0].y, g[0].z), y31(g[1].x, g[1].y, g[1].z), y31(g[2].x, g[2].y, g[2].z), y31(g[3].x, g[3].y, g[3].z), + y3m1(g[0].x, g[0].y, g[0].z), y3m1(g[1].x, g[1].y, g[1].z), y3m1(g[2].x, g[2].y, g[2].z), y3m1(g[3].x, g[3].y, g[3].z), + y32(g[0].x, g[0].y, g[0].z), y32(g[1].x, g[1].y, g[1].z), y32(g[2].x, g[2].y, g[2].z), y32(g[3].x, g[3].y, g[3].z), + y3m2(g[0].x, g[0].y, g[0].z), y3m2(g[1].x, g[1].y, g[1].z), y3m2(g[2].x, g[2].y, g[2].z), y3m2(g[3].x, g[3].y, g[3].z), + y33(g[0].x, g[0].y, g[0].z), y33(g[1].x, g[1].y, g[1].z), y33(g[2].x, g[2].y, g[2].z), y33(g[3].x, g[3].y, g[3].z), + y3m3(g[0].x, g[0].y, g[0].z), y3m3(g[1].x, g[1].y, g[1].z), y3m3(g[2].x, g[2].y, g[2].z), y3m3(g[3].x, g[3].y, g[3].z), + y40(g[0].x, g[0].y, g[0].z), y40(g[1].x, g[1].y, g[1].z), y40(g[2].x, g[2].y, g[2].z), y40(g[3].x, g[3].y, g[3].z), + y41(g[0].x, g[0].y, g[0].z), y41(g[1].x, g[1].y, g[1].z), y41(g[2].x, g[2].y, g[2].z), y41(g[3].x, g[3].y, g[3].z), + y4m1(g[0].x, g[0].y, g[0].z), y4m1(g[1].x, g[1].y, g[1].z), y4m1(g[2].x, g[2].y, g[2].z), y4m1(g[3].x, g[3].y, g[3].z), + y42(g[0].x, g[0].y, g[0].z), y42(g[1].x, g[1].y, g[1].z), y42(g[2].x, g[2].y, g[2].z), y42(g[3].x, g[3].y, g[3].z), + y4m2(g[0].x, g[0].y, g[0].z), y4m2(g[1].x, g[1].y, g[1].z), y4m2(g[2].x, g[2].y, g[2].z), y4m2(g[3].x, g[3].y, g[3].z), + y43(g[0].x, g[0].y, g[0].z), y43(g[1].x, g[1].y, g[1].z), y43(g[2].x, g[2].y, g[2].z), y43(g[3].x, g[3].y, g[3].z), + y4m3(g[0].x, g[0].y, g[0].z), y4m3(g[1].x, g[1].y, g[1].z), y4m3(g[2].x, g[2].y, g[2].z), y4m3(g[3].x, g[3].y, g[3].z), + y44(g[0].x, g[0].y, g[0].z), y44(g[1].x, g[1].y, g[1].z), y44(g[2].x, g[2].y, g[2].z), y44(g[3].x, g[3].y, g[3].z), + y4m4(g[0].x, g[0].y, g[0].z), y4m4(g[1].x, g[1].y, g[1].z), y4m4(g[2].x, g[2].y, g[2].z), y4m4(g[3].x, g[3].y, g[3].z), + 0.000000000000000, 0.000000000000000, 0.935602579627389, 0.090028400200397, + -0.452946651195697, -0.000000000000000, -0.000000000000000, -0.348678494661834, + -0.000000000000000, -0.452946651195697, -0.000000000000000, -0.348678494661834, + -0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, + -0.000000000000000, -0.000000000000000, 0.000000000000000, -0.000000000000000, + 0.489238299435250, 0.000000000000000, -0.000000000000000, -0.376615818502422, + 0.000000000000000, -0.489238299435250, -0.000000000000000, 0.376615818502422, + 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.532615198330370, + 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, + -0.656382056840170, -0.000000000000000, -0.000000000000000, -0.168427714314628, + -0.000000000000000, -0.656382056840170, -0.000000000000000, -0.168427714314628, + -0.317846011338142, -0.317846011338142, 1.017107236282055, 0.226023830284901, + -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.258942827786103, + -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.258942827786103, + 0.460602629757462, -0.460602629757462, 0.000000000000000, -0.000000000000000, + 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.409424559784410, + -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.136474853261470, + -0.000000000000000, 0.000000000000000, -0.000000000000000, -0.136474853261470, + -0.504564900728724, -0.504564900728724, 0.000000000000000, -0.598002845308118, + -0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, + -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.350610246256556, + -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.350610246256556, + 0.683184105191914, -0.683184105191914, 0.000000000000000, -0.000000000000000, + 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.202424920056864, + 0.000000000000000, 0.000000000000000, 1.092548430592079, -0.350435072502801, + 0.451658037912587, 0.000000000000000, -0.000000000000000, 0.046358202625865, + 0.000000000000000, 0.451658037912587, -0.000000000000000, 0.046358202625865, + 0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, + 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.492067081245654, + -0.469376801586882, -0.000000000000000, -0.000000000000000, 0.187354445356332, + -0.000000000000000, 0.469376801586882, -0.000000000000000, -0.187354445356332, + 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.355076798886913, + 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, + 0.518915578720260, 0.000000000000000, -0.000000000000000, -0.443845998608641, + 0.000000000000000, 0.518915578720260, -0.000000000000000, -0.443845998608641, + 0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, + 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.452635881587108, + -0.707162732524596, 0.000000000000000, -0.000000000000000, 0.120972027847095, + -0.000000000000000, 0.707162732524596, -0.000000000000000, -0.120972027847095 + } ; } void TearDown() @@ -293,11 +293,11 @@ TEST_F(YlmRealTest,YlmReal) ModuleBase::YlmReal::Ylm_Real(nylm,ng,g,ylm); for(int i=0;i gplus = g[j]; ModuleBase::Vector3 gminus = g[j]; @@ -352,16 +352,16 @@ TEST_F(YlmRealTest,YlmReal2) ModuleBase::YlmReal::Ylm_Real2(nylm,ng,g,ylm); for(int i=0;i R (20.0, 0.0, 0.0); const double xdr = R.x/R.norm(); const double ydr = R.y/R.norm(); @@ -444,17 +444,17 @@ TEST_F(YlmRealTest, equality_value_test) const double rl = std::pow( R.norm(), L); //std::cout << " rl=" << rl << std::endl; ModuleBase::Ylm::set_coefficients(); - + int nu = 100; - + // Peize Lin change rlya 2016-08-26 std::vector rlya; double rlyb[400]; ModuleBase::Ylm::ZEROS( rlyb, 400); - + ModuleBase::Ylm::rl_sph_harm(L, xdr, ydr, zdr, rlya); ModuleBase::Ylm::get_ylm_real(L+1, R, rlyb); - + for (int i=0; i < nu; i++) { double diff = fabs(rlya[i]-rlyb[i]); @@ -467,21 +467,21 @@ TEST_F(YlmRealTest, equality_value_test) TEST_F(YlmRealTest, equality_gradient_test) { - + ModuleBase::Vector3 R (0.1,-0.2,0.5); ModuleBase::Ylm::set_coefficients(); - + //int nu = 100; std::vector rlya; double rlyb[400]; - + std::vector> grlya; double grlyb[400][3]; - + ModuleBase::Ylm::grad_rl_sph_harm (9, R.x, R.y, R.z, rlya, grlya); ModuleBase::Ylm::rlylm (10, R.x, R.y, R.z, rlyb, grlyb); - + for (int i = 0; i < 100; i++) { double diffx = fabs(grlya[i][2]-grlyb[i][2]); diff --git a/source/module_base/test/para_json_test.cpp b/source/module_base/test/para_json_test.cpp new file mode 100644 index 0000000000..3ce6ecce49 --- /dev/null +++ b/source/module_base/test/para_json_test.cpp @@ -0,0 +1,68 @@ +#include "module_base/para_json.h" +#include "gtest/gtest.h" +#ifdef __MPI +#include "mpi.h" +#endif + +#include +#include "rapidjson/document.h" +/************************************************ + * unit test of Input::ParaJson + ***********************************************/ + +/** + * - Tested Functions: + * - Init() + * - init json tree from input::Init and check if the json string is valid + */ + +class ParaJsonTest : public ::testing::Test +{ + protected: + std::string testString; +}; + +// check if a string is a valid JSON string +bool isValidJSON(const std::string& jsonString) +{ + rapidjson::Document document; + document.Parse(jsonString.c_str()); + + return !document.HasParseError(); +} + +TEST_F(ParaJsonTest, Init) +{ + //std::string input_file = "./support/INPUT"; + //Input input_tmp; + //EXPECT_NO_THROW(input_tmp.Init(input_file)); + + // int status = system("rm -r ./OUT.autotest/"); + // EXPECT_EQ(status,0); + // Para_Json::Init_json_abacus_readinInfo(); + Para_Json::Init_json_abacus_generalInfo(); + Para_Json::Init_json_abacus(); + Para_Json::Finish_json_tree(); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + Para_Json::doc.Accept(writer); + std::string json = buffer.GetString(); + EXPECT_EQ(isValidJSON(json), true); +} + +int main(int argc, char** argv) +{ +#ifdef __MPI + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); +#endif + testing::InitGoogleTest(&argc, argv); + int result; + result = RUN_ALL_TESTS(); +#ifdef __MPI + MPI_Finalize(); +#endif + return result; +} + diff --git a/source/module_base/test/perf_sphbes_test.cpp b/source/module_base/test/perf_sphbes_test.cpp new file mode 100644 index 0000000000..4c574baa8e --- /dev/null +++ b/source/module_base/test/perf_sphbes_test.cpp @@ -0,0 +1,72 @@ +#include"../math_sphbes.h" +#include +#include +#include +#include +#include + +/************************************************ +* performace test of class Sphbes +***********************************************/ + +/** + * Tested function: + * - sphbesj + * - Spherical_Bessel + */ + +class PerfSphbes : public benchmark::Fixture { +public: + const double q = 1; + const int n = 1000; + double stop = 1000.0; + double dr = 0.0; + double* rc, *rinf, *jc, *jinf; + void SetUp(const benchmark::State& state){ + const double rcut = state.range(0) + 0.5; + rc = new double[n + 10]; + rinf = new double[n + 10]; + jc = new double[n + 10]; + jinf = new double[n + 10]; + + // generate data points in (0, rcut] in log scale + double rmin = 0.0001; + double log_rmin = std::log(rmin); + double log_rcut = std::log(rcut); + dr = (log_rcut - log_rmin) / (n-1); + memset(rc, 0, (n+10) * sizeof(double)); + for (int i = 0; i < n; i++) + rc[i] = std::exp(log_rmin + i * dr); + + // generate data points in [rcut, stop] in linear scale + memset(rinf, 0, (n+10) * sizeof(double)); + rinf[0] = rcut; + dr = (stop - rcut) / (n-1); + for (int i = 1; i < n; i++) + rinf[i] += rinf[i-1] + dr; + } + void TearDown(const benchmark::State& state){ + delete[] rc; + delete[] rinf; + delete[] jc; + delete[] jinf; + } +}; + +BENCHMARK_DEFINE_F(PerfSphbes, BM_Spherical_Bessel)(benchmark::State& state) { + for (auto _ : state) { + ModuleBase::Sphbes::Spherical_Bessel(n, rc, q, state.range(0), jc); + ModuleBase::Sphbes::Spherical_Bessel(n, rinf, q, state.range(0), jinf); + } +} + +BENCHMARK_DEFINE_F(PerfSphbes, BM_sphbesj)(benchmark::State& state) { + for (auto _ : state) { + ModuleBase::Sphbes::sphbesj(n, rc, q, state.range(0), jc); + ModuleBase::Sphbes::sphbesj(n, rinf, q, state.range(0), jinf); + } +} + +BENCHMARK_REGISTER_F(PerfSphbes, BM_sphbesj)->DenseRange(0, 11, 1)->Unit(benchmark::kMicrosecond); +BENCHMARK_REGISTER_F(PerfSphbes, BM_Spherical_Bessel)->DenseRange(0, 11, 1)->Unit(benchmark::kMicrosecond); +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/source/module_base/tool_quit.h b/source/module_base/tool_quit.h index eafaf673cc..f944696d5a 100644 --- a/source/module_base/tool_quit.h +++ b/source/module_base/tool_quit.h @@ -33,13 +33,13 @@ void WARNING(const std::string &file, const std::string &description); * @brief Close .log files and exit * */ -void QUIT(void); +[[noreturn]] void QUIT(void); /** * @brief Close .log files and exit * */ -void QUIT(int ret); +[[noreturn]] void QUIT(int ret); /** * @brief Combine the functions of WARNING and QUIT @@ -47,7 +47,7 @@ void QUIT(int ret); * @param file The file where warning happens * @param description The warning information */ -void WARNING_QUIT(const std::string &file, const std::string &description); +[[noreturn]] void WARNING_QUIT(const std::string& file, const std::string& description); /** * @brief Combine the functions of WARNING and QUIT @@ -55,7 +55,7 @@ void WARNING_QUIT(const std::string &file, const std::string &description); * @param file The file where warning happens * @param description The warning information */ -void WARNING_QUIT(const std::string &file, const std::string &description, int ret); +[[noreturn]] void WARNING_QUIT(const std::string& file, const std::string& description, int ret); /** * @brief Check, if true, WARNING_QUIT diff --git a/source/module_cell/klist.cpp b/source/module_cell/klist.cpp index 52bc42440d..e11ab183f5 100644 --- a/source/module_cell/klist.cpp +++ b/source/module_cell/klist.cpp @@ -362,6 +362,10 @@ bool K_Vectors::read_kpoints(const std::string &fn) //recalculate nkstot. nkstot = 0; + /* ISSUE#3482: to distinguish different kline segments */ + std::vector kpt_segids; + kl_segids.clear(); kl_segids.shrink_to_fit(); + int kpt_segid = 0; for(int iks=0; iks> ksx[iks]; @@ -371,6 +375,9 @@ bool K_Vectors::read_kpoints(const std::string &fn) //std::cout << " nkl[" << iks << "]=" << nkl[iks] << std::endl; assert(nkl[iks] >= 0); nkstot += nkl[iks]; + /* ISSUE#3482: to distinguish different kline segments */ + if((nkl[iks] == 1)&&(iks!=(nks_special-1))) kpt_segid++; + kpt_segids.push_back(kpt_segid); } assert( nkl[nks_special-1] == 1); @@ -389,6 +396,7 @@ bool K_Vectors::read_kpoints(const std::string &fn) kvec_c[count].x = ksx[iks-1] + is*dx; kvec_c[count].y = ksy[iks-1] + is*dy; kvec_c[count].z = ksz[iks-1] + is*dz; + kl_segids.push_back(kpt_segids[iks-1]); /* ISSUE#3482: to distinguish different kline segments */ ++count; } } @@ -397,15 +405,14 @@ bool K_Vectors::read_kpoints(const std::string &fn) kvec_c[count].x = ksx[nks_special-1]; kvec_c[count].y = ksy[nks_special-1]; kvec_c[count].z = ksz[nks_special-1]; + kl_segids.push_back(kpt_segids[nks_special-1]); /* ISSUE#3482: to distinguish different kline segments */ ++count; //std::cout << " count = " << count << std::endl; - assert (count == nkstot ); - - for(int ik=0; ikkc_done = true; @@ -439,15 +446,22 @@ bool K_Vectors::read_kpoints(const std::string &fn) //recalculate nkstot. nkstot = 0; + /* ISSUE#3482: to distinguish different kline segments */ + std::vector kpt_segids; + kl_segids.clear(); kl_segids.shrink_to_fit(); + int kpt_segid = 0; for(int iks=0; iks> ksx[iks]; ifk >> ksy[iks]; ifk >> ksz[iks]; - ModuleBase::GlobalFunc::READ_VALUE( ifk, nkl[iks] ); + ModuleBase::GlobalFunc::READ_VALUE( ifk, nkl[iks] ); /* so ifk is ifstream for kpoint, then nkl is number of kpoints on line */ //std::cout << " nkl[" << iks << "]=" << nkl[iks] << std::endl; assert(nkl[iks] >= 0); nkstot += nkl[iks]; + /* ISSUE#3482: to distinguish different kline segments */ + if((nkl[iks] == 1)&&(iks!=(nks_special-1))) kpt_segid++; + kpt_segids.push_back(kpt_segid); } assert( nkl[nks_special-1] == 1); @@ -466,6 +480,7 @@ bool K_Vectors::read_kpoints(const std::string &fn) kvec_d[count].x = ksx[iks-1] + is*dx; kvec_d[count].y = ksy[iks-1] + is*dy; kvec_d[count].z = ksz[iks-1] + is*dz; + kl_segids.push_back(kpt_segids[iks-1]); /* ISSUE#3482: to distinguish different kline segments */ ++count; } } @@ -474,18 +489,16 @@ bool K_Vectors::read_kpoints(const std::string &fn) kvec_d[count].x = ksx[nks_special-1]; kvec_d[count].y = ksy[nks_special-1]; kvec_d[count].z = ksz[nks_special-1]; + kl_segids.push_back(kpt_segids[nks_special-1]); /* ISSUE#3482: to distinguish different kline segments */ ++count; //std::cout << " count = " << count << std::endl; - assert (count == nkstot ); + assert(count == nkstot ); + assert(kl_segids.size() == nkstot); /* ISSUE#3482: to distinguish different kline segments */ - for(int ik=0; ikkd_done = true; - } else @@ -1122,6 +1135,9 @@ void K_Vectors::mpi_k(void) Parallel_Common::bcast_int(nmp, 3); + kl_segids.resize(nkstot); + Parallel_Common::bcast_int(kl_segids.data(), nkstot); + Parallel_Common::bcast_double(koffset, 3); this->nks = GlobalC::Pkpoints.nks_pool[GlobalV::MY_POOL]; @@ -1352,6 +1368,8 @@ void K_Vectors::mpi_k_after_vc(void) Parallel_Common::bcast_int(nspin); Parallel_Common::bcast_int(nkstot); Parallel_Common::bcast_int(nmp, 3); + kl_segids.resize(nkstot); + Parallel_Common::bcast_int(kl_segids.data(), nkstot); Parallel_Common::bcast_double(koffset, 3); this->nks = GlobalC::Pkpoints.nks_pool[GlobalV::MY_POOL]; diff --git a/source/module_cell/klist.h b/source/module_cell/klist.h index a9e06f8614..aa92cf29fd 100644 --- a/source/module_cell/klist.h +++ b/source/module_cell/klist.h @@ -29,6 +29,7 @@ class K_Vectors int nkstot_full; /// number of k points in full k mesh int nmp[3]; // Number of Monhorst-Pack + std::vector kl_segids; // index of kline segment K_Vectors(); ~K_Vectors(); diff --git a/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp b/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp index 617674256a..bb447bca4c 100644 --- a/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp +++ b/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp @@ -223,7 +223,7 @@ TEST_F(SltkAtomInputTest, ConstructorNoExpand) GlobalV::test_grid = 1; // this is a bug if radius is too small // because the expand_flag will be false! - radius = 1e-1000; + radius = 0; Atom_input Atom_inp(ofs, *ucell, ucell->nat, ucell->ntype, pbc, radius, test_atom_in); EXPECT_FALSE(Atom_inp.getExpandFlag()); // call set_FAtom and Load_atom diff --git a/source/module_cell/read_atoms.cpp b/source/module_cell/read_atoms.cpp index dc517bccd7..4c6bf9c0eb 100644 --- a/source/module_cell/read_atoms.cpp +++ b/source/module_cell/read_atoms.cpp @@ -535,100 +535,101 @@ bool UnitCell::read_atom_positions(std::ifstream &ifpos, std::ofstream &ofs_runn ModuleBase::GlobalFunc::ZEROS(atoms[it].mag,na); for (int ia = 0;ia < na; ia++) { - // modify the reading of frozen ions and velocities -- Yuanbo Li 2021/8/20 - ifpos >> v.x >> v.y >> v.z; - mv.x = true ; - mv.y = true ; - mv.z = true ; - atoms[it].vel[ia].set(0,0,0); - atoms[it].mag[ia]=magnet.start_magnetization[it];//if this line is used, default startmag_type would be 2 - atoms[it].angle1[ia]=0; - atoms[it].angle2[ia]=0; - atoms[it].m_loc_[ia].set(0,0,0); - - std::string tmpid; - tmpid = ifpos.get(); - - if( (int)tmpid[0] < 0 ) - { - std::cout << "read_atom_positions, mismatch in atom number for atom type: " << atoms[it].label << std::endl; - exit(1); - } - - bool input_vec_mag=false; - bool input_angle_mag=false; - while ( (tmpid != "\n") && (ifpos.eof()==false) && (tmpid !="#") ) - { - tmpid = ifpos.get() ; - // old method of reading frozen ions - char tmp = (char)tmpid[0]; - if ( tmp >= 48 && tmp <= 57 ) - { - mv.x = std::stoi(tmpid); - ifpos >> mv.y >> mv.z ; - } - // new method of reading frozen ions and velocities - if ( tmp >= 'a' && tmp <='z') - { - ifpos.putback(tmp); - ifpos >> tmpid; - } - if ( tmpid == "m" ) - { - ifpos >> mv.x >> mv.y >> mv.z ; - } - else if ( tmpid == "v" ||tmpid == "vel" || tmpid == "velocity" ) - { - ifpos >> atoms[it].vel[ia].x >> atoms[it].vel[ia].y >> atoms[it].vel[ia].z; - } - else if ( tmpid == "mag" || tmpid == "magmom") - { - set_element_mag_zero = true; - double tmpamg=0; - ifpos >> tmpamg; - tmp=ifpos.get(); - while (tmp==' ') - { - tmp=ifpos.get(); - } - - if((tmp >= 48 && tmp <= 57) or tmp=='-') - { - ifpos.putback(tmp); - ifpos >> atoms[it].m_loc_[ia].y>>atoms[it].m_loc_[ia].z; - atoms[it].m_loc_[ia].x=tmpamg; - atoms[it].mag[ia]=sqrt(pow(atoms[it].m_loc_[ia].x,2)+pow(atoms[it].m_loc_[ia].y,2)+pow(atoms[it].m_loc_[ia].z,2)); - input_vec_mag=true; - - } - else - { - ifpos.putback(tmp); - atoms[it].mag[ia]=tmpamg; - } - - // atoms[it].mag[ia]; - } - else if ( tmpid == "angle1") - { - ifpos >> atoms[it].angle1[ia]; - atoms[it].angle1[ia]=atoms[it].angle1[ia]/180 *ModuleBase::PI; - input_angle_mag=true; - set_element_mag_zero = true; - } - else if ( tmpid == "angle2") - { - ifpos >> atoms[it].angle2[ia]; - atoms[it].angle2[ia]=atoms[it].angle2[ia]/180 *ModuleBase::PI; - input_angle_mag=true; - set_element_mag_zero = true; - } - - } - while ( (tmpid != "\n") && (ifpos.eof()==false) ) - { - tmpid = ifpos.get(); - } + // modify the reading of frozen ions and velocities -- Yuanbo Li 2021/8/20 + ifpos >> v.x >> v.y >> v.z; + mv.x = true ; + mv.y = true ; + mv.z = true ; + atoms[it].vel[ia].set(0,0,0); + atoms[it].mag[ia]=magnet.start_magnetization[it];//if this line is used, default startmag_type would be 2 + atoms[it].angle1[ia]=0; + atoms[it].angle2[ia]=0; + atoms[it].m_loc_[ia].set(0,0,0); + + std::string tmpid; + tmpid = ifpos.get(); + + if( (int)tmpid[0] < 0 ) + { + std::cout << "read_atom_positions, mismatch in atom number for atom type: " << atoms[it].label << std::endl; + exit(1); + } + + bool input_vec_mag=false; + bool input_angle_mag=false; + // read if catch goodbit before "\n" and "#" + while ( (tmpid != "\n") && (ifpos.good()) && (tmpid !="#") ) + { + tmpid = ifpos.get() ; + // old method of reading frozen ions + char tmp = (char)tmpid[0]; + if ( tmp >= 48 && tmp <= 57 ) + { + mv.x = std::stoi(tmpid); + ifpos >> mv.y >> mv.z ; + } + // new method of reading frozen ions and velocities + if ( tmp >= 'a' && tmp <='z') + { + ifpos.putback(tmp); + ifpos >> tmpid; + } + if ( tmpid == "m" ) + { + ifpos >> mv.x >> mv.y >> mv.z ; + } + else if ( tmpid == "v" ||tmpid == "vel" || tmpid == "velocity" ) + { + ifpos >> atoms[it].vel[ia].x >> atoms[it].vel[ia].y >> atoms[it].vel[ia].z; + } + else if ( tmpid == "mag" || tmpid == "magmom") + { + set_element_mag_zero = true; + double tmpamg=0; + ifpos >> tmpamg; + tmp=ifpos.get(); + while (tmp==' ') + { + tmp=ifpos.get(); + } + + if((tmp >= 48 && tmp <= 57) or tmp=='-') + { + ifpos.putback(tmp); + ifpos >> atoms[it].m_loc_[ia].y>>atoms[it].m_loc_[ia].z; + atoms[it].m_loc_[ia].x=tmpamg; + atoms[it].mag[ia]=sqrt(pow(atoms[it].m_loc_[ia].x,2)+pow(atoms[it].m_loc_[ia].y,2)+pow(atoms[it].m_loc_[ia].z,2)); + input_vec_mag=true; + + } + else + { + ifpos.putback(tmp); + atoms[it].mag[ia]=tmpamg; + } + + // atoms[it].mag[ia]; + } + else if ( tmpid == "angle1") + { + ifpos >> atoms[it].angle1[ia]; + atoms[it].angle1[ia]=atoms[it].angle1[ia]/180 *ModuleBase::PI; + input_angle_mag=true; + set_element_mag_zero = true; + } + else if ( tmpid == "angle2") + { + ifpos >> atoms[it].angle2[ia]; + atoms[it].angle2[ia]=atoms[it].angle2[ia]/180 *ModuleBase::PI; + input_angle_mag=true; + set_element_mag_zero = true; + } + } + // move to next line + while ( (tmpid != "\n") && (ifpos.good()) ) + { + tmpid = ifpos.get(); + } std::string mags; //cout<<"mag"<( tetra(nt,i) ) + nk][ibnd]; - } - - itetra[0] = 0; - - ModuleBase::hpsort(4, etetra, itetra); - - //=============================================== - // ...sort in ascending order: e1 < e2 < e3 < e4 - //=============================================== - e1 = etetra [0]; - e2 = etetra [1]; - e3 = etetra [2]; - e4 = etetra [3]; - - //============================================================== - // kp1-kp4 are the irreducible k-points corresponding to e1-e4 - //============================================================== - - kp1 = static_cast( tetra(nt,itetra[0]) )+ nk; - kp2 = static_cast( tetra(nt,itetra[1]) )+ nk; - kp3 = static_cast( tetra(nt,itetra[2]) )+ nk; - kp4 = static_cast( tetra(nt,itetra[3]) )+ nk; - - //====================== - // calculate weights wg - //====================== - if (ef >= e4) - { - wg(kp1, ibnd) = wg(kp1, ibnd) + 0.250 / ntetra; - wg(kp2, ibnd) = wg(kp2, ibnd) + 0.250 / ntetra; - wg(kp3, ibnd) = wg(kp3, ibnd) + 0.250 / ntetra; - wg(kp4, ibnd) = wg(kp4, ibnd) + 0.250 / ntetra; - } - else if (ef < e4 && ef >= e3) - { - c4 = 0.250 / ntetra * pow(e4 - ef, 3) / (e4 - e1) / (e4 - e2) - / (e4 - e3); - dosef = 3.0 / ntetra * (e4 - ef) * (e4 - ef) / (e4 - e1) / (e4 - e2) - / (e4 - e3); - wg(kp1, ibnd) = wg(kp1, ibnd) + 0.250 / ntetra - c4 * - (e4 - ef) / (e4 - e1) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0; - wg(kp2, ibnd) = wg(kp2, ibnd) + 0.250 / ntetra - c4 * - (e4 - ef) / (e4 - e2) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0; - wg(kp3, ibnd) = wg(kp3, ibnd) + 0.250 / ntetra - c4 * - (e4 - ef) / (e4 - e3) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0; - wg(kp4, ibnd) = wg(kp4, ibnd) + 0.250 / ntetra - c4 * - (4.0 - (e4 - ef) * (1.0 / (e4 - e1) + 1.0 / (e4 - e2) - + 1.0 / (e4 - e3))) + dosef * (e1 + e2 + e3 + e4 - 4.0 * - ekb[kp4][ibnd]) / 40.0; - } - - else if (ef < e3 && ef >= e2) - { - c1 = 0.250 / ntetra * (ef - e1) * (ef - e1) / (e4 - e1) / (e3 - e1); - c2 = 0.250 / ntetra * (ef - e1) * (ef - e2) * (e3 - ef) - / (e4 - e1) / (e3 - e2) / (e3 - e1); - c3 = 0.250 / ntetra * (ef - e2) * (ef - e2) * (e4 - ef) / (e4 - e2) - / (e3 - e2) / (e4 - e1); - dosef = 1.0 / ntetra / (e3 - e1) / (e4 - e1) * (3.0 * - (e2 - e1) + 6.0 * (ef - e2) - 3.0 * (e3 - e1 + e4 - e2) - * (ef - e2) * (ef - e2) / (e3 - e2) / (e4 - e2)); - wg(kp1, ibnd) = wg(kp1, ibnd) + c1 + (c1 + c2) * (e3 - ef) - / (e3 - e1) + (c1 + c2 + c3) * (e4 - ef) / (e4 - e1) + dosef * - (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0; - wg(kp2, ibnd) = wg(kp2, ibnd) + c1 + c2 + c3 + (c2 + c3) - * (e3 - ef) / (e3 - e2) + c3 * (e4 - ef) / (e4 - e2) + dosef * - (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0; - wg(kp3, ibnd) = wg(kp3, ibnd) + (c1 + c2) * (ef - e1) - / (e3 - e1) + (c2 + c3) * (ef - e2) / (e3 - e2) + dosef * - (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0; - wg(kp4, ibnd) = wg(kp4, ibnd) + (c1 + c2 + c3) * (ef - e1) - / (e4 - e1) + c3 * (ef - e2) / (e4 - e2) + dosef * (e1 + e2 + - e3 + e4 - 4.0 * ekb[kp4][ibnd]) / 40.0; - } - else if (ef < e2 && ef >= e1) - { - c4 = 0.250 / ntetra * (ef - e1) * (ef - e1) * (ef - e1) / (e2 - e1) / - (e3 - e1) / (e4 - e1); - dosef = 3.0 / ntetra * (ef - e1) * (ef - e1) / (e2 - e1) / (e3 - e1) - / (e4 - e1); - wg(kp1, ibnd) = wg(kp1, ibnd) + c4 * (4.0 - (ef - e1) - * (1.0 / (e2 - e1) + 1.0 / (e3 - e1) + 1.0 / (e4 - e1))) - + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0; - wg(kp2, ibnd) = wg(kp2, ibnd) + c4 * (ef - e1) / (e2 - e1) - + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0; - wg(kp3, ibnd) = wg(kp3, ibnd) + c4 * (ef - e1) / (e3 - e1) - + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0; - wg(kp4, ibnd) = wg(kp4, ibnd) + c4 * (ef - e1) / (e4 - e1) - + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp4][ibnd]) / 40.0; - } // endif - } // enddo - } // enddo - } // enddo - - //===================================================================== - // add correct spin normalization : 2 for LDA, 1 for LSDA calculations - //===================================================================== - for (ik = 0;ik < nks;ik++) - { - for (ibnd = 0;ibnd < GlobalV::NBANDS;ibnd++) - { - wg(ik, ibnd) = wg(ik, ibnd) * 2.0 / nspin; - } - } - return; -} // end subroutine tweights -*/ - -/* -double Occupy::wsweight(const ModuleBase::Vector3 &r, ModuleBase::Vector3 *rws,const int nrws) -{ - //============================================================ - // integer ir, nreq, nrws - // real(kind=dp) r(3), rrt, ck, eps, rws(0:3,nrws), wsweight - // parameter (eps=1.0e-6) - //============================================================ - const double eps = 1.0e-6; - - int nreq = 1; - - for (int ir = 0;ir < nrws;ir++) - { - const double rrt = r * rws[ir]; - const double ck = rrt - rws[ir].x; - // rrt = r[1]*rws(1,ir) + r[2]*rws(2,ir) + r[3]*rws(3,ir); - // ck = rrt-rws(0,ir); - - if (ck > eps) - { - break; - } - - if (std::abs(ck) < eps) - { - nreq++; - } - } // end do - - const double wswe = 1.0 / nreq; - - return wswe; -} // end function wsweight -*/ - -/* -void Occupy::efermit(double** ekb,const int nband,const int nks,const double &nelec,const int nspin, - const int ntetra,const ModuleBase::matrix &tetra, double &ef) -{ - //======================================================= - // Finds the Fermi energy - tetrahedron method (Bloechl) - // the transformation Ry to eV - //======================================================= - - // parameter : - const int maxiter = 300; - const double eps = 1.0e-10; - - double efbetter; - - //=================================== - // nlw : the minimum energy band - // elw : the lower limit of the fermi ener - // eup : the upper limit of the fermi ener - // external sumkt - // find bounds for the Fermi energy. - //=================================== - const int nlw = max( 1, static_cast( (nelec / 2.0 - 5.0) ) ); - double elw = ekb[nlw][0]; - double eup = ekb[0][GlobalV::NBANDS-1]; - - for (int ik = 1;ik < nks;ik++)// do ik = 2, nks - { - elw = min(elw, ekb[ik][nlw]); - eup = max(eup, ekb[ik][GlobalV::NBANDS-1]); - } - for (int ik = 1;ik < nks;ik++)// do ik = 2, nks - { - elw = min(elw, ekb[ik][nlw]); - eup = max(eup, ekb[ik][GlobalV::NBANDS-1]); - } - - //=============================== - // Bisection method - // the number of states with eup - // the number of states with elw - //=============================== - const double sumkup = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, eup); - const double sumklw = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, elw); - - GlobalV::ofs_running << "\n sumkup = " << sumkup; - GlobalV::ofs_running << "\n sumklw = " << sumklw << std::endl; - - if ((sumkup - nelec) < - eps || (sumklw - nelec) > eps) - { - ModuleBase::WARNING("efermit","unexpected error."); - } - - double better = 1.0e+10; - - bool converge = false; - - double sumkmid = 0.0; - for (int iter = 0;iter < maxiter;iter++) - { - // the number of states with ef - ef = (eup + elw) / 2.0; - sumkmid = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, ef); - - if (std::abs(sumkmid - nelec) < better) - { - better = std::abs(sumkmid - nelec); - efbetter = ef; - } - - // converged - if (std::abs(sumkmid - nelec) < eps) - { - converge = true; - break; - } - else if ((sumkmid - nelec) < - eps) - { - elw = ef; - } - else - { - eup = ef; - } - } - if (!converge) - { - // unconverged exit: - // the best available ef is used . Needed in some difficult cases - ef = efbetter; - sumkmid = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, ef); - } - - //============================================================== - // Check if Fermi level is above any of the highest eigenvalues - //============================================================== - for (int ik = 0;ik < nks;ik++) - { - if (ef > ekb[ik][GlobalV::NBANDS-1] + 1.e-4) - { - std::cout << "\n ef = " << ef; - } - } - return; -} // end subroutine efermit -*/ - -/* -double Occupy::sumkt(double** ekb,const int nband,const int nks,const int nspin,const int ntetra, - const ModuleBase::matrix &tetra,const double &e) -{ - double etetra[4]; - double sum = 0.0; - - int nk = 0 ; - for (int ns = 0; ns < nspin;ns++) - { - //================================================================== - // nk is used to select k-points with up (ns=1) or down (ns=2) spin - //================================================================== - if (ns == 1) - { - nk = 0; - } - else - { - nk = nks / 2; - } - - for (int nt = 0; nt < ntetra; nt++) - { - for (int ibnd = 0; ibnd < GlobalV::NBANDS; ibnd++) - { - //====================================================== - // etetra are the energies at the vertexes of the nt-th - // tetrahedron - //====================================================== - for (int i = 0; i < 4; i++) - { - etetra [i] = ekb[ static_cast( (tetra(i, nt) + nk) )][ ibnd ]; - } - - piksort(4, etetra); - //=========================================== - //sort in ascending order: e1 < e2 < e3 < e4 - //=========================================== - const double e1 = etetra [0]; - const double e2 = etetra [1]; - const double e3 = etetra [2]; - const double e4 = etetra [3]; - - //=============================================== - // calculate sum over k of the integrated charge - //=============================================== - if (e >= e4) - { - sum += 1.0 / ntetra; - } - else if (e < e4 && e >= e3) - { - sum += 1.0 / ntetra * (1.0 - pow((e4 - e), 3) / (e4 - e1) - / (e4 - e2) / (e4 - e3)); - } - else if (e < e3 && e >= e2) - { - sum += 1.0 / ntetra / (e3 - e1) / (e4 - e1) * - ((e2 - e1) * (e2 - e1) + 3.0 * (e2 - e1) * (e - e2) + - 3.0 * (e - e2) * (e - e2) - (e3 - e1 + e4 - e2) / - (e3 - e2) / (e4 - e2) * pow((e - e2), 3)); - } - else if (e < e2 && e >= e1) - { - sum += 1.0 / ntetra * pow((e - e1), 3) / - (e2 - e1) / (e3 - e1) / (e4 - e1); - } - }//ibnd - }//nt - }//ns - -// add correct spin normalization : 2 for LDA, 1 for LSDA calculations - sum *= 2.0 / nspin; - return sum; -} // end function sumkt -*/ - -/* -void Occupy::piksort(const int n, double *a) -{ - int i; - bool b = true; - for (int j = 1;j < n;j++) // do j = 2, n - { - const double temp = a [j]; - for (i = j - 1;i >= 0;i--) // do i = j - 1, 1, - 1 - { - if (a [i] <= temp) - { - b = false; - break; - } - a [i + 1] = a [i]; - } - if (b) - { - i = 0; - } - a [i + 1] = temp; - } - return; -} //end subroutine piksort -*/ diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp index 520dcfd176..0b2608e5ea 100644 --- a/source/module_esolver/esolver_ks.cpp +++ b/source/module_esolver/esolver_ks.cpp @@ -409,7 +409,7 @@ namespace ModuleESolver } } - this->conv_elec = (drho < this->scf_thr); + this->conv_elec = (drho < this->scf_thr && iter!=GlobalV::MIXING_RESTART); // If drho < hsolver_error in the first iter or drho < scf_thr, we do not change rho. if (drho < hsolver_error || this->conv_elec) @@ -435,8 +435,16 @@ namespace ModuleESolver // } // p_chgmix->auto_set(bandgap_for_autoset, GlobalC::ucell); // } - - p_chgmix->mix_rho(pelec->charge); + // mixing will restart after GlobalV::MIXING_RESTART steps + // So, GlobalV::MIXING_RESTART=1 means mix from scratch + if (GlobalV::MIXING_RESTART > 0 && iter == GlobalV::MIXING_RESTART - 1) + { + // do not mix charge density + } + else + { + p_chgmix->mix_rho(pelec->charge); // update chr->rho by mixing + } if (GlobalV::SCF_THR_TYPE == 2) pelec->charge->renormalize_rho(); // renormalize rho in R-space would induce a error in K-space //----------charge mixing done----------- } @@ -467,6 +475,11 @@ namespace ModuleESolver bool stop = this->do_after_converge(iter); if(stop) {std::cout << "break\n"; break;} } + // notice for restart + if (GlobalV::MIXING_RESTART > 0 && iter == GlobalV::MIXING_RESTART - 1) + { + std::cout<<"SCF restart after this step!"<classname, "Run"); diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp index 8fdc9217fd..4c4d6c342f 100644 --- a/source/module_esolver/esolver_ks_lcao.cpp +++ b/source/module_esolver/esolver_ks_lcao.cpp @@ -309,7 +309,7 @@ namespace ModuleESolver GlobalV::ofs_running << " !FINAL_ETOT_IS " << this->pelec->f_en.etot * ModuleBase::Ry_to_eV << " eV" << std::endl; GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl; - if (INPUT.out_dos != 0 || INPUT.out_band != 0 || INPUT.out_proj_band != 0) + if (INPUT.out_dos != 0 || INPUT.out_band[0] != 0 || INPUT.out_proj_band != 0) { GlobalV::ofs_running << "\n\n\n\n"; GlobalV::ofs_running << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; @@ -331,7 +331,7 @@ namespace ModuleESolver int nspin0 = (GlobalV::NSPIN == 2) ? 2 : 1; - if (INPUT.out_band) // pengfei 2014-10-13 + if (INPUT.out_band[0]) // pengfei 2014-10-13 { int nks = 0; if (nspin0 == 1) @@ -348,7 +348,15 @@ namespace ModuleESolver std::stringstream ss2; ss2 << GlobalV::global_out_dir << "BANDS_" << is + 1 << ".dat"; GlobalV::ofs_running << "\n Output bands in file: " << ss2.str() << std::endl; - ModuleIO::nscf_band(is, ss2.str(), nks, GlobalV::NBANDS, 0.0, this->pelec->ekb, this->kv, &(GlobalC::Pkpoints)); + ModuleIO::nscf_band(is, + ss2.str(), + nks, + GlobalV::NBANDS, + 0.0, + INPUT.out_band[1], + this->pelec->ekb, + this->kv, + &(GlobalC::Pkpoints)); } } // out_band @@ -483,8 +491,19 @@ namespace ModuleESolver template void ESolver_KS_LCAO::eachiterinit(const int istep, const int iter) { - if (iter == 1) + if (iter == 1 || iter == GlobalV::MIXING_RESTART) + { + if (iter == GlobalV::MIXING_RESTART) // delete mixing and re-construct it to restart + { + this->p_chgmix->set_mixing(GlobalV::MIXING_MODE, + GlobalV::MIXING_BETA, + GlobalV::MIXING_NDIM, + GlobalV::MIXING_GG0, + GlobalV::MIXING_TAU, + GlobalV::MIXING_BETA_MAG); + } this->p_chgmix->mix_reset(); + } // mohan update 2012-06-05 this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband(); diff --git a/source/module_esolver/esolver_ks_pw.cpp b/source/module_esolver/esolver_ks_pw.cpp index 294f1636f7..07779f62ec 100644 --- a/source/module_esolver/esolver_ks_pw.cpp +++ b/source/module_esolver/esolver_ks_pw.cpp @@ -492,9 +492,19 @@ void ESolver_KS_PW::othercalculation(const int istep) template void ESolver_KS_PW::eachiterinit(const int istep, const int iter) { - if (iter == 1) + if (iter == 1 || iter == GlobalV::MIXING_RESTART) + { + if (iter == GlobalV::MIXING_RESTART) // delete mixing and re-construct it to restart + { + this->p_chgmix->set_mixing(GlobalV::MIXING_MODE, + GlobalV::MIXING_BETA, + GlobalV::MIXING_NDIM, + GlobalV::MIXING_GG0, + GlobalV::MIXING_TAU, + GlobalV::MIXING_BETA_MAG); + } this->p_chgmix->mix_reset(); - + } // mohan move harris functional to here, 2012-06-05 // use 'rho(in)' and 'v_h and v_xc'(in) this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband(); @@ -894,6 +904,58 @@ void ESolver_KS_PW::afterscf(const int istep) this->kspw_psi[0].get_pointer() - this->kspw_psi[0].get_psi_bias(), this->psi[0].size()); } + + if(INPUT.band_print_num > 0) + { + std::complex * wfcr = new std::complex[this->pw_rho->nxyz]; + double * rho_band = new double [this->pw_rho->nxyz]; + for(int i = 0; i < this->pw_rho->nxyz; i++) + { + rho_band[i] = 0.0; + } + + for(int i = 0; i < INPUT.band_print_num; i++) + { + int ib = INPUT.bands_to_print[i]; + for(int ik = 0; ik < this->kv.nks; ik++) + { + this->psi->fix_k(ik); + this->pw_wfc->recip_to_real(this->ctx,&psi[0](ib,0),wfcr,ik); + + double w1 = static_cast(this->kv.wk[ik] / GlobalC::ucell.omega); + + for(int i = 0; i < this->pw_rho->nxyz; i++) + { + rho_band[i] += std::norm(wfcr[i]) * w1; + } + } + + std::stringstream ssc; + ssc << GlobalV::global_out_dir << "band" << ib << ".cube"; + + ModuleIO::write_rho + ( +#ifdef __MPI + this->pw_big->bz, + this->pw_big->nbz, + this->pw_big->nplane, + this->pw_big->startz_current, +#endif + rho_band, + 0, + GlobalV::NSPIN, + 0, + ssc.str(), + this->pw_rho->nx, + this->pw_rho->ny, + this->pw_rho->nz, + 0.0, + &(GlobalC::ucell), + 11); + } + delete[] wfcr; + delete[] rho_band; + } } template @@ -959,7 +1021,7 @@ void ESolver_KS_PW::postprocess() GlobalV::ofs_running << " !FINAL_ETOT_IS " << this->pelec->f_en.etot * ModuleBase::Ry_to_eV << " eV" << std::endl; GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl; - if (INPUT.out_dos != 0 || INPUT.out_band != 0) + if (INPUT.out_dos != 0 || INPUT.out_band[0] != 0) { GlobalV::ofs_running << "\n\n\n\n"; GlobalV::ofs_running << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; @@ -1001,7 +1063,7 @@ void ESolver_KS_PW::postprocess() } } - if (INPUT.out_band) // pengfei 2014-10-13 + if (INPUT.out_band[0]) // pengfei 2014-10-13 { int nks = 0; if (nspin0 == 1) @@ -1022,6 +1084,7 @@ void ESolver_KS_PW::postprocess() nks, GlobalV::NBANDS, 0.0, + INPUT.out_band[1], this->pelec->ekb, this->kv, &(GlobalC::Pkpoints)); diff --git a/source/module_hamilt_general/module_xc/test/test_xc.cpp b/source/module_hamilt_general/module_xc/test/test_xc.cpp index 558556b66b..a770a88458 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc.cpp @@ -11,7 +11,7 @@ namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} + void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} } namespace GlobalV @@ -95,7 +95,7 @@ class XCTest_PBEsol : public testing::Test e_gga.push_back(e); v1_gga.push_back(v1); v2_gga.push_back(v2); - } + } } }; diff --git a/source/module_hamilt_general/module_xc/test/test_xc1.cpp b/source/module_hamilt_general/module_xc/test/test_xc1.cpp index 8e7a451e71..bc5c439630 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc1.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc1.cpp @@ -12,7 +12,7 @@ namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} + void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} } namespace GlobalV diff --git a/source/module_hamilt_general/module_xc/test/test_xc2.cpp b/source/module_hamilt_general/module_xc/test/test_xc2.cpp index 4b1b7e888e..5bf75a3c68 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc2.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc2.cpp @@ -11,7 +11,7 @@ namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} + void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} } namespace GlobalV @@ -202,7 +202,7 @@ class XCTest_PZ_SPN : public testing::Test e_lda.push_back(e); v1_lda.push_back(v1); v2_lda.push_back(v2); - } + } } }; @@ -238,7 +238,7 @@ class XCTest_SLATER1_SPN : public testing::Test e_lda.push_back(e); v1_lda.push_back(v1); v2_lda.push_back(v2); - } + } } }; @@ -273,7 +273,7 @@ class XCTest_SLATER_RXC_SPN : public testing::Test e_lda.push_back(e); v1_lda.push_back(v1); v2_lda.push_back(v2); - } + } } }; @@ -310,7 +310,7 @@ class XCTest_P86_SPN : public testing::Test v1_gga.push_back(v1); v2_gga.push_back(v2); v3_gga.push_back(v3); - } + } } }; diff --git a/source/module_hamilt_general/module_xc/test/test_xc4.cpp b/source/module_hamilt_general/module_xc/test/test_xc4.cpp index 114c817b0f..b4c8b70093 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc4.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc4.cpp @@ -11,7 +11,7 @@ namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} + void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} } namespace GlobalV @@ -49,7 +49,7 @@ class XCTest_SCAN : public testing::Test v2_.push_back(v2); v3_.push_back(v3); } - } + } }; TEST_F(XCTest_SCAN, set_xc_type) diff --git a/source/module_hamilt_general/module_xc/test/test_xc5.cpp b/source/module_hamilt_general/module_xc/test/test_xc5.cpp index bd2f87da8a..d9dfed1b20 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc5.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc5.cpp @@ -25,9 +25,6 @@ class XCTest_VXC : public testing::Test double et2 = 0, vt2 = 0; ModuleBase::matrix v2; - double et4 = 0, vt4 = 0; - ModuleBase::matrix v4; - void SetUp() { ModulePW::PW_Basis rhopw; @@ -87,13 +84,6 @@ class XCTest_VXC : public testing::Test vt2 = std::get<1>(etxc_vtxc_v); v2 = std::get<2>(etxc_vtxc_v); - GlobalV::NSPIN = 4; - GlobalV::DOMAG = true; - etxc_vtxc_v - = XC_Functional::v_xc(rhopw.nrxx,&chr,&ucell); - et4 = std::get<0>(etxc_vtxc_v); - vt4 = std::get<1>(etxc_vtxc_v); - v4 = std::get<2>(etxc_vtxc_v); } }; @@ -121,29 +111,6 @@ TEST_F(XCTest_VXC, set_xc_type) EXPECT_NEAR(v2(1,3),-1.97506482,1.0e-8); EXPECT_NEAR(v2(1,4),-2.160374198,1.0e-8); - EXPECT_NEAR(et4,-27.40098253,1.0e-8); - EXPECT_NEAR(vt4,-35.81948838,1.0e-8); - EXPECT_NEAR(v4(0,0),0,1.0e-8); - EXPECT_NEAR(v4(0,1),-1.559604078,1.0e-8); - EXPECT_NEAR(v4(0,2),-1.920028447,1.0e-8); - EXPECT_NEAR(v4(0,3),-2.168396069,1.0e-8); - EXPECT_NEAR(v4(0,4),-2.36419592,1.0e-8); - EXPECT_NEAR(v4(1,0),0,1.0e-8); - EXPECT_NEAR(v4(1,1),-0.09308179605,1.0e-8); - EXPECT_NEAR(v4(1,2),-0.123132664,1.0e-8); - EXPECT_NEAR(v4(1,3),-0.144332804,1.0e-8); - EXPECT_NEAR(v4(1,4),-0.16127282,1.0e-8); - EXPECT_NEAR(v4(2,0),0,1.0e-8); - EXPECT_NEAR(v4(2,1),-0.9308179605,1.0e-8); - EXPECT_NEAR(v4(2,2),-1.23132664,1.0e-8); - EXPECT_NEAR(v4(2,3),-1.44332804,1.0e-8); - EXPECT_NEAR(v4(2,4),-1.6127282,1.0e-8); - EXPECT_NEAR(v4(3,0),0,1.0e-8); - EXPECT_NEAR(v4(3,1),-0.09308179605,1.0e-8); - EXPECT_NEAR(v4(3,2),-0.123132664,1.0e-8); - EXPECT_NEAR(v4(3,3),-0.144332804,1.0e-8); - EXPECT_NEAR(v4(3,4),-0.16127282,1.0e-8); - } class XCTest_VXC_Libxc : public testing::Test @@ -156,9 +123,6 @@ class XCTest_VXC_Libxc : public testing::Test double et2 = 0, vt2 = 0; ModuleBase::matrix v2; - double et4 = 0, vt4 = 0; - ModuleBase::matrix v4; - void SetUp() { ModulePW::PW_Basis rhopw; @@ -218,13 +182,6 @@ class XCTest_VXC_Libxc : public testing::Test vt2 = std::get<1>(etxc_vtxc_v); v2 = std::get<2>(etxc_vtxc_v); - GlobalV::NSPIN = 4; - GlobalV::DOMAG = true; - etxc_vtxc_v - = XC_Functional::v_xc(rhopw.nrxx,&chr,&ucell); - et4 = std::get<0>(etxc_vtxc_v); - vt4 = std::get<1>(etxc_vtxc_v); - v4 = std::get<2>(etxc_vtxc_v); } }; @@ -252,28 +209,6 @@ TEST_F(XCTest_VXC_Libxc, set_xc_type) EXPECT_NEAR(v2(1,3),-1.975058937,1.0e-8); EXPECT_NEAR(v2(1,4),-2.160368003,1.0e-8); - EXPECT_NEAR(et4,-27.28201062,1.0e-8); - EXPECT_NEAR(vt4,-35.98253991,1.0e-8); - EXPECT_NEAR(v4(0,0),0,1.0e-8); - EXPECT_NEAR(v4(0,1),-1.268278149,1.0e-8); - EXPECT_NEAR(v4(0,2),-1.598108222,1.0e-8); - EXPECT_NEAR(v4(0,3),-1.828079634,1.0e-8); - EXPECT_NEAR(v4(0,4),-2.010634115,1.0e-8); - EXPECT_NEAR(v4(1,0),0,1.0e-8); - EXPECT_NEAR(v4(1,1),-0.1255782493,1.0e-8); - EXPECT_NEAR(v4(1,2),-0.1582362929,1.0e-8); - EXPECT_NEAR(v4(1,3),-0.1810068558,1.0e-8); - EXPECT_NEAR(v4(1,4),-0.1990824429,1.0e-8); - EXPECT_NEAR(v4(2,0),0,1.0e-8); - EXPECT_NEAR(v4(2,1),-1.255782493,1.0e-8); - EXPECT_NEAR(v4(2,2),-1.582362929,1.0e-8); - EXPECT_NEAR(v4(2,3),-1.810068558,1.0e-8); - EXPECT_NEAR(v4(2,4),-1.990824429,1.0e-8); - EXPECT_NEAR(v4(3,0),0,1.0e-8); - EXPECT_NEAR(v4(3,1),-0.1255782493,1.0e-8); - EXPECT_NEAR(v4(3,2),-0.1582362929,1.0e-8); - EXPECT_NEAR(v4(3,3),-0.1810068558,1.0e-8); - EXPECT_NEAR(v4(3,4),-0.1990824429,1.0e-8); } class XCTest_VXC_meta : public testing::Test diff --git a/source/module_hamilt_general/module_xc/test/xc3_mock.h b/source/module_hamilt_general/module_xc/test/xc3_mock.h index 628937adfe..da7f1e6f08 100644 --- a/source/module_hamilt_general/module_xc/test/xc3_mock.h +++ b/source/module_hamilt_general/module_xc/test/xc3_mock.h @@ -75,7 +75,7 @@ namespace ModulePW return x; } - + template void PW_Basis_K::real_to_recip(const Device* ctx, const std::complex* in, @@ -115,7 +115,7 @@ namespace ModulePW const int ik, const bool add, const double factor) const; -#if __CUDA || __ROCM +#if __CUDA || __ROCM template void PW_Basis_K::real_to_recip(const psi::DEVICE_GPU* ctx, const std::complex* in, std::complex* out, @@ -129,7 +129,7 @@ namespace ModulePW const int ik, const bool add, const double factor) const; -#endif +#endif FFT::FFT(){}; FFT::~FFT(){}; @@ -144,9 +144,13 @@ namespace ModulePW namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} + void WARNING_QUIT(const std::string &file,const std::string &description) + { + std::cout << " " << file <<" warning : "<< description<(float& object); template void reduce_pool(float* object, const int n); template void reduce_pool(double* object, const int n); -} \ No newline at end of file +} diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp index 3d85150b22..7ee0394d4d 100644 --- a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp @@ -33,7 +33,7 @@ ModuleBase::matrix SpinConstrain, psi::DEVICE_CPU>::cal_MW_ const char N_char = 'N'; const int one_int = 1; const std::complex one_float = {1.0, 0.0}, zero_float = {0.0, 0.0}; - pzgemm_(&T_char, + pzgemm_(&N_char, &T_char, &nw, &nw, diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp index 0c0595a9ce..bd0ad4ce3a 100644 --- a/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp @@ -22,7 +22,7 @@ std::vector>> SpinConstrain AorbMulP[is][iat].resize(nw_it, 0.0); for (int iw = 0; iw < nw_it; iw++) { - AorbMulP[is][iat][iw] = orbMulP(is, num); + AorbMulP[is][iat][iw] = std::abs(orbMulP(is, num))< 1e-10 ? 0.0 : orbMulP(is, num); num++; } } @@ -92,16 +92,10 @@ void SpinConstrain, psi::DEVICE_CPU>::calculate_MW( } else if (this->nspin_ == 4) { - this->Mi_[iat].x = total_charge_soc[1]; - this->Mi_[iat].y = total_charge_soc[2]; - this->Mi_[iat].z = total_charge_soc[3]; + this->Mi_[iat].x = (std::abs(total_charge_soc[1]) < this->sc_thr_)? 0.0 : total_charge_soc[1]; + this->Mi_[iat].y = (std::abs(total_charge_soc[2]) < this->sc_thr_)? 0.0 : total_charge_soc[2]; + this->Mi_[iat].z = (std::abs(total_charge_soc[3]) < this->sc_thr_)? 0.0 : total_charge_soc[3]; } - if (std::abs(this->Mi_[iat].x) < 1e-12) - this->Mi_[iat].x = 0.0; - if (std::abs(this->Mi_[iat].y) < 1e-12) - this->Mi_[iat].y = 0.0; - if (std::abs(this->Mi_[iat].z) < 1e-12) - this->Mi_[iat].z = 0.0; } } } diff --git a/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp b/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp index db64d5490d..845db88062 100644 --- a/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp @@ -45,6 +45,7 @@ void SpinConstrain, psi::DEVICE_CPU>::run_lambda_loop(int o } else { + where_fill_scalar_else_2d(this->constrain_, 0, zero, delta_lambda, delta_lambda); add_scalar_multiply_2d(initial_lambda, delta_lambda, one, this->lambda_); this->cal_mw_from_lambda(i_step); new_spin = this->Mi_; @@ -87,6 +88,7 @@ void SpinConstrain, psi::DEVICE_CPU>::run_lambda_loop(int o add_scalar_multiply_2d(dnu, search, alpha_trial, dnu); delta_lambda = dnu; + where_fill_scalar_else_2d(this->constrain_, 0, zero, delta_lambda, delta_lambda); add_scalar_multiply_2d(initial_lambda, delta_lambda, one, this->lambda_); this->cal_mw_from_lambda(i_step); @@ -115,4 +117,4 @@ void SpinConstrain, psi::DEVICE_CPU>::run_lambda_loop(int o } alpha_trial = alpha_trial * pow(g, 0.7); } -} \ No newline at end of file +} diff --git a/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp b/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp index fedb46a976..a55ad59681 100644 --- a/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp +++ b/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp @@ -28,7 +28,8 @@ void MPIInit() npcol = 1; Cblacs_pinfo(&myrank, &mysize); Cblacs_get(-1, 0, &ictxt); - Cblacs_gridinit(&ictxt, "Row", nprow, npcol); + char order[] = "Row"; + Cblacs_gridinit(&ictxt, order, nprow, npcol); Cblacs_gridinfo(ictxt, &nprow, &npcol, &myprow, &mypcol); } diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index fbaf7b1806..8e4ee5b15b 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -1,14 +1,13 @@ #include #ifdef __PEXSI -#include "diago_pexsi.h" - #include "c_pexsi_interface.h" +#include "diago_pexsi.h" #include "module_base/global_variable.h" #include "module_base/lapack_connector.h" #include "module_base/timer.h" #include "module_base/tool_quit.h" #include "module_basis/module_ao/parallel_orbitals.h" -#include "pexsi/pexsi_solver.h" +#include "module_pexsi/pexsi_solver.h" typedef hamilt::MatrixBlock matd; typedef hamilt::MatrixBlock> matcd; @@ -16,7 +15,7 @@ typedef hamilt::MatrixBlock> matcd; namespace hsolver { -template<> +template <> void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) { ModuleBase::TITLE("DiagoPEXSI", "diag"); @@ -25,30 +24,31 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& std::vector eigen(GlobalV::NLOCAL, 0.0); MPI_Comm COMM_DIAG = MPI_COMM_WORLD; this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt, - this->ParaV->nb, - this->ParaV->nrow, - this->ParaV->ncol, - h_mat.p, - s_mat.p, - this->DM, - this->EDM, - this->totalEnergyH, - this->totalEnergyS, - this->totalFreeEnergy); + this->ParaV->nb, + this->ParaV->nrow, + this->ParaV->ncol, + h_mat.p, + s_mat.p, + this->DM, + this->EDM, + this->totalEnergyH, + this->totalEnergyS, + this->totalFreeEnergy); this->ps->solve(); - this->EDM = this->ps->EDM; - this->DM = this->ps->DM; // loc.dm_gamma[ik] loc.dm_gamma[0]? - this->totalFreeEnergy = this->ps->totalFreeEnergy; - this->totalEnergyH = this->ps->totalEnergyH; - this->totalEnergyS = this->ps->totalEnergyS; + this->EDM = this->ps->get_EDM(); + this->DM = this->ps->get_DM(); // loc.dm_gamma[ik] loc.dm_gamma[0]? + this->totalFreeEnergy = this->ps->get_totalFreeEnergy(); + this->totalEnergyH = this->ps->get_totalEnergyH(); + this->totalEnergyS = this->ps->get_totalEnergyS(); } -template<> -void DiagoPexsi>::diag(hamilt::Hamilt>* phm_in, psi::Psi>& psi, double* eigenvalue_in) +template <> +void DiagoPexsi>::diag(hamilt::Hamilt>* phm_in, + psi::Psi>& psi, + double* eigenvalue_in) { ModuleBase::TITLE("DiagoPEXSI", "diag"); ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case"); - } } // namespace hsolver diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index 018397a33d..c212d7795a 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -3,7 +3,7 @@ #include "diagh.h" #include "module_basis/module_ao/parallel_orbitals.h" -#include "pexsi/pexsi_solver.h" +#include "module_pexsi/pexsi_solver.h" namespace hsolver { diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp index 16fa5f335b..ae784d2009 100644 --- a/source/module_hsolver/hsolver_pw.cpp +++ b/source/module_hsolver/hsolver_pw.cpp @@ -624,17 +624,31 @@ void HSolverPW::hamiltSolvePsiK(hamilt::Hamilt* hm, psi::P hm->ops->hPsi(info); ModuleBase::timer::tick("DiagoCG_New", "hpsi_func"); }; - auto spsi_func = [hm](const ct::Tensor& psi_in, ct::Tensor& spsi_out) { + auto spsi_func = [this, hm](const ct::Tensor& psi_in, ct::Tensor& spsi_out) { ModuleBase::timer::tick("DiagoCG_New", "spsi_func"); // psi_in should be a 2D tensor: // psi_in.shape() = [nbands, nbasis] const auto ndim = psi_in.shape().ndim(); REQUIRES_OK(ndim <= 2, "dims of psi_in should be less than or equal to 2"); - // Convert a Tensor object to a psi::Psi object - hm->sPsi(psi_in.data(), spsi_out.data(), + + if (GlobalV::use_uspp) + { + // Convert a Tensor object to a psi::Psi object + hm->sPsi(psi_in.data(), spsi_out.data(), ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1), ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1), ndim == 1 ? 1 : psi_in.shape().dim_size(0)); + } else + { + psi::memory::synchronize_memory_op()( + this->ctx, + this->ctx, + spsi_out.data(), + psi_in.data(), + static_cast((ndim == 1 ? 1 : psi_in.shape().dim_size(0)) + * (ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1)))); + } + ModuleBase::timer::tick("DiagoCG_New", "spsi_func"); }; auto psi_tensor = ct::TensorMap( @@ -776,4 +790,4 @@ template class HSolverPW, psi::DEVICE_GPU>; template class HSolverPW, psi::DEVICE_GPU>; #endif -} // namespace hsolver \ No newline at end of file +} // namespace hsolver diff --git a/source/module_hsolver/module_pexsi/CMakeLists.txt b/source/module_hsolver/module_pexsi/CMakeLists.txt index 8faab8b4b4..87d16ff557 100644 --- a/source/module_hsolver/module_pexsi/CMakeLists.txt +++ b/source/module_hsolver/module_pexsi/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(pexsi OBJECT DistBCDMatrix.cpp DistCCSMatrix.cpp DistMatrixTransformer.cpp pexsi_solver.cpp simplePEXSI.cpp) +add_library(pexsi OBJECT dist_bcd_matrix.cpp dist_ccs_matrix.cpp dist_matrix_transformer.cpp pexsi_solver.cpp simple_pexsi.cpp) if(ENABLE_COVERAGE) add_coverage(pexsi) diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp index cf815bd4ae..e498b83a2e 100644 --- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp @@ -1,3 +1,4 @@ +#ifdef __PEXSI #include "dist_bcd_matrix.h" #include @@ -110,4 +111,5 @@ int DistBCDMatrix::pnum(const int prow, const int pcol) { return this->prowpcol2pnum[prow * this->npcols + pcol]; } -} // namespace pexsi \ No newline at end of file +} // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h index 7dbddbad7c..98b8512893 100644 --- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h @@ -2,6 +2,8 @@ #define DISTBCDMATRIX_H #include + +#include "module_hsolver/module_pexsi/dist_matrix_transformer.h" // a Block Cyclic Data Distribution matrix // http://www.netlib.org/utk/papers/factor/node3.html // local matrix elements is stored in column major @@ -27,6 +29,27 @@ class DistBCDMatrix int pnum(const int prow, const int pcol); //~DistBCDMatrix(); + const MPI_Comm get_comm() const + { + return comm; + }; + const MPI_Group get_group() const + { + return group; + }; + const int get_nrow() const + { + return nrow; + }; + const int get_ncol() const + { + return ncol; + }; + const char get_LAYOUT() const + { + return LAYOUT; + }; + private: // MPI communicator MPI_Comm comm; diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp index 365622d249..ddd02aaa9a 100644 --- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp +++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp @@ -1,3 +1,4 @@ +#ifdef __PEXSI #include "dist_ccs_matrix.h" #include @@ -114,4 +115,5 @@ DistCCSMatrix::~DistCCSMatrix() delete[] colptrLocal; delete[] rowindLocal; } -} // namespace pexsi \ No newline at end of file +} // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h index aa5e67b6ab..a63a0dc16c 100644 --- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h +++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h @@ -19,6 +19,44 @@ class DistCCSMatrix int globalCol(int localCol); int localCol(int globalCol, int& mypcol); void setnnz(int nnzLocal); + + const MPI_Comm get_comm() const + { + return comm; + }; + const MPI_Group get_group() const + { + return group; + }; + const MPI_Group get_group_data() const + { + return group_data; + }; + const int get_size() const + { + return size; + }; + const int get_nnz() const + { + return nnz; + }; + const int get_nnzlocal() const + { + return nnzLocal; + }; + const int get_numcol_local() const + { + return numColLocal; + }; + int* get_colptr_local() const + { + return colptrLocal; + }; + int* get_rowind_local() const + { + return rowindLocal; + }; + ~DistCCSMatrix(); private: @@ -50,6 +88,8 @@ class DistCCSMatrix // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal int* colptrLocal; int* rowindLocal; + + // friend class DistMatrixTransformer; }; } // namespace pexsi #endif // DISTCCSMATRIX_H diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp index 01b96f42cc..ef6c6fec72 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp @@ -1,3 +1,6 @@ +#ifdef __PEXSI +#include "dist_matrix_transformer.h" + #include #include @@ -28,11 +31,11 @@ namespace pexsi // wether this function is called for the first time for a index array; nprocs: total number of processes size_process: // the number of indices in each process displacement_process: the start position in each process index: the array // contains the indices -inline int MinimumIndexPosition(const bool isFirst, - const int nprocs, - int* size_process, - int* displacement_process, - const int* index) +inline int DistMatrixTransformer::MinimumIndexPosition(const bool isFirst, + const int nprocs, + int* size_process, + int* displacement_process, + const int* index) { // usually the minimum index is continuous, so it will be a good idea to // check the one next to the previous index first. @@ -104,16 +107,16 @@ inline int MinimumIndexPosition(const bool isFirst, } } -inline void buildCCSParameter(const int size, - const int nprocs, - std::vector size_process, - std::vector displacement_process, - const int* position_index, - DistCCSMatrix& DST_Matrix, - int* buffer2ccsIndex) +inline void DistMatrixTransformer::buildCCSParameter(const int size, + const int nprocs, + std::vector size_process, + std::vector displacement_process, + const int* position_index, + DistCCSMatrix& DST_Matrix, + int* buffer2ccsIndex) { // find the minimum one from left buffer index - if (DST_Matrix.nnzLocal <= 0) + if (DST_Matrix.get_nnzlocal() <= 0) return; int pre_col = -1; @@ -123,31 +126,34 @@ inline void buildCCSParameter(const int size, while (p_mini >= 0) { int index_mini = position_index[p_mini]; - int col_mini = index_mini / DST_Matrix.size; //-DST_Matrix.firstCol; - int row_mini = index_mini % DST_Matrix.size; + int col_mini = index_mini / DST_Matrix.get_size(); //-DST_Matrix.firstCol; + int row_mini = index_mini % DST_Matrix.get_size(); if (col_mini > pre_col) // a new column starts, column pointer is a 1-based array { pre_col = col_mini; - DST_Matrix.colptrLocal[col_mini] = nnz_now + 1; + DST_Matrix.get_colptr_local()[col_mini] = nnz_now + 1; } - DST_Matrix.rowindLocal[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based + DST_Matrix.get_rowind_local()[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based // copy data from buffer to M, be careful M is a 0-based array buffer2ccsIndex[nnz_now] = p_mini; ++nnz_now; p_mini = MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index); } // The last element of colptrLocal is nnzLocal+1 - DST_Matrix.colptrLocal[DST_Matrix.numColLocal] = nnz_now + 1; + DST_Matrix.get_colptr_local()[DST_Matrix.get_numcol_local()] = nnz_now + 1; } -inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal) +inline void DistMatrixTransformer::buffer2CCSvalue(int nnzLocal, + int* buffer2ccsIndex, + double* buffer, + double* nzvalLocal) { for (int i = 0; i < nnzLocal; ++i) { nzvalLocal[i] = buffer[buffer2ccsIndex[i]]; } } -inline void countMatrixDistribution(int N, double* A, std::map& P) +inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std::map& P) { for (int i = 0; i < N; ++i) { @@ -161,15 +167,15 @@ inline void countMatrixDistribution(int N, double* A, std::map& P) } // find out the index of non-zero elements -inline int getNonZeroIndex(char LAYOUT, - const int nrow, - const int ncol, - double* H_2d, - double* S_2d, - const double ZERO_Limit, - int& nnz, - std::vector& rowidx, - std::vector& colidx) +inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT, + const int nrow, + const int ncol, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + int& nnz, + std::vector& rowidx, + std::vector& colidx) { #ifdef _DEBUG char f_log[80]; @@ -275,21 +281,21 @@ inline int getNonZeroIndex(char LAYOUT, return 0; } -int buildTransformParameter(DistBCDMatrix& SRC_Matrix, - DistCCSMatrix& DST_Matrix, - const int NPROC_TRANS, - MPI_Group& GROUP_TRANS, - MPI_Comm& COMM_TRANS, - const int nnz, - std::vector& rowidx, - std::vector& colidx, - int& sender_size, - std::vector& sender_size_process, - std::vector& sender_displacement_process, - int& receiver_size, - std::vector& receiver_size_process, - std::vector& receiver_displacement_process, - std::vector& buffer2ccsIndex) +int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + const int NPROC_TRANS, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS, + const int nnz, + std::vector& rowidx, + std::vector& colidx, + int& sender_size, + std::vector& sender_size_process, + std::vector& sender_displacement_process, + int& receiver_size, + std::vector& receiver_size_process, + std::vector& receiver_displacement_process, + std::vector& buffer2ccsIndex) { // debug int myproc; @@ -322,12 +328,12 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix, std::vector proc_map_data_trans; if (myproc == 0) { - MPI_Group_size(DST_Matrix.group_data, &nproc_data); + MPI_Group_size(DST_Matrix.get_group_data(), &nproc_data); MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS); proc_map_data_trans.resize(nproc_data, 0); for (int i = 0; i < nproc_data; ++i) { - MPI_Group_translate_ranks(DST_Matrix.group_data, 1, &i, GROUP_TRANS, &proc_map_data_trans[i]); + MPI_Group_translate_ranks(DST_Matrix.get_group_data(), 1, &i, GROUP_TRANS, &proc_map_data_trans[i]); } MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS); } @@ -429,7 +435,7 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix, int dst_col = DST_Matrix.localCol(g_col, dst_process); int l_row = rowidx[i]; int dst_row = SRC_Matrix.globalRow(l_row); - sender_index[i] = dst_col * DST_Matrix.size + dst_row; + sender_index[i] = dst_col * DST_Matrix.get_size() + dst_row; } // debug #ifdef _DEBUG @@ -478,10 +484,10 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix, return 0; } -int newGroupCommTrans(DistBCDMatrix& SRC_Matrix, - DistCCSMatrix& DST_Matrix, - MPI_Group& GROUP_TRANS, - MPI_Comm& COMM_TRANS) +int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS) { // debug #ifdef _DEBUG @@ -499,7 +505,7 @@ int newGroupCommTrans(DistBCDMatrix& SRC_Matrix, #endif // build transfortram communicator which contains both processes of BCD processors and // CCS processors with nonzero elements - MPI_Group_union(DST_Matrix.group_data, SRC_Matrix.group, &GROUP_TRANS); + MPI_Group_union(DST_Matrix.get_group_data(), SRC_Matrix.get_group(), &GROUP_TRANS); MPI_Comm_create(MPI_COMM_WORLD, GROUP_TRANS, &COMM_TRANS); // debug #ifdef _DEBUG @@ -557,7 +563,7 @@ int newGroupCommTrans(DistBCDMatrix& SRC_Matrix, return 0; } -int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS) +int DistMatrixTransformer::deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS) { MPI_Group_free(&GROUP_TRANS); if (COMM_TRANS != MPI_COMM_NULL) @@ -571,13 +577,13 @@ int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS) // two destination matrices share the same non-zero elements positions // if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, // even if one of them is acturely zero All matrices must have same MPI communicator -int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, - double* H_2d, - double* S_2d, - const double ZERO_Limit, - DistCCSMatrix& DST_Matrix, - double*& H_ccs, - double*& S_ccs) +int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + DistCCSMatrix& DST_Matrix, + double*& H_ccs, + double*& S_ccs) { // debug #ifdef _DEBUG @@ -614,9 +620,9 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, { log << "nprocs: " << SRC_Matrix.nprocs << " ; myprow: " << SRC_Matrix.myprow << " ; mypcol: " << SRC_Matrix.mypcol << std::endl; - log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.nrow << " ; ncol: " << SRC_Matrix.ncol + log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.get_nrow() << " ; ncol: " << SRC_Matrix.get_ncol() << std::endl; - log << "layout:" << SRC_Matrix.LAYOUT << std::endl; + log << "layout:" << SRC_Matrix.get_LAYOUT() << std::endl; log << "ZERO = " << ZERO_Limit << std::endl; log << "DST_Matrix parameters:" << std::endl; log << "size: " << DST_Matrix.size << " ;nproc_data: " << DST_Matrix.nproc_data << std::endl; @@ -633,11 +639,11 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, if (myproc < 100) log << "start counting nnz..." << std::endl; #endif - if (SRC_Matrix.comm != MPI_COMM_NULL) + if (SRC_Matrix.get_comm() != MPI_COMM_NULL) { - getNonZeroIndex(SRC_Matrix.LAYOUT, - SRC_Matrix.nrow, - SRC_Matrix.ncol, + getNonZeroIndex(SRC_Matrix.get_LAYOUT(), + SRC_Matrix.get_nrow(), + SRC_Matrix.get_ncol(), H_2d, S_2d, ZERO_Limit, @@ -654,11 +660,11 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, if(SRC_Matrix.comm != MPI_COMM_NULL) { log<<"NonZeroIndex :"< sender_buffer(sender_size); std::vector receiver_buffer(receiver_size); // put H to sender buffer - if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r') + if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r') { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]]; + sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]]; } } else { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]]; + sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; } } #ifdef _DEBUG @@ -749,18 +755,18 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, #endif // put S to sender buffer - if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r') + if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r') { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]]; + sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]]; } } else { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]]; + sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; } } #ifdef _DEBUG @@ -804,12 +810,12 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, // transform two sparse matrices from Compressed Column Storage (CCS) to block cyclic distribution (BCD) distribution // two source matrices share the same non-zero elements positions -int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, - double* DMnzvalLocal, - double* EDMnzvalLocal, - DistBCDMatrix& DST_Matrix, - double* DM, - double* EDM) +int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, + double* DMnzvalLocal, + double* EDMnzvalLocal, + DistBCDMatrix& DST_Matrix, + double* DM, + double* EDM) { // debug #ifdef _DEBUG @@ -840,7 +846,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, if (COMM_TRANS != MPI_COMM_NULL) { // init DM and EDM with 0 - for (int i = 0; i < DST_Matrix.nrow * DST_Matrix.ncol; ++i) + for (int i = 0; i < DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); ++i) { DM[i] = 0; EDM[i] = 0; @@ -877,12 +883,12 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, MPI_Comm_rank(COMM_TRANS, &myproc_trans); if (myproc_trans == 0) { - MPI_Group_size(DST_Matrix.group, &nproc_bcd); + MPI_Group_size(DST_Matrix.get_group(), &nproc_bcd); MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS); proc_map_bcd_trans.resize(nproc_bcd, 0); for (int i = 0; i < nproc_bcd; ++i) { - MPI_Group_translate_ranks(DST_Matrix.group, 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]); + MPI_Group_translate_ranks(DST_Matrix.get_group(), 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]); } MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS); } @@ -933,7 +939,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, log << "display all columns and rows of nonzeros values:\n"; int log_nnz = 0; #endif - for (int icol = 0; icol < SRC_Matrix.numColLocal; ++icol) + for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol) { int g_col = SRC_Matrix.globalCol(icol); int recv_pcol_bcd; @@ -942,9 +948,9 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, // log<DST_Matrix.nrow) + else if(receiver_index[i*2]>DST_Matrix.get_nrow()) { - log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "< "< "<DST_Matrix.ncol) + else if(receiver_index[i*2+1]>DST_Matrix.get_ncol()) { - log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "< "< "< DST_Matrix.nrow) + else if (receiver_index[i * 2] > DST_Matrix.get_nrow()) { log << "ERROR! receiver_index(BCD)[" << 2 * i << "] = " << receiver_index[i * 2] << " > " - << DST_Matrix.nrow << std::endl; + << DST_Matrix.get_nrow() << std::endl; log.flush(); } if (receiver_index[i * 2 + 1] < 0) @@ -1388,10 +1394,10 @@ MPI_Barrier(COMM_TRANS); << std::endl; log.flush(); } - else if (receiver_index[i * 2 + 1] > DST_Matrix.ncol) + else if (receiver_index[i * 2 + 1] > DST_Matrix.get_ncol()) { log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " > " - << DST_Matrix.ncol << std::endl; + << DST_Matrix.get_ncol() << std::endl; log.flush(); } } @@ -1428,14 +1434,14 @@ MPI_Barrier(COMM_TRANS); // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM"); #endif // transform receiver_buffer to DM - if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r') + if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r') { - int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = ix * DST_Matrix.ncol + iy; + int idx = ix * DST_Matrix.get_ncol() + iy; #ifdef _DEBUG if (myproc < 100) { @@ -1444,7 +1450,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem << std::endl; log << "index number is " << 2 * i << " ix = " << ix << " iy = " << iy - << " ncol = " << DST_Matrix.ncol << std::endl; + << " ncol = " << DST_Matrix.get_ncol() << std::endl; log.flush(); } } @@ -1454,12 +1460,12 @@ MPI_Barrier(COMM_TRANS); } else { - int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = iy * DST_Matrix.nrow + ix; + int idx = iy * DST_Matrix.get_nrow() + ix; #ifdef _DEBUG if (myproc < 100) { @@ -1468,7 +1474,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem << std::endl; log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " nrow = " << DST_Matrix.nrow << std::endl; + << " nrow = " << DST_Matrix.get_nrow() << std::endl; log.flush(); } } @@ -1512,14 +1518,14 @@ MPI_Barrier(COMM_TRANS); // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM"); #endif // transform receiver_buffer to EDM - if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r') + if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r') { - int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = ix * DST_Matrix.ncol + iy; + int idx = ix * DST_Matrix.get_ncol() + iy; #ifdef _DEBUG if (myproc < 100) { @@ -1528,7 +1534,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem << std::endl; log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " ncol = " << DST_Matrix.ncol << std::endl; + << " ncol = " << DST_Matrix.get_ncol() << std::endl; log.flush(); } } @@ -1538,12 +1544,12 @@ MPI_Barrier(COMM_TRANS); } else { - int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = iy * DST_Matrix.nrow + ix; + int idx = iy * DST_Matrix.get_nrow() + ix; #ifdef _DEBUG if (myproc < 100) { @@ -1552,7 +1558,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem << std::endl; log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " nrow = " << DST_Matrix.nrow << std::endl; + << " nrow = " << DST_Matrix.get_nrow() << std::endl; log.flush(); } } @@ -1596,3 +1602,4 @@ MPI_Barrier(COMM_TRANS); } } // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h index 1d28866c96..e3e27e995a 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h @@ -1,8 +1,9 @@ #ifndef DISTMATRIXTRANSFORMER_H #define DISTMATRIXTRANSFORMER_H -#include "dist_bcd_matrix.h" -#include "dist_ccs_matrix.h" +#include + +#include // transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution // they should have same MPI communicator // The local matrix of BCD is column-major order @@ -15,6 +16,62 @@ // even if one of them is acturely zero All matrices must have same MPI communicator namespace pexsi { +class DistBCDMatrix; +class DistCCSMatrix; + +namespace DistMatrixTransformer +{ +int MinimumIndexPosition(const bool isFirst, + const int nprocs, + int* size_process, + int* displacement_process, + const int* index); + +void buildCCSParameter(const int size, + const int nprocs, + std::vector size_process, + std::vector displacement_process, + const int* position_index, + DistCCSMatrix& DST_Matrix, + int* buffer2ccsIndex); + +void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal); + +void countMatrixDistribution(int N, double* A, std::map& P); + +int getNonZeroIndex(char LAYOUT, + const int nrow, + const int ncol, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + int& nnz, + std::vector& rowidx, + std::vector& colidx); + +int buildTransformParameter(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + const int NPROC_TRANS, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS, + const int nnz, + std::vector& rowidx, + std::vector& colidx, + int& sender_size, + std::vector& sender_size_process, + std::vector& sender_displacement_process, + int& receiver_size, + std::vector& receiver_size_process, + std::vector& receiver_displacement_process, + std::vector& buffer2ccsIndex); + +int newGroupCommTrans(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS); + +int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS); + int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, double* H_2d, double* S_2d, @@ -31,6 +88,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* ENDnzvalLocal, DistBCDMatrix& DST_Matrix, double* DM_2d, - double* END_2d); + double* ED_2d); +}; // namespace DistMatrixTransformer } // namespace pexsi #endif // DISTMATRIXTRANSFORMER_H \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp index 90d16ae993..1be66abf59 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -1,3 +1,4 @@ +#ifdef __PEXSI #include "pexsi_solver.h" #include @@ -5,6 +6,11 @@ #include #include "module_base/global_variable.h" +#include "simple_pexsi.h" + +extern MPI_Comm DIAG_WORLD; +extern MPI_Comm GRID_WORLD; +extern MPI_Group GRID_GROUP; namespace pexsi { @@ -37,9 +43,7 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text, int PEXSI_Solver::solve() { - extern MPI_Comm DIAG_WORLD; - extern MPI_Comm GRID_WORLD; - extern MPI_Group GRID_GROUP; + simplePEXSI(DIAG_WORLD, GRID_WORLD, GRID_GROUP, @@ -61,12 +65,12 @@ int PEXSI_Solver::solve() return 0; } -const double* PEXSI_Solver::get_DM() const +double* PEXSI_Solver::get_DM() const { return DM; } -const double* PEXSI_Solver::get_EDM() const +double* PEXSI_Solver::get_EDM() const { return EDM; } @@ -75,4 +79,16 @@ const double PEXSI_Solver::get_totalFreeEnergy() const { return totalFreeEnergy; } -} // namespace pexsi \ No newline at end of file + +const double PEXSI_Solver::get_totalEnergyH() const +{ + return totalEnergyH; +} + +const double PEXSI_Solver::get_totalEnergyS() const +{ + return totalEnergyS; +} + +} // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h index 0c3164e5f0..b3d7aed152 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.h +++ b/source/module_hsolver/module_pexsi/pexsi_solver.h @@ -18,9 +18,11 @@ class PEXSI_Solver double& totalEnergyS, double& totalFreeEnergy); int solve(); - const double* get_DM() const; - const double* get_EDM() const; + double* get_DM() const; + double* get_EDM() const; const double get_totalFreeEnergy() const; + const double get_totalEnergyH() const; + const double get_totalEnergyS() const; private: int blacs_text; diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp index 845beef18c..df72a061c5 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -2,6 +2,7 @@ // the H and S matrices are given by 2D block cyclic distribution // the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution // #include "mpi.h" +#ifdef __PEXSI #include #include @@ -18,6 +19,7 @@ #include "module_base/lapack_connector.h" #include "module_base/timer.h" #include "module_base/tool_quit.h" +#include "module_base/global_variable.h" namespace pexsi { @@ -102,220 +104,252 @@ int loadPEXSIOption(MPI_Comm comm, // 10: numElectronPEXSITolerance // 11: ZERO_Limit double double_para[12]; - int myid; - MPI_Comm_rank(comm, &myid); - if (myid == 0) - { - std::ifstream ifs(PexsiOptionFile.c_str()); - if (!ifs) - { - return 1; - } - setDefaultOption(int_para, double_para); - ifs.clear(); - ifs.seekg(0); + // read in PEXSI options from GlobalV + int_para[0] = GlobalV::pexsi_npole; + int_para[1] = GlobalV::pexsi_inertia; + int_para[2] = GlobalV::pexsi_nmax; + int_para[3] = 0; + int_para[4] = 1; // GlobalV::pexsi_symbolic; + int_para[5] = GlobalV::pexsi_comm; + int_para[6] = 0; + int_para[7] = GlobalV::pexsi_storage; + int_para[8] = GlobalV::pexsi_ordering; + int_para[9] = GlobalV::pexsi_row_ordering; + int_para[10] = GlobalV::pexsi_nproc; + int_para[11] = GlobalV::pexsi_symm; + int_para[12] = GlobalV::pexsi_trans; + int_para[13] = GlobalV::pexsi_method; + int_para[14] = 2; + int_para[15] = 0; + int_para[16] = GlobalV::pexsi_nproc_pole; - char key[128]; - char lowercase_key[128]; - const int LINE_LINGTH = 1024; - char unused_string[LINE_LINGTH]; + double_para[0] = GlobalV::NSPIN; // GlobalV::pexsi_spin; + double_para[1] = GlobalV::pexsi_temp; + double_para[2] = GlobalV::pexsi_gap; + double_para[3] = GlobalV::pexsi_delta_e; + double_para[4] = GlobalV::pexsi_mu_lower; + double_para[5] = GlobalV::pexsi_mu_upper; + double_para[6] = GlobalV::pexsi_mu; + double_para[7] = GlobalV::pexsi_mu_thr; + double_para[8] = GlobalV::pexsi_mu_expand; + double_para[9] = GlobalV::pexsi_mu_guard; + double_para[10] = GlobalV::pexsi_elec_thr; + double_para[11] = GlobalV::pexsi_zero_thr; + // int myid; + // MPI_Comm_rank(comm, &myid); + // if (myid == 0) + // { + // std::ifstream ifs(PexsiOptionFile.c_str()); + // if (!ifs) + // { + // return 1; + // } + // setDefaultOption(int_para, double_para); - while (ifs.good()) - { - ifs >> key; - //~ cout<<"readin word is: "<>options.spin; - ifs >> double_para[0]; - //~ cout<<"double_para[0]: "<>options.temperature; - ifs >> double_para[1]; - //~ cout<<"double_para[1]: "<>options.gap; - ifs >> double_para[2]; - //~ cout<<"double_para[2]: "<>options.deltaE; - ifs >> double_para[3]; - //~ cout<<"double_para[3]: "<>options.numPole; - ifs >> int_para[0]; - //~ cout<<"int_para[0]: "<>options.isInertiaCount; - ifs >> int_para[1]; - //~ cout<<"int_para[1]: "<>options.maxPEXSIIter; - ifs >> int_para[2]; - //~ cout<<"int_para[2]: "<>options.muMin0; - ifs >> double_para[4]; - //~ cout<<"double_para[4]: "<>options.muMax0; - ifs >> double_para[5]; - //~ cout<<"double_para[5]: "<>options.mu0; - ifs >> double_para[6]; - //~ cout<<"double_para[6]: "<>options.muInertiaTolerance; - ifs >> double_para[7]; - //~ cout<<"double_para[7]: "<>options.muInertiaExpansion; - ifs >> double_para[8]; - //~ cout<<"double_para[8]: "<>options.muPEXSISafeGuard; - ifs >> double_para[9]; - //~ cout<<"double_para[9]: "<>options.numElectronPEXSITolerance; - ifs >> double_para[10]; - //~ cout<<"double_para[10]: "<> double_para[11]; - } - else if (strcmp("matrixtype", lowercase_key) == 0) - { - //~ ifs>>options.matrixType; - ifs >> int_para[3]; - //~ cout<<"int_para[3]: "<>options.isSymbolicFactorize; - ifs >> int_para[4]; - //~ cout<<"int_para[4]: "<>options.isConstructCommPattern; - ifs >> int_para[5]; - //~ cout<<"int_para[5]: "<>options.solver; - ifs >> int_para[6]; - //~ cout<<"int_para[6]: "<>options.symmetricStorage; - ifs >> int_para[7]; - //~ cout<<"int_para[7]: "<>options.ordering; - ifs >> int_para[8]; - //~ cout<<"int_para[8]: "<>options.rowOrdering; - ifs >> int_para[9]; - //~ cout<<"int_para[9]: "<>options.npSymbFact; - ifs >> int_para[10]; - //~ cout<<"int_para[10]: "<>options.symmetric; - ifs >> int_para[11]; - //~ cout<<"int_para[11]: "<>options.transpose; - ifs >> int_para[12]; - //~ cout<<"int_para[12]: "<>options.method; - ifs >> int_para[13]; - //~ cout<<"int_para[13]: "<>options.nPoints; - ifs >> int_para[14]; - //~ cout<<"int_para[14]: "<>options.verbosity; - ifs >> int_para[15]; - //~ cout<<"int_para[15]: "<>options.verbosity; - ifs >> int_para[16]; - //~ cout<<"int_para[16]: "<> key; + // //~ cout<<"readin word is: "<>options.spin; + // ifs >> double_para[0]; + // //~ cout<<"double_para[0]: "<>options.temperature; + // ifs >> double_para[1]; + // //~ cout<<"double_para[1]: "<>options.gap; + // ifs >> double_para[2]; + // //~ cout<<"double_para[2]: "<>options.deltaE; + // ifs >> double_para[3]; + // //~ cout<<"double_para[3]: "<>options.numPole; + // ifs >> int_para[0]; + // //~ cout<<"int_para[0]: "<>options.isInertiaCount; + // ifs >> int_para[1]; + // //~ cout<<"int_para[1]: "<>options.maxPEXSIIter; + // ifs >> int_para[2]; + // //~ cout<<"int_para[2]: "<>options.muMin0; + // ifs >> double_para[4]; + // //~ cout<<"double_para[4]: "<>options.muMax0; + // ifs >> double_para[5]; + // //~ cout<<"double_para[5]: "<>options.mu0; + // ifs >> double_para[6]; + // //~ cout<<"double_para[6]: "<>options.muInertiaTolerance; + // ifs >> double_para[7]; + // //~ cout<<"double_para[7]: "<>options.muInertiaExpansion; + // ifs >> double_para[8]; + // //~ cout<<"double_para[8]: "<>options.muPEXSISafeGuard; + // ifs >> double_para[9]; + // //~ cout<<"double_para[9]: "<>options.numElectronPEXSITolerance; + // ifs >> double_para[10]; + // //~ cout<<"double_para[10]: "<> double_para[11]; + // } + // else if (strcmp("matrixtype", lowercase_key) == 0) + // { + // //~ ifs>>options.matrixType; + // ifs >> int_para[3]; + // //~ cout<<"int_para[3]: "<>options.isSymbolicFactorize; + // ifs >> int_para[4]; + // //~ cout<<"int_para[4]: "<>options.isConstructCommPattern; + // ifs >> int_para[5]; + // //~ cout<<"int_para[5]: "<>options.solver; + // ifs >> int_para[6]; + // //~ cout<<"int_para[6]: "<>options.symmetricStorage; + // ifs >> int_para[7]; + // //~ cout<<"int_para[7]: "<>options.ordering; + // ifs >> int_para[8]; + // //~ cout<<"int_para[8]: "<>options.rowOrdering; + // ifs >> int_para[9]; + // //~ cout<<"int_para[9]: "<>options.npSymbFact; + // ifs >> int_para[10]; + // //~ cout<<"int_para[10]: "<>options.symmetric; + // ifs >> int_para[11]; + // //~ cout<<"int_para[11]: "<>options.transpose; + // ifs >> int_para[12]; + // //~ cout<<"int_para[12]: "<>options.method; + // ifs >> int_para[13]; + // //~ cout<<"int_para[13]: "<>options.nPoints; + // ifs >> int_para[14]; + // //~ cout<<"int_para[14]: "<>options.verbosity; + // ifs >> int_para[15]; + // //~ cout<<"int_para[15]: "<>options.verbosity; + // ifs >> int_para[16]; + // //~ cout<<"int_para[16]: "< 0) + { + bands_to_print.resize(band_print_num); + ifs.clear(); + ifs.seekg(0); // move to the beginning of the file + ifs.rdstate(); + while (ifs.good()) + { + ifs >> word1; + if (ifs.eof() != 0) + break; + strtolower(word1, word); // convert uppercase std::string to lower case; word1 --> word + + if (strcmp("bands_to_print", word) == 0) + { + for(int i = 0; i < band_print_num; i ++) + { + ifs >> bands_to_print[i]; + } + } + } + } + //---------------------------------------------------------- // DFT+U Xin Qu added on 2020-10-29 //---------------------------------------------------------- @@ -2826,7 +2996,7 @@ void Input::Default_2(void) // jiyy add 2019-08-04 this->relax_nmax = 1; out_stru = 0; out_dos = 0; - out_band = 0; + out_band[0] = 0; out_proj_band = 0; cal_force = 0; init_wfc = "file"; @@ -2843,7 +3013,7 @@ void Input::Default_2(void) // jiyy add 2019-08-04 this->relax_nmax = 1; out_stru = 0; out_dos = 0; - out_band = 0; + out_band[0] = 0; out_proj_band = 0; cal_force = 0; init_wfc = "file"; @@ -3292,6 +3462,7 @@ void Input::Bcast() Parallel_Common::bcast_string(mixing_mode); Parallel_Common::bcast_double(mixing_beta); Parallel_Common::bcast_int(mixing_ndim); + Parallel_Common::bcast_int(mixing_restart); Parallel_Common::bcast_double(mixing_gg0); // mohan add 2014-09-27 Parallel_Common::bcast_double(mixing_beta_mag); Parallel_Common::bcast_double(mixing_gg0_mag); @@ -3325,7 +3496,8 @@ void Input::Bcast() Parallel_Common::bcast_int(out_wfc_pw); Parallel_Common::bcast_bool(out_wfc_r); Parallel_Common::bcast_int(out_dos); - Parallel_Common::bcast_bool(out_band); + if(GlobalV::MY_RANK != 0) out_band.resize(2); /* If this line is absent, will cause segmentation fault in io_input_test_para */ + Parallel_Common::bcast_int(out_band.data(), 2); Parallel_Common::bcast_bool(out_proj_band); if(GlobalV::MY_RANK != 0) out_mat_hs.resize(2); /* If this line is absent, will cause segmentation fault in io_input_test_para */ Parallel_Common::bcast_int(out_mat_hs.data(), 2); @@ -3523,6 +3695,17 @@ void Input::Bcast() Parallel_Common::bcast_bool(restart_save); // Peize Lin add 2020.04.04 Parallel_Common::bcast_bool(restart_load); // Peize Lin add 2020.04.04 + Parallel_Common::bcast_int(band_print_num); + if(GlobalV::MY_RANK != 0) + { + bands_to_print.resize(band_print_num); + } + + for(int i = 0; i < band_print_num; i++) + { + Parallel_Common::bcast_int(bands_to_print[i]); + } + //----------------------------------------------------------------------------------- // DFT+U (added by Quxin 2020-10-29) //----------------------------------------------------------------------------------- @@ -3625,6 +3808,34 @@ void Input::Bcast() Parallel_Common::bcast_bool(qo_switch); Parallel_Common::bcast_string(qo_basis); Parallel_Common::bcast_double(qo_thr); + //========================================================== + // PEXSI + //========================================================== + Parallel_Common::bcast_int(pexsi_npole); + Parallel_Common::bcast_int(pexsi_inertia); + Parallel_Common::bcast_int(pexsi_nmax); + // Parallel_Common::bcast_int(pexsi_symbolic); + Parallel_Common::bcast_int(pexsi_comm); + Parallel_Common::bcast_int(pexsi_storage); + Parallel_Common::bcast_int(pexsi_ordering); + Parallel_Common::bcast_int(pexsi_row_ordering); + Parallel_Common::bcast_int(pexsi_nproc); + Parallel_Common::bcast_int(pexsi_symm); + Parallel_Common::bcast_int(pexsi_trans); + Parallel_Common::bcast_int(pexsi_method); + Parallel_Common::bcast_int(pexsi_nproc_pole); + // Parallel_Common::bcast_double(pexsi_spin); + Parallel_Common::bcast_double(pexsi_temp); + Parallel_Common::bcast_double(pexsi_gap); + Parallel_Common::bcast_double(pexsi_delta_e); + Parallel_Common::bcast_double(pexsi_mu_lower); + Parallel_Common::bcast_double(pexsi_mu_upper); + Parallel_Common::bcast_double(pexsi_mu); + Parallel_Common::bcast_double(pexsi_mu_thr); + Parallel_Common::bcast_double(pexsi_mu_expand); + Parallel_Common::bcast_double(pexsi_mu_guard); + Parallel_Common::bcast_double(pexsi_elec_thr); + Parallel_Common::bcast_double(pexsi_zero_thr); /* broadcasting std::vector is sometime a annorying task... */ if (ntype != 0) /* ntype has been broadcasted before */ { @@ -3922,10 +4133,11 @@ void Input::Check(void) } else if (ks_solver == "pexsi") { -#ifndef __MPI - ModuleBase::WARNING_QUIT("Input", "Cusolver can not be used for series version."); -#else +#ifdef __PEXSI GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl; +#else + ModuleBase::WARNING_QUIT("Input", + "Can not use PEXSI if abacus is not compiled with PEXSI. Please change ks_solver to scalapack_gvx."); #endif diff --git a/source/module_io/input.h b/source/module_io/input.h index b4e983abad..1d29c6311a 100644 --- a/source/module_io/input.h +++ b/source/module_io/input.h @@ -232,6 +232,7 @@ class Input std::string mixing_mode; // "plain","broyden",... double mixing_beta; // 0 : no_mixing int mixing_ndim; // used in Broyden method + int mixing_restart; double mixing_gg0; // used in kerker method. mohan add 2014-09-27 double mixing_beta_mag; double mixing_gg0_mag; @@ -259,11 +260,13 @@ class Input bool out_chg; // output charge density. 0: no; 1: yes bool out_dm; // output density matrix. bool out_dm1; + int band_print_num; + std::vector bands_to_print; int out_pot; // yes or no int out_wfc_pw; // 0: no; 1: txt; 2: dat bool out_wfc_r; // 0: no; 1: yes int out_dos; // dos calculation. mohan add 20090909 - bool out_band; // band calculation pengfei 2014-10-13 + std::vector out_band; // band calculation pengfei 2014-10-13 bool out_proj_band; // projected band structure calculation jiyy add 2022-05-11 std::vector out_mat_hs; // output H matrix and S matrix in local basis. bool out_mat_xc; // output exchange-correlation matrix in KS-orbital representation. @@ -599,6 +602,34 @@ class Input double qo_thr = 1e-6; std::vector qo_strategy = {}; std::vector qo_screening_coeff = {}; + //========================================================== + // variables for PEXSI + //========================================================== + int pexsi_npole = 54; + int pexsi_inertia = 1; + int pexsi_nmax = 80; + // int pexsi_symbolic = 1; + int pexsi_comm = 1; + int pexsi_storage = 1; + int pexsi_ordering = 0; + int pexsi_row_ordering = 1; + int pexsi_nproc = 1; + int pexsi_symm = 1; + int pexsi_trans = 0; + int pexsi_method = 1; + int pexsi_nproc_pole = 1; + // double pexsi_spin = 2; + double pexsi_temp = 0.0001; + double pexsi_gap = 0; + double pexsi_delta_e = 20.0; + double pexsi_mu_lower = -10; + double pexsi_mu_upper = 10; + double pexsi_mu = 0.0; + double pexsi_mu_thr = 0.05; + double pexsi_mu_expand = 0.3; + double pexsi_mu_guard = 0.2; + double pexsi_elec_thr = 0.001; + double pexsi_zero_thr = 1e-10; private: //========================================================== @@ -667,7 +698,15 @@ class Input template typename std::enable_if::value, T>::type cast_string(const std::string& str) { return std::stod(str); } template - typename std::enable_if::value, T>::type cast_string(const std::string& str) { return std::stoi(str); } + typename std::enable_if::value, T>::type cast_string(const std::string& str) + { + if (str == "true" || str == "1") + return 1; + else if (str == "false" || str == "0") + return 0; + else + return std::stoi(str); + } template typename std::enable_if::value, T>::type cast_string(const std::string& str) { return (str == "true" || str == "1"); } template diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp index a52245d05c..d6e3371111 100644 --- a/source/module_io/input_conv.cpp +++ b/source/module_io/input_conv.cpp @@ -750,6 +750,7 @@ void Input_Conv::Convert(void) GlobalV::MIXING_MODE = INPUT.mixing_mode; GlobalV::MIXING_BETA = INPUT.mixing_beta; GlobalV::MIXING_NDIM = INPUT.mixing_ndim; + GlobalV::MIXING_RESTART = INPUT.mixing_restart; GlobalV::MIXING_GG0 = INPUT.mixing_gg0; GlobalV::MIXING_BETA_MAG = INPUT.mixing_beta_mag; GlobalV::MIXING_GG0_MAG = INPUT.mixing_gg0_mag; @@ -765,6 +766,35 @@ void Input_Conv::Convert(void) GlobalV::qo_strategy = INPUT.qo_strategy; GlobalV::qo_thr = INPUT.qo_thr; GlobalV::qo_screening_coeff = INPUT.qo_screening_coeff; + + //----------------------------------------------- + // PEXSI related parameters + //----------------------------------------------- + GlobalV::pexsi_npole = INPUT.pexsi_npole; + GlobalV::pexsi_inertia = INPUT.pexsi_inertia; + GlobalV::pexsi_nmax = INPUT.pexsi_nmax; + // GlobalV::pexsi_symbolic = INPUT.pexsi_symbolic; + GlobalV::pexsi_comm = INPUT.pexsi_comm; + GlobalV::pexsi_storage = INPUT.pexsi_storage; + GlobalV::pexsi_ordering = INPUT.pexsi_ordering; + GlobalV::pexsi_row_ordering = INPUT.pexsi_row_ordering; + GlobalV::pexsi_nproc = INPUT.pexsi_nproc; + GlobalV::pexsi_symm = INPUT.pexsi_symm; + GlobalV::pexsi_trans = INPUT.pexsi_trans; + GlobalV::pexsi_method = INPUT.pexsi_method; + GlobalV::pexsi_nproc_pole = INPUT.pexsi_nproc_pole; + // GlobalV::pexsi_spin = INPUT.pexsi_spin; + GlobalV::pexsi_temp = INPUT.pexsi_temp; + GlobalV::pexsi_gap = INPUT.pexsi_gap; + GlobalV::pexsi_delta_e = INPUT.pexsi_delta_e; + GlobalV::pexsi_mu_lower = INPUT.pexsi_mu_lower; + GlobalV::pexsi_mu_upper = INPUT.pexsi_mu_upper; + GlobalV::pexsi_mu = INPUT.pexsi_mu; + GlobalV::pexsi_mu_thr = INPUT.pexsi_mu_thr; + GlobalV::pexsi_mu_expand = INPUT.pexsi_mu_expand; + GlobalV::pexsi_mu_guard = INPUT.pexsi_mu_guard; + GlobalV::pexsi_elec_thr = INPUT.pexsi_elec_thr; + GlobalV::pexsi_zero_thr = INPUT.pexsi_zero_thr; ModuleBase::timer::tick("Input_Conv", "Convert"); return; } diff --git a/source/module_io/mulliken_charge.cpp b/source/module_io/mulliken_charge.cpp index 393da5fda4..bdcdb5a035 100644 --- a/source/module_io/mulliken_charge.cpp +++ b/source/module_io/mulliken_charge.cpp @@ -44,7 +44,7 @@ ModuleBase::matrix ModuleIO::cal_mulliken(const std::vector> const char N_char = 'N'; const int one_int = 1; const double one_float = 1.0, zero_float = 0.0; - pdgemm_(&T_char, + pdgemm_(&N_char, &T_char, &GlobalV::NLOCAL, &GlobalV::NLOCAL, @@ -156,7 +156,7 @@ ModuleBase::matrix ModuleIO::cal_mulliken(const std::vector one_float = {1.0, 0.0}, zero_float = {0.0, 0.0}; - pzgemm_(&T_char, + pzgemm_(&N_char, &T_char, &GlobalV::NLOCAL, &GlobalV::NLOCAL, diff --git a/source/module_io/nscf_band.cpp b/source/module_io/nscf_band.cpp index d8b7b05ca6..290dc58bd3 100644 --- a/source/module_io/nscf_band.cpp +++ b/source/module_io/nscf_band.cpp @@ -3,6 +3,7 @@ #include "module_base/global_variable.h" #include "module_base/timer.h" #include "module_base/tool_title.h" +#include "module_base/formatter_physfmt.h" void ModuleIO::nscf_band( const int &is, @@ -10,6 +11,7 @@ void ModuleIO::nscf_band( const int &nks, const int &nband, const double &fermie, + const int &precision, const ModuleBase::matrix& ekb, const K_Vectors& kv, const Parallel_Kpoints* Pkpoints) @@ -33,23 +35,28 @@ void ModuleIO::nscf_band( if (ik>0) { auto delta=kv.kvec_c[ik]-kv.kvec_c[ik-1]; - klength[ik] = klength[ik-1] + delta.norm(); + klength[ik] = klength[ik-1]; + klength[ik] += (kv.kl_segids[ik] == kv.kl_segids[ik-1]) ? delta.norm() : 0.0; } + /* first find if present kpoint in present pool */ if ( GlobalV::MY_POOL == Pkpoints->whichpool[ik] ) { + /* then get the local kpoint index, which starts definitly from 0 */ const int ik_now = ik - Pkpoints->startk_pool[GlobalV::MY_POOL]; + /* if present kpoint corresponds the spin of the present one */ if( kv.isk[ik_now+is*nks] == is ) { if ( GlobalV::RANK_IN_POOL == 0) { - std::ofstream ofs(out_band_dir.c_str(),std::ios::app); - ofs << std::setprecision(8); - //start from 1 - ofs << ik+1; - ofs << " " << klength[ik] << " "; + formatter::PhysicalFmt physfmt; // create a physical formatter temporarily + std::ofstream ofs(out_band_dir.c_str(), std::ios::app); + physfmt.adjust_formatter_flexible(4, 0, false); // for integer + ofs << physfmt.get_p_formatter()->format(ik+1); + physfmt.adjust_formatter_flexible(precision, 4.0/double(precision), false); // for decimal + ofs << physfmt.get_p_formatter()->format(klength[ik]); for(int ib = 0; ib < nband; ib++) { - ofs << " " << (ekb(ik_now+is*nks, ib)-fermie) * ModuleBase::Ry_to_eV; + ofs << physfmt.get_p_formatter()->format((ekb(ik_now+is*nks, ib)-fermie) * ModuleBase::Ry_to_eV); } ofs << std::endl; ofs.close(); @@ -83,18 +90,30 @@ void ModuleIO::nscf_band( #else // std::cout<<"\n nband = "< klength; + klength.resize(nks); + klength[0] = 0.0; std::ofstream ofs(out_band_dir.c_str()); for(int ik=0;ik0) + { + auto delta=kv.kvec_c[ik]-kv.kvec_c[ik-1]; + klength[ik] = klength[ik-1]; + klength[ik] += (kv.kl_segids[ik] == kv.kl_segids[ik-1]) ? delta.norm() : 0.0; + } if( kv.isk[ik] == is) { - ofs<format(ik+1); + physfmt.adjust_formatter_flexible(precision, 4.0/double(precision), false); // for decimal + ofs << physfmt.get_p_formatter()->format(klength[ik]); // add klength, in accordance with the MPI version for(int ibnd = 0; ibnd < nband; ibnd++) { - ofs <format((ekb(ik, ibnd)-fermie) * ModuleBase::Ry_to_eV); } - ofs<& default_parametes_type) +void default_parametes_reader(const std::string& fn, std::map& default_parametes_type) { std::ifstream inputFile(fn.c_str()); if (inputFile.is_open()) @@ -122,28 +120,24 @@ bool default_parametes_reader(const std::string& fn, std::map& input) +void input_parameters_get(const std::string& fn, std::map& input) { - // The module title information is displayed ModuleBase::TITLE("Input", "Read"); - // If it is not the primary node, return false if (GlobalV::MY_RANK != 0) - return false; + return; // Open the input parameter file std::ifstream ifs(fn.c_str(), std::ios::in); // "in_datas/input_parameters" - // If the opening fails, an error message is printed and false is returned if (!ifs) { - std::cout << " Can't find the INPUT file." << std::endl; - return false; + ModuleBase::WARNING_QUIT("Input", "Can't find the INPUT file at " + fn); } ifs.clear(); ifs.seekg(0); @@ -166,8 +160,7 @@ bool input_parameters_get(const std::string& fn, std::map input_parameters) +void input_parameters_set(std::map input_parameters) { if (input_parameters.count("nupdown") != 0) { @@ -831,6 +818,10 @@ bool input_parameters_set(std::map input_parameters { INPUT.mixing_ndim = *static_cast(input_parameters["mixing_ndim"].get()); } + else if (input_parameters.count("mixing_restart") != 0) + { + INPUT.mixing_restart = *static_cast(input_parameters["mixing_restart"].get()); + } else if (input_parameters.count("mixing_gg0") != 0) { INPUT.mixing_gg0 = *static_cast(input_parameters["mixing_gg0"].get()); @@ -917,7 +908,7 @@ bool input_parameters_set(std::map input_parameters } else if (input_parameters.count("out_band") != 0) { - INPUT.out_band = *static_cast(input_parameters["out_band"].get()); + INPUT.out_band = *static_cast*>(input_parameters["out_band"].get()); } else if (input_parameters.count("out_proj_band") != 0) { diff --git a/source/module_io/parameter_pool.h b/source/module_io/parameter_pool.h index 83baedd036..bd4ae575dd 100644 --- a/source/module_io/parameter_pool.h +++ b/source/module_io/parameter_pool.h @@ -241,12 +241,12 @@ class InputParameter } } }; -bool Init(const std::string& default_type_path, +void Init(const std::string& default_type_path, const std::string& default_value_path, const std::string& input_value_path); -bool default_parametes_reader(const std::string& fn, std::map& default_parametes_type); -bool input_parameters_get(const std::string& fn, std::map& input); -bool input_parameters_set(std::map input_parameters); +void default_parametes_reader(const std::string& fn, std::map& default_parametes_type); +void input_parameters_get(const std::string& fn, std::map& input); +void input_parameters_set(std::map input_parameters); extern std::map input_parameters; extern std::map default_parametes_type; diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp index f0d7e43f68..a566827792 100644 --- a/source/module_io/test/input_conv_test.cpp +++ b/source/module_io/test/input_conv_test.cpp @@ -183,6 +183,7 @@ TEST_F(InputConvTest, Conv) EXPECT_EQ(GlobalV::sc_mag_switch,0); EXPECT_TRUE(GlobalV::decay_grad_switch); EXPECT_EQ(GlobalV::sc_file, "sc.json"); + EXPECT_EQ(GlobalV::MIXING_RESTART,0); } TEST_F(InputConvTest, ConvRelax) diff --git a/source/module_io/test/input_test.cpp b/source/module_io/test/input_test.cpp index 02a5a19e10..11bce873ab 100644 --- a/source/module_io/test/input_test.cpp +++ b/source/module_io/test/input_test.cpp @@ -176,9 +176,11 @@ TEST_F(InputTest, Default) EXPECT_EQ(INPUT.out_wfc_pw,0); EXPECT_EQ(INPUT.out_wfc_r,0); EXPECT_EQ(INPUT.out_dos,0); - EXPECT_EQ(INPUT.out_band,0); + EXPECT_EQ(INPUT.out_band[0],0); + EXPECT_EQ(INPUT.out_band[1],8); EXPECT_EQ(INPUT.out_proj_band,0); EXPECT_EQ(INPUT.out_mat_hs[0],0); + EXPECT_EQ(INPUT.out_mat_hs[1],8); EXPECT_EQ(INPUT.out_mat_hs2,0); EXPECT_EQ(INPUT.out_mat_xc, 0); EXPECT_EQ(INPUT.out_interval,1); @@ -539,9 +541,11 @@ TEST_F(InputTest, Read) EXPECT_EQ(INPUT.out_wfc_pw,0); EXPECT_EQ(INPUT.out_wfc_r,0); EXPECT_EQ(INPUT.out_dos,0); - EXPECT_EQ(INPUT.out_band,0); + EXPECT_EQ(INPUT.out_band[0],0); + EXPECT_EQ(INPUT.out_band[1],8); EXPECT_EQ(INPUT.out_proj_band,0); EXPECT_EQ(INPUT.out_mat_hs[0],0); + EXPECT_EQ(INPUT.out_mat_hs[1],8); EXPECT_EQ(INPUT.out_mat_hs2,0); EXPECT_EQ(INPUT.out_mat_xc, 0); EXPECT_EQ(INPUT.out_interval,1); @@ -921,7 +925,8 @@ TEST_F(InputTest, Default_2) EXPECT_EQ(INPUT.relax_nmax, 1); EXPECT_EQ(INPUT.out_stru, 0); EXPECT_EQ(INPUT.symmetry, "0"); - EXPECT_EQ(INPUT.out_band,0); + EXPECT_EQ(INPUT.out_band[0],0); + EXPECT_EQ(INPUT.out_band[1],8); EXPECT_EQ(INPUT.out_proj_band,0); EXPECT_EQ(INPUT.cal_force,0); EXPECT_EQ(INPUT.init_wfc,"file"); @@ -943,7 +948,8 @@ TEST_F(InputTest, Default_2) EXPECT_EQ(INPUT.relax_nmax, 1); EXPECT_EQ(INPUT.symmetry, "0"); EXPECT_EQ(INPUT.out_stru, 0); - EXPECT_EQ(INPUT.out_band,0); + EXPECT_EQ(INPUT.out_band[0],0); + EXPECT_EQ(INPUT.out_band[1],8); EXPECT_EQ(INPUT.out_proj_band,0); EXPECT_EQ(INPUT.cal_force,0); EXPECT_EQ(INPUT.init_wfc,"file"); diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp index 58e04eb32d..d005fdfccc 100644 --- a/source/module_io/test/input_test_para.cpp +++ b/source/module_io/test/input_test_para.cpp @@ -26,12 +26,13 @@ class InputParaTest : public ::testing::Test #ifdef __MPI TEST_F(InputParaTest, Bcast) { + INPUT.Default(); if (GlobalV::MY_RANK == 0) { - INPUT.Default(); /* hmmm... why there is not Default_2 here? and, seems Default is execute directly on each processor? */ + INPUT.suffix = "BcastTest"; } INPUT.Bcast(); - EXPECT_EQ(INPUT.suffix, "ABACUS"); + EXPECT_EQ(INPUT.suffix, "BcastTest"); EXPECT_EQ(INPUT.stru_file, ""); EXPECT_EQ(INPUT.kpoint_file, ""); EXPECT_EQ(INPUT.pseudo_dir, ""); @@ -180,7 +181,8 @@ TEST_F(InputParaTest, Bcast) EXPECT_EQ(INPUT.out_wfc_pw, 0); EXPECT_EQ(INPUT.out_wfc_r, 0); EXPECT_EQ(INPUT.out_dos, 0); - EXPECT_EQ(INPUT.out_band, 0); + EXPECT_EQ(INPUT.out_band[0], 0); + EXPECT_EQ(INPUT.out_band[1], 8); EXPECT_EQ(INPUT.out_proj_band, 0); EXPECT_EQ(INPUT.out_mat_hs[0], 0); EXPECT_EQ(INPUT.out_mat_hs[1], 8); @@ -379,6 +381,7 @@ TEST_F(InputParaTest, Bcast) EXPECT_TRUE(INPUT.mdp.dump_virial); EXPECT_FALSE(INPUT.mixing_tau); EXPECT_FALSE(INPUT.mixing_dftu); + EXPECT_EQ(INPUT.mixing_restart,0); EXPECT_EQ(INPUT.out_bandgap, 0); EXPECT_EQ(INPUT.out_mat_t, 0); diff --git a/source/module_io/test/support/INPUT b/source/module_io/test/support/INPUT index 469dff2ff4..4fbde867db 100644 --- a/source/module_io/test/support/INPUT +++ b/source/module_io/test/support/INPUT @@ -59,7 +59,7 @@ out_pot 2 #output realspace potential out_wfc_pw 0 #output wave functions out_wfc_r 0 #output wave functions in realspace out_dos 0 #output energy and dos -out_band false #output energy and band structure +out_band 0 #output energy and band structure out_proj_band FaLse #output projected band structure restart_save f #print to disk every step for restart restart_load F #restart from disk diff --git a/source/module_io/test/support/witestfile b/source/module_io/test/support/witestfile index 4043773876..4db819d53f 100644 --- a/source/module_io/test/support/witestfile +++ b/source/module_io/test/support/witestfile @@ -55,7 +55,7 @@ out_pot 2 #output realspace potential out_wfc_pw 0 #output wave functions out_wfc_r 0 #output wave functions in realspace out_dos 0 #output energy and dos -out_band false #output energy and band structure +out_band 0 #output energy and band structure out_proj_band FaLse #output projected band structure restart_save f #print to disk every step for restart restart_load F #restart from disk diff --git a/source/module_io/test/to_qo_test.cpp b/source/module_io/test/to_qo_test.cpp index 93692f858e..9477b2eb54 100644 --- a/source/module_io/test/to_qo_test.cpp +++ b/source/module_io/test/to_qo_test.cpp @@ -543,7 +543,39 @@ TEST_F(toQOTest, CalculateSelfOvlpRFull) //tqo.write_ovlp(tqo.ovlp_R()[0], "QO_self_ovlp.dat"); } -TEST_F(toQOTest, BuildPswfc) +/* Si_dojo_soc.upf is special: two p orbitals, one s orbital */ + +TEST_F(toQOTest, BuildPswfcPartial1) +{ + define_fcc_cell(ucell); + toQO tqo("pswfc", {"s", "s"}); + tqo.unwrap_unitcell(&ucell); + tqo.build_ao(ucell.ntype, ucell.pseudo_fn); + EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals + EXPECT_EQ(tqo.nchi(), 2); +} + +TEST_F(toQOTest, BuildPswfcPartial2) +{ + define_fcc_cell(ucell); + toQO tqo("pswfc", {"ps", "s"}); + tqo.unwrap_unitcell(&ucell); + tqo.build_ao(ucell.ntype, ucell.pseudo_fn); + EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals + EXPECT_EQ(tqo.nchi(), 8); // the first element is Si, it has two p orbitals, so 3+3+1+1 +} + +TEST_F(toQOTest, BuildPswfcPartial3) +{ + define_fcc_cell(ucell); + toQO tqo("pswfc", {"all", "p"}); + tqo.unwrap_unitcell(&ucell); + tqo.build_ao(ucell.ntype, ucell.pseudo_fn); + EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals + EXPECT_EQ(tqo.nchi(), 10); +} + +TEST_F(toQOTest, BuildPswfcAll) { define_fcc_cell(ucell); toQO tqo("pswfc", {"all", "all"}); diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp index d61133715d..8dccb5627a 100644 --- a/source/module_io/test/write_input_test.cpp +++ b/source/module_io/test/write_input_test.cpp @@ -384,13 +384,16 @@ TEST_F(write_input, Mixing7) std::string output((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); EXPECT_THAT(output, testing::HasSubstr("#Parameters (7.Charge Mixing)")); EXPECT_THAT(output, testing::HasSubstr("mixing_type broyden #plain; pulay; broyden")); - EXPECT_THAT(output, - testing::HasSubstr("mixing_beta 0.7 #mixing parameter: 0 means no new charge")); + EXPECT_THAT(output, testing::HasSubstr("mixing_beta 0.7 #mixing parameter: 0 means no new charge")); EXPECT_THAT(output, testing::HasSubstr("mixing_ndim 8 #mixing dimension in pulay or broyden")); EXPECT_THAT(output, testing::HasSubstr("mixing_gg0 0 #mixing parameter in kerker")); + EXPECT_THAT(output, testing::HasSubstr("mixing_beta_mag -10 #mixing parameter for magnetic density")); + EXPECT_THAT(output, testing::HasSubstr("mixing_gg0_mag 0 #mixing parameter in kerker")); + EXPECT_THAT(output, testing::HasSubstr("mixing_gg0_min 0.1 #the minimum kerker coefficient")); + EXPECT_THAT(output, testing::HasSubstr("mixing_angle -10 #angle mixing parameter for non-colinear calculations")); EXPECT_THAT(output, testing::HasSubstr("mixing_tau 0 #whether to mix tau in mGGA calculation")); - EXPECT_THAT(output, - testing::HasSubstr("mixing_dftu 0 #whether to mix locale in DFT+U calculation")); + EXPECT_THAT(output, testing::HasSubstr("mixing_dftu 0 #whether to mix locale in DFT+U calculation")); + EXPECT_THAT(output, testing::HasSubstr("mixing_restart 0 #which step to restart mixing during SCF")); EXPECT_THAT(output, testing::HasSubstr("")); ifs.close(); remove("write_input_test.log"); diff --git a/source/module_io/test_serial/nscf_band_test.cpp b/source/module_io/test_serial/nscf_band_test.cpp index 4483bf37cd..db9bf752fb 100644 --- a/source/module_io/test_serial/nscf_band_test.cpp +++ b/source/module_io/test_serial/nscf_band_test.cpp @@ -54,9 +54,16 @@ class BandTest : public ::testing::Test ekb(1,1) = 2.0; ekb(1,2) = 3.0; kv = new K_Vectors; + // specify the kpoints + kv->kvec_c.resize(nks); + kv->kvec_c[0] = ModuleBase::Vector3(0.0, 0.0, 0.0); + kv->kvec_c[1] = ModuleBase::Vector3(1.0, 0.0, 0.0); kv->isk.resize(nks); kv->isk[0] = 0; kv->isk[1] = 1; + kv->kl_segids.resize(nks); + kv->kl_segids[0] = 0; + kv->kl_segids[1] = 0; Pkpoints = new Parallel_Kpoints; } @@ -81,12 +88,12 @@ class BandTest : public ::testing::Test TEST_F(BandTest, nscf_band) { // Call the function to be tested - ModuleIO::nscf_band(is, out_band_dir, nks, nband, fermie, ekb, *kv, Pkpoints); + ModuleIO::nscf_band(is, out_band_dir, nks, nband, fermie, 8, ekb, *kv, Pkpoints); // Check the output file std::ifstream ifs(out_band_dir); std::string str((std::istreambuf_iterator(ifs)),std::istreambuf_iterator()); ASSERT_TRUE(ifs.is_open()); - EXPECT_THAT(str, testing::HasSubstr("1 -27.2114 -13.6057 0")); + EXPECT_THAT(str, testing::HasSubstr("1 0.00000000 -27.21139600 -13.60569800 0.00000000")); ifs.close(); } diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp index 6003a4fdb0..cb26bc2283 100644 --- a/source/module_io/write_input.cpp +++ b/source/module_io/write_input.cpp @@ -85,6 +85,7 @@ void Input::Print(const std::string &fn) const ModuleBase::GlobalFunc::OUTP(ofs, "cal_force", cal_force, "if calculate the force at the end of the electronic iteration"); ModuleBase::GlobalFunc::OUTP(ofs, "out_freq_ion", out_freq_ion, "the frequency ( >= 0 ) of ionic step to output charge density and wavefunction. 0: output only when ion steps are finished"); ModuleBase::GlobalFunc::OUTP(ofs, "device", device, "the computing device for ABACUS"); + ModuleBase::GlobalFunc::OUTP(ofs, "precision", precision, "the computing precision for ABACUS"); ofs << "\n#Parameters (2.PW)" << std::endl; ModuleBase::GlobalFunc::OUTP(ofs, "ecutwfc", ecutwfc, "#energy cutoff for wave functions"); @@ -122,7 +123,7 @@ void Input::Print(const std::string &fn) const ModuleBase::GlobalFunc::OUTP(ofs, "out_wfc_pw", out_wfc_pw, "output wave functions"); ModuleBase::GlobalFunc::OUTP(ofs, "out_wfc_r", out_wfc_r, "output wave functions in realspace"); ModuleBase::GlobalFunc::OUTP(ofs, "out_dos", out_dos, "output energy and dos"); - ModuleBase::GlobalFunc::OUTP(ofs, "out_band", out_band, "output energy and band structure"); + ModuleBase::GlobalFunc::OUTP(ofs, "out_band", out_band[0], "output energy and band structure (with precision "+std::to_string(out_band[1])+")"); ModuleBase::GlobalFunc::OUTP(ofs, "out_proj_band", out_proj_band, "output projected band structure"); ModuleBase::GlobalFunc::OUTP(ofs, "restart_save", restart_save, "print to disk every step for restart"); ModuleBase::GlobalFunc::OUTP(ofs, "restart_load", restart_load, "restart from disk"); @@ -222,7 +223,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ModuleBase::GlobalFunc::OUTP(ofs, "lcao_dk", lcao_dk, "delta k for 1D integration in LCAO"); ModuleBase::GlobalFunc::OUTP(ofs, "lcao_dr", lcao_dr, "delta r for 1D integration in LCAO"); ModuleBase::GlobalFunc::OUTP(ofs, "lcao_rmax", lcao_rmax, "max R for 1D two-center integration table"); - ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs", out_mat_hs[0], "output H and S matrix"); + ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs", out_mat_hs[0], "output H and S matrix (with precision "+std::to_string(out_mat_hs[1])+")"); ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs2", out_mat_hs2, "output H(R) and S(R) matrix"); ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_dh", out_mat_dh, "output of derivative of H(R) matrix"); ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_xc", out_mat_xc, "output exchange-correlation matrix in KS-orbital representation"); @@ -247,6 +248,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ModuleBase::GlobalFunc::OUTP(ofs, "mixing_type", mixing_mode, "plain; pulay; broyden"); ModuleBase::GlobalFunc::OUTP(ofs, "mixing_beta", mixing_beta, "mixing parameter: 0 means no new charge"); ModuleBase::GlobalFunc::OUTP(ofs, "mixing_ndim", mixing_ndim, "mixing dimension in pulay or broyden"); + ModuleBase::GlobalFunc::OUTP(ofs, "mixing_restart", mixing_restart, "which step to restart mixing during SCF"); ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0", mixing_gg0, "mixing parameter in kerker"); ModuleBase::GlobalFunc::OUTP(ofs, "mixing_beta_mag", mixing_beta_mag, "mixing parameter for magnetic density"); ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0_mag", mixing_gg0_mag, "mixing parameter in kerker"); @@ -493,7 +495,32 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ModuleBase::GlobalFunc::OUTP(ofs, "qo_switch", qo_switch, "0: no QO analysis; 1: QO analysis"); ModuleBase::GlobalFunc::OUTP(ofs, "qo_basis", qo_basis, "type of QO basis function: hydrogen: hydrogen-like basis, pswfc: read basis from pseudopotential"); ModuleBase::GlobalFunc::OUTP(ofs, "qo_thr", qo_thr, "accuracy for evaluating cutoff radius of QO basis function"); - + + ofs << "\n#Parameters (24.PEXSI)" << std::endl; + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_npole", pexsi_npole, "Number of poles in expansion"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_inertia", pexsi_inertia, "Whether inertia counting is used at the very beginning of PEXSI process"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nmax", pexsi_nmax, "Maximum number of PEXSI iterations after each inertia counting procedure."); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_comm", pexsi_comm, "Whether to construct PSelInv communication pattern"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices, 0: non-symmetric, 1: symmetric"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_ordering", pexsi_ordering, "Ordering strategy for factorization and selected inversion"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc", pexsi_nproc, "Number of processors for parmetis"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "matrix symmetry, 0: non-symmetric, 1: symmetric"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "transpose, 0: no transpose, 1: transpose"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_method", pexsi_method, "pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc_pole", pexsi_nproc_pole, "Number of processes used by each pole"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_temp", pexsi_temp, "Temperature, in the same unit as H"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_gap", pexsi_gap, "Spectral gap"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \f$S^{-1} H\f$"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_lower", pexsi_mu_lower, "Initial guess of lower bound for mu"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_upper", pexsi_mu_upper, "Initial guess of upper bound for mu"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu", pexsi_mu, "Initial guess for mu (for the solver)"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_thr", pexsi_mu_thr, "Stopping criterion in terms of the chemical potential for the inertia counting procedure"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_expand", pexsi_mu_expand, "If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_guard", pexsi_mu_guard, "Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_elec_thr", pexsi_elec_thr, "Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_zero_thr", pexsi_zero_thr, "if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0"); + ofs.close(); return; } \ No newline at end of file diff --git a/source/module_ri/Exx_LRI.hpp b/source/module_ri/Exx_LRI.hpp index ace9097bb0..c9b3b69601 100644 --- a/source/module_ri/Exx_LRI.hpp +++ b/source/module_ri/Exx_LRI.hpp @@ -12,7 +12,6 @@ #include "module_ri/exx_abfs-construct_orbs.h" #include "module_ri/exx_abfs-io.h" #include "module_ri/conv_coulomb_pot_k.h" -#include "module_ri/conv_coulomb_pot_k-template.h" #include "module_base/tool_title.h" #include "module_base/timer.h" #include "module_ri/serialization_cereal.h" @@ -71,14 +70,19 @@ void Exx_LRI::init(const MPI_Comm &mpi_comm_in, const K_Vectors &kv_in) case Conv_Coulomb_Pot_K::Ccp_Type::Ccp: return {}; case Conv_Coulomb_Pot_K::Ccp_Type::Hf: - return {}; + { + // 4/3 * pi * Rcut^3 = V_{supercell} = V_{unitcell} * Nk + const int nspin0 = (GlobalV::NSPIN==2) ? 2 : 1; + const double hf_Rcut = std::pow(0.75 * this->p_kv->nkstot_full/nspin0 * GlobalC::ucell.omega / (ModuleBase::PI), 1.0/3.0); + return {{"hf_Rcut", hf_Rcut}}; + } case Conv_Coulomb_Pot_K::Ccp_Type::Hse: return {{"hse_omega", this->info.hse_omega}}; default: throw std::domain_error(std::string(__FILE__)+" line "+std::to_string(__LINE__)); break; } }; - this->abfs_ccp = Conv_Coulomb_Pot_K::cal_orbs_ccp(this->abfs, this->info.ccp_type, get_ccp_parameter(), this->info.ccp_rmesh_times, this->p_kv->nkstot_full); + this->abfs_ccp = Conv_Coulomb_Pot_K::cal_orbs_ccp(this->abfs, this->info.ccp_type, get_ccp_parameter(), this->info.ccp_rmesh_times); for( size_t T=0; T!=this->abfs.size(); ++T ) diff --git a/source/module_ri/LRI_CV_Tools.hpp b/source/module_ri/LRI_CV_Tools.hpp index 532e7104fb..8ad95c3715 100644 --- a/source/module_ri/LRI_CV_Tools.hpp +++ b/source/module_ri/LRI_CV_Tools.hpp @@ -250,11 +250,10 @@ LRI_CV_Tools::cal_latvec_range(const double &rcut_times) const ModuleBase::Vector3 proj = ModuleBase::Mathzone::latvec_projection( std::array,3>{GlobalC::ucell.a1, GlobalC::ucell.a2, GlobalC::ucell.a3}); const ModuleBase::Vector3 latvec_times = Rcut_max * rcut_times / (proj * GlobalC::ucell.lat0); - const ModuleBase::Vector3 latvec_times_ceil = - {std::ceil(latvec_times.x), - std::ceil(latvec_times.y), - std::ceil(latvec_times.z)}; - const ModuleBase::Vector3 period = 2 * latvec_times_ceil + ModuleBase::Vector3{1,1,1}; + const ModuleBase::Vector3 latvec_times_ceil = {static_cast(std::ceil(latvec_times.x)), + static_cast(std::ceil(latvec_times.y)), + static_cast(std::ceil(latvec_times.z))}; + const ModuleBase::Vector3 period = 2 * latvec_times_ceil + ModuleBase::Vector3{1,1,1}; return std::array{period.x, period.y, period.z}; } @@ -308,7 +307,7 @@ LRI_CV_Tools::get_dCVws( const Abfs::Vector3_Order R_delta = -tau0+tau1+(RI_Util::array3_to_Vector3(cell1)*GlobalC::ucell.latvec); dCVws[it0][it1][R_delta][ix] = dCVs_B.second; } - } + } } return dCVws; } diff --git a/source/module_ri/conv_coulomb_pot_k-template.h b/source/module_ri/conv_coulomb_pot_k-template.h deleted file mode 100644 index 9a3d245286..0000000000 --- a/source/module_ri/conv_coulomb_pot_k-template.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef CONV_COULOMB_POT_K_TEMPLATE_H -#define CONV_COULOMB_POT_K_TEMPLATE_H - -#include "conv_coulomb_pot_k.h" -#include -#include - -#include "../module_ri/test_code/exx_abfs-construct_orbs-test.h" - - -template< typename T > -T Conv_Coulomb_Pot_K::cal_orbs_ccp( - const T & orbs, - const Ccp_Type &ccp_type, - const std::map ¶meter, - const double rmesh_times, - const int& nks) -{ - T orbs_ccp(orbs.size()); - for( size_t i=0; i!=orbs.size(); ++i ) - orbs_ccp[i] = cal_orbs_ccp(orbs[i], ccp_type, parameter, rmesh_times, nks ); - return orbs_ccp; -} - -extern template -Numerical_Orbital_Lm Conv_Coulomb_Pot_K::cal_orbs_ccp( - const Numerical_Orbital_Lm & orbs, - const Ccp_Type &ccp_type, - const std::map ¶meter, - const double rmesh_times, - const int& nks); - - - -template< typename T > -double Conv_Coulomb_Pot_K::get_rmesh_proportion( - const T & orbs, - const double psi_threshold) -{ - double rmesh_proportion=0; - for( const auto &orb : orbs ) - rmesh_proportion = std::max(rmesh_proportion, get_rmesh_proportion(orb,psi_threshold)); - return rmesh_proportion; -} - -extern template -double Conv_Coulomb_Pot_K::get_rmesh_proportion( - const Numerical_Orbital_Lm & orbs, - const double psi_threshold); - -#endif \ No newline at end of file diff --git a/source/module_ri/conv_coulomb_pot_k.cpp b/source/module_ri/conv_coulomb_pot_k.cpp index 9f573509ee..62dd582a44 100644 --- a/source/module_ri/conv_coulomb_pot_k.cpp +++ b/source/module_ri/conv_coulomb_pot_k.cpp @@ -2,104 +2,109 @@ #include "../module_base/constants.h" #include "../module_basis/module_ao/ORB_atomic_lm.h" #include "../module_hamilt_pw/hamilt_pwdft/global.h" -std::vector Conv_Coulomb_Pot_K::cal_psi_ccp( const std::vector & psif ) + +namespace Conv_Coulomb_Pot_K { - std::vector psik2_ccp(psif.size()); - for( size_t ik=0; ik Conv_Coulomb_Pot_K::cal_psi_hf(const int& nks, const std::vector &psif, - const std::vector &k_radial, - const double omega = 0) -{ - const int nspin0 = (GlobalV::NSPIN==2) ? 2 : 1; - const double Rc = std::pow(0.75 * nks/nspin0 * GlobalC::ucell.omega / (ModuleBase::PI), 1.0/3.0); - std::vector psik2_ccp(psif.size()); - for (size_t ik = 0; ik < psif.size(); ++ik) - psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1 - std::cos(k_radial[ik] * Rc)); - return psik2_ccp; -} + std::vector cal_psi_ccp( + const std::vector & psif) + { + std::vector psik2_ccp(psif.size()); + for( size_t ik=0; ik cal_psi_hf( + const std::vector &psif, + const std::vector &k_radial, + const double hf_Rcut) + { + std::vector psik2_ccp(psif.size()); + for (size_t ik = 0; ik < psif.size(); ++ik) + psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1 - std::cos(k_radial[ik] * hf_Rcut)); + return psik2_ccp; + } -std::vector Conv_Coulomb_Pot_K::cal_psi_hse( - const std::vector & psif, - const std::vector & k_radial, - const double omega) -{ - std::vector psik2_ccp(psif.size()); - for( size_t ik=0; ik cal_psi_hse( + const std::vector & psif, + const std::vector & k_radial, + const double hse_omega) + { + std::vector psik2_ccp(psif.size()); + for( size_t ik=0; ik -Numerical_Orbital_Lm Conv_Coulomb_Pot_K::cal_orbs_ccp( - const Numerical_Orbital_Lm &orbs, - const Ccp_Type &ccp_type, - const std::map ¶meter, - const double rmesh_times, - const int& nks) -{ - std::vector psik2_ccp; - switch(ccp_type) + + template<> + Numerical_Orbital_Lm cal_orbs_ccp( + const Numerical_Orbital_Lm &orbs, + const Ccp_Type &ccp_type, + const std::map ¶meter, + const double rmesh_times) { - case Ccp_Type::Ccp: - psik2_ccp = cal_psi_ccp( orbs.get_psif() ); break; - case Ccp_Type::Hf: - psik2_ccp = cal_psi_hf(nks, orbs.get_psif(), orbs.get_k_radial()); break; - case Ccp_Type::Hse: - psik2_ccp = cal_psi_hse( orbs.get_psif(), orbs.get_k_radial(), parameter.at("hse_omega") ); break; - default: - throw( ModuleBase::GlobalFunc::TO_STRING(__FILE__)+" line "+ModuleBase::GlobalFunc::TO_STRING(__LINE__) ); break; - } + std::vector psik2_ccp; + switch(ccp_type) + { + case Ccp_Type::Ccp: + psik2_ccp = cal_psi_ccp( orbs.get_psif() ); break; + case Ccp_Type::Hf: + psik2_ccp = cal_psi_hf( orbs.get_psif(), orbs.get_k_radial(), parameter.at("hf_Rcut")); break; + case Ccp_Type::Hse: + psik2_ccp = cal_psi_hse( orbs.get_psif(), orbs.get_k_radial(), parameter.at("hse_omega") ); break; + default: + throw( ModuleBase::GlobalFunc::TO_STRING(__FILE__)+" line "+ModuleBase::GlobalFunc::TO_STRING(__LINE__) ); break; + } - const double dr = orbs.get_rab().back(); - const int Nr = (static_cast(orbs.getNr()*rmesh_times)) | 1; - std::vector rab(Nr); - for( size_t ir=0; ir r_radial(Nr); - for( size_t ir=0; ir(orbs.getNr()*rmesh_times)) | 1; + std::vector rab(Nr); + for( size_t ir=0; ir r_radial(Nr); + for( size_t ir=0; ir -double Conv_Coulomb_Pot_K::get_rmesh_proportion( - const Numerical_Orbital_Lm &orbs, - const double psi_threshold) -{ - for(int ir=orbs.getNr()-1; ir>=0; --ir) + Numerical_Orbital_Lm orbs_ccp; + orbs_ccp.set_orbital_info( + orbs.getLabel(), + orbs.getType(), + orbs.getL(), + orbs.getChi(), + Nr, + ModuleBase::GlobalFunc::VECTOR_TO_PTR(rab), + ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_radial), + Numerical_Orbital_Lm::Psi_Type::Psik2, + ModuleBase::GlobalFunc::VECTOR_TO_PTR(psik2_ccp), + orbs.getNk(), + orbs.getDk(), + orbs.getDruniform(), + false, + true, GlobalV::CAL_FORCE); + return orbs_ccp; + } + + template<> + double get_rmesh_proportion( + const Numerical_Orbital_Lm &orbs, + const double psi_threshold) { - if(std::abs(orbs.getPsi(ir))>=psi_threshold) - return static_cast(ir)/orbs.getNr(); + for(int ir=orbs.getNr()-1; ir>=0; --ir) + { + if(std::abs(orbs.getPsi(ir))>=psi_threshold) + return static_cast(ir)/orbs.getNr(); + } + return 0.0; } - return 0.0; + } diff --git a/source/module_ri/conv_coulomb_pot_k.h b/source/module_ri/conv_coulomb_pot_k.h index 9adec9d915..d464a53f91 100644 --- a/source/module_ri/conv_coulomb_pot_k.h +++ b/source/module_ri/conv_coulomb_pot_k.h @@ -5,40 +5,37 @@ #include #include -class Conv_Coulomb_Pot_K +namespace Conv_Coulomb_Pot_K { -public: + enum class Ccp_Type{ // parameter: + Ccp, // + Hf, // "hf_Rcut" + Hse}; // "hse_omega" - enum class Ccp_Type{ // parameter: - Ccp, // - Hf, // - Hse}; // "hse_omega" - - template static T cal_orbs_ccp( + template T cal_orbs_ccp( const T &orbs, const Ccp_Type &ccp_type, const std::map ¶meter, - const double rmesh_times, - const int& nks); - -private: - - template< typename T > static double get_rmesh_proportion( + const double rmesh_times); + + //private: + template< typename T > double get_rmesh_proportion( const T &orbs, const double psi_threshold); - -private: - static std::vector cal_psi_ccp( const std::vector & psif ); - - static std::vector cal_psi_hf(const int& nks, const std::vector &psif, - const std::vector &k_radial, - const double omega); - - static std::vector cal_psi_hse( + //private: + std::vector cal_psi_ccp( + const std::vector & psif); + std::vector cal_psi_hf( + const std::vector &psif, + const std::vector &k_radial, + const double hf_Rcut); + std::vector cal_psi_hse( const std::vector & psif, const std::vector & k_radial, - const double omega); -}; + const double hse_omega); +} + +#include "conv_coulomb_pot_k.hpp" #endif \ No newline at end of file diff --git a/source/module_ri/conv_coulomb_pot_k.hpp b/source/module_ri/conv_coulomb_pot_k.hpp new file mode 100644 index 0000000000..5ca3abe5c8 --- /dev/null +++ b/source/module_ri/conv_coulomb_pot_k.hpp @@ -0,0 +1,37 @@ +#ifndef CONV_COULOMB_POT_K_HPP +#define CONV_COULOMB_POT_K_HPP + +#include "conv_coulomb_pot_k.h" +#include +#include + +namespace Conv_Coulomb_Pot_K +{ + + template< typename T > + std::vector cal_orbs_ccp( + const std::vector & orbs, + const Ccp_Type &ccp_type, + const std::map ¶meter, + const double rmesh_times) + { + std::vector orbs_ccp(orbs.size()); + for( size_t i=0; i!=orbs.size(); ++i ) + orbs_ccp[i] = cal_orbs_ccp(orbs[i], ccp_type, parameter, rmesh_times); + return orbs_ccp; + } + + template< typename T > + double get_rmesh_proportion( + const std::vector & orbs, + const double psi_threshold) + { + double rmesh_proportion=0; + for( const auto &orb : orbs ) + rmesh_proportion = std::max(rmesh_proportion, get_rmesh_proportion(orb,psi_threshold)); + return rmesh_proportion; + } + +} + +#endif \ No newline at end of file diff --git a/source/module_ri/exx_lip.cpp b/source/module_ri/exx_lip.cpp index 2f685be5f1..0c4211d890 100644 --- a/source/module_ri/exx_lip.cpp +++ b/source/module_ri/exx_lip.cpp @@ -481,7 +481,7 @@ void Exx_Lip::b_cal( int ik, int iq, int ib) } std::complex * const porter = new std::complex [rho_basis->nrxx]; - + for(size_t iw=0; iw< GlobalV::NLOCAL; ++iw) { const std::complex * const phi_w = phi[iw]; @@ -495,7 +495,7 @@ void Exx_Lip::b_cal( int ik, int iq, int ib) if( Conv_Coulomb_Pot_K::Ccp_Type::Ccp==info.ccp_type || Conv_Coulomb_Pot_K::Ccp_Type::Hf==info.ccp_type ) if((iq==iq_vecik) && (gzero_rank_in_pool==GlobalV::RANK_IN_POOL)) /// need to check while use k_point parallel b0[iw] = b_w[rho_basis->ig_gge0]; - + for( size_t ig=0; ignpw; ++ig) b_w[ig] *= recip_qkg2[ig]; } @@ -634,12 +634,14 @@ void Exx_Lip::write_q_pack() const if(!GlobalV::RANK_IN_POOL) { const std::string exx_q_pack = "exx_q_pack/"; - + int return_value=0; const std::string command_mkdir = "test -d " + GlobalV::global_out_dir + exx_q_pack + " || mkdir " + GlobalV::global_out_dir + exx_q_pack; - system( command_mkdir.c_str() ); // Need to check + return_value = system(command_mkdir.c_str()); + assert(return_value == 0); - const std::string command_kpoint = "test -f " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card + " || cp " + GlobalV::global_kpoint_card + " " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card; - system( command_kpoint.c_str() ); // Need to check + const std::string command_kpoint = "test -f " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card + " || cp " + GlobalV::global_kpoint_card + " " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card; + return_value = system(command_kpoint.c_str()); + assert(return_value==0); std::stringstream ss_wf_wg; ss_wf_wg << GlobalV::global_out_dir << exx_q_pack << "wf_wg_" << GlobalV::MY_POOL; diff --git a/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat b/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat index af0ad58c0d..1aa4b94ac1 100644 --- a/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat +++ b/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat @@ -1,6 +1,6 @@ -1 0 -3.3870593 -0.79801307 5.0648821 5.0648821 7.8411435 9.605949 -2 0.17320508 -3.8620194 -0.075179882 5.1365314 5.1365314 7.9181006 9.6849562 -3 0.34641016 -4.6307963 1.434193 5.3528485 5.3528485 8.1554014 9.8149803 -4 0.51961524 -5.2581292 3.25095 5.6954121 5.6954121 8.5186442 9.653424 -5 0.69282032 -5.6519805 5.1370698 6.0846605 6.0846605 8.8678468 9.1370653 -6 0.8660254 -5.7858738 6.2887673 6.2887673 6.2887673 8.8364355 8.8364355 + 1 0.00000000 -3.38705933 -0.79801307 5.06488210 5.06488210 7.84114355 9.60594903 + 2 0.17320508 -3.86201936 -0.07517988 5.13653145 5.13653145 7.91810064 9.68495616 + 3 0.34641016 -4.63079629 1.43419304 5.35284854 5.35284854 8.15540136 9.81498032 + 4 0.51961524 -5.25812925 3.25094996 5.69541211 5.69541211 8.51864422 9.65342396 + 5 0.69282032 -5.65198054 5.13706981 6.08466055 6.08466055 8.86784679 9.13706533 + 6 0.86602540 -5.78587376 6.28876728 6.28876728 6.28876728 8.83643550 8.83643550 diff --git a/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat b/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat index ced64a27bf..0a1088ceb5 100644 --- a/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat +++ b/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat @@ -1,6 +1,6 @@ -1 0 -3.3870489 -0.79801666 5.0648903 5.0649025 7.841159 9.6059536 -2 0.17320508 -3.861988 -0.075180055 5.1365331 5.136532 7.9181141 9.6849637 -3 0.34641016 -4.6307948 1.43421 5.3529077 5.3528526 8.1553806 9.8149771 -4 0.51961524 -5.2581202 3.2509597 5.6954124 5.6954235 8.5188338 9.6533658 -5 0.69282032 -5.6519785 5.1370872 6.0846758 6.0846914 8.8679617 9.1370593 -6 0.8660254 -5.7858674 6.288817 6.2887755 6.2887815 8.8365307 8.8365071 + 1 0.00000000 -3.38704889 -0.79801666 5.06489026 5.06490253 7.84115900 9.60595365 + 2 0.17320508 -3.86198803 -0.07518005 5.13653307 5.13653200 7.91811409 9.68496369 + 3 0.34641016 -4.63079483 1.43421001 5.35290770 5.35285262 8.15538060 9.81497711 + 4 0.51961524 -5.25812025 3.25095973 5.69541240 5.69542353 8.51883375 9.65336577 + 5 0.69282032 -5.65197852 5.13708720 6.08467575 6.08469139 8.86796173 9.13705931 + 6 0.86602540 -5.78586742 6.28881704 6.28877545 6.28878152 8.83653074 8.83650706 diff --git a/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref b/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref index 168c5723a3..bffad6b08a 100644 --- a/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref +++ b/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref @@ -3,92 +3,92 @@ CALCULATE THE MULLIkEN ANALYSIS FOR EACH ATOM Total charge: 32 Decomposed Mulliken populations 0 Zeta of Fe Spin 1 Spin 2 Spin 3 Spin 4 -s 0 1.317 0.05552 0.2843 0.02903 - sum over m 1.317 0.05552 0.2843 0.02903 -s 1 1.726 -0.01923 -0.09498 0.005159 - sum over m 1.726 -0.01923 -0.09498 0.005159 -s 2 0.03246 -0.04333 -0.2148 0.008137 - sum over m 0.03246 -0.04333 -0.2148 0.008137 -s 3 -0.02921 0.005194 0.02641 0.001867 - sum over m -0.02921 0.005194 0.02641 0.001867 - sum over m+zeta 3.046 -0.001842 0.0009368 0.04419 -pz 0 2.034 -0.001185 -0.005932 1.545e-06 -px 0 2.033 -0.001283 -0.006419 1.538e-06 -py 0 2.033 -0.001188 -0.005944 1.543e-06 - sum over m 6.1 -0.003656 -0.01829 4.626e-06 -pz 1 -0.02622 0.0005602 0.002791 0 -px 1 -0.02639 0.0006145 0.003054 0 -py 1 -0.02603 0.0005563 0.00277 0 - sum over m -0.07864 0.001731 0.008615 0 - sum over m+zeta 6.021 -0.001925 -0.00968 5.611e-06 -dz^2 0 1.964 0.0008273 0.004131 4.077e-06 -dxz 0 1.044 0.1755 0.7507 0.002258 -dyz 0 0.9544 0.1768 0.7532 0.002329 -dx^2-y^2 0 1.967 0.0007523 0.003756 3.978e-06 -dxy 0 1.055 0.1751 0.7495 0.002251 - sum over m 6.984 0.529 2.261 0.006846 -dz^2 1 0.03863 -0.0008699 -0.004363 5.197e-06 -dxz 1 -0.03759 -0.005346 -0.01936 -0.0001322 -dyz 1 -0.03407 -0.005734 -0.02118 -0.0001342 -dx^2-y^2 1 0.03943 -0.0009093 -0.004564 5.691e-06 -dxy 1 -0.03787 -0.005246 -0.0189 -0.0001314 - sum over m -0.03146 -0.01811 -0.06836 -0.000387 - sum over m+zeta 6.952 0.5109 2.193 0.006459 -fz^3 0 -0.007049 0.0007578 0.003775 0 -fxz^2 0 -0.002045 0.0002638 0.001312 0 -fyz^2 0 -0.002729 0.0002912 0.001448 0 -fzx^2-zy^2 0 6.273e-05 0 -6.642e-06 0 -fxyz 0 1.153e-05 1.446e-06 5.675e-06 0 -fx^3-3*xy^2 0 -0.00338 0.00044 0.002189 0 -f3yx^2-y^3 0 -0.00407 0.0004646 0.002311 0 - sum over m -0.0192 0.002219 0.01103 2.581e-06 - sum over m+zeta -0.0192 0.002219 0.01103 2.581e-06 -Total Charge on atom: Fe 16 -Total Magnetism on atom: Fe (0.5093, 2.195, 0.05066) +s 0 1.317 0.06196 -0.2625 -0.07949 + sum over m 1.317 0.06196 -0.2625 -0.07949 +s 1 1.726 -0.01809 0.09886 -0.01413 + sum over m 1.726 -0.01809 0.09886 -0.01413 +s 2 0.03246 -0.04153 0.2209 -0.02228 + sum over m 0.03246 -0.04153 0.2209 -0.02228 +s 3 -0.02921 0.005609 -0.025 -0.005114 + sum over m -0.02921 0.005609 -0.025 -0.005114 + sum over m+zeta 3.046 0.007945 0.0323 -0.121 +pz 0 2.034 -0.001186 0.005932 -3.981e-06 +px 0 2.033 -0.001283 0.006419 -3.989e-06 +py 0 2.033 -0.001188 0.005944 -3.979e-06 + sum over m 6.1 -0.003658 0.0183 -1.195e-05 +pz 1 -0.02621 0.0005578 -0.002789 0 +px 1 -0.02639 0.0006107 -0.003054 0 +py 1 -0.02603 0.0005536 -0.002768 0 + sum over m -0.07863 0.001722 -0.008611 0 + sum over m+zeta 6.021 -0.001936 0.009684 -1.277e-05 +dz^2 0 1.964 0.0008269 -0.004128 -1.088e-05 +dxz 0 1.044 0.156 -0.7849 -0.0003055 +dyz 0 0.9592 0.1564 -0.7869 -0.0003096 +dx^2-y^2 0 1.967 0.000752 -0.003754 -1.059e-05 +dxy 0 1.055 0.1558 -0.7835 -0.0003047 + sum over m 6.988 0.4698 -2.363 -0.0009413 +dz^2 1 0.03863 -0.0008716 0.004365 -1.357e-05 +dxz 1 -0.03708 -0.004148 0.02101 1.956e-05 +dyz 1 -0.03373 -0.004494 0.02274 1.968e-05 +dx^2-y^2 1 0.03943 -0.0009117 0.004566 -1.471e-05 +dxy 1 -0.03733 -0.004056 0.02055 1.945e-05 + sum over m -0.03008 -0.01448 0.07324 3.041e-05 + sum over m+zeta 6.958 0.4553 -2.29 -0.0009109 +fz^3 0 -0.007044 0.0007552 -0.003776 -1.406e-06 +fxz^2 0 -0.002046 0.0002628 -0.001314 0 +fyz^2 0 -0.00273 0.00029 -0.00145 0 +fzx^2-zy^2 0 5.811e-05 0 3.451e-06 0 +fxyz 0 1.14e-05 1.249e-06 -6.306e-06 0 +fx^3-3*xy^2 0 -0.003379 0.0004381 -0.00219 0 +f3yx^2-y^3 0 -0.00407 0.0004626 -0.002313 0 + sum over m -0.0192 0.002209 -0.01105 -4.307e-06 + sum over m+zeta -0.0192 0.002209 -0.01105 -4.307e-06 +Total Charge on atom: Fe 16.01 +Total Magnetism on atom: Fe (0.4635, -2.259, -0.1219) 1 Zeta of Fe Spin 1 Spin 2 Spin 3 Spin 4 -s 0 1.275 0.05341 0.2605 -0.02903 - sum over m 1.275 0.05341 0.2605 -0.02903 -s 1 1.755 -0.01752 -0.08879 -0.005156 - sum over m 1.755 -0.01752 -0.08879 -0.005156 -s 2 -0.02898 -0.0404 -0.2039 -0.00813 - sum over m -0.02898 -0.0404 -0.2039 -0.00813 -s 3 -0.04711 0.006367 0.03139 -0.001874 - sum over m -0.04711 0.006367 0.03139 -0.001874 - sum over m+zeta 2.954 0.001862 -0.0008532 -0.04419 -pz 0 2.032 -0.001369 -0.006852 -1.367e-06 -px 0 2.025 -0.0009208 -0.004608 -1.387e-06 -py 0 2.032 -0.001332 -0.006666 -1.366e-06 - sum over m 6.089 -0.003622 -0.01813 -4.119e-06 -pz 1 -0.02528 0.0005889 0.002889 0 -px 1 -0.01606 0.0001369 0.0006408 0 -py 1 -0.02466 0.000571 0.002802 0 - sum over m -0.066 0.001297 0.006331 2.367e-06 - sum over m+zeta 6.023 -0.002325 -0.01179 -1.753e-06 -dz^2 0 1.957 0.001158 0.005774 -3.913e-06 -dxz 0 1.097 0.1724 0.7275 0.002311 -dyz 0 0.9509 0.1759 0.7475 0.002269 -dx^2-y^2 0 1.947 0.001654 0.008245 -4.075e-06 -dxy 0 1.113 0.1714 0.7227 0.002304 - sum over m 7.065 0.5225 2.212 0.006876 -dz^2 1 0.03925 -0.001062 -0.005333 -4.383e-06 -dxz 1 -0.0366 -0.003947 -0.01263 -0.0001213 -dyz 1 -0.03157 -0.005197 -0.01856 -0.0001267 -dx^2-y^2 1 0.04266 -0.001394 -0.007002 -4.206e-06 -dxy 1 -0.03743 -0.003854 -0.01222 -0.0001203 - sum over m -0.02369 -0.01545 -0.05575 -0.0003768 - sum over m+zeta 7.041 0.5071 2.156 0.006499 -fz^3 0 -0.006614 0.0007261 0.003596 0 -fxz^2 0 -0.001954 0.0002565 0.001276 0 -fyz^2 0 -0.002684 0.0002742 0.001366 0 -fzx^2-zy^2 0 9.09e-05 1.99e-05 8.018e-05 0 -fxyz 0 2.062e-05 4.102e-06 1.816e-05 0 -fx^3-3*xy^2 0 -0.003203 0.0004291 0.00213 0 -f3yx^2-y^3 0 -0.003698 0.0004635 0.002271 0 - sum over m -0.01804 0.002174 0.01074 0 - sum over m+zeta -0.01804 0.002174 0.01074 0 -Total Charge on atom: Fe 16 -Total Magnetism on atom: Fe (0.5088, 2.154, -0.03769) +s 0 1.275 0.04699 -0.2823 0.07949 + sum over m 1.275 0.04699 -0.2823 0.07949 +s 1 1.755 -0.01866 0.08491 0.01412 + sum over m 1.755 -0.01866 0.08491 0.01412 +s 2 -0.02899 -0.04221 0.1978 0.02226 + sum over m -0.02899 -0.04221 0.1978 0.02226 +s 3 -0.04712 0.00595 -0.03281 0.005133 + sum over m -0.04712 0.00595 -0.03281 0.005133 + sum over m+zeta 2.954 -0.007928 -0.03239 0.121 +pz 0 2.032 -0.001371 0.00685 3.967e-06 +px 0 2.025 -0.0009218 0.004606 3.958e-06 +py 0 2.032 -0.001333 0.006664 3.965e-06 + sum over m 6.089 -0.003626 0.01812 1.189e-05 +pz 1 -0.02529 0.0005803 -0.002904 0 +px 1 -0.01606 0.0001295 -0.0006492 0 +py 1 -0.02466 0.0005625 -0.002815 0 + sum over m -0.06602 0.001272 -0.006367 0 + sum over m+zeta 6.023 -0.002353 0.01175 1.25e-05 +dz^2 0 1.957 0.001154 -0.005778 1.149e-05 +dxz 0 1.091 0.1517 -0.7637 -8.462e-05 +dyz 0 0.9556 0.1553 -0.7815 -8.443e-05 +dx^2-y^2 0 1.947 0.001648 -0.008249 1.233e-05 +dxy 0 1.106 0.1508 -0.7591 -8.432e-05 + sum over m 7.056 0.4606 -2.318 -0.0002295 +dz^2 1 0.03925 -0.001067 0.005328 1.289e-05 +dxz 1 -0.03558 -0.002824 0.01439 2.5e-06 +dyz 1 -0.03117 -0.003962 0.0201 2.798e-06 +dx^2-y^2 1 0.04266 -0.001401 0.006997 1.29e-05 +dxy 1 -0.03637 -0.002747 0.01401 2.475e-06 + sum over m -0.02122 -0.012 0.06082 3.356e-05 + sum over m+zeta 7.035 0.4486 -2.257 -0.000196 +fz^3 0 -0.006615 0.0007206 -0.003605 1.352e-06 +fxz^2 0 -0.001955 0.0002554 -0.001278 0 +fyz^2 0 -0.002684 0.0002735 -0.001368 0 +fzx^2-zy^2 0 9.383e-05 1.68e-05 -8.473e-05 0 +fxyz 0 2.053e-05 3.66e-06 -1.839e-05 0 +fx^3-3*xy^2 0 -0.003204 0.0004266 -0.002134 0 +f3yx^2-y^3 0 -0.003695 0.0004558 -0.002281 0 + sum over m -0.01804 0.002152 -0.01077 4.022e-06 + sum over m+zeta -0.01804 0.002152 -0.01077 4.022e-06 +Total Charge on atom: Fe 15.99 +Total Magnetism on atom: Fe (0.4405, -2.289, 0.1208) diff --git a/tests/integrate/204_NO_KP_NC_deltaspin/result.ref b/tests/integrate/204_NO_KP_NC_deltaspin/result.ref index 649ae1ef31..8a17a1fada 100644 --- a/tests/integrate/204_NO_KP_NC_deltaspin/result.ref +++ b/tests/integrate/204_NO_KP_NC_deltaspin/result.ref @@ -1,4 +1,4 @@ -etotref -6844.685232776227 -etotperatomref -3422.3426163881 +etotref -6844.326716364628 +etotperatomref -3422.1633581823 Compare_mulliken_pass 0 -totaltimeref 21.55 +totaltimeref 36.59 diff --git a/tests/integrate/207_NO_KP_OB/refBANDS_1.dat b/tests/integrate/207_NO_KP_OB/refBANDS_1.dat index c3cd3a0b6d..87a35be1b0 100644 --- a/tests/integrate/207_NO_KP_OB/refBANDS_1.dat +++ b/tests/integrate/207_NO_KP_OB/refBANDS_1.dat @@ -1,6 +1,6 @@ -1 0 -3.2007432 -0.55268317 5.3411505 5.3411505 8.2951616 10.292492 -2 0.17320508 -3.6638913 0.16302859 5.4122469 5.4122469 8.3831312 10.350824 -3 0.34641016 -4.4233335 1.6715127 5.6238068 5.6238068 8.6434268 10.399663 -4 0.51961524 -5.0448087 3.4994417 5.9592422 5.9592422 9.0296871 10.132257 -5 0.69282032 -5.4330707 5.3995429 6.3414385 6.3414385 9.3569963 9.5807977 -6 0.8660254 -5.5643351 6.5398122 6.5398122 6.5398122 9.280228 9.280228 + 1 0.00000000 -3.20074324 -0.55268317 5.34115051 5.34115051 8.29516160 10.29249200 + 2 0.17320508 -3.66389133 0.16302859 5.41224692 5.41224692 8.38313120 10.35082356 + 3 0.34641016 -4.42333349 1.67151268 5.62380682 5.62380682 8.64342675 10.39966330 + 4 0.51961524 -5.04480873 3.49944170 5.95924219 5.95924219 9.02968708 10.13225743 + 5 0.69282032 -5.43307067 5.39954292 6.34143853 6.34143853 9.35699627 9.58079775 + 6 0.86602540 -5.56433513 6.53981221 6.53981221 6.53981221 9.28022796 9.28022796 diff --git a/tests/integrate/Autotest.sh b/tests/integrate/Autotest.sh index 5310998097..37908d1769 100755 --- a/tests/integrate/Autotest.sh +++ b/tests/integrate/Autotest.sh @@ -74,6 +74,11 @@ check_out(){ # check every 'key' word #------------------------------------------------------ for key in $properties; do + + if [ $key == "totaltimeref" ]; then + # echo "time=$cal ref=$ref" + break + fi #-------------------------------------------------- # calculated value @@ -91,11 +96,6 @@ check_out(){ #-------------------------------------------------- deviation=`awk 'BEGIN {x='$ref';y='$cal';printf "%.'$ca'f\n",x-y}'` - if [ $key == "totaltimeref" ]; then - # echo "time=$cal ref=$ref" - break - fi - #-------------------------------------------------- # If deviation < threshold, then the test passes, diff --git a/toolchain/README.md b/toolchain/README.md index 3e747006b7..a8ce2f711e 100644 --- a/toolchain/README.md +++ b/toolchain/README.md @@ -91,6 +91,7 @@ The needed dependencies version default: - `LibXC` 6.2.2 - `ELPA` 2023.05.001 - `CEREAL` 1.3.2 +- `RapidJSON` 1.1.0 And Intel-oneAPI need user or server manager to manually install from Intel. [Intel-oneAPI](https://www.intel.cn/content/www/cn/zh/developer/tools/oneapi/toolkits.html) @@ -102,12 +103,12 @@ Dependencies below are optional, which is NOT installed by default: Users can install them by using `--with-*=install` in toolchain*.sh, which is `no` in default. > Notice: LibRI, LibComm and Libnpy is on actively development, you should check-out the package version when using this toolchain. Also, LibRI and LibComm can be installed by github submodule, which is also work for libnpy, which is more recommended. -Notice: for `CEREAL`, `Libnpy`, `LibRI` and `LibComm`, +Notice: for `CEREAL`,`RapidJSON`, `Libnpy`, `LibRI` and `LibComm`, you need to download them from github.com, rename it as formatted, and put them in `build` directory at the same time e.g.: ```shell -# packages downloaded from github.com +# packages downloaded from github.com, RapidJSON is not supported now mv v1.3.2.tar.gz build/cereal-1.3.2.tar.gz ``` @@ -175,7 +176,7 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \ -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \ ...... ``` -Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS +Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS for now ### shell problem If you encounter problem like: @@ -206,6 +207,8 @@ The default compiler for Intel-oneAPI is `icpx` and `icx`, which will cause prob The best way is to change `icpx` to `icpc`, `icx` to `icc`. user can manually change it in toolchain*.sh via `--with-intel-classic=yes` +Notice: `icc` and `icpc` from Intel Classic Compiler of Intel-oneAPI is not supported for 2024.0 and newer version. + ### LibRI and LibComm problem (There is some problem sometimes when compling with LibRI and LibComm, detailed information is needed) @@ -216,6 +219,9 @@ Sometimes Intel-oneAPI have problem to link `mpirun`, which will always show in 2023.2.0 version of MPI in Intel-oneAPI. Try `source /path/to/setvars.sh` or install another version of IntelMPI may help. +which is fixed in 2024.0.0 version of Intel-oneAPI, +And will not occur in Intel-MPI before 2021.10.0 (Intel-oneAPI before 2023.2.0) + More problem and possible solution can be accessed via [#2928](https://github.com/deepmodeling/abacus-develop/issues/2928) @@ -230,9 +236,6 @@ from ABACUS repo, make dependencies package more independent and flexible. 2. Users can manually change `pkg_install_dir` variable in `scripts/stage*/install*` to change the installation directory of each packages, which may let the installation more fiexible. -3. Users can manually change `INSTALL` variable in `scripts/common_vars.sh` -to change the installation directory of all packages, which may let the -installation more fiexible. ## More diff --git a/toolchain/install_abacus_toolchain.sh b/toolchain/install_abacus_toolchain.sh index be41687307..68c7959ce1 100755 --- a/toolchain/install_abacus_toolchain.sh +++ b/toolchain/install_abacus_toolchain.sh @@ -182,10 +182,12 @@ The --with-PKG options follow the rules: --with-scalapack Parallel linear algebra library, needed for parallel calculations. Default = install + --with-elpa Eigenvalue SoLvers for Petaflop-Applications library. + Fast library for large parallel jobs, Especially for ABACUS LCAO + Default = install --with-cereal Enable cereal for ABACUS LCAO Default = install - --with-elpa Eigenvalue SoLvers for Petaflop-Applications library. - Fast library for large parallel jobs. + --with-rapidjson Enable rapidjson for ABACUS to read/write json files Default = install --with-libtorch Enable libtorch the machine learning framework needed for DeePKS Default = no @@ -230,7 +232,7 @@ EOF tool_list="gcc intel cmake" mpi_list="mpich openmpi intelmpi" math_list="mkl acml openblas" -lib_list="fftw libxc scalapack elpa cereal libtorch libnpy libri libcomm" +lib_list="fftw libxc scalapack elpa cereal rapidjson libtorch libnpy libri libcomm" package_list="${tool_list} ${mpi_list} ${math_list} ${lib_list}" # ------------------------------------------------------------------------ @@ -264,7 +266,8 @@ with_acml="__SYSTEM__" with_openblas="__INSTALL__" with_elpa="__INSTALL__" with_cereal="__INSTALL__" -# with_libtorch="__DONTUSE__" +with_rapidjson="__INSTALL__" +# with_libtorch="__DONTUSE__" # default # with_libnpy="__DONTUSE__" # with_libri="__DONTUSE__" # with_libcomm="__DONTUSE__" @@ -549,6 +552,9 @@ while [ $# -ge 1 ]; do --with-cereal*) with_cereal=$(read_with "${1}") ;; + --with-rapidjson*) + with_rapidjson=$(read_with "${1}") + ;; --with-libnpy*) with_libnpy=$(read_with "${1}") ;; diff --git a/toolchain/scripts/common_vars.sh b/toolchain/scripts/common_vars.sh index d04d40fa39..51f941f6fe 100755 --- a/toolchain/scripts/common_vars.sh +++ b/toolchain/scripts/common_vars.sh @@ -7,8 +7,7 @@ # directories and files used by the installer ROOTDIR=${ROOTDIR:-"$(pwd -P)"} SCRIPTDIR=${SCRIPTDIR:-"${ROOTDIR}/scripts"} -INSTALLDIR=${INSTALLDIR:-"${ROOTDIR}/install"} -#INSTALLDIR=${INSTALLDIR:-"${HOME}/abacus_deps"} # advanced installation +INSTALLDIR=${INSTALLDIR:-"${ROOTDIR}/install"} # should not be changed BUILDDIR=${BUILDDIR:-"${ROOTDIR}/build"} SETUPFILE=${SETUPFILE:-"${INSTALLDIR}/setup"} ARCH_FILE_TEMPLATE=${ARCH_FILE_TEMPLATE:-"${SCRIPTDIR}/arch_base.tmpl"} diff --git a/toolchain/scripts/stage4/install_rapidjson.sh b/toolchain/scripts/stage4/install_rapidjson.sh new file mode 100755 index 0000000000..b63789ffff --- /dev/null +++ b/toolchain/scripts/stage4/install_rapidjson.sh @@ -0,0 +1,93 @@ +#!/bin/bash -e + +# TODO: Review and if possible fix shellcheck errors. +# shellcheck disable=all +# RAPIDJSON is not need any complex setting +# Only problem is the installation from github.com + +[ "${BASH_SOURCE[0]}" ] && SCRIPT_NAME="${BASH_SOURCE[0]}" || SCRIPT_NAME=$0 +SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_NAME")/.." && pwd -P)" + +rapidjson_ver="1.1.0" +rapidjson_sha256="bf7ced29704a1e696fbccf2a2b4ea068e7774fa37f6d7dd4039d0787f8bed98e" +source "${SCRIPT_DIR}"/common_vars.sh +source "${SCRIPT_DIR}"/tool_kit.sh +source "${SCRIPT_DIR}"/signal_trap.sh +source "${INSTALLDIR}"/toolchain.conf +source "${INSTALLDIR}"/toolchain.env + +[ -f "${BUILDDIR}/setup_rapidjson" ] && rm "${BUILDDIR}/setup_rapidjson" + +RAPIDJSON_CFLAGS="" +! [ -d "${BUILDDIR}" ] && mkdir -p "${BUILDDIR}" +cd "${BUILDDIR}" + +case "$with_rapidjson" in + __INSTALL__) + echo "==================== Installing RAPIDJSON ====================" + dirname="rapidjson-${rapidjson_ver}" + pkg_install_dir="${INSTALLDIR}/$dirname" + #pkg_install_dir="${HOME}/lib/rapidjson/${rapidjson_ver}" + install_lock_file="$pkg_install_dir/install_successful" + url="https://github.com/Tencent/rapidjson/archive/refs/tags/v${rapidjson_ver}.tar.gz" + filename="rapidjson-${rapidjson_ver}.tar.gz" + if verify_checksums "${install_lock_file}"; then + echo "$dirname is already installed, skipping it." + else + if [ -f $filename ]; then + echo "$filename is found" + else + # download from github.com and checksum + echo "wget --quiet $url -O $filename" + if ! wget --quiet $url -O $filename; then + report_error "failed to download $url" + recommend_offline_installation $filename $url + fi + # checksum + checksum "$filename" "$rapidjson_sha256" + fi + echo "Installing from scratch into ${pkg_install_dir}" + [ -d $dirname ] && rm -rf $dirname + tar -xzf $filename + mkdir -p "${pkg_install_dir}" + cp -r $dirname/* "${pkg_install_dir}/" + write_checksums "${install_lock_file}" "${SCRIPT_DIR}/stage4/$(basename ${SCRIPT_NAME})" + fi + ;; + __SYSTEM__) + echo "==================== CANNOT Finding RAPIDJSON from system paths NOW ====================" + recommend_offline_installation $filename $url + # How to do it in rapidjson? -- Zhaoqing in 2023/08/23 + # check_lib -lxcf03 "libxc" + # check_lib -lxc "libxc" + # add_include_from_paths LIBXC_CFLAGS "xc.h" $INCLUDE_PATHS + # add_lib_from_paths LIBXC_LDFLAGS "libxc.*" $LIB_PATHS + ;; + __DONTUSE__) ;; + + *) + echo "==================== Linking RAPIDJSON to user paths ====================" + check_dir "${pkg_install_dir}" + RAPIDJSON_CFLAGS="-I'${pkg_install_dir}'" + ;; +esac +if [ "$with_rapidjson" != "__DONTUSE__" ]; then + if [ "$with_rapidjson" != "__SYSTEM__" ]; then + # LibRI deps should find rapidjson include in CPATH + cat << EOF > "${BUILDDIR}/setup_rapidjson" +prepend_path CPATH "$pkg_install_dir/include" +export CPATH="${pkg_install_dir}/include:"${CPATH} +EOF + cat "${BUILDDIR}/setup_rapidjson" >> $SETUPFILE + fi + cat << EOF >> "${BUILDDIR}/setup_rapidjson" +export RAPIDJSON_CFLAGS="${RAPIDJSON_CFLAGS}" +export RAPIDJSON_ROOT="$pkg_install_dir" +EOF +fi + +load "${BUILDDIR}/setup_rapidjson" +write_toolchain_env "${INSTALLDIR}" + +cd "${ROOTDIR}" +report_timing "rapidjson" diff --git a/toolchain/scripts/stage4/install_stage4.sh b/toolchain/scripts/stage4/install_stage4.sh index ffe8f670c9..b5c7cf5eed 100755 --- a/toolchain/scripts/stage4/install_stage4.sh +++ b/toolchain/scripts/stage4/install_stage4.sh @@ -4,6 +4,7 @@ # shellcheck disable=all ./scripts/stage4/install_cereal.sh +./scripts/stage4/install_rapidjson.sh ./scripts/stage4/install_libtorch.sh ./scripts/stage4/install_libnpy.sh ./scripts/stage4/install_libri.sh diff --git a/toolchain/scripts/tool_kit.sh b/toolchain/scripts/tool_kit.sh index d07445089b..120b623fee 100755 --- a/toolchain/scripts/tool_kit.sh +++ b/toolchain/scripts/tool_kit.sh @@ -54,9 +54,12 @@ By download $__filename from $__url, Rename it as $__filename and put it into ${BUILDDIR}, And re-run toolchain installation script. -Instead of github.com. you can manually install requirements packages via: -1. Download from www.cp2k.org/static/downloads -2. wget https://bohrium-api.dp.tech/ds-dl/abacus-deps-93wi-v1 -O abacus-deps-v1.zip +You can manually install requirements packages via: +1. Download from www.cp2k.org/static/downloads (for OpenBLAS, OpenMPI and Others) +2. Download from github.com (for CEREAL, RapidJSON, libnpy, LibRI and others stage4 packages) +3. Use git submodule update --init --recursive (for LibRI) +4. wget https://bohrium-api.dp.tech/ds-dl/abacus-deps-93wi-v2 -O abacus-deps.zip +5. for Intel-oneAPI, please contact your server manager our visit Intel official website EOF } diff --git a/toolchain/toolchain_gnu.sh b/toolchain/toolchain_gnu.sh index 57105f601b..04635bb63d 100755 --- a/toolchain/toolchain_gnu.sh +++ b/toolchain/toolchain_gnu.sh @@ -18,6 +18,7 @@ --with-fftw=install \ --with-elpa=install \ --with-cereal=install \ +--with-rapidjson=install \ --with-libtorch=no \ --with-libnpy=no \ --with-libri=no \ diff --git a/toolchain/toolchain_intel-mpich.sh b/toolchain/toolchain_intel-mpich.sh index ffc2626670..fcf3cc41ee 100755 --- a/toolchain/toolchain_intel-mpich.sh +++ b/toolchain/toolchain_intel-mpich.sh @@ -21,6 +21,7 @@ --with-fftw=no \ --with-elpa=install \ --with-cereal=install \ +--with-rapidjson=install \ --with-libtorch=no \ --with-libnpy=no \ --with-libri=no \ diff --git a/toolchain/toolchain_intel.sh b/toolchain/toolchain_intel.sh index 8f391be008..e5298c570d 100755 --- a/toolchain/toolchain_intel.sh +++ b/toolchain/toolchain_intel.sh @@ -22,6 +22,7 @@ --with-fftw=no \ --with-elpa=install \ --with-cereal=install \ +--with-rapidjson=install \ --with-libtorch=no \ --with-libnpy=no \ --with-libri=no \ From 6aa019c41ae3348e176cb3f5ceb58aff6422606d Mon Sep 17 00:00:00 2001 From: Hongxu Ren <60290838+Flying-dragon-boxing@users.noreply.github.com> Date: Sun, 28 Jan 2024 14:03:20 +0800 Subject: [PATCH 17/44] Revert "Modify inputs and update to latest version" --- .github/workflows/test.yml | 18 +- CMakeLists.txt | 77 +- Dockerfile.cuda | 2 +- Dockerfile.gnu | 2 +- Dockerfile.intel | 2 +- cmake/FindELPA.cmake | 44 +- cmake/FindLAPACK.cmake | 2 +- cmake/FindLibxc.cmake | 36 - cmake/FindPEXSI.cmake | 8 +- deps/libpaw_interface | 2 +- docs/advanced/input_files/input-main.md | 37 +- docs/advanced/install.md | 10 - docs/quick_start/easy_install.md | 9 +- docs/quick_start/hands_on.md | 2 +- python/pyabacus/CMakeLists.txt | 11 +- python/pyabacus/src/py_abacus.cpp | 13 - python/pyabacus/src/py_math_base.cpp | 63 -- python/pyabacus/src/py_numerical_radial.cpp | 4 +- python/pyabacus/src/pyabacus/__init__.py | 5 +- python/pyabacus/tests/test_base_math.py | 15 - python/pyabacus/tests/test_nr.py | 25 + source/Makefile | 17 +- source/Makefile.Objects | 18 +- source/Makefile.vars | 20 +- source/module_base/global_variable.cpp | 30 - source/module_base/global_variable.h | 28 - source/module_base/math_sphbes.cpp | 56 +- source/module_base/math_sphbes.h | 13 +- source/module_base/para_json.cpp | 977 ------------------ source/module_base/para_json.h | 560 ---------- source/module_base/test/CMakeLists.txt | 14 - .../module_base/test/complexmatrix_test.cpp | 22 +- .../module_base/test/inverse_matrix_test.cpp | 2 +- source/module_base/test/math_sphbes_test.cpp | 16 +- source/module_base/test/math_ylmreal_test.cpp | 364 +++---- source/module_base/test/para_json_test.cpp | 68 -- source/module_base/test/perf_sphbes_test.cpp | 72 -- source/module_base/tool_quit.h | 8 +- source/module_cell/klist.cpp | 44 +- source/module_cell/klist.h | 1 - .../test/sltk_atom_input_test.cpp | 2 +- source/module_cell/read_atoms.cpp | 189 ++-- source/module_elecstate/occupy.cpp | 414 +++++++- source/module_esolver/esolver_ks.cpp | 19 +- source/module_esolver/esolver_ks_lcao.cpp | 27 +- source/module_esolver/esolver_ks_pw.cpp | 71 +- .../module_xc/test/test_xc.cpp | 4 +- .../module_xc/test/test_xc1.cpp | 2 +- .../module_xc/test/test_xc2.cpp | 10 +- .../module_xc/test/test_xc4.cpp | 4 +- .../module_xc/test/test_xc5.cpp | 65 ++ .../module_xc/test/xc3_mock.h | 16 +- .../module_deltaspin/cal_mw.cpp | 2 +- .../module_deltaspin/cal_mw_helper.cpp | 14 +- .../module_deltaspin/lambda_loop.cpp | 4 +- .../module_tddft/test/tddft_test.cpp | 3 +- source/module_hsolver/diago_pexsi.cpp | 44 +- source/module_hsolver/diago_pexsi.h | 2 +- source/module_hsolver/hsolver_pw.cpp | 22 +- .../module_pexsi/CMakeLists.txt | 2 +- .../module_pexsi/dist_bcd_matrix.cpp | 4 +- .../module_pexsi/dist_bcd_matrix.h | 23 - .../module_pexsi/dist_ccs_matrix.cpp | 4 +- .../module_pexsi/dist_ccs_matrix.h | 40 - .../module_pexsi/dist_matrix_transformer.cpp | 233 ++--- .../module_pexsi/dist_matrix_transformer.h | 64 +- .../module_pexsi/pexsi_solver.cpp | 28 +- .../module_pexsi/pexsi_solver.h | 6 +- .../module_pexsi/simple_pexsi.cpp | 490 +++++---- source/module_io/input.cpp | 230 +---- source/module_io/input.h | 43 +- source/module_io/input_conv.cpp | 30 - source/module_io/mulliken_charge.cpp | 4 +- source/module_io/nscf_band.cpp | 41 +- source/module_io/nscf_band.h | 1 - source/module_io/parameter_pool.cpp | 39 +- source/module_io/parameter_pool.h | 8 +- source/module_io/test/input_conv_test.cpp | 1 - source/module_io/test/input_test.cpp | 14 +- source/module_io/test/input_test_para.cpp | 9 +- source/module_io/test/support/INPUT | 2 +- source/module_io/test/support/witestfile | 2 +- source/module_io/test/to_qo_test.cpp | 34 +- source/module_io/test/write_input_test.cpp | 11 +- .../module_io/test_serial/nscf_band_test.cpp | 11 +- source/module_io/write_input.cpp | 33 +- source/module_ri/Exx_LRI.hpp | 10 +- source/module_ri/LRI_CV_Tools.hpp | 11 +- .../module_ri/conv_coulomb_pot_k-template.h | 51 + source/module_ri/conv_coulomb_pot_k.cpp | 181 ++-- source/module_ri/conv_coulomb_pot_k.h | 47 +- source/module_ri/conv_coulomb_pot_k.hpp | 37 - source/module_ri/exx_lip.cpp | 14 +- .../107_PW_OBOD_MemSaver/refBANDS_1.dat | 12 +- .../107_PW_OB_outputbands/refBANDS_1.dat | 12 +- .../204_NO_KP_NC_deltaspin/mulliken.txt.ref | 168 +-- .../204_NO_KP_NC_deltaspin/result.ref | 6 +- tests/integrate/207_NO_KP_OB/refBANDS_1.dat | 12 +- tests/integrate/Autotest.sh | 10 +- toolchain/README.md | 15 +- toolchain/install_abacus_toolchain.sh | 14 +- toolchain/scripts/common_vars.sh | 3 +- toolchain/scripts/stage4/install_rapidjson.sh | 93 -- toolchain/scripts/stage4/install_stage4.sh | 1 - toolchain/scripts/tool_kit.sh | 9 +- toolchain/toolchain_gnu.sh | 1 - toolchain/toolchain_intel-mpich.sh | 1 - toolchain/toolchain_intel.sh | 1 - 108 files changed, 1722 insertions(+), 4025 deletions(-) delete mode 100644 cmake/FindLibxc.cmake delete mode 100644 python/pyabacus/src/py_abacus.cpp delete mode 100644 python/pyabacus/src/py_math_base.cpp delete mode 100644 python/pyabacus/tests/test_base_math.py create mode 100644 python/pyabacus/tests/test_nr.py delete mode 100644 source/module_base/para_json.cpp delete mode 100644 source/module_base/para_json.h delete mode 100644 source/module_base/test/para_json_test.cpp delete mode 100644 source/module_base/test/perf_sphbes_test.cpp create mode 100644 source/module_ri/conv_coulomb_pot_k-template.h delete mode 100644 source/module_ri/conv_coulomb_pot_k.hpp delete mode 100755 toolchain/scripts/stage4/install_rapidjson.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 76f48347a8..856e56d97a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,37 +2,27 @@ name: Integration Test and Unit Test on: pull_request: - + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true - + jobs: test: name: Test runs-on: self-hosted if: github.repository_owner == 'deepmodeling' - container: - image: ghcr.io/deepmodeling/abacus-gnu - volumes: - - /tmp/ccache:/github/home/.ccache + container: ghcr.io/deepmodeling/abacus-gnu steps: - name: Checkout uses: actions/checkout@v4 with: submodules: recursive - - - name: Install Ccache - run: | - sudo apt-get update - sudo apt-get install -y ccache - - name: Build run: | - cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON -DENABLE_GOOGLEBENCH=ON + cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON cmake --build build -j8 cmake --install build - - name: Test env: GTEST_COLOR: 'yes' diff --git a/CMakeLists.txt b/CMakeLists.txt index 73a846304b..8440662355 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,12 @@ project(ABACUS LANGUAGES CXX ) +# private options, should not be pushed to master +# set(PEXSI_DIR "~/projects/pexsi-build/pexsi") +# set(SuperLU_DIR "~/projects/pexsi-build/superlu") +# set(ParMETIS_DIR "~/projects/pexsi-build/parmetis") +# set(ELPA_INCLUDE_DIR "/usr/include/elpa_openmp-2023.05.001") + option(ENABLE_LCAO "Enable LCAO calculation." ON) option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF) option(ENABLE_LIBXC "Enable LibXC functionality" OFF) @@ -34,36 +40,7 @@ option(DEBUG_INFO "Print message for developers to debug." OFF) option(ENABLE_NATIVE_OPTIMIZATION "Enable compilation optimization for the native machine's CPU type" OFF) option(COMMIT_INFO "Print commit information in log" ON) option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON) -option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF) -option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF) option(USE_PEXSI "Enable support for PEXSI." OFF) - - - -# enable json support -if(ENABLE_RAPIDJSON) - find_package(RapidJSON) - if(NOT RapidJSON_FOUND) - message(WARNING "Rapidjson is not found, trying downloading from github, or you can install Rapidjson first and reinstall abacus.") - include(FetchContent) - FetchContent_Declare( - rapidjson - GIT_REPOSITORY https://github.com/Tencent/rapidjson.git - GIT_TAG "origin/master" - GIT_SHALLOW TRUE - GIT_PROGRESS TRUE - ) - set(RAPIDJSON_BUILD_TESTS OFF CACHE INTERNAL "") - set(RAPIDJSON_BUILD_EXAMPLES OFF CACHE INTERNAL "") - FetchContent_MakeAvailable(rapidjson) - set(RapidJSON_INCLUDE_PATH "${rapidjson_SOURCE_DIR}/include") - endif() - add_compile_definitions(__RAPIDJSON) - add_definitions(-DRAPIDJSON_HAS_CXX11_NOEXCEPT=0) - include_directories(${RapidJSON_INCLUDE_PATH}) -endif() - - if (USE_CUDA) set(USE_CUSOLVER_LCAO ON) else() @@ -212,7 +189,7 @@ if(ENABLE_LCAO) if(USE_PEXSI) find_package(PEXSI REQUIRED) - target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) + target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR}) add_compile_definitions(__PEXSI) endif() @@ -437,7 +414,8 @@ endif() if(ENABLE_DEEPKS) # Torch uses outdated components to detech CUDA arch, causing failure on latest CUDA kits. - # Set CMake variable TORCH_CUDA_ARCH_LIST in the form of "major.minor" if required. + # See above for setting CMAKE_CUDA_ARCHITECTURES + set(TORCH_CUDA_ARCH_LIST CMAKE_CUDA_ARCHITECTURES) find_package(Torch REQUIRED) if(NOT Torch_VERSION VERSION_LESS "2.1.0") set_if_higher(CMAKE_CXX_STANDARD 17) @@ -544,8 +522,11 @@ if(DEFINED Libxc_DIR) set(ENABLE_LIBXC ON) endif() if(ENABLE_LIBXC) - # use `cmake/FindLibxc.cmake` to detect Libxc installation with `pkg-config` - find_package(Libxc REQUIRED) + find_package(Libxc REQUIRED HINTS + ${Libxc_DIR}/share/cmake/Libxc + ${Libxc_DIR}/lib/cmake/Libxc + ${Libxc_DIR}/lib64/cmake/Libxc + ) message(STATUS "Found Libxc: version " ${Libxc_VERSION}) if(${Libxc_VERSION} VERSION_LESS 5.1.7) message(FATAL_ERROR "LibXC >= 5.1.7 is required.") @@ -599,25 +580,6 @@ if(INFO) # modifications on blas_connector and lapack_connector endif() -# Add performance test in abacus -IF (ENABLE_GOOGLEBENCH) - set(BUILD_TESTING ON) - find_package(benchmark HINTS ${BENCHMARK_DIR}) - if(NOT ${benchmark_FOUND}) - set(BENCHMARK_USE_BUNDLED_GTEST OFF) - include(FetchContent) - FetchContent_Declare( - benchmark - GIT_REPOSITORY https://github.com/google/benchmark.git - GIT_TAG "origin/main" - GIT_SHALLOW TRUE - GIT_PROGRESS TRUE - ) - set(BENCHMARK_ENABLE_TESTING OFF) - FetchContent_MakeAvailable(benchmark) - endif() -endif() - IF (BUILD_TESTING) set_if_higher(CMAKE_CXX_STANDARD 14) # Required in orbital include(CTest) @@ -647,14 +609,8 @@ IF (BUILD_TESTING) endif() #dependencies & link library - if(ENABLE_GOOGLEBENCH) - target_link_libraries(${UT_TARGET} ${UT_LIBS} - Threads::Threads GTest::gtest_main GTest::gmock_main benchmark::benchmark) - else() - target_link_libraries(${UT_TARGET} ${UT_LIBS} - Threads::Threads GTest::gtest_main GTest::gmock_main) - endif() - + target_link_libraries(${UT_TARGET} ${UT_LIBS} + Threads::Threads GTest::gtest_main GTest::gmock_main) if(USE_OPENMP) target_link_libraries(${UT_TARGET} OpenMP::OpenMP_CXX) endif() @@ -664,7 +620,6 @@ IF (BUILD_TESTING) WORKING_DIRECTORY $ ) endfunction(AddTest) - endif() add_subdirectory(source) diff --git a/Dockerfile.cuda b/Dockerfile.cuda index e950f097f9..719f7c4278 100644 --- a/Dockerfile.cuda +++ b/Dockerfile.cuda @@ -2,7 +2,7 @@ FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 RUN apt update && apt install -y --no-install-recommends \ libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev \ - libxc-dev libgtest-dev libgmock-dev libbenchmark-dev python3-numpy \ + libxc-dev libgtest-dev libgmock-dev python3-numpy \ bc cmake git g++ make bc time sudo unzip vim wget ENV GIT_SSL_NO_VERIFY=true TERM=xterm-256color \ diff --git a/Dockerfile.gnu b/Dockerfile.gnu index 060d930563..0b6b45d248 100644 --- a/Dockerfile.gnu +++ b/Dockerfile.gnu @@ -1,7 +1,7 @@ FROM ubuntu:22.04 RUN apt update && apt install -y --no-install-recommends \ libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev \ - libxc-dev libgtest-dev libgmock-dev libbenchmark-dev python3-numpy \ + libxc-dev libgtest-dev libgmock-dev python3-numpy \ bc cmake git g++ make bc time sudo unzip vim wget gfortran ENV GIT_SSL_NO_VERIFY=true TERM=xterm-256color \ diff --git a/Dockerfile.intel b/Dockerfile.intel index 3947f05b9e..6cac8c9f5f 100644 --- a/Dockerfile.intel +++ b/Dockerfile.intel @@ -2,7 +2,7 @@ FROM ubuntu:22.04 RUN apt-get update && apt-get install -y \ bc cmake git gnupg gcc g++ python3-numpy sudo wget vim unzip \ - libcereal-dev libxc-dev libgtest-dev libgmock-dev libbenchmark-dev + libcereal-dev libxc-dev libgtest-dev libgmock-dev # Following steps by https://software.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/installation/install-using-package-managers/apt.html . RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ diff --git a/cmake/FindELPA.cmake b/cmake/FindELPA.cmake index 4105e47592..5769f7248c 100644 --- a/cmake/FindELPA.cmake +++ b/cmake/FindELPA.cmake @@ -7,50 +7,34 @@ # ELPA_INCLUDE_DIR - Where to find ELPA headers. # -find_package(PkgConfig) - -find_path(ELPA_INCLUDE_DIRS +find_path(ELPA_INCLUDE_DIR elpa/elpa.h HINTS ${ELPA_DIR} PATH_SUFFIXES "include" "include/elpa" ) if(USE_OPENMP) - find_library(ELPA_LINK_LIBRARIES - NAMES elpa_openmp elpa - HINTS ${ELPA_DIR} - PATH_SUFFIXES "lib" - ) + find_library(ELPA_LIBRARY + NAMES elpa_openmp elpa + HINTS ${ELPA_DIR} + PATH_SUFFIXES "lib" + ) else() - find_library(ELPA_LINK_LIBRARIES - NAMES elpa - HINTS ${ELPA_DIR} - PATH_SUFFIXES "lib" - ) -endif() - -if(NOT ELPA_INCLUDE_DIRS AND PKG_CONFIG_FOUND) - if(DEFINED ELPA_DIR) - string(APPEND CMAKE_PREFIX_PATH ";${ELPA_DIR}") - endif() - if(USE_OPENMP) - pkg_search_module(ELPA REQUIRED IMPORTED_TARGET GLOBAL elpa_openmp) - else() - pkg_search_module(ELPA REQUIRED IMPORTED_TARGET GLOBAL elpa) - endif() -elseif(NOT PKG_CONFIG_FOUND) - message( - "ELPA : We need pkg-config to get all information about the elpa library") + find_library(ELPA_LIBRARY + NAMES elpa + HINTS ${ELPA_DIR} + PATH_SUFFIXES "lib" + ) endif() # Handle the QUIET and REQUIRED arguments and # set ELPA_FOUND to TRUE if all variables are non-zero. include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(ELPA DEFAULT_MSG ELPA_LINK_LIBRARIES ELPA_INCLUDE_DIRS) +find_package_handle_standard_args(ELPA DEFAULT_MSG ELPA_LIBRARY ELPA_INCLUDE_DIR) # Copy the results to the output variables and target. if(ELPA_FOUND) - list(GET ELPA_LINK_LIBRARIES 0 ELPA_LIBRARY) - set(ELPA_INCLUDE_DIR ${ELPA_INCLUDE_DIRS}) + set(ELPA_LIBRARIES ${ELPA_LIBRARY}) + set(ELPA_INCLUDE_DIR ${ELPA_INCLUDE_DIR}) if(NOT TARGET ELPA::ELPA) add_library(ELPA::ELPA UNKNOWN IMPORTED) diff --git a/cmake/FindLAPACK.cmake b/cmake/FindLAPACK.cmake index c240d5facf..4f4bfbc425 100644 --- a/cmake/FindLAPACK.cmake +++ b/cmake/FindLAPACK.cmake @@ -6,7 +6,7 @@ # find_library(LAPACK_LIBRARY - NAMES openblas blas + NAMES openblas HINTS ${LAPACK_DIR} PATH_SUFFIXES "lib" ) diff --git a/cmake/FindLibxc.cmake b/cmake/FindLibxc.cmake deleted file mode 100644 index 4a3c04cba7..0000000000 --- a/cmake/FindLibxc.cmake +++ /dev/null @@ -1,36 +0,0 @@ -include(FindPackageHandleStandardArgs) - -if(DEFINED Libxc_DIR) - string(APPEND CMAKE_PREFIX_PATH ";${Libxc_DIR}") -endif() -# Using CMake interface as default. -# NO REQUIRED here, otherwhile it would throw error -# with no LibXC found. -find_package(Libxc HINTS - ${Libxc_DIR}/share/cmake/Libxc - ${Libxc_DIR}/lib/cmake/Libxc - ${Libxc_DIR}/lib64/cmake/Libxc - ) -if(NOT TARGET Libxc::xc) - find_package(PkgConfig REQUIRED) - pkg_search_module(Libxc REQUIRED IMPORTED_TARGET GLOBAL libxc) - find_package_handle_standard_args(Libxc DEFAULT_MSG Libxc_LINK_LIBRARIES Libxc_INCLUDE_DIRS) -endif() - - -# Copy the results to the output variables and target. -# if find_package() above works, Libxc::xc would be present and -# below would be skipped. -if(Libxc_FOUND AND NOT TARGET Libxc::xc) - set(Libxc_LIBRARY ${Libxc_LINK_LIBRARIES}) - set(Libxc_LIBRARIES ${Libxc_LIBRARY}) - set(Libxc_INCLUDE_DIR ${Libxc_INCLUDE_DIRS}) - add_library(Libxc::xc UNKNOWN IMPORTED) - set_target_properties(Libxc::xc PROPERTIES - IMPORTED_LOCATION "${Libxc_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${Libxc_INCLUDE_DIR}") -endif() - -set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${Libxc_INCLUDE_DIR}) - -mark_as_advanced(Libxc_INCLUDE_DIR Libxc_LIBRARY) diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake index 062764acce..22fe4dd01c 100644 --- a/cmake/FindPEXSI.cmake +++ b/cmake/FindPEXSI.cmake @@ -35,18 +35,18 @@ find_library(ParMETIS_LIBRARY PATH_SUFFIXES "lib" ) -find_library(SuperLU_DIST_LIBRARY +find_library(SuperLU_LIBRARY NAMES libsuperlu_dist.a - HINTS ${SuperLU_DIST_DIR} + HINTS ${SuperLU_DIR} PATH_SUFFIXES "lib" ) # Handle the QUIET and REQUIRED arguments and # set Cereal_FOUND to TRUE if all variables are non-zero. include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_DIST_LIBRARY) +find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_LIBRARY) # Copy the results to the output variables and target. -mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_DIST_LIBRARY) +mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_LIBRARY) diff --git a/deps/libpaw_interface b/deps/libpaw_interface index c211c0ab33..893cfe5b88 160000 --- a/deps/libpaw_interface +++ b/deps/libpaw_interface @@ -1 +1 @@ -Subproject commit c211c0ab330adf3cc374f50ab3edee46b174e64c +Subproject commit 893cfe5b88c4b640b88a82335474d9f67d4c4cf6 diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index 8c675cab92..b9ee122af6 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -72,7 +72,6 @@ - [mixing\_beta](#mixing_beta) - [mixing\_beta\_mag](#mixing_beta_mag) - [mixing\_ndim](#mixing_ndim) - - [mixing\_restart](#mixing_restart) - [mixing\_gg0](#mixing_gg0) - [mixing\_gg0\_mag](#mixing_gg0_mag) - [mixing\_gg0\_min](#mixing_gg0_min) @@ -146,8 +145,6 @@ - [out\_app\_flag](#out_app_flag) - [out\_ndigits](#out_ndigits) - [out\_interval](#out_interval) - - [band\_print\_num](#band_print_num) - - [bands\_to\_print](#bands_to_print) - [out\_element\_info](#out_element_info) - [restart\_save](#restart_save) - [restart\_load](#restart_load) @@ -948,8 +945,6 @@ calculations. - **fixed**: fixed occupations (available for non-coductors only) - **gauss** or **gaussian**: Gaussian smearing method. - **mp**: methfessel-paxton smearing method; recommended for metals. - - **mp2**: 2-nd methfessel-paxton smearing method; recommended for metals. - - **mv** or **cold**: marzari-vanderbilt smearing method. - **fd**: Fermi-Dirac smearing method: $f=1/\{1+\exp[(E-\mu)/kT]\}$ and smearing_sigma below is the temperature $T$ (in Ry). - **Default**: gauss @@ -1006,13 +1001,6 @@ We recommend the following options: For systems that are difficult to converge, one could try increasing the value of 'mixing_ndim' to enhance the stability of the self-consistent field (SCF) calculation. - **Default**: 8 -### mixing_restart - -- **Type**: Integer -- **Description**: At `mixing_restart`-th iteration, SCF will restart by using output charge density from perivos iteration as input charge density directly, and start a new mixing. `mixing_restart=0|1` means SCF starts from scratch. - -- **Default**: 0 - ### mixing_gg0 - **Type**: Real @@ -1506,8 +1494,8 @@ These variables are used to control the output of properties. ### out_band -- **Type**: Boolean Integer(optional) -- **Description**: Whether to output the band structure (in eV), optionally output precision can be set by a second parameter, default is 8. For more information, refer to the [band.md](../elec_properties/band.md) +- **Type**: Boolean +- **Description**: Whether to output the band structure (in eV). For more information, refer to the [band.md](../elec_properties/band.md) - **Default**: False ### out_proj_band @@ -1611,20 +1599,6 @@ These variables are used to control the output of properties. - **Description**: Control the interval for printing Mulliken population analysis, $r(R)$, $H(R)$, $S(R)$, $T(R)$, $dH(R)$, $H(k)$, $S(k)$ and $wfc(k)$ matrices during molecular dynamics calculations. Check input parameters [out_mul](#out_mul), [out_mat_r](#out_mat_r), [out_mat_hs2](#out_mat_hs2), [out_mat_t](#out_mat_t), [out_mat_dh](#out_mat_dh), [out_mat_hs](#out_mat_hs) and [out_wfc_lcao](#out_wfc_lcao) for more information, respectively. - **Default**: 1 -### band_print_num - -- **Type**: Integer -- **Availability**: PW basis -- **Description**: If you want to plot a partial charge density contributed from some chosen bands. `band_print_num` define the number of band list. The result can be found in "band*.cube". -- **Default**: 0 - -### bands_to_print - -- **Type**: vector -- **Availability**: band_print_num > 0 -- **Description**: define which band you want to choose for partial charge density. -- **Default**: [] - ### out_element_info - **Type**: Boolean @@ -2802,9 +2776,9 @@ These variables are used to control berry phase and wannier90 interface paramete - **Type**: String - **Description**: the spin direction for the Wannier function calculation when nspin is set to 2 - - `up`: Calculate spin up for the Wannier function. - - `down`: Calculate spin down for the Wannier function. -- **Default**: `up` + - "up": Calculate spin up for the Wannier function. + - "down": Calculate spin down for the Wannier function. +- **Default**: "up" ### out_wannier_mmn @@ -2844,7 +2818,6 @@ These variables are used to control berry phase and wannier90 interface paramete - **Description**: write the "UNK.*" file in ASCII format or binary format. - 0: write the "UNK.*" file in binary format. - 1: write the "UNK.*" file in ASCII format (text file format). -- **Default**: 1 [back to top](#full-list-of-input-keywords) diff --git a/docs/advanced/install.md b/docs/advanced/install.md index d6201a060f..e929fac34c 100644 --- a/docs/advanced/install.md +++ b/docs/advanced/install.md @@ -69,16 +69,6 @@ After building and installing, unit tests can be performed with `ctest`. To run a subset of unit test, use `ctest -R ` to perform tests with name matched by given pattern. -## Build Performance Tests - -To build performance tests for ABACUS, define `ENABLE_GOOGLEBENCH` flag. You can also specify the path to a local installation of [Google Benchmark](https://github.com/google/benchmark.git) by setting `BENCHMARK_DIR` flags. If not found locally, the configuration process will try to download it automatically. - -```bash -cmake -B build -DENABLE_GOOGLEBENCH=1 -``` - -Google Benchmark requires Google Test to build and run the tests. When setting `ENABLE_GOOGLEBENCH` to ON, `BUILD_TESTING` is automatically enabled. After building and installing, performance tests can be executed with `ctest`. - ## Build with CUDA support ### Extra prerequisites diff --git a/docs/quick_start/easy_install.md b/docs/quick_start/easy_install.md index 4089e303a3..957b9d3262 100644 --- a/docs/quick_start/easy_install.md +++ b/docs/quick_start/easy_install.md @@ -28,7 +28,7 @@ These requirements support the calculation of plane-wave basis in ABACUS. For LC Some of these packages can be installed with popular package management system, such as `apt` and `yum`: ```bash -sudo apt update && sudo apt install -y libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev libxc-dev g++ make cmake bc git pkgconf +sudo apt update && sudo apt install -y libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev libxc-dev g++ make cmake bc git ``` > Installing ELPA by apt only matches requirements on Ubuntu 22.04. For earlier linux distributions, you should build ELPA from source. @@ -111,12 +111,12 @@ Here, 'build' is the path for building ABACUS; and '-D' is used for setting up s - `LAPACK_DIR`: Path to OpenBLAS library `libopenblas.so`(including BLAS and LAPACK) - `SCALAPACK_DIR`: Path to ScaLAPACK library `libscalapack.so` - `ELPA_DIR`: Path to ELPA install directory; should be the folder containing 'include' and 'lib'. - > Note: In ABACUS v3.5.1 or earlier, if you install ELPA from source , please add a symlink to avoid the additional include file folder with version name: `ln -s elpa/include/elpa-2021.05.002/elpa elpa/include/elpa` to help the build system find ELPA headers. + > Note: If you install ELPA from source, please add a symlink to avoid the additional include file folder with version name: `ln -s elpa/include/elpa-2021.05.002/elpa elpa/include/elpa`. This is a known behavior of ELPA. - `FFTW3_DIR`: Path to FFTW3. - `CEREAL_INCLUDE_DIR`: Path to the parent folder of `cereal/cereal.hpp`. Will download from GitHub if absent. - `Libxc_DIR`: (Optional) Path to Libxc. - > Note: In ABACUS v3.5.1 or earlier, Libxc built from source with Makefile is NOT supported; please compile Libxc with CMake instead. + > Note: Building Libxc from source with Makefile does NOT support using it in CMake here. Please compile Libxc with CMake instead. - `LIBRI_DIR`: (Optional) Path to LibRI. - `LIBCOMM_DIR`: (Optional) Path to LibComm. @@ -126,7 +126,6 @@ Here, 'build' is the path for building ABACUS; and '-D' is used for setting up s - `ENABLE_LIBRI=OFF`: [Enable LibRI](../advanced/install.md#add-libri-support) to suppport variety of functionals. If `LIBRI_DIR` and `LIBCOMM_DIR` is defined, `ENABLE_LIBRI` will set to 'ON'. - `USE_OPENMP=ON`: Enable OpenMP support. Building ABACUS without OpenMP is not fully tested yet. - `BUILD_TESTING=OFF`: [Build unit tests](../advanced/install.md#build-unit-tests). - - `ENABLE_GOOGLEBENCH=OFF`: [Build performance tests](../advanced/install.md#build-performance-tests) - `ENABLE_MPI=ON`: Enable MPI parallel compilation. If set to `OFF`, a serial version of ABACUS with PW basis only will be compiled. Currently serial version of ABACUS with LCAO basis is not supported yet, so `ENABLE_LCAO` will be automatically set to `OFF`. - `ENABLE_COVERAGE=OFF`: Build ABACUS executable supporting [coverage analysis](../CONTRIBUTING.md#generating-code-coverage-report). This feature has a drastic impact on performance. - `ENABLE_ASAN=OFF`: Build with Address Sanitizer. This feature would help detecting memory problems. @@ -230,7 +229,7 @@ conda create -n abacus_env abacus -c conda-forge conda activate abacus_env export CMAKE_PREFIX_PATH=$CONDA_PREFIX:$CMAKE_PREFIX_PATH -# By default OpenBLAS is used; run `conda install "blas=*=mkl" mkl_fft mkl-devel -c conda-forge` to switch implementation. +# By default OpenBLAS is used; run `conda install "blas=*=mkl" mkl_fft -c conda-forge` to switch implementation. export MKLROOT=$CONDA_PREFIX # If Intel MKL is required. export CMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'`:$CMAKE_PREFIX_PATH # If DEEPKS support is required; diff --git a/docs/quick_start/hands_on.md b/docs/quick_start/hands_on.md index 2e0e768169..d63c6b0232 100644 --- a/docs/quick_start/hands_on.md +++ b/docs/quick_start/hands_on.md @@ -57,7 +57,7 @@ basis_type lcao calculation scf # this is the key parameter telling abacus to do a scf calculation ``` -The pseudopotential files of `Mg_ONCV_PBE-1.0.upf` and `O_ONCV_PBE-1.0.upf` should be provided under the directory of `pseudo_dir` defined in `INPUT` (the default directory is "./"), and the orbital files `Mg_gga_8au_100Ry_4s2p1d.orb` and `O_gga_8au_100Ry_2s2p1d.orb` under the directory of `orbital_dir` also defined in `INPUT` (the default directory is "./"). The pseudopotential and orbital files can be downloaded from the [ABACUS website](http://abacus.ustc.edu.cn/pseudo/list.htm). +The pseudopotential files of `Mg_ONCV_PBE-1.0.upf` and `O_ONCV_PBE-1.0.upf` should be provided under the directory of `pseudo_dir`, and the orbital files `Mg_gga_8au_100Ry_4s2p1d.orb` and `O_gga_8au_100Ry_2s2p1d.orb` under the directory of `orbital_dir`. The pseudopotential and orbital files can be downloaded from the [ABACUS website](http://abacus.ustc.edu.cn/pseudo/list.htm). The final mandatory input file is called `KPT`, which sets the reciprocal space k-mesh. Below is an example: diff --git a/python/pyabacus/CMakeLists.txt b/python/pyabacus/CMakeLists.txt index 0effbe83f2..399bd4fe57 100644 --- a/python/pyabacus/CMakeLists.txt +++ b/python/pyabacus/CMakeLists.txt @@ -12,14 +12,9 @@ set(BASE_PATH "${PROJECT_SOURCE_DIR}/../../source/module_base") set(ABACUS_SOURCE_DIR "${PROJECT_SOURCE_DIR}/../../source") include_directories(${BASE_PATH} ${ABACUS_SOURCE_DIR}) list(APPEND _sources - #${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.h - #${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.cpp - ${ABACUS_SOURCE_DIR}/module_base/constants.h - ${ABACUS_SOURCE_DIR}/module_base/math_sphbes.h - ${ABACUS_SOURCE_DIR}/module_base/math_sphbes.cpp - ${PROJECT_SOURCE_DIR}/src/py_abacus.cpp - #${PROJECT_SOURCE_DIR}/src/py_numerical_radial.cpp - ${PROJECT_SOURCE_DIR}/src/py_math_base.cpp) + ${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.h + ${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.cpp + ${PROJECT_SOURCE_DIR}/src/py_numerical_radial.cpp) python_add_library(_core MODULE ${_sources} WITH_SOABI) target_link_libraries(_core PRIVATE pybind11::headers) target_compile_definitions(_core PRIVATE VERSION_INFO=${PROJECT_VERSION}) diff --git a/python/pyabacus/src/py_abacus.cpp b/python/pyabacus/src/py_abacus.cpp deleted file mode 100644 index 34b354dc6b..0000000000 --- a/python/pyabacus/src/py_abacus.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include - -namespace py = pybind11; - -void bind_numerical_radial(py::module& m); -void bind_math_base(py::module& m); - -PYBIND11_MODULE(_core, m) -{ - // bind_numerical_radial(m); - bind_math_base(m); -} \ No newline at end of file diff --git a/python/pyabacus/src/py_math_base.cpp b/python/pyabacus/src/py_math_base.cpp deleted file mode 100644 index 4378690897..0000000000 --- a/python/pyabacus/src/py_math_base.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include - -#include "module_base/math_sphbes.h" - -namespace py = pybind11; -using namespace pybind11::literals; -template -using overload_cast_ = pybind11::detail::overload_cast_impl; - -void bind_math_base(py::module& m) -{ - py::module module_base = m.def_submodule("ModuleBase"); - - py::class_(module_base, "Sphbes") - .def(py::init<>()) - .def_static("sphbesj", overload_cast_()(&ModuleBase::Sphbes::sphbesj), "l"_a, "x"_a) - .def_static("dsphbesj", overload_cast_()(&ModuleBase::Sphbes::dsphbesj), "l"_a, "x"_a) - .def_static("sphbesj", - [](const int n, py::array_t r, const double q, const int l, py::array_t jl) { - py::buffer_info r_info = r.request(); - if (r_info.ndim != 1) - { - throw std::runtime_error("r array must be 1-dimensional"); - } - py::buffer_info jl_info = jl.request(); - if (jl_info.ndim != 1) - { - throw std::runtime_error("jl array must be 1-dimensional"); - } - ModuleBase::Sphbes::sphbesj(n, - static_cast(r_info.ptr), - q, - l, - static_cast(jl_info.ptr)); - }) - .def_static("dsphbesj", - [](const int n, py::array_t r, const double q, const int l, py::array_t djl) { - py::buffer_info r_info = r.request(); - if (r_info.ndim != 1) - { - throw std::runtime_error("r array must be 1-dimensional"); - } - py::buffer_info djl_info = djl.request(); - if (djl_info.ndim != 1) - { - throw std::runtime_error("djl array must be 1-dimensional"); - } - ModuleBase::Sphbes::dsphbesj(n, - static_cast(r_info.ptr), - q, - l, - static_cast(djl_info.ptr)); - }) - .def_static("sphbes_zeros", [](const int l, const int n, py::array_t zeros) { - py::buffer_info zeros_info = zeros.request(); - if (zeros_info.ndim != 1) - { - throw std::runtime_error("zeros array must be 1-dimensional"); - } - ModuleBase::Sphbes::sphbes_zeros(l, n, static_cast(zeros_info.ptr)); - }); -} \ No newline at end of file diff --git a/python/pyabacus/src/py_numerical_radial.cpp b/python/pyabacus/src/py_numerical_radial.cpp index ebda8f080b..296229b3d1 100644 --- a/python/pyabacus/src/py_numerical_radial.cpp +++ b/python/pyabacus/src/py_numerical_radial.cpp @@ -8,7 +8,7 @@ using namespace pybind11::literals; template using overload_cast_ = pybind11::detail::overload_cast_impl; -void bind_numerical_radial(py::module& m) +PYBIND11_MODULE(_core, m) { // Create the submodule for NumericalRadial py::module m_numerical_radial = m.def_submodule("NumericalRadial"); @@ -165,4 +165,4 @@ void bind_numerical_radial(py::module& m) .def_property_readonly("kgrid", overload_cast_()(&NumericalRadial::kgrid, py::const_)) .def_property_readonly("rvalue", overload_cast_()(&NumericalRadial::rvalue, py::const_)) .def_property_readonly("kvalue", overload_cast_()(&NumericalRadial::kvalue, py::const_)); -} \ No newline at end of file +} diff --git a/python/pyabacus/src/pyabacus/__init__.py b/python/pyabacus/src/pyabacus/__init__.py index 94d8c0d5b8..cda9318053 100644 --- a/python/pyabacus/src/pyabacus/__init__.py +++ b/python/pyabacus/src/pyabacus/__init__.py @@ -1,4 +1,3 @@ from __future__ import annotations -# from ._core import __doc__, __version__, NumericalRadial, ModuleBase -from ._core import ModuleBase -__all__ = ["ModuleBase"] \ No newline at end of file +from ._core import __doc__, __version__, NumericalRadial +__all__ = ["__doc__", "__version__", "NumericalRadial"] \ No newline at end of file diff --git a/python/pyabacus/tests/test_base_math.py b/python/pyabacus/tests/test_base_math.py deleted file mode 100644 index 97d5118bac..0000000000 --- a/python/pyabacus/tests/test_base_math.py +++ /dev/null @@ -1,15 +0,0 @@ -from __future__ import annotations - -import pyabacus as m -import numpy as np - - -def test_version(): - assert m.__version__ == "0.0.1" - -def test_sphbes(): - s = m.ModuleBase.Sphbes() - # test for sphbesj - assert s.sphbesj(1, 0.0) == 0.0 - assert s.sphbesj(0, 0.0) == 1.0 - diff --git a/python/pyabacus/tests/test_nr.py b/python/pyabacus/tests/test_nr.py new file mode 100644 index 0000000000..4986331b25 --- /dev/null +++ b/python/pyabacus/tests/test_nr.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import pyabacus as m + + +def test_version(): + assert m.__version__ == "0.0.1" + +def test_attributes(): + chi = m.NumericalRadial() + # string + assert chi.symbol == '' + # integer + assert chi.itype == 0 + assert chi.izeta == 0 + assert chi.l == -1 + assert chi.nr == 0 + assert chi.nk == 0 + # float + assert chi.rcut == 0.0 + assert chi.kcut == 0.0 + assert chi.pr == 0.0 + assert chi.pk == 0.0 + # bool + assert chi.is_fft_compliant == False diff --git a/source/Makefile b/source/Makefile index 7bd81f26d9..0dcd329161 100644 --- a/source/Makefile +++ b/source/Makefile @@ -7,7 +7,7 @@ include Makefile.vars INCLUDES = -I. -Icommands -I../ -Imodule_base/module_container LIBS = -lm -lpthread -OPTS = ${INCLUDES} -std=c++14 -pedantic -m64 ${INCLUDES} +OPTS = ${INCLUDES} -Ofast -g -traceback -xHost -std=c++11 -simd -march=native -m64 -qopenmp -Werror -Wall -pedantic HONG = -D__LCAO HONG += -D__ELPA ifeq ($(OPENMP), ON) @@ -75,7 +75,7 @@ else FFTW_INCLUDE_DIR = ${FFTW_DIR}/include FFTW_LIB_DIR = ${FFTW_DIR}/lib HONG += -D__FFTW3 - LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} + LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} -qmkl INCLUDES += -I${FFTW_INCLUDE_DIR} #========================== @@ -140,6 +140,12 @@ ifdef LIBTORCH_DIR endif endif +ifdef PEXSI_DIR + INCLUDES += -I${PEXSI_INCLUDE_DIR} ${SCOTCH_INCLUDE} ${DSUPERLU_INCLUDE} + LIBS += -L${PEXSI_LIB_DIR} -lpexsi_linux_release_v2.0 ${DSUPERLU_LIB} ${PTSCOTCH_LIB} ${SCOTCH_LIB} + HONG += -D__PEXSI +endif + ifdef DeePMD_DIR HONG += -D__DPMD -DHIGH_PREC OPTS += -Wl,--no-as-needed @@ -169,13 +175,6 @@ ifdef DeePMD_DIR INCLUDES += -I${TensorFlow_INCLUDE_DIR} endif -ifdef PEXSI_DIR - OBJS_ABACUS += ${OBJS_HSOLVER_PEXSI} - INCLUDES += -I${PEXSI_DIR}/include -I${PARMETIS_DIR}/include -I${DSUPERLU_DIR}/include - LIBS += -L${PEXSI_DIR}/lib -lpexsi -L${DSUPERLU_DIR}/lib -lsuperlu_dist -L${PARMETIS_DIR}/lib -lparmetis -lmetis - HONG += -D__PEXSI -endif - include Makefile.Objects #========================== diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 71e637a80b..2a69761da3 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -30,7 +30,7 @@ VPATH=./src_global:\ ./module_hsolver:\ ./module_hsolver/kernels:\ ./module_hsolver/genelpa:\ -./module_hsolver/module_pexsi:\ +./module_hsolver/pexsi:\ ./module_elecstate:\ ./module_elecstate/kernels:\ ./module_elecstate/potentials:\ @@ -102,7 +102,6 @@ ${OBJS_VDW}\ ${OBJS_DFTU}\ ${OBJS_DELTASPIN}\ ${OBJS_TENSOR}\ -${OBJS_HSOLVER_PEXSI}\ OBJS_MAIN=main.o\ driver.o\ @@ -291,7 +290,13 @@ OBJS_HSOLVER=diago_cg.o\ diago_iter_assist.o\ math_kernel_op.o\ dngvd_op.o\ - + diago_pexsi.o\ + DistBCDMatrix.o\ + DistCCSMatrix.o\ + DistMatrixTransformer.o\ + pexsi_solver.o\ + simplePEXSI.o\ + OBJS_HSOLVER_LCAO=hsolver_lcao.o\ diago_blas.o\ diago_elpa.o\ @@ -300,13 +305,6 @@ OBJS_HSOLVER_LCAO=hsolver_lcao.o\ elpa_new_complex.o\ utils.o\ -OBJS_HSOLVER_PEXSI=diago_pexsi.o\ - pexsi_solver.o\ - simple_pexsi.o\ - dist_bcd_matrix.o\ - dist_ccs_matrix.o\ - dist_matrix_transformer.o\ - OBJS_MD=fire.o\ langevin.o\ md_base.o\ diff --git a/source/Makefile.vars b/source/Makefile.vars index 477b0a251d..860bbdd806 100644 --- a/source/Makefile.vars +++ b/source/Makefile.vars @@ -33,6 +33,15 @@ ELPA_DIR = /root/lib/ELPA ELPA_INCLUDE_DIR = ${ELPA_DIR}/include/ CEREAL_DIR = /root/lib/cereal +DSUPERLU_DIR = /root/workspace/superlu_dist-7.2.0 +DSUPERLU_INCLUDE = -I${DSUPERLU_DIR}/include +DSUPERLU_LIB = ${DSUPERLU_DIR}/lib/libsuperlu_dist.a + +SCOTCH_INCLUDE = -I/usr/local/include +PTSCOTCH_DIR = /root/workspace/scotch_6.0.0 +PTSCOTCH_LIB = ${PTSCOTCH_DIR}/lib/libptscotchparmetis.a ${PTSCOTCH_DIR}/lib/libptscotch.a ${PTSCOTCH_DIR}/lib/libptscotcherrexit.a ${PTSCOTCH_DIR}/lib/libptscotcherr.a +SCOTCH_LIB = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libscotch.a ${PTSCOTCH_DIR}/lib/libscotcherr.a ${PTSCOTCH_DIR}/lib/libscotcherrexit.a + ##------------------- FOR GNU COMPILER ------------------------------ @@ -59,9 +68,14 @@ CEREAL_DIR = /root/lib/cereal ## To use LIBXC: set LIBXC_DIR which contains include and lib/libxc.a (>5.1.7) ## To use DeePMD: set DeePMD_DIR and TensorFlow_DIR ## To use LibRI: set LIBRI_DIR and LIBCOMM_DIR -## To use PEXSI: set PEXSI_DIR DSUPERLU_DIR and PARMETIS_DIR +## To use PEXSI: set PEXSI_DIR which contains include and libpexsi.a ##--------------------------------------------------------------------- +PEXSI_DIR = /root/workspace/pexsi_v2.0.0 +PEXSI_LIB_DIR = ${PEXSI_DIR}/src +PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include + + # LIBTORCH_DIR = /usr/local # LIBNPY_DIR = /usr/local @@ -73,10 +87,6 @@ CEREAL_DIR = /root/lib/cereal # LIBRI_DIR = /public/software/LibRI # LIBCOMM_DIR = /public/software/LibComm -# PEXSI_DIR = /home/rhx/projects/pexsi-build/pexsi -# DSUPERLU_DIR = /home/rhx/projects/pexsi-build/superlu -# PARMETIS_DIR = /home/rhx/projects/pexsi-build/parmetis - ##--------------------------------------------------------------------- # NP = 14 # It is not supported. use make -j14 or make -j to parallelly compile # DEBUG = OFF diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp index 6b7015dc25..696bcd6088 100644 --- a/source/module_base/global_variable.cpp +++ b/source/module_base/global_variable.cpp @@ -248,7 +248,6 @@ std::string of_kernel_file = "WTkernel.txt"; std::string MIXING_MODE = "broyden"; double MIXING_BETA = 0.7; int MIXING_NDIM = 8; -int MIXING_RESTART = 0; double MIXING_GG0 = 1.00; double MIXING_BETA_MAG = 1.6; double MIXING_GG0_MAG = 1.00; @@ -301,33 +300,4 @@ std::string qo_basis = "hydrogen"; std::vector qo_strategy = {}; double qo_thr = 1.0e-6; std::vector qo_screening_coeff = {}; - -//========================================================== -// PEXSI related -//========================================================== -int pexsi_npole = 54; -int pexsi_inertia = 1; -int pexsi_nmax = 80; -// int pexsi_symbolic = 1; -int pexsi_comm = 1; -int pexsi_storage = 1; -int pexsi_ordering = 0; -int pexsi_row_ordering = 1; -int pexsi_nproc = 1; -int pexsi_symm = 1; -int pexsi_trans = 0; -int pexsi_method = 1; -int pexsi_nproc_pole = 1; -// double pexsi_spin = 2; -double pexsi_temp = 0.0001; -double pexsi_gap = 0; -double pexsi_delta_e = 20.0; -double pexsi_mu_lower = -10; -double pexsi_mu_upper = 10; -double pexsi_mu = 0.0; -double pexsi_mu_thr = 0.05; -double pexsi_mu_expand = 0.3; -double pexsi_mu_guard = 0.2; -double pexsi_elec_thr = 0.001; -double pexsi_zero_thr = 1e-10; } // namespace GlobalV diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h index 9808ca080b..b1fbb1748d 100644 --- a/source/module_base/global_variable.h +++ b/source/module_base/global_variable.h @@ -277,7 +277,6 @@ extern std::string of_kernel_file; // The name of WT kernel file. extern std::string MIXING_MODE; extern double MIXING_BETA; extern int MIXING_NDIM; -extern int MIXING_RESTART; extern double MIXING_GG0; extern bool MIXING_TAU; extern double MIXING_BETA_MAG; @@ -329,32 +328,5 @@ extern std::string qo_basis; extern std::vector qo_strategy; extern double qo_thr; extern std::vector qo_screening_coeff; - -// PEXSI related -extern int pexsi_npole; -extern int pexsi_inertia; -extern int pexsi_nmax; -// extern int pexsi_symbolic; -extern int pexsi_comm; -extern int pexsi_storage; -extern int pexsi_ordering; -extern int pexsi_row_ordering; -extern int pexsi_nproc; -extern int pexsi_symm; -extern int pexsi_trans; -extern int pexsi_method; -extern int pexsi_nproc_pole; -// extern double pexsi_spin; -extern double pexsi_temp; -extern double pexsi_gap; -extern double pexsi_delta_e; -extern double pexsi_mu_lower; -extern double pexsi_mu_upper; -extern double pexsi_mu; -extern double pexsi_mu_thr; -extern double pexsi_mu_expand; -extern double pexsi_mu_guard; -extern double pexsi_elec_thr; -extern double pexsi_zero_thr; } // namespace GlobalV #endif diff --git a/source/module_base/math_sphbes.cpp b/source/module_base/math_sphbes.cpp index 73e0127e6b..5e7f41de54 100644 --- a/source/module_base/math_sphbes.cpp +++ b/source/module_base/math_sphbes.cpp @@ -1,7 +1,7 @@ #include "math_sphbes.h" +#include "timer.h" #include "constants.h" #include -#include #include @@ -425,6 +425,7 @@ void Sphbes::Spherical_Bessel double *jl // jl(1:msh) = j_l(q*r(i)),spherical bessel function ) { + ModuleBase::timer::tick("Sphbes","Spherical_Bessel"); double x1=0.0; int i=0; @@ -597,6 +598,7 @@ void Sphbes::Spherical_Bessel } } + ModuleBase::timer::tick("Sphbes","Spherical_Bessel"); return; } @@ -611,6 +613,7 @@ void Sphbes::Spherical_Bessel double *sjp ) { + ModuleBase::timer::tick("Sphbes","Spherical_Bessel"); //calculate jlx first Spherical_Bessel (msh, r, q, l, sj); @@ -631,6 +634,7 @@ void Sphbes::dSpherical_Bessel_dx double *djl // jl(1:msh) = j_l(q*r(i)),spherical bessel function ) { + ModuleBase::timer::tick("Sphbes","dSpherical_Bessel_dq"); if (l < 0 ) { std::cout << "We temporarily only calculate derivative of l >= 0." << std::endl; @@ -678,6 +682,7 @@ void Sphbes::dSpherical_Bessel_dx } delete[] jl; } + ModuleBase::timer::tick("Sphbes","dSpherical_Bessel_dq"); return; } @@ -803,7 +808,7 @@ void Sphbes::dsphbesj(const int n, } } -void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros, const bool return_all) +void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros) { assert( n > 0 ); assert( l >= 0 ); @@ -813,22 +818,10 @@ void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros, const b // This property enables us to use bracketing method recursively // to find all zeros of j_l from the zeros of j_0. - // If return_all is true, zeros of j_0, j_1, ..., j_l will all be returned - // such that zeros[l*n+i] is the i-th zero of j_l. As such, it is required - // that the array "zeros" has a size of (l+1)*n. - // - // If return_all is false, only the zeros of j_l will be returned - // and "zeros" is merely required to have a size of n. - // Note that in this case the bracketing method can be applied with a stride - // of 2 instead of 1: - // j_0 --> j_1 --> j_3 --> j_5 --> ... --> j_l (odd l) - // j_0 --> j_2 --> j_4 --> j_6 --> ... --> j_l (even l) - - // Every recursion step reduces the number of zeros by 1. - // If return_all is true, one needs to start with n+l zeros of j_0 - // to ensure n zeros of j_l; otherwise with a stride of 2 one only - // needs to start with n+(l+1)/2 zeros of j_0 - int nz = n + ( return_all ? l : (l+1)/2 ); + // if l is odd , j_0 --> j_1 --> j_3 --> j_5 --> ... + // if l is even, j_0 --> j_2 --> j_4 --> j_6 --> ... + + int nz = n + (l+1)/2; // number of effective zeros in buffer double* buffer = new double[nz]; // zeros of j_0 = sin(x)/x is just n*pi @@ -838,34 +831,27 @@ void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros, const b buffer[i] = (i+1) * PI; } - int ll; // active l + int ll = 1; auto jl = [&ll] (double x) { return sphbesj(ll, x); }; - int stride; - std::function copy_if_needed; - int offset = 0; // keeps track of the position in zeros for next copy (used when return_all == true) - if (return_all) - { - copy_if_needed = [&](){ std::copy(buffer, buffer + n, zeros + offset); offset += n; }; - stride = 1; - ll = 1; - } - else + + if (l % 2 == 1) { - copy_if_needed = [](){}; - stride = 2; - ll = 2 - l % 2; + for (int i = 0; i < nz-1; i++) + { + buffer[i] = illinois(jl, buffer[i], buffer[i+1], 1e-15, 50); + } + --nz; } - for (; ll <= l; ll += stride, --nz) + for (ll = 2 + l%2; ll <= l; ll += 2, --nz) { - copy_if_needed(); for (int i = 0; i < nz-1; i++) { buffer[i] = illinois(jl, buffer[i], buffer[i+1], 1e-15, 50); } } - std::copy(buffer, buffer + n, zeros + offset); + std::copy(buffer, buffer + n, zeros); delete[] buffer; } diff --git a/source/module_base/math_sphbes.h b/source/module_base/math_sphbes.h index 7aa9c78a48..c654847a5d 100644 --- a/source/module_base/math_sphbes.h +++ b/source/module_base/math_sphbes.h @@ -126,18 +126,13 @@ class Sphbes * This function computes the first n positive zeros of the l-th order * spherical Bessel function of the first kind. * - * @param[in] l (maximum) order of the spherical Bessel function - * @param[in] n number of zeros to be computed (for each j_l if return_all is true) - * @param[out] zeros on exit, contains the positive zeros. - * @param[in] return_all if true, return all zeros from j_0 to j_l such that zeros[l*n+i] - * is the i-th zero of j_l. If false, return only the first n zeros of j_l. - * - * @note The size of array "zeros" must be at least (l+1)*n if return_all is true, and n otherwise. + * @param[in] l order of the spherical Bessel function + * @param[in] n number of zeros to be computed + * @param[out] zeros on exit, contains the first n positive zeros in ascending order */ static void sphbes_zeros(const int l, const int n, - double* const zeros, - bool return_all = false + double* const zeros ); private: diff --git a/source/module_base/para_json.cpp b/source/module_base/para_json.cpp deleted file mode 100644 index 1f042271f8..0000000000 --- a/source/module_base/para_json.cpp +++ /dev/null @@ -1,977 +0,0 @@ -#include "para_json.h" -#include "module_base/global_variable.h" - -#ifdef __RAPIDJSON - -namespace Para_Json -{ - int test=4; - // @param doc: the output json file - rapidjson::Document doc; - rapidjson::Value abacus(rapidjson::kObjectType); - - // @param general_info : - rapidjson::Value general_info(rapidjson::kObjectType); - rapidjson::Value version; - - rapidjson::Value commit; - rapidjson::Value begin_time; - rapidjson::Value begin_date; - rapidjson::Value device_g; - // @param general_info -- parallel: - rapidjson::Value parallel(rapidjson::kObjectType); - rapidjson::Value drank; - rapidjson::Value dsize; - rapidjson::Value dcolor ; - // @param general_info -- path - rapidjson::Value path(rapidjson::kObjectType); - rapidjson::Value global_out_dir; - rapidjson::Value global_in_card; - rapidjson::Value pseudo_dir_path ; - rapidjson::Value orbital_dir_path; - - - // @param reading_information: - rapidjson::Value readin_info(rapidjson::kObjectType); - // @param reading_information -- input_file: - rapidjson::Value input_file(rapidjson::kObjectType); - - - // @param reading_information -- input_file -- system_variables: - rapidjson::Value input_suffix; - rapidjson::Value ntype; - rapidjson::Value calculation; - rapidjson::Value esolver_type; - rapidjson::Value symmetry; - rapidjson::Value symmetry_precfield; - rapidjson::Value symmetry_autoclose; - rapidjson::Value kpar; - rapidjson::Value bndpar; - rapidjson::Value latname; - rapidjson::Value init_wfc; - rapidjson::Value init_chg; - rapidjson::Value init_vel; - rapidjson::Value nelec; - rapidjson::Value nupdown; - rapidjson::Value dft_functional; - rapidjson::Value xc_temperature; - rapidjson::Value pseudo_rcut(rapidjson::kNumberType ); - rapidjson::Value pseudo_mesh; - rapidjson::Value mem_saver; - rapidjson::Value diago_proc; - rapidjson::Value nbspline; - rapidjson::Value kspacing(rapidjson::kArrayType); - rapidjson::Value min_dist_coef(rapidjson::kNumberType); - rapidjson::Value device; - // @param reading_information -- input_file -- files_related - rapidjson::Value stru_file; - rapidjson::Value kpoint_file; - rapidjson::Value pseudo_dir; - rapidjson::Value orbital_dir; - rapidjson::Value read_file_dir; - rapidjson::Value wannier_card; - - // @param reading_information -- input_file -- planewave_related - rapidjson::Value ecutwfc; - rapidjson::Value nx; - rapidjson::Value ny; - rapidjson::Value nz; - rapidjson::Value pw_seed; - rapidjson::Value pw_diag_thr; - rapidjson::Value pw_diag_nmax; - rapidjson::Value pw_diag_ndim; - // @param reading_information -- input_file -- numerical_atomic_orbitals_related - rapidjson::Value nb2d; - rapidjson::Value lmaxmax; - rapidjson::Value lcao_ecut; - rapidjson::Value lcao_dk; - rapidjson::Value lcao_dr; - rapidjson::Value lcao_rmax; - rapidjson::Value search_radius; - rapidjson::Value search_pbc; - rapidjson::Value bx; - rapidjson::Value by; - rapidjson::Value bz; - // @param reading_information -- input_file -- electronic_structure - rapidjson::Value basis_type; - rapidjson::Value ks_solver; - rapidjson::Value nbands; - rapidjson::Value nbands_istate; - rapidjson::Value nspin; - rapidjson::Value smearing_method; - rapidjson::Value smearing_sigma; - rapidjson::Value smearing_sigma_temp; - rapidjson::Value mixing_type; - rapidjson::Value mixing_beta; - rapidjson::Value mixing_ndim; - rapidjson::Value mixing_gg0; - rapidjson::Value mixing_tau; - rapidjson::Value mixing_dftu; - rapidjson::Value gamma_only; - rapidjson::Value printe; - rapidjson::Value scf_nmax; - rapidjson::Value scf_thr; - rapidjson::Value scf_thr_type; - rapidjson::Value chg_extrap; - rapidjson::Value lspinorb; - rapidjson::Value noncolin; - rapidjson::Value soc_lambda; - // @param reading_information -- input_file -- electronic_structure_SDFT - rapidjson::Value method_sto; - rapidjson::Value nbands_sto; - rapidjson::Value nche_sto(rapidjson::kNumberType); - rapidjson::Value emin_sto; - rapidjson::Value emax_sto; - rapidjson::Value seed_sto; - rapidjson::Value initsto_freq; - rapidjson::Value npart_sto; - // @param reading_information -- input_file -- geometry_relaxation - rapidjson::Value relax_method; - rapidjson::Value relax_new; - rapidjson::Value relax_scale_force; - rapidjson::Value relax_nmax; - rapidjson::Value relax_cg_thr; - rapidjson::Value cal_force; - rapidjson::Value force_thr; - rapidjson::Value force_thr_ev; - rapidjson::Value force_thr_ev2; - rapidjson::Value relax_bfgs_w1; - rapidjson::Value relax_bfgs_w2; - rapidjson::Value relax_bfgs_rmax; - rapidjson::Value relax_bfgs_rmin; - rapidjson::Value relax_bfgs_init; - rapidjson::Value cal_stress; - rapidjson::Value stress_thr; - rapidjson::Value press1; - rapidjson::Value press2; - rapidjson::Value press3; - rapidjson::Value fixed_axes; - rapidjson::Value fixed_ibrav; - rapidjson::Value fixed_atoms; - rapidjson::Value cell_factor; - - // @param reading_information -- input_file -- output_information_related - rapidjson::Value out_mul; - rapidjson::Value out_freq_elec; - rapidjson::Value out_freq_ion; - rapidjson::Value out_chg; - rapidjson::Value out_pot; - rapidjson::Value out_dm; - rapidjson::Value out_dm1; - rapidjson::Value out_wfc_pw; - rapidjson::Value out_wfc_r; - rapidjson::Value out_wfc_lcao; - rapidjson::Value out_dos; - rapidjson::Value out_band; - rapidjson::Value out_proj_band; - rapidjson::Value out_stru; - rapidjson::Value out_bandgap; - rapidjson::Value out_level; - rapidjson::Value out_alllog; - rapidjson::Value out_mat_hs; - rapidjson::Value out_mat_r; - rapidjson::Value out_mat_hs2; - rapidjson::Value out_mat_t; - rapidjson::Value out_mat_dh; - rapidjson::Value out_app_flag; - rapidjson::Value out_interval; - rapidjson::Value out_element_info; - rapidjson::Value restart_save; - rapidjson::Value restart_load; - rapidjson::Value rpa; - - // @param reading_information -- input_file -- density_of_states - rapidjson::Value dos_edelta_ev; - rapidjson::Value dos_sigma; - rapidjson::Value dos_scale; - rapidjson::Value dos_emin_ev; - rapidjson::Value dos_emax_ev; - rapidjson::Value dos_nche; - // @param reading_information -- input_file -- naos - rapidjson::Value bessel_nao_ecut; - rapidjson::Value bessel_nao_tolerence; - rapidjson::Value bessel_nao_rcut; - rapidjson::Value bessel_nao_smooth; - rapidjson::Value bessel_nao_sigma; - // @param reading_information -- input_file -- deepks - rapidjson::Value input_file_out_labels; - rapidjson::Value input_file_scf; - rapidjson::Value input_file_model; - rapidjson::Value bessel_descriptor_lmax; - rapidjson::Value bessel_descriptor_ecut; - rapidjson::Value bessel_descriptor_tolerence; - rapidjson::Value bessel_descriptor_rcut; - rapidjson::Value bessel_descriptor_smooth; - rapidjson::Value bessel_descriptor_sigma; - rapidjson::Value input_file_bandgap; - rapidjson::Value input_file_out_unittest; - // @param reading_information -- input_file -- ofdft - rapidjson::Value of_kinetic; - rapidjson::Value of_method; - rapidjson::Value of_conv; - rapidjson::Value of_tole; - rapidjson::Value of_tolp; - rapidjson::Value of_tf_weight; - rapidjson::Value of_vw_weight; - rapidjson::Value of_wt_alpha; - rapidjson::Value of_wt_beta; - rapidjson::Value of_wt_rho0; - rapidjson::Value of_hold_rho0; - rapidjson::Value of_lkt_a; - rapidjson::Value of_read_kernel; - rapidjson::Value of_kernel_file; - rapidjson::Value of_full_pw; - rapidjson::Value of_full_pw_dim; - - // @param reading_information -- input_file -- electric_field_and_dipole_correction - rapidjson::Value efield_flag; - rapidjson::Value dip_cor_flag; - rapidjson::Value efield_dir; - rapidjson::Value efield_pos_max; - rapidjson::Value efield_pos_dec; - rapidjson::Value efield_amp; - // @param reading_information -- input_file -- gate_field - rapidjson::Value gate_flag; - rapidjson::Value zgate; - rapidjson::Value block; - rapidjson::Value block_down; - rapidjson::Value block_up; - rapidjson::Value block_height; - // @param reading_information -- input_file -- exact_exchange - rapidjson::Value exx_hybrid_alpha; - rapidjson::Value exx_hse_omega; - rapidjson::Value exx_separate_loop; - rapidjson::Value exx_hybrid_step; - rapidjson::Value exx_mixing_beta; - rapidjson::Value exx_lambda; - rapidjson::Value exx_pca_threshold; - rapidjson::Value exx_c_threshold; - rapidjson::Value exx_v_threshold; - rapidjson::Value exx_dm_threshold; - rapidjson::Value exx_c_grad_threshold; - rapidjson::Value exx_v_grad_threshold; - rapidjson::Value exx_schwarz_threshold; - rapidjson::Value exx_cauchy_threshold; - rapidjson::Value exx_cauchy_force_threshold; - rapidjson::Value exx_cauchy_stress_threshold; - rapidjson::Value exx_ccp_threshold; - rapidjson::Value exx_ccp_rmesh_times; - rapidjson::Value exx_distribute_type; - rapidjson::Value exx_opt_orb_lmax; - rapidjson::Value exx_opt_orb_ecut; - rapidjson::Value exx_opt_orb_tolerence; - rapidjson::Value exx_real_number; - - // @param reading_information -- input_file -- molecular_dynamics - rapidjson::Value md_type; - rapidjson::Value md_nstep; - rapidjson::Value md_dt; - rapidjson::Value md_thermostat; - rapidjson::Value md_tlast; - rapidjson::Value md_tfirst; - rapidjson::Value md_restart; - rapidjson::Value md_restartfreq; - rapidjson::Value md_dumpfreq; - rapidjson::Value dump_force; - rapidjson::Value dump_vel; - rapidjson::Value dump_virial; - rapidjson::Value md_seed; - rapidjson::Value md_tfreq; - rapidjson::Value md_tchain; - rapidjson::Value md_pmode; - rapidjson::Value md_prec_level; - rapidjson::Value ref_cell_factor; - rapidjson::Value md_pcouple; - rapidjson::Value md_pfirst; - rapidjson::Value md_plast; - rapidjson::Value md_pfreq; - rapidjson::Value md_pchain; - rapidjson::Value lj_rcut; - rapidjson::Value lj_epsilon; - rapidjson::Value lj_sigma; - rapidjson::Value pot_file; - rapidjson::Value msst_direction; - rapidjson::Value msst_vel; - rapidjson::Value msst_vis; - rapidjson::Value msst_tscale; - rapidjson::Value msst_qmass; - rapidjson::Value md_damp; - rapidjson::Value md_tolerance; - rapidjson::Value md_nraise; - rapidjson::Value cal_syns; - rapidjson::Value dmax; - - // @param reading_information -- input_file -- dft_plus_u - rapidjson::Value orbital_corr(rapidjson::kArrayType); - rapidjson::Value hubbard_u(rapidjson::kArrayType); - rapidjson::Value yukawa_potential; - rapidjson::Value yukawa_lambda; - rapidjson::Value omc; - - // @param reading_information -- input_file -- vdw_correction - rapidjson::Value vdw_method; - rapidjson::Value vdw_s6; - rapidjson::Value vdw_s8; - rapidjson::Value vdw_a1; - rapidjson::Value vdw_a2; - rapidjson::Value vdw_d; - rapidjson::Value vdw_abc; - rapidjson::Value vdw_C6_file; - rapidjson::Value vdw_C6_unit; - rapidjson::Value vdw_R0_file; - rapidjson::Value vdw_R0_unit; - rapidjson::Value vdw_cutoff_type; - rapidjson::Value vdw_cutoff_radius; - rapidjson::Value vdw_radius_unit; - rapidjson::Value vdw_cutoff_period(rapidjson::kArrayType); - rapidjson::Value vdw_cn_thr; - rapidjson::Value vdw_cn_thr_unit; - - // @param reading_information -- input_file -- berry_phase_and_wannier90_interface - rapidjson::Value berry_phase; - rapidjson::Value gdir; - rapidjson::Value towannier90; - rapidjson::Value nnkpfile; - rapidjson::Value wannier_spin; - - // @param reading_information -- input_file -- tddft - rapidjson::Value td_edm; - rapidjson::Value td_print_eij; - rapidjson::Value td_propagator; - rapidjson::Value td_vext; - rapidjson::Value td_vext_dire; - rapidjson::Value td_stype; - rapidjson::Value td_ttype; - rapidjson::Value td_tstart; - rapidjson::Value td_tend; - rapidjson::Value td_lcut1; - rapidjson::Value td_lcut2; - rapidjson::Value td_gauss_freq; - rapidjson::Value td_gauss_phase; - rapidjson::Value td_gauss_sigma; - rapidjson::Value td_gauss_t0; - rapidjson::Value td_gauss_amp; - rapidjson::Value td_trape_freq; - rapidjson::Value td_trape_phase; - rapidjson::Value td_trape_t1; - rapidjson::Value td_trape_t2; - rapidjson::Value td_trape_t3; - rapidjson::Value td_trape_amp; - rapidjson::Value td_trigo_freq1; - rapidjson::Value td_trigo_freq2; - rapidjson::Value td_trigo_phase1; - rapidjson::Value td_trigo_phase2; - rapidjson::Value td_trigo_amp; - rapidjson::Value td_heavi_t0; - rapidjson::Value td_heavi_amp; - rapidjson::Value td_out_dipole; - rapidjson::Value td_out_efield; - rapidjson::Value ocp; - rapidjson::Value ocp_set; - - // @param reading_information -- input_file -- debuging_related - rapidjson::Value t_in_h; - rapidjson::Value vl_in_h; - rapidjson::Value vnl_in_h; - rapidjson::Value vh_in_h; - rapidjson::Value vion_in_h; - rapidjson::Value test_force; - rapidjson::Value test_stress; - rapidjson::Value colour; - rapidjson::Value test_skip_ewald; - - // @param reading_information -- input_file -- electronic_conductivities - rapidjson::Value cal_cond; - rapidjson::Value cond_nche; - rapidjson::Value cond_dw; - rapidjson::Value cond_wcut; - rapidjson::Value cond_dt; - rapidjson::Value cond_dtbatch; - rapidjson::Value cond_fwhm; - rapidjson::Value cond_nonlocal; - // @param reading_information -- input_file -- implicit_solvation_model - rapidjson::Value imp_sol; - rapidjson::Value eb_k; - rapidjson::Value tau; - rapidjson::Value sigma_k; - rapidjson::Value nc_k; - - // @param reading_information -- stru_infos: - rapidjson::Value stru_infos(rapidjson::kObjectType); - // rapidjson::Value ATOMIC_SPECIES(rapidjson::kArrayType); - // rapidjson::Value NUMERICAL_ORBITAL; - // rapidjson::Value LATTICE_CONSTANT(rapidjson::kArrayType); - // rapidjson::Value ATOMIC_POSITIONS(rapidjson::kArrayType); - - // @param reading_information -- KPT_infos - rapidjson::Value KPT_infos(rapidjson::kObjectType); - // rapidjson::Value total_number; - // rapidjson::Value mode; - // rapidjson::Value vectors(rapidjson::kArrayType); - - // @param reading_information -- orb_infos - rapidjson::Value orb_infos(rapidjson::kObjectType); - - // @param reading_information -- pp - rapidjson::Value pp(rapidjson::kObjectType); - - // @param init - rapidjson::Value init(rapidjson::kObjectType); - // @param init -- general - // rapidjson::Value calculation; - // rapidjson::Value esolver_type; - // rapidjson::Value basis_type; - // rapidjson::Value gamma_only; - // rapidjson::Value ks_solver; - // rapidjson::Value ntype; - // rapidjson::Value nspin; - // rapidjson::Value ecutwfc; - // rapidjson::Value scf_thr; - // rapidjson::Value scf_nmax; - - // @param init -- symmetry - // rapidjson::Value symmetry(rapidjson::kObjectType); - // rapidjson::Value BRAVAIS_TYPE; - // rapidjson::Value BRAVAIS_LATTICE_NAME; - // rapidjson::Value IBRAV; - // rapidjson::Value LATTICE_CONSTANT_A; - // rapidjson::Value right_hand_lattice; - - // @param init -- Kpoints - rapidjson::Value kpoints(rapidjson::kObjectType); - rapidjson::Value nkstot; - rapidjson::Value nkstot_ibz; - rapidjson::Value coordinates(rapidjson::kArrayType); - rapidjson::Value weight(rapidjson::kArrayType); - - // @param init -- grid - rapidjson::Value grid(rapidjson::kObjectType); - rapidjson::Value energy_cutoff_for_wavefunc; - rapidjson::Value fft_grid_for_wave_functions(rapidjson::kArrayType); - rapidjson::Value number_of_plane_waves; - rapidjson::Value number_of_sticks; - - // @param init -- Smearing - // rapidjson::Value smearing_method; - // rapidjson::Value smearing_sigma; - - // @param init -- mixing - rapidjson::Value mixing; - - - // @param output - rapidjson::Value output(rapidjson::kArrayType); - - - - // @param final_stru - rapidjson::Value final_stru(rapidjson::kObjectType); - rapidjson::Value cell; - rapidjson::Value coordinate; - - - - /** - * The functions below initialize the json output parameter - * tree to connect the nodes of the module - */ - - /** - * @brief add Top stage:parameter in Abacus: - */ - void Init_json_abacus() - { - - - // add First stage:parameter in abcus: - - abacus.AddMember("general_info", general_info, doc.GetAllocator()); - - abacus.AddMember("readin_info", readin_info, doc.GetAllocator()); - - abacus.AddMember("init", init, doc.GetAllocator()); - - abacus.AddMember("output", output, doc.GetAllocator()); - - abacus.AddMember("final_stru", final_stru, doc.GetAllocator()); - - doc.SetObject(); - // abacus.SetObject(); - doc.AddMember("ABACUS", abacus, doc.GetAllocator()); - /** - * . - * . - * . - * . - * . - * . - * . - * */ - } - /** - * @brief add Second stage:parameter in Abacus - general_info: - */ - void Init_json_abacus_generalInfo(){ - general_info.AddMember("version", version, doc.GetAllocator()); - - general_info.AddMember("commit", commit, doc.GetAllocator()); - - general_info.AddMember("begin_time", begin_time, doc.GetAllocator()); - - general_info.AddMember("begin_date", begin_date, doc.GetAllocator()); - - general_info.AddMember("device", device_g, doc.GetAllocator()); - - - - parallel.AddMember("drank", drank, doc.GetAllocator()); - - parallel.AddMember("dsize", dsize, doc.GetAllocator()); - - parallel.AddMember("dcolor", dcolor, doc.GetAllocator()); - - - // add Third stage:parameter in parallel: - general_info.AddMember("parallel", parallel, doc.GetAllocator()); - - } - /** - * @brief delete null node - */ - void RemoveNullValues(rapidjson::Value& parent) { - if (parent.IsObject()) { - for (rapidjson::Value::MemberIterator itr = parent.MemberBegin(); itr != parent.MemberEnd(); ) { - if (itr->value.IsNull()) { - itr = parent.EraseMember(itr); - } else { - // delet son null node - RemoveNullValues(itr->value); - ++itr; - } - } - } else if (parent.IsArray()) { - for (int i = 0; i < parent.Size(); ) { - if (parent[i].IsNull()) { - parent.Erase(parent.Begin() + i); - } else { - // delet son null node - RemoveNullValues(parent[i]); - ++i; - } - } - } - } - - /** - * @brief add Second stage:parameter in Abacus - readin_info: - */ - void Init_json_abacus_readinInfo(){ - //add Third stage:parameter in system_variables: - input_file.AddMember("suffix", input_suffix, doc.GetAllocator()); - input_file.AddMember("ntype", ntype, doc.GetAllocator()); - input_file.AddMember("calculation", calculation, doc.GetAllocator()); - input_file.AddMember("esolver_type", esolver_type, doc.GetAllocator()); - input_file.AddMember("symmetry", symmetry, doc.GetAllocator()); - input_file.AddMember("symmetry_precfield", symmetry_precfield, doc.GetAllocator()); - input_file.AddMember("symmetry_autoclose", symmetry_autoclose, doc.GetAllocator()); - input_file.AddMember("kpar", kpar, doc.GetAllocator()); - input_file.AddMember("bndpar", bndpar, doc.GetAllocator()); - input_file.AddMember("latname", latname, doc.GetAllocator()); - input_file.AddMember("init_wfc", init_wfc, doc.GetAllocator()); - input_file.AddMember("init_chg", init_chg, doc.GetAllocator()); - input_file.AddMember("init_vel", init_vel, doc.GetAllocator()); - input_file.AddMember("nelec", nelec, doc.GetAllocator()); - input_file.AddMember("nupdown", nupdown, doc.GetAllocator()); - input_file.AddMember("dft_functional", dft_functional, doc.GetAllocator()); - input_file.AddMember("xc_temperature", xc_temperature, doc.GetAllocator()); - input_file.AddMember("pseudo_rcut", pseudo_rcut, doc.GetAllocator()); - input_file.AddMember("pseudo_mesh", pseudo_mesh, doc.GetAllocator()); - input_file.AddMember("mem_saver", mem_saver, doc.GetAllocator()); - input_file.AddMember("diago_proc", diago_proc, doc.GetAllocator()); - input_file.AddMember("nbspline", nbspline, doc.GetAllocator()); - input_file.AddMember("kspacing", kspacing, doc.GetAllocator()); - input_file.AddMember("min_dist_coef", min_dist_coef, doc.GetAllocator()); - input_file.AddMember("device", device, doc.GetAllocator()); - - //add Third stage:parameter in files_related: - input_file.AddMember("stru_file", stru_file, doc.GetAllocator()); - input_file.AddMember("kpoint_file", kpoint_file, doc.GetAllocator()); - input_file.AddMember("pseudo_dir", pseudo_dir, doc.GetAllocator()); - input_file.AddMember("orbital_dir", orbital_dir, doc.GetAllocator()); - input_file.AddMember("read_file_dir", read_file_dir, doc.GetAllocator()); - input_file.AddMember("wannier_card", wannier_card, doc.GetAllocator()); - - //add Third stage:parameter in planewave_related: - input_file.AddMember("ecutwfc", ecutwfc, doc.GetAllocator()); - input_file.AddMember("nx", nx, doc.GetAllocator()); - input_file.AddMember("ny", ny, doc.GetAllocator()); - input_file.AddMember("nz", nz, doc.GetAllocator()); - input_file.AddMember("pw_seed", pw_seed, doc.GetAllocator()); - input_file.AddMember("pw_diag_thr", pw_diag_thr, doc.GetAllocator()); - input_file.AddMember("pw_diag_nmax", pw_diag_nmax, doc.GetAllocator()); - input_file.AddMember("pw_diag_ndim", pw_diag_ndim, doc.GetAllocator()); - - - //add Third stage:parameter in numerical_atomic_orbitals_related: - input_file.AddMember("nb2d", nb2d, doc.GetAllocator()); - input_file.AddMember("lmaxmax", lmaxmax, doc.GetAllocator()); - input_file.AddMember("lcao_ecut", lcao_ecut, doc.GetAllocator()); - input_file.AddMember("lcao_dk", lcao_dk, doc.GetAllocator()); - input_file.AddMember("lcao_dr", lcao_dr, doc.GetAllocator()); - input_file.AddMember("lcao_rmax", lcao_rmax, doc.GetAllocator()); - input_file.AddMember("search_radius", search_radius, doc.GetAllocator()); - input_file.AddMember("search_pbc", search_pbc, doc.GetAllocator()); - input_file.AddMember("bx", bx, doc.GetAllocator()); - input_file.AddMember("by", by, doc.GetAllocator()); - input_file.AddMember("bz", bz, doc.GetAllocator()); - - //add Third stage:parameter in electronic_structure: - input_file.AddMember("basis_type", basis_type, doc.GetAllocator()); - input_file.AddMember("ks_solver", ks_solver, doc.GetAllocator()); - input_file.AddMember("nbands", nbands, doc.GetAllocator()); - input_file.AddMember("nbands_istate", nbands_istate, doc.GetAllocator()); - input_file.AddMember("nspin", nspin, doc.GetAllocator()); - input_file.AddMember("smearing_method", smearing_method, doc.GetAllocator()); - input_file.AddMember("smearing_sigma", smearing_sigma, doc.GetAllocator()); - input_file.AddMember("smearing_sigma_temp", smearing_sigma_temp, doc.GetAllocator()); - input_file.AddMember("mixing_type", mixing_type, doc.GetAllocator()); - input_file.AddMember("mixing_beta", mixing_beta, doc.GetAllocator()); - input_file.AddMember("mixing_ndim", mixing_ndim, doc.GetAllocator()); - input_file.AddMember("mixing_gg0", mixing_gg0, doc.GetAllocator()); - input_file.AddMember("mixing_tau", mixing_tau, doc.GetAllocator()); - input_file.AddMember("mixing_dftu", mixing_dftu, doc.GetAllocator()); - input_file.AddMember("gamma_only", gamma_only, doc.GetAllocator()); - input_file.AddMember("printe", printe, doc.GetAllocator()); - input_file.AddMember("scf_nmax", scf_nmax, doc.GetAllocator()); - input_file.AddMember("scf_thr", scf_thr, doc.GetAllocator()); - input_file.AddMember("scf_thr_type", scf_thr_type, doc.GetAllocator()); - input_file.AddMember("chg_extrap", chg_extrap, doc.GetAllocator()); - input_file.AddMember("lspinorb", lspinorb, doc.GetAllocator()); - input_file.AddMember("noncolin", noncolin, doc.GetAllocator()); - input_file.AddMember("soc_lambda", soc_lambda, doc.GetAllocator()); - - - //add Third stage:parameter in electronic_structure_SDFT: - input_file.AddMember("method_sto", method_sto, doc.GetAllocator()); - input_file.AddMember("nbands_sto", nbands_sto, doc.GetAllocator()); - input_file.AddMember("nche_sto", nche_sto, doc.GetAllocator()); - input_file.AddMember("emin_sto", emin_sto, doc.GetAllocator()); - input_file.AddMember("emax_sto", emax_sto, doc.GetAllocator()); - input_file.AddMember("seed_sto", seed_sto, doc.GetAllocator()); - input_file.AddMember("initsto_freq", initsto_freq, doc.GetAllocator()); - input_file.AddMember("npart_sto", npart_sto, doc.GetAllocator()); - - - //add Third stage:parameter in geometry_relaxation: - input_file.AddMember("relax_method", relax_method, doc.GetAllocator()); - input_file.AddMember("relax_new", relax_new, doc.GetAllocator()); - input_file.AddMember("relax_scale_force", relax_scale_force, doc.GetAllocator()); - input_file.AddMember("relax_nmax", relax_nmax, doc.GetAllocator()); - input_file.AddMember("relax_cg_thr", relax_cg_thr, doc.GetAllocator()); - input_file.AddMember("cal_force", cal_force, doc.GetAllocator()); - input_file.AddMember("force_thr", force_thr, doc.GetAllocator()); - input_file.AddMember("force_thr_ev", force_thr_ev, doc.GetAllocator()); - input_file.AddMember("force_thr_ev2", force_thr_ev2, doc.GetAllocator()); - input_file.AddMember("relax_bfgs_w1", relax_bfgs_w1, doc.GetAllocator()); - input_file.AddMember("relax_bfgs_w2", relax_bfgs_w2, doc.GetAllocator()); - input_file.AddMember("relax_bfgs_rmax", relax_bfgs_rmax, doc.GetAllocator()); - input_file.AddMember("relax_bfgs_rmin", relax_bfgs_rmin, doc.GetAllocator()); - input_file.AddMember("relax_bfgs_init", relax_bfgs_init, doc.GetAllocator()); - input_file.AddMember("cal_stress", cal_stress, doc.GetAllocator()); - input_file.AddMember("stress_thr", stress_thr, doc.GetAllocator()); - input_file.AddMember("press1", press1, doc.GetAllocator()); - input_file.AddMember("press2", press2, doc.GetAllocator()); - input_file.AddMember("press3", press3, doc.GetAllocator()); - input_file.AddMember("fixed_axes", fixed_axes, doc.GetAllocator()); - input_file.AddMember("fixed_ibrav", fixed_ibrav, doc.GetAllocator()); - input_file.AddMember("fixed_atoms", fixed_atoms, doc.GetAllocator()); - input_file.AddMember("cell_factor", cell_factor, doc.GetAllocator()); - - - //add Third stage:parameter in output_information_related: - input_file.AddMember("out_mul", out_mul, doc.GetAllocator()); - input_file.AddMember("out_freq_elec", out_freq_elec, doc.GetAllocator()); - input_file.AddMember("out_freq_ion", out_freq_ion, doc.GetAllocator()); - input_file.AddMember("out_chg", out_chg, doc.GetAllocator()); - input_file.AddMember("out_pot", out_pot, doc.GetAllocator()); - input_file.AddMember("out_dm", out_dm, doc.GetAllocator()); - input_file.AddMember("out_dm1", out_dm1, doc.GetAllocator()); - input_file.AddMember("out_wfc_pw", out_wfc_pw, doc.GetAllocator()); - input_file.AddMember("out_wfc_r", out_wfc_r, doc.GetAllocator()); - input_file.AddMember("out_wfc_lcao", out_wfc_lcao, doc.GetAllocator()); - input_file.AddMember("out_dos", out_dos, doc.GetAllocator()); - input_file.AddMember("out_band", out_band, doc.GetAllocator()); - input_file.AddMember("out_proj_band", out_proj_band, doc.GetAllocator()); - input_file.AddMember("out_stru", out_stru, doc.GetAllocator()); - input_file.AddMember("out_bandgap", out_bandgap, doc.GetAllocator()); - input_file.AddMember("out_level", out_level, doc.GetAllocator()); - input_file.AddMember("out_alllog", out_alllog, doc.GetAllocator()); - input_file.AddMember("out_mat_hs", out_mat_hs, doc.GetAllocator()); - input_file.AddMember("out_mat_r", out_mat_r, doc.GetAllocator()); - input_file.AddMember("out_mat_hs2", out_mat_hs2, doc.GetAllocator()); - input_file.AddMember("out_mat_t", out_mat_t, doc.GetAllocator()); - input_file.AddMember("out_mat_dh", out_mat_dh, doc.GetAllocator()); - input_file.AddMember("out_app_flag", out_app_flag, doc.GetAllocator()); - input_file.AddMember("out_interval", out_interval, doc.GetAllocator()); - input_file.AddMember("out_element_info", out_element_info, doc.GetAllocator()); - input_file.AddMember("restart_save", restart_save, doc.GetAllocator()); - input_file.AddMember("restart_load", restart_load, doc.GetAllocator()); - input_file.AddMember("rpa", rpa, doc.GetAllocator()); - - //add Third stage:parameter in density_of_states: - input_file.AddMember("dos_edelta_ev", dos_edelta_ev, doc.GetAllocator()); - input_file.AddMember("dos_sigma", dos_sigma, doc.GetAllocator()); - input_file.AddMember("dos_scale", dos_scale, doc.GetAllocator()); - input_file.AddMember("dos_emin_ev", dos_emin_ev, doc.GetAllocator()); - input_file.AddMember("dos_emax_ev", dos_emax_ev, doc.GetAllocator()); - input_file.AddMember("dos_nche", dos_nche, doc.GetAllocator()); - - //add Third stage:parameter in naos: - input_file.AddMember("bessel_nao_ecut", bessel_nao_ecut, doc.GetAllocator()); - input_file.AddMember("bessel_nao_tolerence", bessel_nao_tolerence, doc.GetAllocator()); - input_file.AddMember("bessel_nao_rcut", bessel_nao_rcut, doc.GetAllocator()); - input_file.AddMember("bessel_nao_smooth", bessel_nao_smooth, doc.GetAllocator()); - input_file.AddMember("bessel_nao_sigma", bessel_nao_sigma, doc.GetAllocator()); - - //add Third stage:parameter in deepks: - input_file.AddMember("input_file_out_labels", input_file_out_labels, doc.GetAllocator()); - input_file.AddMember("input_file_scf", input_file_scf, doc.GetAllocator()); - input_file.AddMember("input_file_model", input_file_model, doc.GetAllocator()); - input_file.AddMember("bessel_descriptor_lmax", bessel_descriptor_lmax, doc.GetAllocator()); - input_file.AddMember("bessel_descriptor_ecut", bessel_descriptor_ecut, doc.GetAllocator()); - input_file.AddMember("bessel_descriptor_tolerence", bessel_descriptor_tolerence, doc.GetAllocator()); - input_file.AddMember("bessel_descriptor_rcut", bessel_descriptor_rcut, doc.GetAllocator()); - input_file.AddMember("bessel_descriptor_smooth", bessel_descriptor_smooth, doc.GetAllocator()); - input_file.AddMember("bessel_descriptor_sigma", bessel_descriptor_sigma, doc.GetAllocator()); - input_file.AddMember("input_file_bandgap", input_file_bandgap, doc.GetAllocator()); - input_file.AddMember("input_file_out_unittest", input_file_out_unittest, doc.GetAllocator()); - - //add Third stage:parameter in ofdft: - input_file.AddMember("of_kinetic", of_kinetic, doc.GetAllocator()); - input_file.AddMember("of_method", of_method, doc.GetAllocator()); - input_file.AddMember("of_conv", of_conv, doc.GetAllocator()); - input_file.AddMember("of_tole", of_tole, doc.GetAllocator()); - input_file.AddMember("of_tolp", of_tolp, doc.GetAllocator()); - input_file.AddMember("of_tf_weight", of_tf_weight, doc.GetAllocator()); - input_file.AddMember("of_vw_weight", of_vw_weight, doc.GetAllocator()); - input_file.AddMember("of_wt_alpha", of_wt_alpha, doc.GetAllocator()); - input_file.AddMember("of_wt_beta", of_wt_beta, doc.GetAllocator()); - input_file.AddMember("of_wt_rho0", of_wt_rho0, doc.GetAllocator()); - input_file.AddMember("of_hold_rho0", of_hold_rho0, doc.GetAllocator()); - input_file.AddMember("of_lkt_a", of_lkt_a, doc.GetAllocator()); - input_file.AddMember("of_read_kernel", of_read_kernel, doc.GetAllocator()); - input_file.AddMember("of_kernel_file", of_kernel_file, doc.GetAllocator()); - input_file.AddMember("of_full_pw", of_full_pw, doc.GetAllocator()); - input_file.AddMember("of_full_pw_dim", of_full_pw_dim, doc.GetAllocator()); - - - //add Third stage:parameter in electric_field_and_dipole_correction: - input_file.AddMember("efield_flag", efield_flag, doc.GetAllocator()); - input_file.AddMember("dip_cor_flag", dip_cor_flag, doc.GetAllocator()); - input_file.AddMember("efield_dir", efield_dir, doc.GetAllocator()); - input_file.AddMember("efield_pos_max", efield_pos_max, doc.GetAllocator()); - input_file.AddMember("efield_pos_dec", efield_pos_dec, doc.GetAllocator()); - input_file.AddMember("efield_amp", efield_amp, doc.GetAllocator()); - - //add Third stage:parameter in gate_field: - input_file.AddMember("gate_flag", gate_flag, doc.GetAllocator()); - input_file.AddMember("zgate", zgate, doc.GetAllocator()); - input_file.AddMember("block", block, doc.GetAllocator()); - input_file.AddMember("block_down", block_down, doc.GetAllocator()); - input_file.AddMember("block_up", block_up, doc.GetAllocator()); - input_file.AddMember("block_height", block_height, doc.GetAllocator()); - - //add Third stage:parameter in exact_exchange: - input_file.AddMember("exx_hybrid_alpha", exx_hybrid_alpha, doc.GetAllocator()); - input_file.AddMember("exx_hse_omega", exx_hse_omega, doc.GetAllocator()); - input_file.AddMember("exx_separate_loop", exx_separate_loop, doc.GetAllocator()); - input_file.AddMember("exx_hybrid_step", exx_hybrid_step, doc.GetAllocator()); - input_file.AddMember("exx_mixing_beta", exx_mixing_beta, doc.GetAllocator()); - input_file.AddMember("exx_lambda", exx_lambda, doc.GetAllocator()); - input_file.AddMember("exx_pca_threshold", exx_pca_threshold, doc.GetAllocator()); - input_file.AddMember("exx_c_threshold", exx_c_threshold, doc.GetAllocator()); - input_file.AddMember("exx_v_threshold", exx_v_threshold, doc.GetAllocator()); - input_file.AddMember("exx_dm_threshold", exx_dm_threshold, doc.GetAllocator()); - input_file.AddMember("exx_c_grad_threshold", exx_c_grad_threshold, doc.GetAllocator()); - input_file.AddMember("exx_v_grad_threshold", exx_v_grad_threshold, doc.GetAllocator()); - input_file.AddMember("exx_schwarz_threshold", exx_schwarz_threshold, doc.GetAllocator()); - input_file.AddMember("exx_cauchy_threshold", exx_cauchy_threshold, doc.GetAllocator()); - input_file.AddMember("exx_cauchy_force_threshold", exx_cauchy_force_threshold, doc.GetAllocator()); - input_file.AddMember("exx_cauchy_stress_threshold", exx_cauchy_stress_threshold, doc.GetAllocator()); - input_file.AddMember("exx_ccp_threshold", exx_ccp_threshold, doc.GetAllocator()); - input_file.AddMember("exx_ccp_rmesh_times", exx_ccp_rmesh_times, doc.GetAllocator()); - input_file.AddMember("exx_distribute_type", exx_distribute_type, doc.GetAllocator()); - input_file.AddMember("exx_opt_orb_lmax", exx_opt_orb_lmax, doc.GetAllocator()); - input_file.AddMember("exx_opt_orb_ecut", exx_opt_orb_ecut, doc.GetAllocator()); - input_file.AddMember("exx_opt_orb_tolerence", exx_opt_orb_tolerence, doc.GetAllocator()); - input_file.AddMember("exx_real_number", exx_real_number, doc.GetAllocator()); - - - //add Third stage:parameter in molecular_dynamics: - input_file.AddMember("md_type", md_type, doc.GetAllocator()); - input_file.AddMember("md_nstep", md_nstep, doc.GetAllocator()); - input_file.AddMember("md_dt", md_dt, doc.GetAllocator()); - input_file.AddMember("md_thermostat", md_thermostat, doc.GetAllocator()); - input_file.AddMember("md_tlast", md_tlast, doc.GetAllocator()); - input_file.AddMember("md_tfirst", md_tfirst, doc.GetAllocator()); - input_file.AddMember("md_restart", md_restart, doc.GetAllocator()); - input_file.AddMember("md_restartfreq", md_restartfreq, doc.GetAllocator()); - input_file.AddMember("md_dumpfreq", md_dumpfreq, doc.GetAllocator()); - input_file.AddMember("dump_force", dump_force, doc.GetAllocator()); - input_file.AddMember("dump_vel", dump_vel, doc.GetAllocator()); - input_file.AddMember("dump_virial", dump_virial, doc.GetAllocator()); - input_file.AddMember("md_seed", md_seed, doc.GetAllocator()); - input_file.AddMember("md_tfreq", md_tfreq, doc.GetAllocator()); - input_file.AddMember("md_tchain", md_tchain, doc.GetAllocator()); - input_file.AddMember("md_pmode", md_pmode, doc.GetAllocator()); - input_file.AddMember("md_prec_level", md_prec_level, doc.GetAllocator()); - input_file.AddMember("ref_cell_factor", ref_cell_factor, doc.GetAllocator()); - input_file.AddMember("md_pcouple", md_pcouple, doc.GetAllocator()); - input_file.AddMember("md_pfirst", md_pfirst, doc.GetAllocator()); - input_file.AddMember("md_plast", md_plast, doc.GetAllocator()); - input_file.AddMember("md_pfreq", md_pfreq, doc.GetAllocator()); - input_file.AddMember("md_pchain", md_pchain, doc.GetAllocator()); - input_file.AddMember("lj_rcut", lj_rcut, doc.GetAllocator()); - input_file.AddMember("lj_epsilon", lj_epsilon, doc.GetAllocator()); - input_file.AddMember("lj_sigma", lj_sigma, doc.GetAllocator()); - input_file.AddMember("pot_file", pot_file, doc.GetAllocator()); - input_file.AddMember("msst_direction", msst_direction, doc.GetAllocator()); - input_file.AddMember("msst_vel", msst_vel, doc.GetAllocator()); - input_file.AddMember("msst_vis", msst_vis, doc.GetAllocator()); - input_file.AddMember("msst_tscale", msst_tscale, doc.GetAllocator()); - input_file.AddMember("msst_qmass", msst_qmass, doc.GetAllocator()); - input_file.AddMember("md_damp", md_damp, doc.GetAllocator()); - input_file.AddMember("md_tolerance", md_tolerance, doc.GetAllocator()); - input_file.AddMember("md_nraise", md_nraise, doc.GetAllocator()); - input_file.AddMember("cal_syns", cal_syns, doc.GetAllocator()); - input_file.AddMember("dmax", dmax, doc.GetAllocator()); - - //add Third stage:parameter in dft_plus_u: - input_file.AddMember("orbital_corr", orbital_corr, doc.GetAllocator()); - input_file.AddMember("hubbard_u", hubbard_u, doc.GetAllocator()); - input_file.AddMember("yukawa_potential", yukawa_potential, doc.GetAllocator()); - input_file.AddMember("yukawa_lambda", yukawa_lambda, doc.GetAllocator()); - input_file.AddMember("omc", omc, doc.GetAllocator()); - - //add Third stage:parameter in vdw_correction: - input_file.AddMember("vdw_method", vdw_method, doc.GetAllocator()); - input_file.AddMember("vdw_s6", vdw_s6, doc.GetAllocator()); - input_file.AddMember("vdw_s8", vdw_s8, doc.GetAllocator()); - input_file.AddMember("vdw_a1", vdw_a1, doc.GetAllocator()); - input_file.AddMember("vdw_a2", vdw_a2, doc.GetAllocator()); - input_file.AddMember("vdw_d", vdw_d, doc.GetAllocator()); - input_file.AddMember("vdw_abc", vdw_abc, doc.GetAllocator()); - input_file.AddMember("vdw_C6_file", vdw_C6_file, doc.GetAllocator()); - input_file.AddMember("vdw_C6_unit", vdw_C6_unit, doc.GetAllocator()); - input_file.AddMember("vdw_R0_file", vdw_R0_file, doc.GetAllocator()); - input_file.AddMember("vdw_R0_unit", vdw_R0_unit, doc.GetAllocator()); - input_file.AddMember("vdw_cutoff_type", vdw_cutoff_type, doc.GetAllocator()); - input_file.AddMember("vdw_cutoff_radius", vdw_cutoff_radius, doc.GetAllocator()); - input_file.AddMember("vdw_radius_unit", vdw_radius_unit, doc.GetAllocator()); - input_file.AddMember("vdw_cutoff_period", vdw_cutoff_period, doc.GetAllocator()); - input_file.AddMember("vdw_cn_thr", vdw_cn_thr, doc.GetAllocator()); - input_file.AddMember("vdw_cn_thr_unit", vdw_cn_thr_unit, doc.GetAllocator()); - - //add Third stage:parameter in berry_phase_and_wannier90_interface: - input_file.AddMember("berry_phase", berry_phase, doc.GetAllocator()); - input_file.AddMember("gdir", gdir, doc.GetAllocator()); - input_file.AddMember("towannier90", towannier90, doc.GetAllocator()); - input_file.AddMember("nnkpfile", nnkpfile, doc.GetAllocator()); - input_file.AddMember("wannier_spin", wannier_spin, doc.GetAllocator()); - - //add Third stage:parameter in tddft: - input_file.AddMember("td_edm", td_edm, doc.GetAllocator()); - input_file.AddMember("td_print_eij", td_print_eij, doc.GetAllocator()); - input_file.AddMember("td_propagator", td_propagator, doc.GetAllocator()); - input_file.AddMember("td_vext", td_vext, doc.GetAllocator()); - input_file.AddMember("td_vext_dire", td_vext_dire, doc.GetAllocator()); - input_file.AddMember("td_stype", td_stype, doc.GetAllocator()); - input_file.AddMember("td_ttype", td_ttype, doc.GetAllocator()); - input_file.AddMember("td_tstart", td_tstart, doc.GetAllocator()); - input_file.AddMember("td_tend", td_tend, doc.GetAllocator()); - input_file.AddMember("td_lcut1", td_lcut1, doc.GetAllocator()); - input_file.AddMember("td_lcut2", td_lcut2, doc.GetAllocator()); - input_file.AddMember("td_gauss_freq", td_gauss_freq, doc.GetAllocator()); - input_file.AddMember("td_gauss_phase", td_gauss_phase, doc.GetAllocator()); - input_file.AddMember("td_gauss_sigma", td_gauss_sigma, doc.GetAllocator()); - input_file.AddMember("td_gauss_t0", td_gauss_t0, doc.GetAllocator()); - input_file.AddMember("td_gauss_amp", td_gauss_amp, doc.GetAllocator()); - input_file.AddMember("td_trape_freq", td_trape_freq, doc.GetAllocator()); - input_file.AddMember("td_trape_phase", td_trape_phase, doc.GetAllocator()); - input_file.AddMember("td_trape_t1", td_trape_t1, doc.GetAllocator()); - input_file.AddMember("td_trape_t2", td_trape_t2, doc.GetAllocator()); - input_file.AddMember("td_trape_t3", td_trape_t3, doc.GetAllocator()); - input_file.AddMember("td_trape_amp", td_trape_amp, doc.GetAllocator()); - input_file.AddMember("td_trigo_freq1", td_trigo_freq1, doc.GetAllocator()); - input_file.AddMember("td_trigo_freq2", td_trigo_freq2, doc.GetAllocator()); - input_file.AddMember("td_trigo_phase1", td_trigo_phase1, doc.GetAllocator()); - input_file.AddMember("td_trigo_phase2", td_trigo_phase2, doc.GetAllocator()); - input_file.AddMember("td_trigo_amp", td_trigo_amp, doc.GetAllocator()); - input_file.AddMember("td_heavi_t0", td_heavi_t0, doc.GetAllocator()); - input_file.AddMember("td_heavi_amp", td_heavi_amp, doc.GetAllocator()); - input_file.AddMember("td_out_dipole", td_out_dipole, doc.GetAllocator()); - input_file.AddMember("td_out_efield", td_out_efield, doc.GetAllocator()); - input_file.AddMember("ocp", ocp, doc.GetAllocator()); - input_file.AddMember("ocp_set", ocp_set, doc.GetAllocator()); - - //add Third stage:parameter in debuging_related: - input_file.AddMember("t_in_h", t_in_h, doc.GetAllocator()); - input_file.AddMember("vl_in_h", vl_in_h, doc.GetAllocator()); - input_file.AddMember("vnl_in_h", vnl_in_h, doc.GetAllocator()); - input_file.AddMember("vh_in_h", vh_in_h, doc.GetAllocator()); - input_file.AddMember("vion_in_h", vion_in_h, doc.GetAllocator()); - input_file.AddMember("test_force", test_force, doc.GetAllocator()); - input_file.AddMember("test_stress", test_stress, doc.GetAllocator()); - input_file.AddMember("colour", colour, doc.GetAllocator()); - input_file.AddMember("test_skip_ewald", test_skip_ewald, doc.GetAllocator()); - - //add Third stage:parameter in electronic_conductivities: - input_file.AddMember("cal_cond", cal_cond, doc.GetAllocator()); - input_file.AddMember("cond_nche", cond_nche, doc.GetAllocator()); - input_file.AddMember("cond_dw", cond_dw, doc.GetAllocator()); - input_file.AddMember("cond_wcut", cond_wcut, doc.GetAllocator()); - input_file.AddMember("cond_dt", cond_dt, doc.GetAllocator()); - input_file.AddMember("cond_dtbatch", cond_dtbatch, doc.GetAllocator()); - input_file.AddMember("cond_fwhm", cond_fwhm, doc.GetAllocator()); - input_file.AddMember("cond_nonlocal", cond_nonlocal, doc.GetAllocator()); - - //add Third stage:parameter in implicit_solvation_model: - input_file.AddMember("imp_sol", imp_sol, doc.GetAllocator()); - input_file.AddMember("eb_k", eb_k, doc.GetAllocator()); - input_file.AddMember("tau", tau, doc.GetAllocator()); - input_file.AddMember("sigma_k", sigma_k, doc.GetAllocator()); - input_file.AddMember("nc_k", nc_k, doc.GetAllocator()); - - - RemoveNullValues(input_file); - - - // after add child_node's node in readin_info, add child node - // add parameters in readin_info: - readin_info.AddMember("input_file", input_file, doc.GetAllocator()); - - } - - - void Finish_json_tree(){ - // Converts a json object to a string - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - doc.Accept(writer); - - // Output the json string to a file - std::string json_path; - json_path.append("abacus.json"); - - std::ofstream ofs(json_path); - ofs << buffer.GetString() << std::endl; - ofs.close(); - } - - - - -} - - -#endif \ No newline at end of file diff --git a/source/module_base/para_json.h b/source/module_base/para_json.h deleted file mode 100644 index 0e829dc80e..0000000000 --- a/source/module_base/para_json.h +++ /dev/null @@ -1,560 +0,0 @@ - -#include -#include -#include -#include -#include - -#ifdef __RAPIDJSON - -#include -#include -#include - - -/** - * @brief This namespace is used to store the json object of the - * abacus parameter and its handlers. Used to read the parameters - * at run time and finally organize them into json format files - * -*/ -namespace Para_Json -{ - - extern int test; - // @param doc: the output json file - extern rapidjson::Document doc; - extern rapidjson::Value abacus; - - // @param general_info : - extern rapidjson::Value general_info; - extern rapidjson::Value version; - extern rapidjson::Value commit; - extern rapidjson::Value begin_time; - extern rapidjson::Value begin_date; - extern rapidjson::Value device_g; - // @param general_info -- parallel: - extern rapidjson::Value parallel; - extern rapidjson::Value drank; - extern rapidjson::Value dsize; - extern rapidjson::Value dcolor ; - // @param general_info -- path - extern rapidjson::Value path; - extern rapidjson::Value global_out_dir; - extern rapidjson::Value global_in_card; - extern rapidjson::Value pseudo_dir_path ; - extern rapidjson::Value orbital_dir_path; - - - // @param reading_information: - extern rapidjson::Value readin_info; - // @param reading_information -- input_para: - - // @param reading_information -- input_para -- system_variables: - extern rapidjson::Value system_variables; - - extern rapidjson::Value input_file; - extern rapidjson::Value input_suffix; - extern rapidjson::Value ntype; - extern rapidjson::Value calculation; - extern rapidjson::Value esolver_type; - extern rapidjson::Value symmetry; - extern rapidjson::Value symmetry_precfield; - extern rapidjson::Value symmetry_autoclose; - extern rapidjson::Value kpar; - extern rapidjson::Value bndpar; - extern rapidjson::Value latname; - extern rapidjson::Value init_wfc; - extern rapidjson::Value init_chg; - extern rapidjson::Value init_vel; - extern rapidjson::Value nelec; - extern rapidjson::Value nupdown; - extern rapidjson::Value dft_functional; - extern rapidjson::Value xc_temperature; - extern rapidjson::Value pseudo_rcut; - extern rapidjson::Value pseudo_mesh; - extern rapidjson::Value mem_saver; - extern rapidjson::Value diago_proc; - extern rapidjson::Value nbspline; - extern rapidjson::Value kspacing; - extern rapidjson::Value min_dist_coef; - extern rapidjson::Value device; - // @param reading_information -- input_para -- files_related - - extern rapidjson::Value stru_file; - extern rapidjson::Value kpoint_file; - extern rapidjson::Value pseudo_dir; - extern rapidjson::Value orbital_dir; - extern rapidjson::Value read_file_dir; - extern rapidjson::Value wannier_card; - // @param reading_information -- input_para -- planewave_related - - extern rapidjson::Value ecutwfc; - extern rapidjson::Value nx; - extern rapidjson::Value ny; - extern rapidjson::Value nz; - extern rapidjson::Value pw_seed; - extern rapidjson::Value pw_diag_thr; - extern rapidjson::Value pw_diag_nmax; - extern rapidjson::Value pw_diag_ndim; - // @param reading_information -- input_para -- numerical_atomic_orbitals_related - - extern rapidjson::Value nb2d; - extern rapidjson::Value lmaxmax; - extern rapidjson::Value lcao_ecut; - extern rapidjson::Value lcao_dk; - extern rapidjson::Value lcao_dr; - extern rapidjson::Value lcao_rmax; - extern rapidjson::Value search_radius; - extern rapidjson::Value search_pbc; - extern rapidjson::Value bx; - extern rapidjson::Value by; - extern rapidjson::Value bz; - // @param reading_information -- input_para -- electronic_structure - - extern rapidjson::Value basis_type; - extern rapidjson::Value ks_solver; - extern rapidjson::Value nbands; - extern rapidjson::Value nbands_istate; - extern rapidjson::Value nspin; - extern rapidjson::Value smearing_method; - extern rapidjson::Value smearing_sigma; - extern rapidjson::Value smearing_sigma_temp; - extern rapidjson::Value mixing_type; - extern rapidjson::Value mixing_beta; - extern rapidjson::Value mixing_ndim; - extern rapidjson::Value mixing_gg0; - extern rapidjson::Value mixing_tau; - extern rapidjson::Value mixing_dftu; - extern rapidjson::Value gamma_only; - extern rapidjson::Value printe; - extern rapidjson::Value scf_nmax; - extern rapidjson::Value scf_thr; - extern rapidjson::Value scf_thr_type; - extern rapidjson::Value chg_extrap; - extern rapidjson::Value lspinorb; - extern rapidjson::Value noncolin; - extern rapidjson::Value soc_lambda; - // @param reading_information -- input_para -- electronic_structure_SDFT - - extern rapidjson::Value method_sto; - extern rapidjson::Value nbands_sto; - extern rapidjson::Value nche_sto; - extern rapidjson::Value emin_sto; - extern rapidjson::Value emax_sto; - extern rapidjson::Value seed_sto; - extern rapidjson::Value initsto_freq; - extern rapidjson::Value npart_sto; - // @param reading_information -- input_para -- geometry_relaxation - - extern rapidjson::Value relax_method; - extern rapidjson::Value relax_new; - extern rapidjson::Value relax_scale_force; - extern rapidjson::Value relax_nmax; - extern rapidjson::Value relax_cg_thr; - extern rapidjson::Value cal_force; - extern rapidjson::Value force_thr; - extern rapidjson::Value force_thr_ev; - extern rapidjson::Value force_thr_ev2; - extern rapidjson::Value relax_bfgs_w1; - extern rapidjson::Value relax_bfgs_w2; - extern rapidjson::Value relax_bfgs_rmax; - extern rapidjson::Value relax_bfgs_rmin; - extern rapidjson::Value relax_bfgs_init; - extern rapidjson::Value cal_stress; - extern rapidjson::Value stress_thr; - extern rapidjson::Value press1; - extern rapidjson::Value press2; - extern rapidjson::Value press3; - extern rapidjson::Value fixed_axes; - extern rapidjson::Value fixed_ibrav; - extern rapidjson::Value fixed_atoms; - extern rapidjson::Value cell_factor; - - // @param reading_information -- input_para -- output_information_related - - extern rapidjson::Value out_mul; - extern rapidjson::Value out_freq_elec; - extern rapidjson::Value out_freq_ion; - extern rapidjson::Value out_chg; - extern rapidjson::Value out_pot; - extern rapidjson::Value out_dm; - extern rapidjson::Value out_dm1; - extern rapidjson::Value out_wfc_pw; - extern rapidjson::Value out_wfc_r; - extern rapidjson::Value out_wfc_lcao; - extern rapidjson::Value out_dos; - extern rapidjson::Value out_band; - extern rapidjson::Value out_proj_band; - extern rapidjson::Value out_stru; - extern rapidjson::Value out_bandgap; - extern rapidjson::Value out_level; - extern rapidjson::Value out_alllog; - extern rapidjson::Value out_mat_hs; - extern rapidjson::Value out_mat_r; - extern rapidjson::Value out_mat_hs2; - extern rapidjson::Value out_mat_t; - extern rapidjson::Value out_mat_dh; - extern rapidjson::Value out_app_flag; - extern rapidjson::Value out_interval; - extern rapidjson::Value out_element_info; - extern rapidjson::Value restart_save; - extern rapidjson::Value restart_load; - extern rapidjson::Value rpa; - - // @param reading_information -- input_para -- density_of_states - - extern rapidjson::Value dos_edelta_ev; - extern rapidjson::Value dos_sigma; - extern rapidjson::Value dos_scale; - extern rapidjson::Value dos_emin_ev; - extern rapidjson::Value dos_emax_ev; - extern rapidjson::Value dos_nche; - // @param reading_information -- input_para -- naos - extern rapidjson::Value bessel_nao_ecut; - extern rapidjson::Value bessel_nao_tolerence; - extern rapidjson::Value bessel_nao_rcut; - extern rapidjson::Value bessel_nao_smooth; - extern rapidjson::Value bessel_nao_sigma; - // @param reading_information -- input_para -- deepks - - extern rapidjson::Value deepks_out_labels; - extern rapidjson::Value deepks_scf; - extern rapidjson::Value deepks_model; - extern rapidjson::Value bessel_descriptor_lmax; - extern rapidjson::Value bessel_descriptor_ecut; - extern rapidjson::Value bessel_descriptor_tolerence; - extern rapidjson::Value bessel_descriptor_rcut; - extern rapidjson::Value bessel_descriptor_smooth; - extern rapidjson::Value bessel_descriptor_sigma; - extern rapidjson::Value deepks_bandgap; - extern rapidjson::Value deepks_out_unittest; - // @param reading_information -- input_para -- ofdft - extern rapidjson::Value of_kinetic; - extern rapidjson::Value of_method; - extern rapidjson::Value of_conv; - extern rapidjson::Value of_tole; - extern rapidjson::Value of_tolp; - extern rapidjson::Value of_tf_weight; - extern rapidjson::Value of_vw_weight; - extern rapidjson::Value of_wt_alpha; - extern rapidjson::Value of_wt_beta; - extern rapidjson::Value of_wt_rho0; - extern rapidjson::Value of_hold_rho0; - extern rapidjson::Value of_lkt_a; - extern rapidjson::Value of_read_kernel; - extern rapidjson::Value of_kernel_file; - extern rapidjson::Value of_full_pw; - extern rapidjson::Value of_full_pw_dim; - - // @param reading_information -- input_para -- electric_field_and_dipole_correction - - extern rapidjson::Value efield_flag; - extern rapidjson::Value dip_cor_flag; - extern rapidjson::Value efield_dir; - extern rapidjson::Value efield_pos_max; - extern rapidjson::Value efield_pos_dec; - extern rapidjson::Value efield_amp; - // @param reading_information -- input_para -- gate_field - - extern rapidjson::Value gate_flag; - extern rapidjson::Value zgate; - extern rapidjson::Value block; - extern rapidjson::Value block_down; - extern rapidjson::Value block_up; - extern rapidjson::Value block_height; - // @param reading_information -- input_para -- exact_exchange - extern rapidjson::Value exx_hybrid_alpha; - extern rapidjson::Value exx_hse_omega; - extern rapidjson::Value exx_separate_loop; - extern rapidjson::Value exx_hybrid_step; - extern rapidjson::Value exx_mixing_beta; - extern rapidjson::Value exx_lambda; - extern rapidjson::Value exx_pca_threshold; - extern rapidjson::Value exx_c_threshold; - extern rapidjson::Value exx_v_threshold; - extern rapidjson::Value exx_dm_threshold; - extern rapidjson::Value exx_c_grad_threshold; - extern rapidjson::Value exx_v_grad_threshold; - extern rapidjson::Value exx_schwarz_threshold; - extern rapidjson::Value exx_cauchy_threshold; - extern rapidjson::Value exx_cauchy_force_threshold; - extern rapidjson::Value exx_cauchy_stress_threshold; - extern rapidjson::Value exx_ccp_threshold; - extern rapidjson::Value exx_ccp_rmesh_times; - extern rapidjson::Value exx_distribute_type; - extern rapidjson::Value exx_opt_orb_lmax; - extern rapidjson::Value exx_opt_orb_ecut; - extern rapidjson::Value exx_opt_orb_tolerence; - extern rapidjson::Value exx_real_number; - - // @param reading_information -- input_para -- molecular_dynamics - extern rapidjson::Value md_type; - extern rapidjson::Value md_nstep; - extern rapidjson::Value md_dt; - extern rapidjson::Value md_thermostat; - extern rapidjson::Value md_tlast; - extern rapidjson::Value md_tfirst; - extern rapidjson::Value md_restart; - extern rapidjson::Value md_restartfreq; - extern rapidjson::Value md_dumpfreq; - extern rapidjson::Value dump_force; - extern rapidjson::Value dump_vel; - extern rapidjson::Value dump_virial; - extern rapidjson::Value md_seed; - extern rapidjson::Value md_tfreq; - extern rapidjson::Value md_tchain; - extern rapidjson::Value md_pmode; - extern rapidjson::Value md_prec_level; - extern rapidjson::Value ref_cell_factor; - extern rapidjson::Value md_pcouple; - extern rapidjson::Value md_pfirst; - extern rapidjson::Value md_plast; - extern rapidjson::Value md_pfreq; - extern rapidjson::Value md_pchain; - extern rapidjson::Value lj_rcut; - extern rapidjson::Value lj_epsilon; - extern rapidjson::Value lj_sigma; - extern rapidjson::Value pot_file; - extern rapidjson::Value msst_direction; - extern rapidjson::Value msst_vel; - extern rapidjson::Value msst_vis; - extern rapidjson::Value msst_tscale; - extern rapidjson::Value msst_qmass; - extern rapidjson::Value md_damp; - extern rapidjson::Value md_tolerance; - extern rapidjson::Value md_nraise; - extern rapidjson::Value cal_syns; - extern rapidjson::Value dmax; - - // @param reading_information -- input_para -- dft_plus_u - extern rapidjson::Value orbital_corr; - extern rapidjson::Value hubbard_u; - extern rapidjson::Value yukawa_potential; - extern rapidjson::Value yukawa_lambda; - extern rapidjson::Value omc; - - // @param reading_information -- input_para -- vdw_correction - extern rapidjson::Value vdw_method; - extern rapidjson::Value vdw_s6; - extern rapidjson::Value vdw_s8; - extern rapidjson::Value vdw_a1; - extern rapidjson::Value vdw_a2; - extern rapidjson::Value vdw_d; - extern rapidjson::Value vdw_abc; - extern rapidjson::Value vdw_C6_file; - extern rapidjson::Value vdw_C6_unit; - extern rapidjson::Value vdw_R0_file; - extern rapidjson::Value vdw_R0_unit; - extern rapidjson::Value vdw_cutoff_type; - extern rapidjson::Value vdw_cutoff_radius; - extern rapidjson::Value vdw_radius_unit; - extern rapidjson::Value vdw_cutoff_period; - extern rapidjson::Value vdw_cn_thr; - extern rapidjson::Value vdw_cn_thr_unit; - - // @param reading_information -- input_para -- berry_phase_and_wannier90_interface - extern rapidjson::Value berry_phase; - extern rapidjson::Value gdir; - extern rapidjson::Value towannier90; - extern rapidjson::Value nnkpfile; - extern rapidjson::Value wannier_spin; - - // @param reading_information -- input_para -- tddft - extern rapidjson::Value td_edm; - extern rapidjson::Value td_print_eij; - extern rapidjson::Value td_propagator; - extern rapidjson::Value td_vext; - extern rapidjson::Value td_vext_dire; - extern rapidjson::Value td_stype; - extern rapidjson::Value td_ttype; - extern rapidjson::Value td_tstart; - extern rapidjson::Value td_tend; - extern rapidjson::Value td_lcut1; - extern rapidjson::Value td_lcut2; - extern rapidjson::Value td_gauss_freq; - extern rapidjson::Value td_gauss_phase; - extern rapidjson::Value td_gauss_sigma; - extern rapidjson::Value td_gauss_t0; - extern rapidjson::Value td_gauss_amp; - extern rapidjson::Value td_trape_freq; - extern rapidjson::Value td_trape_phase; - extern rapidjson::Value td_trape_t1; - extern rapidjson::Value td_trape_t2; - extern rapidjson::Value td_trape_t3; - extern rapidjson::Value td_trape_amp; - extern rapidjson::Value td_trigo_freq1; - extern rapidjson::Value td_trigo_freq2; - extern rapidjson::Value td_trigo_phase1; - extern rapidjson::Value td_trigo_phase2; - extern rapidjson::Value td_trigo_amp; - extern rapidjson::Value td_heavi_t0; - extern rapidjson::Value td_heavi_amp; - extern rapidjson::Value td_out_dipole; - extern rapidjson::Value td_out_efield; - extern rapidjson::Value ocp; - extern rapidjson::Value ocp_set; - - // @param reading_information -- input_para -- debuging_related - extern rapidjson::Value t_in_h; - extern rapidjson::Value vl_in_h; - extern rapidjson::Value vnl_in_h; - extern rapidjson::Value vh_in_h; - extern rapidjson::Value vion_in_h; - extern rapidjson::Value test_force; - extern rapidjson::Value test_stress; - extern rapidjson::Value colour; - extern rapidjson::Value test_skip_ewald; - - // @param reading_information -- input_para -- electronic_conductivities - extern rapidjson::Value cal_cond; - extern rapidjson::Value cond_nche; - extern rapidjson::Value cond_dw; - extern rapidjson::Value cond_wcut; - extern rapidjson::Value cond_dt; - extern rapidjson::Value cond_dtbatch; - extern rapidjson::Value cond_fwhm; - extern rapidjson::Value cond_nonlocal; - - // @param reading_information -- input_para -- implicit_solvation_model - extern rapidjson::Value imp_sol; - extern rapidjson::Value eb_k; - extern rapidjson::Value tau; - extern rapidjson::Value sigma_k; - extern rapidjson::Value nc_k; - - // @param reading_information -- stru_infos: - extern rapidjson::Value stru_infos; - // extern rapidjson::Value ATOMIC_SPECIES; - // extern rapidjson::Value NUMERICAL_ORBITAL; - // extern rapidjson::Value LATTICE_CONSTANT; - // extern rapidjson::Value ATOMIC_POSITIONS; - - // @param reading_information -- KPT_infos - extern rapidjson::Value KPT_infos; - // extern rapidjson::Value total_number; - // extern rapidjson::Value mode; - // extern rapidjson::Value vectors; - - // @param reading_information -- orb_infos - extern rapidjson::Value orb_infos; - - // @param reading_information -- pp - extern rapidjson::Value pp; - - // @param init - extern rapidjson::Value init; - // @param init -- general - // extern rapidjson::Value calculation; - // extern rapidjson::Value esolver_type; - // extern rapidjson::Value basis_type; - // extern rapidjson::Value gamma_only; - // extern rapidjson::Value ks_solver; - // extern rapidjson::Value ntype; - // extern rapidjson::Value nspin; - // extern rapidjson::Value ecutwfc; - // extern rapidjson::Value scf_thr; - // extern rapidjson::Value scf_nmax; - - // @param init -- symmetry - // extern rapidjson::Value symmetry; - // extern rapidjson::Value BRAVAIS_TYPE; - // extern rapidjson::Value BRAVAIS_LATTICE_NAME; - // extern rapidjson::Value IBRAV; - // extern rapidjson::Value LATTICE_CONSTANT_A; - // extern rapidjson::Value right_hand_lattice; - - // @param init -- Kpoints - extern rapidjson::Value kpoints; - extern rapidjson::Value nkstot; - extern rapidjson::Value nkstot_ibz; - extern rapidjson::Value coordinates; - extern rapidjson::Value weight; - - // @param init -- grid - extern rapidjson::Value grid; - extern rapidjson::Value energy_cutoff_for_wavefunc; - extern rapidjson::Value fft_grid_for_wave_functions; - extern rapidjson::Value number_of_plane_waves; - extern rapidjson::Value number_of_sticks; - - // @param init -- Smearing - // extern rapidjson::Value smearing_method; - // extern rapidjson::Value smearing_sigma; - - // @param init -- mixing - extern rapidjson::Value mixing; - - - // @param output - extern rapidjson::Value output; - - - - // @param final_stru - extern rapidjson::Value final_stru; - extern rapidjson::Value cell; - extern rapidjson::Value coordinate; - - - - - /** - * The functions below initialize the json output parameter - * tree to connect the nodes of the module - */ - - /** - * @brief add Top stage:parameter in Abacus: - */ - void Init_json_abacus(); - - - /** - * @brief add Second stage:parameter in Abacus - general_info: - */ - void Init_json_abacus_generalInfo(); - - - /** - * @brief add Second stage:parameter in Abacus - readin_info: - */ - void Init_json_abacus_readinInfo(); - - - /** - * @brief finish json tree build - */ - void Finish_json_tree(); - - - - /** - * @brief This function is used to populate the template type parameter - * values into rapidjson's Value object - */ - template - void set_json_value(rapidjson::Value &json_v,T *para){ - if(std::is_same::value) - { - json_v.SetInt(*reinterpret_cast(para)); - } - else if(std::is_same::value) - { - json_v.SetDouble(*reinterpret_cast(para)); - } - else if(std::is_same::value) - { - json_v.SetBool(*reinterpret_cast(para)); - } - else if(std::is_same::value) - { - // json_v.SetString(rapidjson::StringRef((*reinterpret_cast(para)).c_str())); - - json_v.SetString((*reinterpret_cast(para)).c_str(), std::strlen((*reinterpret_cast(para)).c_str()), doc.GetAllocator()); - //printf("exx_real_number = %s\n",(*reinterpret_cast(para)).c_str()); - } - } -} - -#endif \ No newline at end of file diff --git a/source/module_base/test/CMakeLists.txt b/source/module_base/test/CMakeLists.txt index 008df422e5..666152b476 100644 --- a/source/module_base/test/CMakeLists.txt +++ b/source/module_base/test/CMakeLists.txt @@ -217,17 +217,3 @@ AddTest( SOURCES assoc_laguerre_test.cpp ../assoc_laguerre.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp LIBS ${math_libs} formatter ) -if(ENABLE_GOOGLEBENCH) - AddTest( - TARGET perf_sphbes - LIBS formatter - SOURCES perf_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp - ) -endif() - -if(ENABLE_RAPIDJSON) - AddTest( - TARGET base_para_json_test - SOURCES para_json_test.cpp ../para_json.cpp - ) -endif() diff --git a/source/module_base/test/complexmatrix_test.cpp b/source/module_base/test/complexmatrix_test.cpp index 0adc52363a..026aeb40de 100644 --- a/source/module_base/test/complexmatrix_test.cpp +++ b/source/module_base/test/complexmatrix_test.cpp @@ -23,8 +23,8 @@ * - set_as_identity_matrix() * - print():Output the elements of this complex matrix greater than threshold. * - checkreal() - * - * Tested relative functions + * + * Tested relative functions * - operator "+" "-" "*" between two ComplexMatrix * - operator "*" between a ComplexMatrix and double or complex, and reverse. * - trace() @@ -35,13 +35,13 @@ * - conj() * - scale_accumulate(): * - scaled_sum(): - * + * */ //a mock function of WARNING_QUIT, to avoid the uncorrected call by matrix.cpp at line 37. namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} + void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} } inline void EXPECT_COMPLEX_EQ(const std::complex& a,const std::complex& b) @@ -104,8 +104,8 @@ TEST_F(ComplexMatrixTest,ConstructorCM) TEST_F(ComplexMatrixTest,ConstructorCMrvalue) { - ModuleBase::ComplexMatrix cm2(cm22); - ModuleBase::ComplexMatrix cm1(std::move(cm22)); + ModuleBase::ComplexMatrix cm2(cm22); + ModuleBase::ComplexMatrix cm1(std::move(cm22)); EXPECT_EQ(cm1.nr,cm2.nr); EXPECT_EQ(cm1.nc,cm2.nc); EXPECT_EQ(cm1.size,cm2.size); @@ -338,15 +338,15 @@ TEST_F(ComplexMatrixTest,OperatorMultMatrix) EXPECT_EQ(cm33.nr,3); EXPECT_EQ(cm33.nc,3); EXPECT_EQ(cm33.size,9); - EXPECT_COMPLEX_EQ(cm33(0,0),std::complex{-46.0,72.0 }); + EXPECT_COMPLEX_EQ(cm33(0,0),std::complex{-46.0,72.0 }); EXPECT_COMPLEX_EQ(cm33(0,1),std::complex{-46.0,118.0 }); EXPECT_COMPLEX_EQ(cm33(0,2),std::complex{-46.0,164.0 }); EXPECT_COMPLEX_EQ(cm33(1,0),std::complex{-54.0,84.0 }); EXPECT_COMPLEX_EQ(cm33(1,1),std::complex{-54.0,138.0 }); - EXPECT_COMPLEX_EQ(cm33(1,2),std::complex{-54.0,192.0 }); + EXPECT_COMPLEX_EQ(cm33(1,2),std::complex{-54.0,192.0 }); EXPECT_COMPLEX_EQ(cm33(2,0),std::complex{-62.0,96.0 }); EXPECT_COMPLEX_EQ(cm33(2,1),std::complex{-62.0,158.0 }); - EXPECT_COMPLEX_EQ(cm33(2,2),std::complex{-62.0,220.0 }); + EXPECT_COMPLEX_EQ(cm33(2,2),std::complex{-62.0,220.0 }); EXPECT_DEATH(cm22 * cm32,""); } @@ -525,7 +525,7 @@ TEST_F(ComplexMatrixTest,ScaleSumArray) cmout = new ModuleBase::ComplexMatrix*[2]; cmin1 = new ModuleBase::ComplexMatrix*[2]; cmin2 = new ModuleBase::ComplexMatrix*[2]; - + cmin1[0] = &cm1; cmin1[1] = &cm2; cmin2[0] = &cm3; @@ -563,7 +563,7 @@ TEST_F(ComplexMatrixTest,print) EXPECT_THAT(output,testing::HasSubstr("(3,4)\t(4,5)\t")); ifs.close(); remove("printtest1.log"); -// The condition of std::abs(data)>threshold_abs && std::imag(data)) <= threshold_imag +// The condition of std::abs(data)>threshold_abs && std::imag(data)) <= threshold_imag ofs.open("printtest2.log"); cm22.print(ofs,1e-10,2); ofs.close(); diff --git a/source/module_base/test/inverse_matrix_test.cpp b/source/module_base/test/inverse_matrix_test.cpp index a871f906cd..df68f58a56 100644 --- a/source/module_base/test/inverse_matrix_test.cpp +++ b/source/module_base/test/inverse_matrix_test.cpp @@ -19,7 +19,7 @@ //a mock function of WARNING_QUIT, to avoid the uncorrected call by matrix.cpp at line 37. namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} + void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} } TEST(InverseMatrixComplexTest, InverseMatrixComplex) diff --git a/source/module_base/test/math_sphbes_test.cpp b/source/module_base/test/math_sphbes_test.cpp index e72c6e289c..521d4dc2f4 100644 --- a/source/module_base/test/math_sphbes_test.cpp +++ b/source/module_base/test/math_sphbes_test.cpp @@ -352,27 +352,15 @@ TEST_F(Sphbes, Zeros) int lmax = 20; int nzeros = 500; - double* zeros = new double[nzeros*(lmax+1)]; + double* zeros = new double[nzeros]; for (int l = 0; l <= lmax; ++l) { - ModuleBase::Sphbes::sphbes_zeros(l, nzeros, zeros, false); + ModuleBase::Sphbes::sphbes_zeros(l, nzeros, zeros); for (int i = 0; i < nzeros; ++i) { EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[i])), 1e-14); } } - - - ModuleBase::Sphbes::sphbes_zeros(lmax, nzeros, zeros, true); - for (int l = 0; l <= lmax; ++l) - { - for (int i = 0; i < nzeros; ++i) - { - EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[l*nzeros+i])), 1e-14); - } - } - - delete[] zeros; } TEST_F(Sphbes, ZerosOld) diff --git a/source/module_base/test/math_ylmreal_test.cpp b/source/module_base/test/math_ylmreal_test.cpp index 13d0bd2b69..d5e7a504ed 100644 --- a/source/module_base/test/math_ylmreal_test.cpp +++ b/source/module_base/test/math_ylmreal_test.cpp @@ -13,16 +13,16 @@ ***********************************************/ /** - * For lmax <5 cases, the reference values are calculated by the formula from + * For lmax <5 cases, the reference values are calculated by the formula from * https://formulasearchengine.com/wiki/Table_of_spherical_harmonics. Note, these - * formula lack of the Condon–Shortley phase (-1)^m, and in this unit test, item + * formula lack of the Condon–Shortley phase (-1)^m, and in this unit test, item * (-1)^m is multiplied. * For lmax >=5, the reference values are calculated by YlmReal::Ylm_Real. * * - Tested functions of class YlmReal * - Ylm_Real * - Ylm_Real2 - * - rlylm + * - rlylm * - YlmRealTemplate (double and float) * * - Tested functions of class Ylm @@ -30,9 +30,9 @@ * - sph_harm * - rl_sph_harm * - grad_rl_sph_harm - * - equality_value_test: test the eqaulity of Ylm function between rl_sph_harm (spherical input) and get_ylm_real (Cartesian input) + * - equality_value_test: test the eqaulity of Ylm function between rl_sph_harm (spherical input) and get_ylm_real (Cartesian input) * - equality_gradient_test:test the eqaulity of Ylm gradient function between grad_rl_sph_harm(spherical input) and rlylm (Cartesian input) - * + * */ @@ -40,7 +40,7 @@ //mock functions of WARNING_QUIT and WARNING namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} + void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} void WARNING(const std::string &file,const std::string &description) {return ;} } @@ -58,7 +58,7 @@ class YlmRealTest : public testing::Test ModuleBase::Vector3 *g; //vectors of the 4 points double *ref; //reference of Ylm double *rly; //Ylm - double (*rlgy)[3]; //the gradient of Ylm + double (*rlgy)[3]; //the gradient of Ylm std::vector rlyvector; //Ylm std::vector> rlgyvector; //the gradient of Ylm @@ -91,101 +91,101 @@ class YlmRealTest : public testing::Test double y4m4(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 3./4.*sqrt(35./M_PI) * x*y*(x*x - y*y) / (r*r*r*r);} //the reference values are calculated by ModuleBase::Ylm::grad_rl_sph_harm - //1st dimension: example, 2nd dimension: Ylm, 3rd dimension: dx/dy/dz + //1st dimension: example, 2nd dimension: Ylm, 3rd dimension: dx/dy/dz double rlgyref[4][64][3] = { - { { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, {-6.30783e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -1.09255e+00}, - { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 1.09255e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 1.09255e+00, -0.00000e+00}, - {-0.00000e+00, 0.00000e+00, -1.11953e+00}, { 1.37114e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 4.57046e-01, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 1.44531e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.77013e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, -1.77013e+00, 0.00000e+00}, { 1.26943e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.00714e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.89235e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, -9.46175e-01, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, -1.77013e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 2.50334e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 2.50334e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.75425e+00}, {-2.26473e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.52947e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.39677e+00}, {-0.00000e+00, -0.00000e+00, 0.00000e+00}, - { 2.44619e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.46771e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.07566e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.28191e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -3.28191e+00, 0.00000e+00}, - {-1.90708e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.91311e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, - { 2.76362e+00, 0.00000e+00, -0.00000e+00}, {-0.00000e+00, 9.21205e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.76362e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.02739e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, -2.01826e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, -2.36662e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 4.09910e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 4.09910e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.38995e+00}, { 3.16161e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, 4.51658e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 3.31900e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-3.28564e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.40813e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -3.11349e+00}, - {-0.00000e+00, -0.00000e+00, 0.00000e+00}, { 3.63241e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 2.59458e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 2.64596e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-4.95014e+00, 0.00000e+00, -0.00000e+00}, + { { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -4.88603e-01, 0.00000e+00}, {-6.30783e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -1.09255e+00}, + { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 1.09255e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 1.09255e+00, -0.00000e+00}, + {-0.00000e+00, 0.00000e+00, -1.11953e+00}, { 1.37114e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 4.57046e-01, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 1.44531e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.77013e+00, 0.00000e+00, -0.00000e+00}, + { 0.00000e+00, -1.77013e+00, 0.00000e+00}, { 1.26943e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.00714e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.89235e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, -9.46175e-01, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, -1.77013e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 2.50334e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 2.50334e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.75425e+00}, {-2.26473e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -4.52947e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.39677e+00}, {-0.00000e+00, -0.00000e+00, 0.00000e+00}, + { 2.44619e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.46771e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.07566e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.28191e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -3.28191e+00, 0.00000e+00}, + {-1.90708e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.91311e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, + { 2.76362e+00, 0.00000e+00, -0.00000e+00}, {-0.00000e+00, 9.21205e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.76362e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.02739e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, -2.01826e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, -2.36662e+00}, { 0.00000e+00, -0.00000e+00, 0.00000e+00}, { 4.09910e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 4.09910e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -2.38995e+00}, { 3.16161e+00, 0.00000e+00, -0.00000e+00}, + { 0.00000e+00, 4.51658e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 3.31900e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-3.28564e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.40813e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, -3.11349e+00}, + {-0.00000e+00, -0.00000e+00, 0.00000e+00}, { 3.63241e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, 2.59458e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 2.64596e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-4.95014e+00, 0.00000e+00, -0.00000e+00}, { 0.00000e+00, -4.95014e+00, 0.00000e+00} }, { - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 0.00000e+00, -6.30783e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -1.09255e+00}, { 0.00000e+00, -1.09255e+00, 0.00000e+00}, { 1.09255e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -1.11953e+00}, { 4.57046e-01, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.37114e+00, -0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -1.44531e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 1.77013e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 1.77013e+00, 0.00000e+00}, { 0.00000e+00, 1.26943e+00, -0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 2.00714e+00}, { 0.00000e+00, 1.89235e+00, -0.00000e+00}, {-9.46175e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.77013e+00}, { 0.00000e+00, 2.50334e+00, -0.00000e+00}, - {-2.50334e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.75425e+00}, {-4.52947e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -2.26473e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.39677e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-1.46771e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -2.44619e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.07566e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.28191e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -3.28191e+00, 0.00000e+00}, - { 0.00000e+00, -1.90708e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.91311e+00}, - { 0.00000e+00, -2.76362e+00, 0.00000e+00}, { 9.21205e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -0.00000e+00, -2.76362e+00}, { 0.00000e+00, -3.02739e+00, 0.00000e+00}, { 2.01826e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.36662e+00}, { 0.00000e+00, -4.09910e+00, 0.00000e+00}, - { 4.09910e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.38995e+00}, { 4.51658e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 3.16161e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.31900e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 1.40813e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.28564e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.11349e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 2.59458e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.63241e+00, -0.00000e+00}, - { 0.00000e+00, 0.00000e+00, -2.64596e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.95014e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 4.95014e+00, -0.00000e+00} + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 0.00000e+00, -6.30783e-01, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -0.00000e+00, -1.09255e+00}, { 0.00000e+00, -1.09255e+00, 0.00000e+00}, { 1.09255e+00, 0.00000e+00, -0.00000e+00}, + { 0.00000e+00, -0.00000e+00, -1.11953e+00}, { 4.57046e-01, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.37114e+00, -0.00000e+00}, + { 0.00000e+00, -0.00000e+00, -1.44531e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 1.77013e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 1.77013e+00, 0.00000e+00}, { 0.00000e+00, 1.26943e+00, -0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 2.00714e+00}, { 0.00000e+00, 1.89235e+00, -0.00000e+00}, {-9.46175e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.77013e+00}, { 0.00000e+00, 2.50334e+00, -0.00000e+00}, + {-2.50334e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.75425e+00}, {-4.52947e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -2.26473e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.39677e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-1.46771e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -2.44619e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.07566e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-3.28191e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -3.28191e+00, 0.00000e+00}, + { 0.00000e+00, -1.90708e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.91311e+00}, + { 0.00000e+00, -2.76362e+00, 0.00000e+00}, { 9.21205e-01, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -0.00000e+00, -2.76362e+00}, { 0.00000e+00, -3.02739e+00, 0.00000e+00}, { 2.01826e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.36662e+00}, { 0.00000e+00, -4.09910e+00, 0.00000e+00}, + { 4.09910e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.38995e+00}, { 4.51658e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 3.16161e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.31900e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + { 1.40813e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.28564e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.11349e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 2.59458e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.63241e+00, -0.00000e+00}, + { 0.00000e+00, 0.00000e+00, -2.64596e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.95014e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 4.95014e+00, -0.00000e+00} }, { - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.26157e+00}, {-1.09255e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -1.09255e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.22045e-16}, {-0.00000e+00, 0.00000e+00, -0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 2.23906e+00}, {-1.82818e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.82818e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 8.81212e-16}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.84324e-16, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 5.55112e-17, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 3.38514e+00}, {-2.67619e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -2.67619e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.30756e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-5.52973e-16, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.66533e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.67801e+00}, {-3.62357e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -3.62357e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.87108e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-1.22267e-15, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.68219e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.93038e-32, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -6.16298e-33, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 6.10264e+00}, {-4.66097e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -4.66097e+00, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 8.98664e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-2.30221e-15, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, 6.93334e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - { 1.77767e-31, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -2.22209e-32, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 7.64784e+00}, {-5.78122e+00, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -5.78122e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.51096e-14}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-3.91011e-15, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.17757e-15, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, - {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.67737e-31, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -5.84671e-32, 0.00000e+00}, - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 1.13319e-47, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.26157e+00}, {-1.09255e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -1.09255e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.22045e-16}, {-0.00000e+00, 0.00000e+00, -0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 2.23906e+00}, {-1.82818e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.82818e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 8.81212e-16}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-1.84324e-16, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 5.55112e-17, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 3.38514e+00}, {-2.67619e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -2.67619e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 2.30756e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-5.52973e-16, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.66533e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.67801e+00}, {-3.62357e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -3.62357e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.87108e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-1.22267e-15, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 3.68219e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.93038e-32, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -6.16298e-33, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 6.10264e+00}, {-4.66097e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -4.66097e+00, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 8.98664e-15}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, {-2.30221e-15, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, 6.93334e-16, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + { 1.77767e-31, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -2.22209e-32, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 7.64784e+00}, {-5.78122e+00, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -5.78122e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 1.51096e-14}, {-0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-3.91011e-15, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 1.17757e-15, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, + {-0.00000e+00, 0.00000e+00, 0.00000e+00}, { 4.67737e-31, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -5.84671e-32, 0.00000e+00}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 1.13319e-47, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, -1.41649e-48, 0.00000e+00} }, { - { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, - { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 3.64183e-01, 3.64183e-01, -7.28366e-01}, { 6.30783e-01, -0.00000e+00, 6.30783e-01}, - {-0.00000e+00, 6.30783e-01, 6.30783e-01}, {-6.30783e-01, 6.30783e-01, -1.66533e-16}, {-6.30783e-01, -6.30783e-01, 0.00000e+00}, - {-7.46353e-01, -7.46353e-01, 0.00000e+00}, { 0.00000e+00, 3.04697e-01, -1.21879e+00}, { 3.04697e-01, 0.00000e+00, -1.21879e+00}, - { 9.63537e-01, -9.63537e-01, 4.01253e-16}, { 9.63537e-01, 9.63537e-01, 9.63537e-01}, {-4.44089e-16, 1.18009e+00, -2.22045e-16}, - {-1.18009e+00, -1.11022e-16, 0.00000e+00}, { 4.88603e-01, 4.88603e-01, 1.30294e+00}, {-1.03006e+00, -7.72548e-01, 7.72548e-01}, - {-7.72548e-01, -1.03006e+00, 7.72548e-01}, {-7.28366e-01, 7.28366e-01, -5.25363e-16}, {-3.64183e-01, -3.64183e-01, -2.18510e+00}, - { 7.69185e-16, -2.04397e+00, -6.81324e-01}, { 2.04397e+00, 1.92296e-16, 6.81324e-01}, { 9.63537e-01, 9.63537e-01, -1.44756e-16}, - {-9.63537e-01, 9.63537e-01, -5.55112e-17}, { 5.19779e-01, 5.19779e-01, -1.81923e+00}, { 1.40917e+00, 8.05238e-01, 8.05238e-01}, - { 8.05238e-01, 1.40917e+00, 8.05238e-01}, { 0.00000e+00, -4.44089e-16, 3.24739e-16}, {-1.06523e+00, -1.06523e+00, 2.13046e+00}, - {-2.17439e-01, 1.73951e+00, 1.73951e+00}, {-1.73951e+00, 2.17439e-01, -1.73951e+00}, {-1.84503e+00, -1.84503e+00, -9.22517e-01}, - { 1.84503e+00, -1.84503e+00, 6.58625e-16}, { 1.45863e+00, 1.11022e-15, 0.00000e+00}, {-8.88178e-16, 1.45863e+00, 0.00000e+00}, - {-1.46807e+00, -1.46807e+00, 5.87227e-01}, {-4.48502e-01, -3.36617e-16, -2.24251e+00}, {-3.36617e-16, -4.48502e-01, -2.24251e+00}, - { 7.09144e-01, -7.09144e-01, 1.87222e-16}, { 2.12743e+00, 2.12743e+00, -9.38779e-16}, { 7.09144e-01, -5.11006e-16, -2.12743e+00}, - { 1.02201e-15, -7.09144e-01, 2.12743e+00}, { 1.81260e+00, 1.81260e+00, 2.58943e+00}, {-2.07154e+00, 2.07154e+00, -1.66969e-15}, - {-3.03637e+00, -2.31111e-15, -6.07275e-01}, { 1.84889e-15, -3.03637e+00, -6.07275e-01}, { 1.05183e+00, -1.05183e+00, 5.77778e-17}, - { 1.05183e+00, 1.05183e+00, 4.03986e-17}, { 1.27464e+00, 1.27464e+00, 1.69952e+00}, {-1.28472e+00, -1.20442e+00, 1.92707e+00}, - {-1.20442e+00, -1.28472e+00, 1.92707e+00}, {-8.52285e-01, 8.52285e-01, -6.74704e-16}, {-1.50789e+00, -1.50789e+00, -2.95022e+00}, - {-1.11260e+00, -2.08612e+00, 9.27164e-01}, { 2.08612e+00, 1.11260e+00, -9.27164e-01}, {-3.07506e-01, -3.07506e-01, -3.69007e+00}, - { 1.23002e+00, -1.23002e+00, 2.28018e-15}, { 3.69007e+00, -1.53753e-01, 1.84503e+00}, {-1.53753e-01, 3.69007e+00, 1.84503e+00}, - {-2.35197e+00, 2.35197e+00, -8.00513e-16}, {-2.35197e+00, -2.35197e+00, -7.83988e-01}, { 1.37903e-15, -1.46671e+00, 9.77875e-17}, + { 0.00000e+00, 0.00000e+00, 0.00000e+00}, { 0.00000e+00, 0.00000e+00, 4.88603e-01}, {-4.88603e-01, 0.00000e+00, 0.00000e+00}, + { 0.00000e+00, -4.88603e-01, 0.00000e+00}, { 3.64183e-01, 3.64183e-01, -7.28366e-01}, { 6.30783e-01, -0.00000e+00, 6.30783e-01}, + {-0.00000e+00, 6.30783e-01, 6.30783e-01}, {-6.30783e-01, 6.30783e-01, -1.66533e-16}, {-6.30783e-01, -6.30783e-01, 0.00000e+00}, + {-7.46353e-01, -7.46353e-01, 0.00000e+00}, { 0.00000e+00, 3.04697e-01, -1.21879e+00}, { 3.04697e-01, 0.00000e+00, -1.21879e+00}, + { 9.63537e-01, -9.63537e-01, 4.01253e-16}, { 9.63537e-01, 9.63537e-01, 9.63537e-01}, {-4.44089e-16, 1.18009e+00, -2.22045e-16}, + {-1.18009e+00, -1.11022e-16, 0.00000e+00}, { 4.88603e-01, 4.88603e-01, 1.30294e+00}, {-1.03006e+00, -7.72548e-01, 7.72548e-01}, + {-7.72548e-01, -1.03006e+00, 7.72548e-01}, {-7.28366e-01, 7.28366e-01, -5.25363e-16}, {-3.64183e-01, -3.64183e-01, -2.18510e+00}, + { 7.69185e-16, -2.04397e+00, -6.81324e-01}, { 2.04397e+00, 1.92296e-16, 6.81324e-01}, { 9.63537e-01, 9.63537e-01, -1.44756e-16}, + {-9.63537e-01, 9.63537e-01, -5.55112e-17}, { 5.19779e-01, 5.19779e-01, -1.81923e+00}, { 1.40917e+00, 8.05238e-01, 8.05238e-01}, + { 8.05238e-01, 1.40917e+00, 8.05238e-01}, { 0.00000e+00, -4.44089e-16, 3.24739e-16}, {-1.06523e+00, -1.06523e+00, 2.13046e+00}, + {-2.17439e-01, 1.73951e+00, 1.73951e+00}, {-1.73951e+00, 2.17439e-01, -1.73951e+00}, {-1.84503e+00, -1.84503e+00, -9.22517e-01}, + { 1.84503e+00, -1.84503e+00, 6.58625e-16}, { 1.45863e+00, 1.11022e-15, 0.00000e+00}, {-8.88178e-16, 1.45863e+00, 0.00000e+00}, + {-1.46807e+00, -1.46807e+00, 5.87227e-01}, {-4.48502e-01, -3.36617e-16, -2.24251e+00}, {-3.36617e-16, -4.48502e-01, -2.24251e+00}, + { 7.09144e-01, -7.09144e-01, 1.87222e-16}, { 2.12743e+00, 2.12743e+00, -9.38779e-16}, { 7.09144e-01, -5.11006e-16, -2.12743e+00}, + { 1.02201e-15, -7.09144e-01, 2.12743e+00}, { 1.81260e+00, 1.81260e+00, 2.58943e+00}, {-2.07154e+00, 2.07154e+00, -1.66969e-15}, + {-3.03637e+00, -2.31111e-15, -6.07275e-01}, { 1.84889e-15, -3.03637e+00, -6.07275e-01}, { 1.05183e+00, -1.05183e+00, 5.77778e-17}, + { 1.05183e+00, 1.05183e+00, 4.03986e-17}, { 1.27464e+00, 1.27464e+00, 1.69952e+00}, {-1.28472e+00, -1.20442e+00, 1.92707e+00}, + {-1.20442e+00, -1.28472e+00, 1.92707e+00}, {-8.52285e-01, 8.52285e-01, -6.74704e-16}, {-1.50789e+00, -1.50789e+00, -2.95022e+00}, + {-1.11260e+00, -2.08612e+00, 9.27164e-01}, { 2.08612e+00, 1.11260e+00, -9.27164e-01}, {-3.07506e-01, -3.07506e-01, -3.69007e+00}, + { 1.23002e+00, -1.23002e+00, 2.28018e-15}, { 3.69007e+00, -1.53753e-01, 1.84503e+00}, {-1.53753e-01, 3.69007e+00, 1.84503e+00}, + {-2.35197e+00, 2.35197e+00, -8.00513e-16}, {-2.35197e+00, -2.35197e+00, -7.83988e-01}, { 1.37903e-15, -1.46671e+00, 9.77875e-17}, { 1.46671e+00, 1.14919e-15, 1.34475e-16} } }; @@ -206,71 +206,71 @@ class YlmRealTest : public testing::Test rlgy = new double[nylm][3]; rlgyvector.resize(nylm,std::vector(3)); ref = new double[64*4]{ - y00(g[0].x, g[0].y, g[0].z), y00(g[1].x, g[1].y, g[1].z), y00(g[2].x, g[2].y, g[2].z), y00(g[3].x, g[3].y, g[3].z), - y10(g[0].x, g[0].y, g[0].z), y10(g[1].x, g[1].y, g[1].z), y10(g[2].x, g[2].y, g[2].z), y10(g[3].x, g[3].y, g[3].z), - y11(g[0].x, g[0].y, g[0].z), y11(g[1].x, g[1].y, g[1].z), y11(g[2].x, g[2].y, g[2].z), y11(g[3].x, g[3].y, g[3].z), - y1m1(g[0].x, g[0].y, g[0].z), y1m1(g[1].x, g[1].y, g[1].z), y1m1(g[2].x, g[2].y, g[2].z), y1m1(g[3].x, g[3].y, g[3].z), - y20(g[0].x, g[0].y, g[0].z), y20(g[1].x, g[1].y, g[1].z), y20(g[2].x, g[2].y, g[2].z), y20(g[3].x, g[3].y, g[3].z), - y21(g[0].x, g[0].y, g[0].z), y21(g[1].x, g[1].y, g[1].z), y21(g[2].x, g[2].y, g[2].z), y21(g[3].x, g[3].y, g[3].z), - y2m1(g[0].x, g[0].y, g[0].z), y2m1(g[1].x, g[1].y, g[1].z), y2m1(g[2].x, g[2].y, g[2].z), y2m1(g[3].x, g[3].y, g[3].z), - y22(g[0].x, g[0].y, g[0].z), y22(g[1].x, g[1].y, g[1].z), y22(g[2].x, g[2].y, g[2].z), y22(g[3].x, g[3].y, g[3].z), - y2m2(g[0].x, g[0].y, g[0].z), y2m2(g[1].x, g[1].y, g[1].z), y2m2(g[2].x, g[2].y, g[2].z), y2m2(g[3].x, g[3].y, g[3].z), - y30(g[0].x, g[0].y, g[0].z), y30(g[1].x, g[1].y, g[1].z), y30(g[2].x, g[2].y, g[2].z), y30(g[3].x, g[3].y, g[3].z), - y31(g[0].x, g[0].y, g[0].z), y31(g[1].x, g[1].y, g[1].z), y31(g[2].x, g[2].y, g[2].z), y31(g[3].x, g[3].y, g[3].z), - y3m1(g[0].x, g[0].y, g[0].z), y3m1(g[1].x, g[1].y, g[1].z), y3m1(g[2].x, g[2].y, g[2].z), y3m1(g[3].x, g[3].y, g[3].z), - y32(g[0].x, g[0].y, g[0].z), y32(g[1].x, g[1].y, g[1].z), y32(g[2].x, g[2].y, g[2].z), y32(g[3].x, g[3].y, g[3].z), - y3m2(g[0].x, g[0].y, g[0].z), y3m2(g[1].x, g[1].y, g[1].z), y3m2(g[2].x, g[2].y, g[2].z), y3m2(g[3].x, g[3].y, g[3].z), - y33(g[0].x, g[0].y, g[0].z), y33(g[1].x, g[1].y, g[1].z), y33(g[2].x, g[2].y, g[2].z), y33(g[3].x, g[3].y, g[3].z), - y3m3(g[0].x, g[0].y, g[0].z), y3m3(g[1].x, g[1].y, g[1].z), y3m3(g[2].x, g[2].y, g[2].z), y3m3(g[3].x, g[3].y, g[3].z), - y40(g[0].x, g[0].y, g[0].z), y40(g[1].x, g[1].y, g[1].z), y40(g[2].x, g[2].y, g[2].z), y40(g[3].x, g[3].y, g[3].z), - y41(g[0].x, g[0].y, g[0].z), y41(g[1].x, g[1].y, g[1].z), y41(g[2].x, g[2].y, g[2].z), y41(g[3].x, g[3].y, g[3].z), - y4m1(g[0].x, g[0].y, g[0].z), y4m1(g[1].x, g[1].y, g[1].z), y4m1(g[2].x, g[2].y, g[2].z), y4m1(g[3].x, g[3].y, g[3].z), - y42(g[0].x, g[0].y, g[0].z), y42(g[1].x, g[1].y, g[1].z), y42(g[2].x, g[2].y, g[2].z), y42(g[3].x, g[3].y, g[3].z), - y4m2(g[0].x, g[0].y, g[0].z), y4m2(g[1].x, g[1].y, g[1].z), y4m2(g[2].x, g[2].y, g[2].z), y4m2(g[3].x, g[3].y, g[3].z), - y43(g[0].x, g[0].y, g[0].z), y43(g[1].x, g[1].y, g[1].z), y43(g[2].x, g[2].y, g[2].z), y43(g[3].x, g[3].y, g[3].z), - y4m3(g[0].x, g[0].y, g[0].z), y4m3(g[1].x, g[1].y, g[1].z), y4m3(g[2].x, g[2].y, g[2].z), y4m3(g[3].x, g[3].y, g[3].z), - y44(g[0].x, g[0].y, g[0].z), y44(g[1].x, g[1].y, g[1].z), y44(g[2].x, g[2].y, g[2].z), y44(g[3].x, g[3].y, g[3].z), - y4m4(g[0].x, g[0].y, g[0].z), y4m4(g[1].x, g[1].y, g[1].z), y4m4(g[2].x, g[2].y, g[2].z), y4m4(g[3].x, g[3].y, g[3].z), - 0.000000000000000, 0.000000000000000, 0.935602579627389, 0.090028400200397, - -0.452946651195697, -0.000000000000000, -0.000000000000000, -0.348678494661834, - -0.000000000000000, -0.452946651195697, -0.000000000000000, -0.348678494661834, - -0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, - -0.000000000000000, -0.000000000000000, 0.000000000000000, -0.000000000000000, - 0.489238299435250, 0.000000000000000, -0.000000000000000, -0.376615818502422, - 0.000000000000000, -0.489238299435250, -0.000000000000000, 0.376615818502422, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.532615198330370, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, - -0.656382056840170, -0.000000000000000, -0.000000000000000, -0.168427714314628, - -0.000000000000000, -0.656382056840170, -0.000000000000000, -0.168427714314628, - -0.317846011338142, -0.317846011338142, 1.017107236282055, 0.226023830284901, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.258942827786103, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.258942827786103, - 0.460602629757462, -0.460602629757462, 0.000000000000000, -0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.409424559784410, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.136474853261470, - -0.000000000000000, 0.000000000000000, -0.000000000000000, -0.136474853261470, - -0.504564900728724, -0.504564900728724, 0.000000000000000, -0.598002845308118, - -0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.350610246256556, - -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.350610246256556, - 0.683184105191914, -0.683184105191914, 0.000000000000000, -0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.202424920056864, - 0.000000000000000, 0.000000000000000, 1.092548430592079, -0.350435072502801, - 0.451658037912587, 0.000000000000000, -0.000000000000000, 0.046358202625865, - 0.000000000000000, 0.451658037912587, -0.000000000000000, 0.046358202625865, - 0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.492067081245654, - -0.469376801586882, -0.000000000000000, -0.000000000000000, 0.187354445356332, - -0.000000000000000, 0.469376801586882, -0.000000000000000, -0.187354445356332, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.355076798886913, - 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, - 0.518915578720260, 0.000000000000000, -0.000000000000000, -0.443845998608641, - 0.000000000000000, 0.518915578720260, -0.000000000000000, -0.443845998608641, - 0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, - 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.452635881587108, - -0.707162732524596, 0.000000000000000, -0.000000000000000, 0.120972027847095, - -0.000000000000000, 0.707162732524596, -0.000000000000000, -0.120972027847095 - } ; + y00(g[0].x, g[0].y, g[0].z), y00(g[1].x, g[1].y, g[1].z), y00(g[2].x, g[2].y, g[2].z), y00(g[3].x, g[3].y, g[3].z), + y10(g[0].x, g[0].y, g[0].z), y10(g[1].x, g[1].y, g[1].z), y10(g[2].x, g[2].y, g[2].z), y10(g[3].x, g[3].y, g[3].z), + y11(g[0].x, g[0].y, g[0].z), y11(g[1].x, g[1].y, g[1].z), y11(g[2].x, g[2].y, g[2].z), y11(g[3].x, g[3].y, g[3].z), + y1m1(g[0].x, g[0].y, g[0].z), y1m1(g[1].x, g[1].y, g[1].z), y1m1(g[2].x, g[2].y, g[2].z), y1m1(g[3].x, g[3].y, g[3].z), + y20(g[0].x, g[0].y, g[0].z), y20(g[1].x, g[1].y, g[1].z), y20(g[2].x, g[2].y, g[2].z), y20(g[3].x, g[3].y, g[3].z), + y21(g[0].x, g[0].y, g[0].z), y21(g[1].x, g[1].y, g[1].z), y21(g[2].x, g[2].y, g[2].z), y21(g[3].x, g[3].y, g[3].z), + y2m1(g[0].x, g[0].y, g[0].z), y2m1(g[1].x, g[1].y, g[1].z), y2m1(g[2].x, g[2].y, g[2].z), y2m1(g[3].x, g[3].y, g[3].z), + y22(g[0].x, g[0].y, g[0].z), y22(g[1].x, g[1].y, g[1].z), y22(g[2].x, g[2].y, g[2].z), y22(g[3].x, g[3].y, g[3].z), + y2m2(g[0].x, g[0].y, g[0].z), y2m2(g[1].x, g[1].y, g[1].z), y2m2(g[2].x, g[2].y, g[2].z), y2m2(g[3].x, g[3].y, g[3].z), + y30(g[0].x, g[0].y, g[0].z), y30(g[1].x, g[1].y, g[1].z), y30(g[2].x, g[2].y, g[2].z), y30(g[3].x, g[3].y, g[3].z), + y31(g[0].x, g[0].y, g[0].z), y31(g[1].x, g[1].y, g[1].z), y31(g[2].x, g[2].y, g[2].z), y31(g[3].x, g[3].y, g[3].z), + y3m1(g[0].x, g[0].y, g[0].z), y3m1(g[1].x, g[1].y, g[1].z), y3m1(g[2].x, g[2].y, g[2].z), y3m1(g[3].x, g[3].y, g[3].z), + y32(g[0].x, g[0].y, g[0].z), y32(g[1].x, g[1].y, g[1].z), y32(g[2].x, g[2].y, g[2].z), y32(g[3].x, g[3].y, g[3].z), + y3m2(g[0].x, g[0].y, g[0].z), y3m2(g[1].x, g[1].y, g[1].z), y3m2(g[2].x, g[2].y, g[2].z), y3m2(g[3].x, g[3].y, g[3].z), + y33(g[0].x, g[0].y, g[0].z), y33(g[1].x, g[1].y, g[1].z), y33(g[2].x, g[2].y, g[2].z), y33(g[3].x, g[3].y, g[3].z), + y3m3(g[0].x, g[0].y, g[0].z), y3m3(g[1].x, g[1].y, g[1].z), y3m3(g[2].x, g[2].y, g[2].z), y3m3(g[3].x, g[3].y, g[3].z), + y40(g[0].x, g[0].y, g[0].z), y40(g[1].x, g[1].y, g[1].z), y40(g[2].x, g[2].y, g[2].z), y40(g[3].x, g[3].y, g[3].z), + y41(g[0].x, g[0].y, g[0].z), y41(g[1].x, g[1].y, g[1].z), y41(g[2].x, g[2].y, g[2].z), y41(g[3].x, g[3].y, g[3].z), + y4m1(g[0].x, g[0].y, g[0].z), y4m1(g[1].x, g[1].y, g[1].z), y4m1(g[2].x, g[2].y, g[2].z), y4m1(g[3].x, g[3].y, g[3].z), + y42(g[0].x, g[0].y, g[0].z), y42(g[1].x, g[1].y, g[1].z), y42(g[2].x, g[2].y, g[2].z), y42(g[3].x, g[3].y, g[3].z), + y4m2(g[0].x, g[0].y, g[0].z), y4m2(g[1].x, g[1].y, g[1].z), y4m2(g[2].x, g[2].y, g[2].z), y4m2(g[3].x, g[3].y, g[3].z), + y43(g[0].x, g[0].y, g[0].z), y43(g[1].x, g[1].y, g[1].z), y43(g[2].x, g[2].y, g[2].z), y43(g[3].x, g[3].y, g[3].z), + y4m3(g[0].x, g[0].y, g[0].z), y4m3(g[1].x, g[1].y, g[1].z), y4m3(g[2].x, g[2].y, g[2].z), y4m3(g[3].x, g[3].y, g[3].z), + y44(g[0].x, g[0].y, g[0].z), y44(g[1].x, g[1].y, g[1].z), y44(g[2].x, g[2].y, g[2].z), y44(g[3].x, g[3].y, g[3].z), + y4m4(g[0].x, g[0].y, g[0].z), y4m4(g[1].x, g[1].y, g[1].z), y4m4(g[2].x, g[2].y, g[2].z), y4m4(g[3].x, g[3].y, g[3].z), + 0.000000000000000, 0.000000000000000, 0.935602579627389, 0.090028400200397, + -0.452946651195697, -0.000000000000000, -0.000000000000000, -0.348678494661834, + -0.000000000000000, -0.452946651195697, -0.000000000000000, -0.348678494661834, + -0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, + -0.000000000000000, -0.000000000000000, 0.000000000000000, -0.000000000000000, + 0.489238299435250, 0.000000000000000, -0.000000000000000, -0.376615818502422, + 0.000000000000000, -0.489238299435250, -0.000000000000000, 0.376615818502422, + 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.532615198330370, + 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, + -0.656382056840170, -0.000000000000000, -0.000000000000000, -0.168427714314628, + -0.000000000000000, -0.656382056840170, -0.000000000000000, -0.168427714314628, + -0.317846011338142, -0.317846011338142, 1.017107236282055, 0.226023830284901, + -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.258942827786103, + -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.258942827786103, + 0.460602629757462, -0.460602629757462, 0.000000000000000, -0.000000000000000, + 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.409424559784410, + -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.136474853261470, + -0.000000000000000, 0.000000000000000, -0.000000000000000, -0.136474853261470, + -0.504564900728724, -0.504564900728724, 0.000000000000000, -0.598002845308118, + -0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, + -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.350610246256556, + -0.000000000000000, -0.000000000000000, -0.000000000000000, 0.350610246256556, + 0.683184105191914, -0.683184105191914, 0.000000000000000, -0.000000000000000, + 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.202424920056864, + 0.000000000000000, 0.000000000000000, 1.092548430592079, -0.350435072502801, + 0.451658037912587, 0.000000000000000, -0.000000000000000, 0.046358202625865, + 0.000000000000000, 0.451658037912587, -0.000000000000000, 0.046358202625865, + 0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, + 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.492067081245654, + -0.469376801586882, -0.000000000000000, -0.000000000000000, 0.187354445356332, + -0.000000000000000, 0.469376801586882, -0.000000000000000, -0.187354445356332, + 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.355076798886913, + 0.000000000000000, 0.000000000000000, 0.000000000000000, -0.000000000000000, + 0.518915578720260, 0.000000000000000, -0.000000000000000, -0.443845998608641, + 0.000000000000000, 0.518915578720260, -0.000000000000000, -0.443845998608641, + 0.000000000000000, -0.000000000000000, 0.000000000000000, 0.000000000000000, + 0.000000000000000, 0.000000000000000, 0.000000000000000, 0.452635881587108, + -0.707162732524596, 0.000000000000000, -0.000000000000000, 0.120972027847095, + -0.000000000000000, 0.707162732524596, -0.000000000000000, -0.120972027847095 + } ; } void TearDown() @@ -293,11 +293,11 @@ TEST_F(YlmRealTest,YlmReal) ModuleBase::YlmReal::Ylm_Real(nylm,ng,g,ylm); for(int i=0;i gplus = g[j]; ModuleBase::Vector3 gminus = g[j]; @@ -352,16 +352,16 @@ TEST_F(YlmRealTest,YlmReal2) ModuleBase::YlmReal::Ylm_Real2(nylm,ng,g,ylm); for(int i=0;i R (20.0, 0.0, 0.0); const double xdr = R.x/R.norm(); const double ydr = R.y/R.norm(); @@ -444,17 +444,17 @@ TEST_F(YlmRealTest, equality_value_test) const double rl = std::pow( R.norm(), L); //std::cout << " rl=" << rl << std::endl; ModuleBase::Ylm::set_coefficients(); - + int nu = 100; - + // Peize Lin change rlya 2016-08-26 std::vector rlya; double rlyb[400]; ModuleBase::Ylm::ZEROS( rlyb, 400); - + ModuleBase::Ylm::rl_sph_harm(L, xdr, ydr, zdr, rlya); ModuleBase::Ylm::get_ylm_real(L+1, R, rlyb); - + for (int i=0; i < nu; i++) { double diff = fabs(rlya[i]-rlyb[i]); @@ -467,21 +467,21 @@ TEST_F(YlmRealTest, equality_value_test) TEST_F(YlmRealTest, equality_gradient_test) { - + ModuleBase::Vector3 R (0.1,-0.2,0.5); ModuleBase::Ylm::set_coefficients(); - + //int nu = 100; std::vector rlya; double rlyb[400]; - + std::vector> grlya; double grlyb[400][3]; - + ModuleBase::Ylm::grad_rl_sph_harm (9, R.x, R.y, R.z, rlya, grlya); ModuleBase::Ylm::rlylm (10, R.x, R.y, R.z, rlyb, grlyb); - + for (int i = 0; i < 100; i++) { double diffx = fabs(grlya[i][2]-grlyb[i][2]); diff --git a/source/module_base/test/para_json_test.cpp b/source/module_base/test/para_json_test.cpp deleted file mode 100644 index 3ce6ecce49..0000000000 --- a/source/module_base/test/para_json_test.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include "module_base/para_json.h" -#include "gtest/gtest.h" -#ifdef __MPI -#include "mpi.h" -#endif - -#include -#include "rapidjson/document.h" -/************************************************ - * unit test of Input::ParaJson - ***********************************************/ - -/** - * - Tested Functions: - * - Init() - * - init json tree from input::Init and check if the json string is valid - */ - -class ParaJsonTest : public ::testing::Test -{ - protected: - std::string testString; -}; - -// check if a string is a valid JSON string -bool isValidJSON(const std::string& jsonString) -{ - rapidjson::Document document; - document.Parse(jsonString.c_str()); - - return !document.HasParseError(); -} - -TEST_F(ParaJsonTest, Init) -{ - //std::string input_file = "./support/INPUT"; - //Input input_tmp; - //EXPECT_NO_THROW(input_tmp.Init(input_file)); - - // int status = system("rm -r ./OUT.autotest/"); - // EXPECT_EQ(status,0); - // Para_Json::Init_json_abacus_readinInfo(); - Para_Json::Init_json_abacus_generalInfo(); - Para_Json::Init_json_abacus(); - Para_Json::Finish_json_tree(); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - Para_Json::doc.Accept(writer); - std::string json = buffer.GetString(); - EXPECT_EQ(isValidJSON(json), true); -} - -int main(int argc, char** argv) -{ -#ifdef __MPI - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC); - MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); -#endif - testing::InitGoogleTest(&argc, argv); - int result; - result = RUN_ALL_TESTS(); -#ifdef __MPI - MPI_Finalize(); -#endif - return result; -} - diff --git a/source/module_base/test/perf_sphbes_test.cpp b/source/module_base/test/perf_sphbes_test.cpp deleted file mode 100644 index 4c574baa8e..0000000000 --- a/source/module_base/test/perf_sphbes_test.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include"../math_sphbes.h" -#include -#include -#include -#include -#include - -/************************************************ -* performace test of class Sphbes -***********************************************/ - -/** - * Tested function: - * - sphbesj - * - Spherical_Bessel - */ - -class PerfSphbes : public benchmark::Fixture { -public: - const double q = 1; - const int n = 1000; - double stop = 1000.0; - double dr = 0.0; - double* rc, *rinf, *jc, *jinf; - void SetUp(const benchmark::State& state){ - const double rcut = state.range(0) + 0.5; - rc = new double[n + 10]; - rinf = new double[n + 10]; - jc = new double[n + 10]; - jinf = new double[n + 10]; - - // generate data points in (0, rcut] in log scale - double rmin = 0.0001; - double log_rmin = std::log(rmin); - double log_rcut = std::log(rcut); - dr = (log_rcut - log_rmin) / (n-1); - memset(rc, 0, (n+10) * sizeof(double)); - for (int i = 0; i < n; i++) - rc[i] = std::exp(log_rmin + i * dr); - - // generate data points in [rcut, stop] in linear scale - memset(rinf, 0, (n+10) * sizeof(double)); - rinf[0] = rcut; - dr = (stop - rcut) / (n-1); - for (int i = 1; i < n; i++) - rinf[i] += rinf[i-1] + dr; - } - void TearDown(const benchmark::State& state){ - delete[] rc; - delete[] rinf; - delete[] jc; - delete[] jinf; - } -}; - -BENCHMARK_DEFINE_F(PerfSphbes, BM_Spherical_Bessel)(benchmark::State& state) { - for (auto _ : state) { - ModuleBase::Sphbes::Spherical_Bessel(n, rc, q, state.range(0), jc); - ModuleBase::Sphbes::Spherical_Bessel(n, rinf, q, state.range(0), jinf); - } -} - -BENCHMARK_DEFINE_F(PerfSphbes, BM_sphbesj)(benchmark::State& state) { - for (auto _ : state) { - ModuleBase::Sphbes::sphbesj(n, rc, q, state.range(0), jc); - ModuleBase::Sphbes::sphbesj(n, rinf, q, state.range(0), jinf); - } -} - -BENCHMARK_REGISTER_F(PerfSphbes, BM_sphbesj)->DenseRange(0, 11, 1)->Unit(benchmark::kMicrosecond); -BENCHMARK_REGISTER_F(PerfSphbes, BM_Spherical_Bessel)->DenseRange(0, 11, 1)->Unit(benchmark::kMicrosecond); -BENCHMARK_MAIN(); \ No newline at end of file diff --git a/source/module_base/tool_quit.h b/source/module_base/tool_quit.h index f944696d5a..eafaf673cc 100644 --- a/source/module_base/tool_quit.h +++ b/source/module_base/tool_quit.h @@ -33,13 +33,13 @@ void WARNING(const std::string &file, const std::string &description); * @brief Close .log files and exit * */ -[[noreturn]] void QUIT(void); +void QUIT(void); /** * @brief Close .log files and exit * */ -[[noreturn]] void QUIT(int ret); +void QUIT(int ret); /** * @brief Combine the functions of WARNING and QUIT @@ -47,7 +47,7 @@ void WARNING(const std::string &file, const std::string &description); * @param file The file where warning happens * @param description The warning information */ -[[noreturn]] void WARNING_QUIT(const std::string& file, const std::string& description); +void WARNING_QUIT(const std::string &file, const std::string &description); /** * @brief Combine the functions of WARNING and QUIT @@ -55,7 +55,7 @@ void WARNING(const std::string &file, const std::string &description); * @param file The file where warning happens * @param description The warning information */ -[[noreturn]] void WARNING_QUIT(const std::string& file, const std::string& description, int ret); +void WARNING_QUIT(const std::string &file, const std::string &description, int ret); /** * @brief Check, if true, WARNING_QUIT diff --git a/source/module_cell/klist.cpp b/source/module_cell/klist.cpp index e11ab183f5..52bc42440d 100644 --- a/source/module_cell/klist.cpp +++ b/source/module_cell/klist.cpp @@ -362,10 +362,6 @@ bool K_Vectors::read_kpoints(const std::string &fn) //recalculate nkstot. nkstot = 0; - /* ISSUE#3482: to distinguish different kline segments */ - std::vector kpt_segids; - kl_segids.clear(); kl_segids.shrink_to_fit(); - int kpt_segid = 0; for(int iks=0; iks> ksx[iks]; @@ -375,9 +371,6 @@ bool K_Vectors::read_kpoints(const std::string &fn) //std::cout << " nkl[" << iks << "]=" << nkl[iks] << std::endl; assert(nkl[iks] >= 0); nkstot += nkl[iks]; - /* ISSUE#3482: to distinguish different kline segments */ - if((nkl[iks] == 1)&&(iks!=(nks_special-1))) kpt_segid++; - kpt_segids.push_back(kpt_segid); } assert( nkl[nks_special-1] == 1); @@ -396,7 +389,6 @@ bool K_Vectors::read_kpoints(const std::string &fn) kvec_c[count].x = ksx[iks-1] + is*dx; kvec_c[count].y = ksy[iks-1] + is*dy; kvec_c[count].z = ksz[iks-1] + is*dz; - kl_segids.push_back(kpt_segids[iks-1]); /* ISSUE#3482: to distinguish different kline segments */ ++count; } } @@ -405,14 +397,15 @@ bool K_Vectors::read_kpoints(const std::string &fn) kvec_c[count].x = ksx[nks_special-1]; kvec_c[count].y = ksy[nks_special-1]; kvec_c[count].z = ksz[nks_special-1]; - kl_segids.push_back(kpt_segids[nks_special-1]); /* ISSUE#3482: to distinguish different kline segments */ ++count; //std::cout << " count = " << count << std::endl; - assert(count == nkstot); - assert(kl_segids.size() == nkstot); /* ISSUE#3482: to distinguish different kline segments */ - - std::for_each(wk.begin(), wk.end(), [](double& d){d = 1.0;}); + assert (count == nkstot ); + + for(int ik=0; ikkc_done = true; @@ -446,22 +439,15 @@ bool K_Vectors::read_kpoints(const std::string &fn) //recalculate nkstot. nkstot = 0; - /* ISSUE#3482: to distinguish different kline segments */ - std::vector kpt_segids; - kl_segids.clear(); kl_segids.shrink_to_fit(); - int kpt_segid = 0; for(int iks=0; iks> ksx[iks]; ifk >> ksy[iks]; ifk >> ksz[iks]; - ModuleBase::GlobalFunc::READ_VALUE( ifk, nkl[iks] ); /* so ifk is ifstream for kpoint, then nkl is number of kpoints on line */ + ModuleBase::GlobalFunc::READ_VALUE( ifk, nkl[iks] ); //std::cout << " nkl[" << iks << "]=" << nkl[iks] << std::endl; assert(nkl[iks] >= 0); nkstot += nkl[iks]; - /* ISSUE#3482: to distinguish different kline segments */ - if((nkl[iks] == 1)&&(iks!=(nks_special-1))) kpt_segid++; - kpt_segids.push_back(kpt_segid); } assert( nkl[nks_special-1] == 1); @@ -480,7 +466,6 @@ bool K_Vectors::read_kpoints(const std::string &fn) kvec_d[count].x = ksx[iks-1] + is*dx; kvec_d[count].y = ksy[iks-1] + is*dy; kvec_d[count].z = ksz[iks-1] + is*dz; - kl_segids.push_back(kpt_segids[iks-1]); /* ISSUE#3482: to distinguish different kline segments */ ++count; } } @@ -489,16 +474,18 @@ bool K_Vectors::read_kpoints(const std::string &fn) kvec_d[count].x = ksx[nks_special-1]; kvec_d[count].y = ksy[nks_special-1]; kvec_d[count].z = ksz[nks_special-1]; - kl_segids.push_back(kpt_segids[nks_special-1]); /* ISSUE#3482: to distinguish different kline segments */ ++count; //std::cout << " count = " << count << std::endl; - assert(count == nkstot ); - assert(kl_segids.size() == nkstot); /* ISSUE#3482: to distinguish different kline segments */ + assert (count == nkstot ); - std::for_each(wk.begin(), wk.end(), [](double& d){d = 1.0;}); + for(int ik=0; ikkd_done = true; + } else @@ -1135,9 +1122,6 @@ void K_Vectors::mpi_k(void) Parallel_Common::bcast_int(nmp, 3); - kl_segids.resize(nkstot); - Parallel_Common::bcast_int(kl_segids.data(), nkstot); - Parallel_Common::bcast_double(koffset, 3); this->nks = GlobalC::Pkpoints.nks_pool[GlobalV::MY_POOL]; @@ -1368,8 +1352,6 @@ void K_Vectors::mpi_k_after_vc(void) Parallel_Common::bcast_int(nspin); Parallel_Common::bcast_int(nkstot); Parallel_Common::bcast_int(nmp, 3); - kl_segids.resize(nkstot); - Parallel_Common::bcast_int(kl_segids.data(), nkstot); Parallel_Common::bcast_double(koffset, 3); this->nks = GlobalC::Pkpoints.nks_pool[GlobalV::MY_POOL]; diff --git a/source/module_cell/klist.h b/source/module_cell/klist.h index aa92cf29fd..a9e06f8614 100644 --- a/source/module_cell/klist.h +++ b/source/module_cell/klist.h @@ -29,7 +29,6 @@ class K_Vectors int nkstot_full; /// number of k points in full k mesh int nmp[3]; // Number of Monhorst-Pack - std::vector kl_segids; // index of kline segment K_Vectors(); ~K_Vectors(); diff --git a/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp b/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp index bb447bca4c..617674256a 100644 --- a/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp +++ b/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp @@ -223,7 +223,7 @@ TEST_F(SltkAtomInputTest, ConstructorNoExpand) GlobalV::test_grid = 1; // this is a bug if radius is too small // because the expand_flag will be false! - radius = 0; + radius = 1e-1000; Atom_input Atom_inp(ofs, *ucell, ucell->nat, ucell->ntype, pbc, radius, test_atom_in); EXPECT_FALSE(Atom_inp.getExpandFlag()); // call set_FAtom and Load_atom diff --git a/source/module_cell/read_atoms.cpp b/source/module_cell/read_atoms.cpp index 4c6bf9c0eb..dc517bccd7 100644 --- a/source/module_cell/read_atoms.cpp +++ b/source/module_cell/read_atoms.cpp @@ -535,101 +535,100 @@ bool UnitCell::read_atom_positions(std::ifstream &ifpos, std::ofstream &ofs_runn ModuleBase::GlobalFunc::ZEROS(atoms[it].mag,na); for (int ia = 0;ia < na; ia++) { - // modify the reading of frozen ions and velocities -- Yuanbo Li 2021/8/20 - ifpos >> v.x >> v.y >> v.z; - mv.x = true ; - mv.y = true ; - mv.z = true ; - atoms[it].vel[ia].set(0,0,0); - atoms[it].mag[ia]=magnet.start_magnetization[it];//if this line is used, default startmag_type would be 2 - atoms[it].angle1[ia]=0; - atoms[it].angle2[ia]=0; - atoms[it].m_loc_[ia].set(0,0,0); - - std::string tmpid; - tmpid = ifpos.get(); - - if( (int)tmpid[0] < 0 ) - { - std::cout << "read_atom_positions, mismatch in atom number for atom type: " << atoms[it].label << std::endl; - exit(1); - } - - bool input_vec_mag=false; - bool input_angle_mag=false; - // read if catch goodbit before "\n" and "#" - while ( (tmpid != "\n") && (ifpos.good()) && (tmpid !="#") ) - { - tmpid = ifpos.get() ; - // old method of reading frozen ions - char tmp = (char)tmpid[0]; - if ( tmp >= 48 && tmp <= 57 ) - { - mv.x = std::stoi(tmpid); - ifpos >> mv.y >> mv.z ; - } - // new method of reading frozen ions and velocities - if ( tmp >= 'a' && tmp <='z') - { - ifpos.putback(tmp); - ifpos >> tmpid; - } - if ( tmpid == "m" ) - { - ifpos >> mv.x >> mv.y >> mv.z ; - } - else if ( tmpid == "v" ||tmpid == "vel" || tmpid == "velocity" ) - { - ifpos >> atoms[it].vel[ia].x >> atoms[it].vel[ia].y >> atoms[it].vel[ia].z; - } - else if ( tmpid == "mag" || tmpid == "magmom") - { - set_element_mag_zero = true; - double tmpamg=0; - ifpos >> tmpamg; - tmp=ifpos.get(); - while (tmp==' ') - { - tmp=ifpos.get(); - } - - if((tmp >= 48 && tmp <= 57) or tmp=='-') - { - ifpos.putback(tmp); - ifpos >> atoms[it].m_loc_[ia].y>>atoms[it].m_loc_[ia].z; - atoms[it].m_loc_[ia].x=tmpamg; - atoms[it].mag[ia]=sqrt(pow(atoms[it].m_loc_[ia].x,2)+pow(atoms[it].m_loc_[ia].y,2)+pow(atoms[it].m_loc_[ia].z,2)); - input_vec_mag=true; - - } - else - { - ifpos.putback(tmp); - atoms[it].mag[ia]=tmpamg; - } - - // atoms[it].mag[ia]; - } - else if ( tmpid == "angle1") - { - ifpos >> atoms[it].angle1[ia]; - atoms[it].angle1[ia]=atoms[it].angle1[ia]/180 *ModuleBase::PI; - input_angle_mag=true; - set_element_mag_zero = true; - } - else if ( tmpid == "angle2") - { - ifpos >> atoms[it].angle2[ia]; - atoms[it].angle2[ia]=atoms[it].angle2[ia]/180 *ModuleBase::PI; - input_angle_mag=true; - set_element_mag_zero = true; - } - } - // move to next line - while ( (tmpid != "\n") && (ifpos.good()) ) - { - tmpid = ifpos.get(); - } + // modify the reading of frozen ions and velocities -- Yuanbo Li 2021/8/20 + ifpos >> v.x >> v.y >> v.z; + mv.x = true ; + mv.y = true ; + mv.z = true ; + atoms[it].vel[ia].set(0,0,0); + atoms[it].mag[ia]=magnet.start_magnetization[it];//if this line is used, default startmag_type would be 2 + atoms[it].angle1[ia]=0; + atoms[it].angle2[ia]=0; + atoms[it].m_loc_[ia].set(0,0,0); + + std::string tmpid; + tmpid = ifpos.get(); + + if( (int)tmpid[0] < 0 ) + { + std::cout << "read_atom_positions, mismatch in atom number for atom type: " << atoms[it].label << std::endl; + exit(1); + } + + bool input_vec_mag=false; + bool input_angle_mag=false; + while ( (tmpid != "\n") && (ifpos.eof()==false) && (tmpid !="#") ) + { + tmpid = ifpos.get() ; + // old method of reading frozen ions + char tmp = (char)tmpid[0]; + if ( tmp >= 48 && tmp <= 57 ) + { + mv.x = std::stoi(tmpid); + ifpos >> mv.y >> mv.z ; + } + // new method of reading frozen ions and velocities + if ( tmp >= 'a' && tmp <='z') + { + ifpos.putback(tmp); + ifpos >> tmpid; + } + if ( tmpid == "m" ) + { + ifpos >> mv.x >> mv.y >> mv.z ; + } + else if ( tmpid == "v" ||tmpid == "vel" || tmpid == "velocity" ) + { + ifpos >> atoms[it].vel[ia].x >> atoms[it].vel[ia].y >> atoms[it].vel[ia].z; + } + else if ( tmpid == "mag" || tmpid == "magmom") + { + set_element_mag_zero = true; + double tmpamg=0; + ifpos >> tmpamg; + tmp=ifpos.get(); + while (tmp==' ') + { + tmp=ifpos.get(); + } + + if((tmp >= 48 && tmp <= 57) or tmp=='-') + { + ifpos.putback(tmp); + ifpos >> atoms[it].m_loc_[ia].y>>atoms[it].m_loc_[ia].z; + atoms[it].m_loc_[ia].x=tmpamg; + atoms[it].mag[ia]=sqrt(pow(atoms[it].m_loc_[ia].x,2)+pow(atoms[it].m_loc_[ia].y,2)+pow(atoms[it].m_loc_[ia].z,2)); + input_vec_mag=true; + + } + else + { + ifpos.putback(tmp); + atoms[it].mag[ia]=tmpamg; + } + + // atoms[it].mag[ia]; + } + else if ( tmpid == "angle1") + { + ifpos >> atoms[it].angle1[ia]; + atoms[it].angle1[ia]=atoms[it].angle1[ia]/180 *ModuleBase::PI; + input_angle_mag=true; + set_element_mag_zero = true; + } + else if ( tmpid == "angle2") + { + ifpos >> atoms[it].angle2[ia]; + atoms[it].angle2[ia]=atoms[it].angle2[ia]/180 *ModuleBase::PI; + input_angle_mag=true; + set_element_mag_zero = true; + } + + } + while ( (tmpid != "\n") && (ifpos.eof()==false) ) + { + tmpid = ifpos.get(); + } std::string mags; //cout<<"mag"<( tetra(nt,i) ) + nk][ibnd]; + } + + itetra[0] = 0; + + ModuleBase::hpsort(4, etetra, itetra); + + //=============================================== + // ...sort in ascending order: e1 < e2 < e3 < e4 + //=============================================== + e1 = etetra [0]; + e2 = etetra [1]; + e3 = etetra [2]; + e4 = etetra [3]; + + //============================================================== + // kp1-kp4 are the irreducible k-points corresponding to e1-e4 + //============================================================== + + kp1 = static_cast( tetra(nt,itetra[0]) )+ nk; + kp2 = static_cast( tetra(nt,itetra[1]) )+ nk; + kp3 = static_cast( tetra(nt,itetra[2]) )+ nk; + kp4 = static_cast( tetra(nt,itetra[3]) )+ nk; + + //====================== + // calculate weights wg + //====================== + if (ef >= e4) + { + wg(kp1, ibnd) = wg(kp1, ibnd) + 0.250 / ntetra; + wg(kp2, ibnd) = wg(kp2, ibnd) + 0.250 / ntetra; + wg(kp3, ibnd) = wg(kp3, ibnd) + 0.250 / ntetra; + wg(kp4, ibnd) = wg(kp4, ibnd) + 0.250 / ntetra; + } + else if (ef < e4 && ef >= e3) + { + c4 = 0.250 / ntetra * pow(e4 - ef, 3) / (e4 - e1) / (e4 - e2) + / (e4 - e3); + dosef = 3.0 / ntetra * (e4 - ef) * (e4 - ef) / (e4 - e1) / (e4 - e2) + / (e4 - e3); + wg(kp1, ibnd) = wg(kp1, ibnd) + 0.250 / ntetra - c4 * + (e4 - ef) / (e4 - e1) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0; + wg(kp2, ibnd) = wg(kp2, ibnd) + 0.250 / ntetra - c4 * + (e4 - ef) / (e4 - e2) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0; + wg(kp3, ibnd) = wg(kp3, ibnd) + 0.250 / ntetra - c4 * + (e4 - ef) / (e4 - e3) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0; + wg(kp4, ibnd) = wg(kp4, ibnd) + 0.250 / ntetra - c4 * + (4.0 - (e4 - ef) * (1.0 / (e4 - e1) + 1.0 / (e4 - e2) + + 1.0 / (e4 - e3))) + dosef * (e1 + e2 + e3 + e4 - 4.0 * + ekb[kp4][ibnd]) / 40.0; + } + + else if (ef < e3 && ef >= e2) + { + c1 = 0.250 / ntetra * (ef - e1) * (ef - e1) / (e4 - e1) / (e3 - e1); + c2 = 0.250 / ntetra * (ef - e1) * (ef - e2) * (e3 - ef) + / (e4 - e1) / (e3 - e2) / (e3 - e1); + c3 = 0.250 / ntetra * (ef - e2) * (ef - e2) * (e4 - ef) / (e4 - e2) + / (e3 - e2) / (e4 - e1); + dosef = 1.0 / ntetra / (e3 - e1) / (e4 - e1) * (3.0 * + (e2 - e1) + 6.0 * (ef - e2) - 3.0 * (e3 - e1 + e4 - e2) + * (ef - e2) * (ef - e2) / (e3 - e2) / (e4 - e2)); + wg(kp1, ibnd) = wg(kp1, ibnd) + c1 + (c1 + c2) * (e3 - ef) + / (e3 - e1) + (c1 + c2 + c3) * (e4 - ef) / (e4 - e1) + dosef * + (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0; + wg(kp2, ibnd) = wg(kp2, ibnd) + c1 + c2 + c3 + (c2 + c3) + * (e3 - ef) / (e3 - e2) + c3 * (e4 - ef) / (e4 - e2) + dosef * + (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0; + wg(kp3, ibnd) = wg(kp3, ibnd) + (c1 + c2) * (ef - e1) + / (e3 - e1) + (c2 + c3) * (ef - e2) / (e3 - e2) + dosef * + (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0; + wg(kp4, ibnd) = wg(kp4, ibnd) + (c1 + c2 + c3) * (ef - e1) + / (e4 - e1) + c3 * (ef - e2) / (e4 - e2) + dosef * (e1 + e2 + + e3 + e4 - 4.0 * ekb[kp4][ibnd]) / 40.0; + } + else if (ef < e2 && ef >= e1) + { + c4 = 0.250 / ntetra * (ef - e1) * (ef - e1) * (ef - e1) / (e2 - e1) / + (e3 - e1) / (e4 - e1); + dosef = 3.0 / ntetra * (ef - e1) * (ef - e1) / (e2 - e1) / (e3 - e1) + / (e4 - e1); + wg(kp1, ibnd) = wg(kp1, ibnd) + c4 * (4.0 - (ef - e1) + * (1.0 / (e2 - e1) + 1.0 / (e3 - e1) + 1.0 / (e4 - e1))) + + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0; + wg(kp2, ibnd) = wg(kp2, ibnd) + c4 * (ef - e1) / (e2 - e1) + + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0; + wg(kp3, ibnd) = wg(kp3, ibnd) + c4 * (ef - e1) / (e3 - e1) + + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0; + wg(kp4, ibnd) = wg(kp4, ibnd) + c4 * (ef - e1) / (e4 - e1) + + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp4][ibnd]) / 40.0; + } // endif + } // enddo + } // enddo + } // enddo + + //===================================================================== + // add correct spin normalization : 2 for LDA, 1 for LSDA calculations + //===================================================================== + for (ik = 0;ik < nks;ik++) + { + for (ibnd = 0;ibnd < GlobalV::NBANDS;ibnd++) + { + wg(ik, ibnd) = wg(ik, ibnd) * 2.0 / nspin; + } + } + return; +} // end subroutine tweights +*/ + +/* +double Occupy::wsweight(const ModuleBase::Vector3 &r, ModuleBase::Vector3 *rws,const int nrws) +{ + //============================================================ + // integer ir, nreq, nrws + // real(kind=dp) r(3), rrt, ck, eps, rws(0:3,nrws), wsweight + // parameter (eps=1.0e-6) + //============================================================ + const double eps = 1.0e-6; + + int nreq = 1; + + for (int ir = 0;ir < nrws;ir++) + { + const double rrt = r * rws[ir]; + const double ck = rrt - rws[ir].x; + // rrt = r[1]*rws(1,ir) + r[2]*rws(2,ir) + r[3]*rws(3,ir); + // ck = rrt-rws(0,ir); + + if (ck > eps) + { + break; + } + + if (std::abs(ck) < eps) + { + nreq++; + } + } // end do + + const double wswe = 1.0 / nreq; + + return wswe; +} // end function wsweight +*/ + +/* +void Occupy::efermit(double** ekb,const int nband,const int nks,const double &nelec,const int nspin, + const int ntetra,const ModuleBase::matrix &tetra, double &ef) +{ + //======================================================= + // Finds the Fermi energy - tetrahedron method (Bloechl) + // the transformation Ry to eV + //======================================================= + + // parameter : + const int maxiter = 300; + const double eps = 1.0e-10; + + double efbetter; + + //=================================== + // nlw : the minimum energy band + // elw : the lower limit of the fermi ener + // eup : the upper limit of the fermi ener + // external sumkt + // find bounds for the Fermi energy. + //=================================== + const int nlw = max( 1, static_cast( (nelec / 2.0 - 5.0) ) ); + double elw = ekb[nlw][0]; + double eup = ekb[0][GlobalV::NBANDS-1]; + + for (int ik = 1;ik < nks;ik++)// do ik = 2, nks + { + elw = min(elw, ekb[ik][nlw]); + eup = max(eup, ekb[ik][GlobalV::NBANDS-1]); + } + for (int ik = 1;ik < nks;ik++)// do ik = 2, nks + { + elw = min(elw, ekb[ik][nlw]); + eup = max(eup, ekb[ik][GlobalV::NBANDS-1]); + } + + //=============================== + // Bisection method + // the number of states with eup + // the number of states with elw + //=============================== + const double sumkup = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, eup); + const double sumklw = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, elw); + + GlobalV::ofs_running << "\n sumkup = " << sumkup; + GlobalV::ofs_running << "\n sumklw = " << sumklw << std::endl; + + if ((sumkup - nelec) < - eps || (sumklw - nelec) > eps) + { + ModuleBase::WARNING("efermit","unexpected error."); + } + + double better = 1.0e+10; + + bool converge = false; + + double sumkmid = 0.0; + for (int iter = 0;iter < maxiter;iter++) + { + // the number of states with ef + ef = (eup + elw) / 2.0; + sumkmid = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, ef); + + if (std::abs(sumkmid - nelec) < better) + { + better = std::abs(sumkmid - nelec); + efbetter = ef; + } + + // converged + if (std::abs(sumkmid - nelec) < eps) + { + converge = true; + break; + } + else if ((sumkmid - nelec) < - eps) + { + elw = ef; + } + else + { + eup = ef; + } + } + if (!converge) + { + // unconverged exit: + // the best available ef is used . Needed in some difficult cases + ef = efbetter; + sumkmid = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, ef); + } + + //============================================================== + // Check if Fermi level is above any of the highest eigenvalues + //============================================================== + for (int ik = 0;ik < nks;ik++) + { + if (ef > ekb[ik][GlobalV::NBANDS-1] + 1.e-4) + { + std::cout << "\n ef = " << ef; + } + } + return; +} // end subroutine efermit +*/ + +/* +double Occupy::sumkt(double** ekb,const int nband,const int nks,const int nspin,const int ntetra, + const ModuleBase::matrix &tetra,const double &e) +{ + double etetra[4]; + double sum = 0.0; + + int nk = 0 ; + for (int ns = 0; ns < nspin;ns++) + { + //================================================================== + // nk is used to select k-points with up (ns=1) or down (ns=2) spin + //================================================================== + if (ns == 1) + { + nk = 0; + } + else + { + nk = nks / 2; + } + + for (int nt = 0; nt < ntetra; nt++) + { + for (int ibnd = 0; ibnd < GlobalV::NBANDS; ibnd++) + { + //====================================================== + // etetra are the energies at the vertexes of the nt-th + // tetrahedron + //====================================================== + for (int i = 0; i < 4; i++) + { + etetra [i] = ekb[ static_cast( (tetra(i, nt) + nk) )][ ibnd ]; + } + + piksort(4, etetra); + //=========================================== + //sort in ascending order: e1 < e2 < e3 < e4 + //=========================================== + const double e1 = etetra [0]; + const double e2 = etetra [1]; + const double e3 = etetra [2]; + const double e4 = etetra [3]; + + //=============================================== + // calculate sum over k of the integrated charge + //=============================================== + if (e >= e4) + { + sum += 1.0 / ntetra; + } + else if (e < e4 && e >= e3) + { + sum += 1.0 / ntetra * (1.0 - pow((e4 - e), 3) / (e4 - e1) + / (e4 - e2) / (e4 - e3)); + } + else if (e < e3 && e >= e2) + { + sum += 1.0 / ntetra / (e3 - e1) / (e4 - e1) * + ((e2 - e1) * (e2 - e1) + 3.0 * (e2 - e1) * (e - e2) + + 3.0 * (e - e2) * (e - e2) - (e3 - e1 + e4 - e2) / + (e3 - e2) / (e4 - e2) * pow((e - e2), 3)); + } + else if (e < e2 && e >= e1) + { + sum += 1.0 / ntetra * pow((e - e1), 3) / + (e2 - e1) / (e3 - e1) / (e4 - e1); + } + }//ibnd + }//nt + }//ns + +// add correct spin normalization : 2 for LDA, 1 for LSDA calculations + sum *= 2.0 / nspin; + return sum; +} // end function sumkt +*/ + +/* +void Occupy::piksort(const int n, double *a) +{ + int i; + bool b = true; + for (int j = 1;j < n;j++) // do j = 2, n + { + const double temp = a [j]; + for (i = j - 1;i >= 0;i--) // do i = j - 1, 1, - 1 + { + if (a [i] <= temp) + { + b = false; + break; + } + a [i + 1] = a [i]; + } + if (b) + { + i = 0; + } + a [i + 1] = temp; + } + return; +} //end subroutine piksort +*/ diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp index 0b2608e5ea..520dcfd176 100644 --- a/source/module_esolver/esolver_ks.cpp +++ b/source/module_esolver/esolver_ks.cpp @@ -409,7 +409,7 @@ namespace ModuleESolver } } - this->conv_elec = (drho < this->scf_thr && iter!=GlobalV::MIXING_RESTART); + this->conv_elec = (drho < this->scf_thr); // If drho < hsolver_error in the first iter or drho < scf_thr, we do not change rho. if (drho < hsolver_error || this->conv_elec) @@ -435,16 +435,8 @@ namespace ModuleESolver // } // p_chgmix->auto_set(bandgap_for_autoset, GlobalC::ucell); // } - // mixing will restart after GlobalV::MIXING_RESTART steps - // So, GlobalV::MIXING_RESTART=1 means mix from scratch - if (GlobalV::MIXING_RESTART > 0 && iter == GlobalV::MIXING_RESTART - 1) - { - // do not mix charge density - } - else - { - p_chgmix->mix_rho(pelec->charge); // update chr->rho by mixing - } + + p_chgmix->mix_rho(pelec->charge); if (GlobalV::SCF_THR_TYPE == 2) pelec->charge->renormalize_rho(); // renormalize rho in R-space would induce a error in K-space //----------charge mixing done----------- } @@ -475,11 +467,6 @@ namespace ModuleESolver bool stop = this->do_after_converge(iter); if(stop) {std::cout << "break\n"; break;} } - // notice for restart - if (GlobalV::MIXING_RESTART > 0 && iter == GlobalV::MIXING_RESTART - 1) - { - std::cout<<"SCF restart after this step!"<classname, "Run"); diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp index 4c4d6c342f..8fdc9217fd 100644 --- a/source/module_esolver/esolver_ks_lcao.cpp +++ b/source/module_esolver/esolver_ks_lcao.cpp @@ -309,7 +309,7 @@ namespace ModuleESolver GlobalV::ofs_running << " !FINAL_ETOT_IS " << this->pelec->f_en.etot * ModuleBase::Ry_to_eV << " eV" << std::endl; GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl; - if (INPUT.out_dos != 0 || INPUT.out_band[0] != 0 || INPUT.out_proj_band != 0) + if (INPUT.out_dos != 0 || INPUT.out_band != 0 || INPUT.out_proj_band != 0) { GlobalV::ofs_running << "\n\n\n\n"; GlobalV::ofs_running << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; @@ -331,7 +331,7 @@ namespace ModuleESolver int nspin0 = (GlobalV::NSPIN == 2) ? 2 : 1; - if (INPUT.out_band[0]) // pengfei 2014-10-13 + if (INPUT.out_band) // pengfei 2014-10-13 { int nks = 0; if (nspin0 == 1) @@ -348,15 +348,7 @@ namespace ModuleESolver std::stringstream ss2; ss2 << GlobalV::global_out_dir << "BANDS_" << is + 1 << ".dat"; GlobalV::ofs_running << "\n Output bands in file: " << ss2.str() << std::endl; - ModuleIO::nscf_band(is, - ss2.str(), - nks, - GlobalV::NBANDS, - 0.0, - INPUT.out_band[1], - this->pelec->ekb, - this->kv, - &(GlobalC::Pkpoints)); + ModuleIO::nscf_band(is, ss2.str(), nks, GlobalV::NBANDS, 0.0, this->pelec->ekb, this->kv, &(GlobalC::Pkpoints)); } } // out_band @@ -491,19 +483,8 @@ namespace ModuleESolver template void ESolver_KS_LCAO::eachiterinit(const int istep, const int iter) { - if (iter == 1 || iter == GlobalV::MIXING_RESTART) - { - if (iter == GlobalV::MIXING_RESTART) // delete mixing and re-construct it to restart - { - this->p_chgmix->set_mixing(GlobalV::MIXING_MODE, - GlobalV::MIXING_BETA, - GlobalV::MIXING_NDIM, - GlobalV::MIXING_GG0, - GlobalV::MIXING_TAU, - GlobalV::MIXING_BETA_MAG); - } + if (iter == 1) this->p_chgmix->mix_reset(); - } // mohan update 2012-06-05 this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband(); diff --git a/source/module_esolver/esolver_ks_pw.cpp b/source/module_esolver/esolver_ks_pw.cpp index 07779f62ec..294f1636f7 100644 --- a/source/module_esolver/esolver_ks_pw.cpp +++ b/source/module_esolver/esolver_ks_pw.cpp @@ -492,19 +492,9 @@ void ESolver_KS_PW::othercalculation(const int istep) template void ESolver_KS_PW::eachiterinit(const int istep, const int iter) { - if (iter == 1 || iter == GlobalV::MIXING_RESTART) - { - if (iter == GlobalV::MIXING_RESTART) // delete mixing and re-construct it to restart - { - this->p_chgmix->set_mixing(GlobalV::MIXING_MODE, - GlobalV::MIXING_BETA, - GlobalV::MIXING_NDIM, - GlobalV::MIXING_GG0, - GlobalV::MIXING_TAU, - GlobalV::MIXING_BETA_MAG); - } + if (iter == 1) this->p_chgmix->mix_reset(); - } + // mohan move harris functional to here, 2012-06-05 // use 'rho(in)' and 'v_h and v_xc'(in) this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband(); @@ -904,58 +894,6 @@ void ESolver_KS_PW::afterscf(const int istep) this->kspw_psi[0].get_pointer() - this->kspw_psi[0].get_psi_bias(), this->psi[0].size()); } - - if(INPUT.band_print_num > 0) - { - std::complex * wfcr = new std::complex[this->pw_rho->nxyz]; - double * rho_band = new double [this->pw_rho->nxyz]; - for(int i = 0; i < this->pw_rho->nxyz; i++) - { - rho_band[i] = 0.0; - } - - for(int i = 0; i < INPUT.band_print_num; i++) - { - int ib = INPUT.bands_to_print[i]; - for(int ik = 0; ik < this->kv.nks; ik++) - { - this->psi->fix_k(ik); - this->pw_wfc->recip_to_real(this->ctx,&psi[0](ib,0),wfcr,ik); - - double w1 = static_cast(this->kv.wk[ik] / GlobalC::ucell.omega); - - for(int i = 0; i < this->pw_rho->nxyz; i++) - { - rho_band[i] += std::norm(wfcr[i]) * w1; - } - } - - std::stringstream ssc; - ssc << GlobalV::global_out_dir << "band" << ib << ".cube"; - - ModuleIO::write_rho - ( -#ifdef __MPI - this->pw_big->bz, - this->pw_big->nbz, - this->pw_big->nplane, - this->pw_big->startz_current, -#endif - rho_band, - 0, - GlobalV::NSPIN, - 0, - ssc.str(), - this->pw_rho->nx, - this->pw_rho->ny, - this->pw_rho->nz, - 0.0, - &(GlobalC::ucell), - 11); - } - delete[] wfcr; - delete[] rho_band; - } } template @@ -1021,7 +959,7 @@ void ESolver_KS_PW::postprocess() GlobalV::ofs_running << " !FINAL_ETOT_IS " << this->pelec->f_en.etot * ModuleBase::Ry_to_eV << " eV" << std::endl; GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl; - if (INPUT.out_dos != 0 || INPUT.out_band[0] != 0) + if (INPUT.out_dos != 0 || INPUT.out_band != 0) { GlobalV::ofs_running << "\n\n\n\n"; GlobalV::ofs_running << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; @@ -1063,7 +1001,7 @@ void ESolver_KS_PW::postprocess() } } - if (INPUT.out_band[0]) // pengfei 2014-10-13 + if (INPUT.out_band) // pengfei 2014-10-13 { int nks = 0; if (nspin0 == 1) @@ -1084,7 +1022,6 @@ void ESolver_KS_PW::postprocess() nks, GlobalV::NBANDS, 0.0, - INPUT.out_band[1], this->pelec->ekb, this->kv, &(GlobalC::Pkpoints)); diff --git a/source/module_hamilt_general/module_xc/test/test_xc.cpp b/source/module_hamilt_general/module_xc/test/test_xc.cpp index a770a88458..558556b66b 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc.cpp @@ -11,7 +11,7 @@ namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} + void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} } namespace GlobalV @@ -95,7 +95,7 @@ class XCTest_PBEsol : public testing::Test e_gga.push_back(e); v1_gga.push_back(v1); v2_gga.push_back(v2); - } + } } }; diff --git a/source/module_hamilt_general/module_xc/test/test_xc1.cpp b/source/module_hamilt_general/module_xc/test/test_xc1.cpp index bc5c439630..8e7a451e71 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc1.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc1.cpp @@ -12,7 +12,7 @@ namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} + void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} } namespace GlobalV diff --git a/source/module_hamilt_general/module_xc/test/test_xc2.cpp b/source/module_hamilt_general/module_xc/test/test_xc2.cpp index 5bf75a3c68..4b1b7e888e 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc2.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc2.cpp @@ -11,7 +11,7 @@ namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} + void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} } namespace GlobalV @@ -202,7 +202,7 @@ class XCTest_PZ_SPN : public testing::Test e_lda.push_back(e); v1_lda.push_back(v1); v2_lda.push_back(v2); - } + } } }; @@ -238,7 +238,7 @@ class XCTest_SLATER1_SPN : public testing::Test e_lda.push_back(e); v1_lda.push_back(v1); v2_lda.push_back(v2); - } + } } }; @@ -273,7 +273,7 @@ class XCTest_SLATER_RXC_SPN : public testing::Test e_lda.push_back(e); v1_lda.push_back(v1); v2_lda.push_back(v2); - } + } } }; @@ -310,7 +310,7 @@ class XCTest_P86_SPN : public testing::Test v1_gga.push_back(v1); v2_gga.push_back(v2); v3_gga.push_back(v3); - } + } } }; diff --git a/source/module_hamilt_general/module_xc/test/test_xc4.cpp b/source/module_hamilt_general/module_xc/test/test_xc4.cpp index b4c8b70093..114c817b0f 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc4.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc4.cpp @@ -11,7 +11,7 @@ namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);} + void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} } namespace GlobalV @@ -49,7 +49,7 @@ class XCTest_SCAN : public testing::Test v2_.push_back(v2); v3_.push_back(v3); } - } + } }; TEST_F(XCTest_SCAN, set_xc_type) diff --git a/source/module_hamilt_general/module_xc/test/test_xc5.cpp b/source/module_hamilt_general/module_xc/test/test_xc5.cpp index d9dfed1b20..bd2f87da8a 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc5.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc5.cpp @@ -25,6 +25,9 @@ class XCTest_VXC : public testing::Test double et2 = 0, vt2 = 0; ModuleBase::matrix v2; + double et4 = 0, vt4 = 0; + ModuleBase::matrix v4; + void SetUp() { ModulePW::PW_Basis rhopw; @@ -84,6 +87,13 @@ class XCTest_VXC : public testing::Test vt2 = std::get<1>(etxc_vtxc_v); v2 = std::get<2>(etxc_vtxc_v); + GlobalV::NSPIN = 4; + GlobalV::DOMAG = true; + etxc_vtxc_v + = XC_Functional::v_xc(rhopw.nrxx,&chr,&ucell); + et4 = std::get<0>(etxc_vtxc_v); + vt4 = std::get<1>(etxc_vtxc_v); + v4 = std::get<2>(etxc_vtxc_v); } }; @@ -111,6 +121,29 @@ TEST_F(XCTest_VXC, set_xc_type) EXPECT_NEAR(v2(1,3),-1.97506482,1.0e-8); EXPECT_NEAR(v2(1,4),-2.160374198,1.0e-8); + EXPECT_NEAR(et4,-27.40098253,1.0e-8); + EXPECT_NEAR(vt4,-35.81948838,1.0e-8); + EXPECT_NEAR(v4(0,0),0,1.0e-8); + EXPECT_NEAR(v4(0,1),-1.559604078,1.0e-8); + EXPECT_NEAR(v4(0,2),-1.920028447,1.0e-8); + EXPECT_NEAR(v4(0,3),-2.168396069,1.0e-8); + EXPECT_NEAR(v4(0,4),-2.36419592,1.0e-8); + EXPECT_NEAR(v4(1,0),0,1.0e-8); + EXPECT_NEAR(v4(1,1),-0.09308179605,1.0e-8); + EXPECT_NEAR(v4(1,2),-0.123132664,1.0e-8); + EXPECT_NEAR(v4(1,3),-0.144332804,1.0e-8); + EXPECT_NEAR(v4(1,4),-0.16127282,1.0e-8); + EXPECT_NEAR(v4(2,0),0,1.0e-8); + EXPECT_NEAR(v4(2,1),-0.9308179605,1.0e-8); + EXPECT_NEAR(v4(2,2),-1.23132664,1.0e-8); + EXPECT_NEAR(v4(2,3),-1.44332804,1.0e-8); + EXPECT_NEAR(v4(2,4),-1.6127282,1.0e-8); + EXPECT_NEAR(v4(3,0),0,1.0e-8); + EXPECT_NEAR(v4(3,1),-0.09308179605,1.0e-8); + EXPECT_NEAR(v4(3,2),-0.123132664,1.0e-8); + EXPECT_NEAR(v4(3,3),-0.144332804,1.0e-8); + EXPECT_NEAR(v4(3,4),-0.16127282,1.0e-8); + } class XCTest_VXC_Libxc : public testing::Test @@ -123,6 +156,9 @@ class XCTest_VXC_Libxc : public testing::Test double et2 = 0, vt2 = 0; ModuleBase::matrix v2; + double et4 = 0, vt4 = 0; + ModuleBase::matrix v4; + void SetUp() { ModulePW::PW_Basis rhopw; @@ -182,6 +218,13 @@ class XCTest_VXC_Libxc : public testing::Test vt2 = std::get<1>(etxc_vtxc_v); v2 = std::get<2>(etxc_vtxc_v); + GlobalV::NSPIN = 4; + GlobalV::DOMAG = true; + etxc_vtxc_v + = XC_Functional::v_xc(rhopw.nrxx,&chr,&ucell); + et4 = std::get<0>(etxc_vtxc_v); + vt4 = std::get<1>(etxc_vtxc_v); + v4 = std::get<2>(etxc_vtxc_v); } }; @@ -209,6 +252,28 @@ TEST_F(XCTest_VXC_Libxc, set_xc_type) EXPECT_NEAR(v2(1,3),-1.975058937,1.0e-8); EXPECT_NEAR(v2(1,4),-2.160368003,1.0e-8); + EXPECT_NEAR(et4,-27.28201062,1.0e-8); + EXPECT_NEAR(vt4,-35.98253991,1.0e-8); + EXPECT_NEAR(v4(0,0),0,1.0e-8); + EXPECT_NEAR(v4(0,1),-1.268278149,1.0e-8); + EXPECT_NEAR(v4(0,2),-1.598108222,1.0e-8); + EXPECT_NEAR(v4(0,3),-1.828079634,1.0e-8); + EXPECT_NEAR(v4(0,4),-2.010634115,1.0e-8); + EXPECT_NEAR(v4(1,0),0,1.0e-8); + EXPECT_NEAR(v4(1,1),-0.1255782493,1.0e-8); + EXPECT_NEAR(v4(1,2),-0.1582362929,1.0e-8); + EXPECT_NEAR(v4(1,3),-0.1810068558,1.0e-8); + EXPECT_NEAR(v4(1,4),-0.1990824429,1.0e-8); + EXPECT_NEAR(v4(2,0),0,1.0e-8); + EXPECT_NEAR(v4(2,1),-1.255782493,1.0e-8); + EXPECT_NEAR(v4(2,2),-1.582362929,1.0e-8); + EXPECT_NEAR(v4(2,3),-1.810068558,1.0e-8); + EXPECT_NEAR(v4(2,4),-1.990824429,1.0e-8); + EXPECT_NEAR(v4(3,0),0,1.0e-8); + EXPECT_NEAR(v4(3,1),-0.1255782493,1.0e-8); + EXPECT_NEAR(v4(3,2),-0.1582362929,1.0e-8); + EXPECT_NEAR(v4(3,3),-0.1810068558,1.0e-8); + EXPECT_NEAR(v4(3,4),-0.1990824429,1.0e-8); } class XCTest_VXC_meta : public testing::Test diff --git a/source/module_hamilt_general/module_xc/test/xc3_mock.h b/source/module_hamilt_general/module_xc/test/xc3_mock.h index da7f1e6f08..628937adfe 100644 --- a/source/module_hamilt_general/module_xc/test/xc3_mock.h +++ b/source/module_hamilt_general/module_xc/test/xc3_mock.h @@ -75,7 +75,7 @@ namespace ModulePW return x; } - + template void PW_Basis_K::real_to_recip(const Device* ctx, const std::complex* in, @@ -115,7 +115,7 @@ namespace ModulePW const int ik, const bool add, const double factor) const; -#if __CUDA || __ROCM +#if __CUDA || __ROCM template void PW_Basis_K::real_to_recip(const psi::DEVICE_GPU* ctx, const std::complex* in, std::complex* out, @@ -129,7 +129,7 @@ namespace ModulePW const int ik, const bool add, const double factor) const; -#endif +#endif FFT::FFT(){}; FFT::~FFT(){}; @@ -144,13 +144,9 @@ namespace ModulePW namespace ModuleBase { - void WARNING_QUIT(const std::string &file,const std::string &description) - { - std::cout << " " << file <<" warning : "<< description<(float& object); template void reduce_pool(float* object, const int n); template void reduce_pool(double* object, const int n); -} +} \ No newline at end of file diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp index 7ee0394d4d..3d85150b22 100644 --- a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp @@ -33,7 +33,7 @@ ModuleBase::matrix SpinConstrain, psi::DEVICE_CPU>::cal_MW_ const char N_char = 'N'; const int one_int = 1; const std::complex one_float = {1.0, 0.0}, zero_float = {0.0, 0.0}; - pzgemm_(&N_char, + pzgemm_(&T_char, &T_char, &nw, &nw, diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp index bd0ad4ce3a..0c0595a9ce 100644 --- a/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp @@ -22,7 +22,7 @@ std::vector>> SpinConstrain AorbMulP[is][iat].resize(nw_it, 0.0); for (int iw = 0; iw < nw_it; iw++) { - AorbMulP[is][iat][iw] = std::abs(orbMulP(is, num))< 1e-10 ? 0.0 : orbMulP(is, num); + AorbMulP[is][iat][iw] = orbMulP(is, num); num++; } } @@ -92,10 +92,16 @@ void SpinConstrain, psi::DEVICE_CPU>::calculate_MW( } else if (this->nspin_ == 4) { - this->Mi_[iat].x = (std::abs(total_charge_soc[1]) < this->sc_thr_)? 0.0 : total_charge_soc[1]; - this->Mi_[iat].y = (std::abs(total_charge_soc[2]) < this->sc_thr_)? 0.0 : total_charge_soc[2]; - this->Mi_[iat].z = (std::abs(total_charge_soc[3]) < this->sc_thr_)? 0.0 : total_charge_soc[3]; + this->Mi_[iat].x = total_charge_soc[1]; + this->Mi_[iat].y = total_charge_soc[2]; + this->Mi_[iat].z = total_charge_soc[3]; } + if (std::abs(this->Mi_[iat].x) < 1e-12) + this->Mi_[iat].x = 0.0; + if (std::abs(this->Mi_[iat].y) < 1e-12) + this->Mi_[iat].y = 0.0; + if (std::abs(this->Mi_[iat].z) < 1e-12) + this->Mi_[iat].z = 0.0; } } } diff --git a/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp b/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp index 845db88062..db64d5490d 100644 --- a/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp @@ -45,7 +45,6 @@ void SpinConstrain, psi::DEVICE_CPU>::run_lambda_loop(int o } else { - where_fill_scalar_else_2d(this->constrain_, 0, zero, delta_lambda, delta_lambda); add_scalar_multiply_2d(initial_lambda, delta_lambda, one, this->lambda_); this->cal_mw_from_lambda(i_step); new_spin = this->Mi_; @@ -88,7 +87,6 @@ void SpinConstrain, psi::DEVICE_CPU>::run_lambda_loop(int o add_scalar_multiply_2d(dnu, search, alpha_trial, dnu); delta_lambda = dnu; - where_fill_scalar_else_2d(this->constrain_, 0, zero, delta_lambda, delta_lambda); add_scalar_multiply_2d(initial_lambda, delta_lambda, one, this->lambda_); this->cal_mw_from_lambda(i_step); @@ -117,4 +115,4 @@ void SpinConstrain, psi::DEVICE_CPU>::run_lambda_loop(int o } alpha_trial = alpha_trial * pow(g, 0.7); } -} +} \ No newline at end of file diff --git a/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp b/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp index a55ad59681..fedb46a976 100644 --- a/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp +++ b/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp @@ -28,8 +28,7 @@ void MPIInit() npcol = 1; Cblacs_pinfo(&myrank, &mysize); Cblacs_get(-1, 0, &ictxt); - char order[] = "Row"; - Cblacs_gridinit(&ictxt, order, nprow, npcol); + Cblacs_gridinit(&ictxt, "Row", nprow, npcol); Cblacs_gridinfo(ictxt, &nprow, &npcol, &myprow, &mypcol); } diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index 8e4ee5b15b..fbaf7b1806 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -1,13 +1,14 @@ #include #ifdef __PEXSI -#include "c_pexsi_interface.h" #include "diago_pexsi.h" + +#include "c_pexsi_interface.h" #include "module_base/global_variable.h" #include "module_base/lapack_connector.h" #include "module_base/timer.h" #include "module_base/tool_quit.h" #include "module_basis/module_ao/parallel_orbitals.h" -#include "module_pexsi/pexsi_solver.h" +#include "pexsi/pexsi_solver.h" typedef hamilt::MatrixBlock matd; typedef hamilt::MatrixBlock> matcd; @@ -15,7 +16,7 @@ typedef hamilt::MatrixBlock> matcd; namespace hsolver { -template <> +template<> void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) { ModuleBase::TITLE("DiagoPEXSI", "diag"); @@ -24,31 +25,30 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& std::vector eigen(GlobalV::NLOCAL, 0.0); MPI_Comm COMM_DIAG = MPI_COMM_WORLD; this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt, - this->ParaV->nb, - this->ParaV->nrow, - this->ParaV->ncol, - h_mat.p, - s_mat.p, - this->DM, - this->EDM, - this->totalEnergyH, - this->totalEnergyS, - this->totalFreeEnergy); + this->ParaV->nb, + this->ParaV->nrow, + this->ParaV->ncol, + h_mat.p, + s_mat.p, + this->DM, + this->EDM, + this->totalEnergyH, + this->totalEnergyS, + this->totalFreeEnergy); this->ps->solve(); - this->EDM = this->ps->get_EDM(); - this->DM = this->ps->get_DM(); // loc.dm_gamma[ik] loc.dm_gamma[0]? - this->totalFreeEnergy = this->ps->get_totalFreeEnergy(); - this->totalEnergyH = this->ps->get_totalEnergyH(); - this->totalEnergyS = this->ps->get_totalEnergyS(); + this->EDM = this->ps->EDM; + this->DM = this->ps->DM; // loc.dm_gamma[ik] loc.dm_gamma[0]? + this->totalFreeEnergy = this->ps->totalFreeEnergy; + this->totalEnergyH = this->ps->totalEnergyH; + this->totalEnergyS = this->ps->totalEnergyS; } -template <> -void DiagoPexsi>::diag(hamilt::Hamilt>* phm_in, - psi::Psi>& psi, - double* eigenvalue_in) +template<> +void DiagoPexsi>::diag(hamilt::Hamilt>* phm_in, psi::Psi>& psi, double* eigenvalue_in) { ModuleBase::TITLE("DiagoPEXSI", "diag"); ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case"); + } } // namespace hsolver diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index c212d7795a..018397a33d 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -3,7 +3,7 @@ #include "diagh.h" #include "module_basis/module_ao/parallel_orbitals.h" -#include "module_pexsi/pexsi_solver.h" +#include "pexsi/pexsi_solver.h" namespace hsolver { diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp index ae784d2009..16fa5f335b 100644 --- a/source/module_hsolver/hsolver_pw.cpp +++ b/source/module_hsolver/hsolver_pw.cpp @@ -624,31 +624,17 @@ void HSolverPW::hamiltSolvePsiK(hamilt::Hamilt* hm, psi::P hm->ops->hPsi(info); ModuleBase::timer::tick("DiagoCG_New", "hpsi_func"); }; - auto spsi_func = [this, hm](const ct::Tensor& psi_in, ct::Tensor& spsi_out) { + auto spsi_func = [hm](const ct::Tensor& psi_in, ct::Tensor& spsi_out) { ModuleBase::timer::tick("DiagoCG_New", "spsi_func"); // psi_in should be a 2D tensor: // psi_in.shape() = [nbands, nbasis] const auto ndim = psi_in.shape().ndim(); REQUIRES_OK(ndim <= 2, "dims of psi_in should be less than or equal to 2"); - - if (GlobalV::use_uspp) - { - // Convert a Tensor object to a psi::Psi object - hm->sPsi(psi_in.data(), spsi_out.data(), + // Convert a Tensor object to a psi::Psi object + hm->sPsi(psi_in.data(), spsi_out.data(), ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1), ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1), ndim == 1 ? 1 : psi_in.shape().dim_size(0)); - } else - { - psi::memory::synchronize_memory_op()( - this->ctx, - this->ctx, - spsi_out.data(), - psi_in.data(), - static_cast((ndim == 1 ? 1 : psi_in.shape().dim_size(0)) - * (ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1)))); - } - ModuleBase::timer::tick("DiagoCG_New", "spsi_func"); }; auto psi_tensor = ct::TensorMap( @@ -790,4 +776,4 @@ template class HSolverPW, psi::DEVICE_GPU>; template class HSolverPW, psi::DEVICE_GPU>; #endif -} // namespace hsolver +} // namespace hsolver \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/CMakeLists.txt b/source/module_hsolver/module_pexsi/CMakeLists.txt index 87d16ff557..8faab8b4b4 100644 --- a/source/module_hsolver/module_pexsi/CMakeLists.txt +++ b/source/module_hsolver/module_pexsi/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(pexsi OBJECT dist_bcd_matrix.cpp dist_ccs_matrix.cpp dist_matrix_transformer.cpp pexsi_solver.cpp simple_pexsi.cpp) +add_library(pexsi OBJECT DistBCDMatrix.cpp DistCCSMatrix.cpp DistMatrixTransformer.cpp pexsi_solver.cpp simplePEXSI.cpp) if(ENABLE_COVERAGE) add_coverage(pexsi) diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp index e498b83a2e..cf815bd4ae 100644 --- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp @@ -1,4 +1,3 @@ -#ifdef __PEXSI #include "dist_bcd_matrix.h" #include @@ -111,5 +110,4 @@ int DistBCDMatrix::pnum(const int prow, const int pcol) { return this->prowpcol2pnum[prow * this->npcols + pcol]; } -} // namespace pexsi -#endif \ No newline at end of file +} // namespace pexsi \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h index 98b8512893..7dbddbad7c 100644 --- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h @@ -2,8 +2,6 @@ #define DISTBCDMATRIX_H #include - -#include "module_hsolver/module_pexsi/dist_matrix_transformer.h" // a Block Cyclic Data Distribution matrix // http://www.netlib.org/utk/papers/factor/node3.html // local matrix elements is stored in column major @@ -29,27 +27,6 @@ class DistBCDMatrix int pnum(const int prow, const int pcol); //~DistBCDMatrix(); - const MPI_Comm get_comm() const - { - return comm; - }; - const MPI_Group get_group() const - { - return group; - }; - const int get_nrow() const - { - return nrow; - }; - const int get_ncol() const - { - return ncol; - }; - const char get_LAYOUT() const - { - return LAYOUT; - }; - private: // MPI communicator MPI_Comm comm; diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp index ddd02aaa9a..365622d249 100644 --- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp +++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp @@ -1,4 +1,3 @@ -#ifdef __PEXSI #include "dist_ccs_matrix.h" #include @@ -115,5 +114,4 @@ DistCCSMatrix::~DistCCSMatrix() delete[] colptrLocal; delete[] rowindLocal; } -} // namespace pexsi -#endif \ No newline at end of file +} // namespace pexsi \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h index a63a0dc16c..aa5e67b6ab 100644 --- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h +++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h @@ -19,44 +19,6 @@ class DistCCSMatrix int globalCol(int localCol); int localCol(int globalCol, int& mypcol); void setnnz(int nnzLocal); - - const MPI_Comm get_comm() const - { - return comm; - }; - const MPI_Group get_group() const - { - return group; - }; - const MPI_Group get_group_data() const - { - return group_data; - }; - const int get_size() const - { - return size; - }; - const int get_nnz() const - { - return nnz; - }; - const int get_nnzlocal() const - { - return nnzLocal; - }; - const int get_numcol_local() const - { - return numColLocal; - }; - int* get_colptr_local() const - { - return colptrLocal; - }; - int* get_rowind_local() const - { - return rowindLocal; - }; - ~DistCCSMatrix(); private: @@ -88,8 +50,6 @@ class DistCCSMatrix // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal int* colptrLocal; int* rowindLocal; - - // friend class DistMatrixTransformer; }; } // namespace pexsi #endif // DISTCCSMATRIX_H diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp index ef6c6fec72..01b96f42cc 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp @@ -1,6 +1,3 @@ -#ifdef __PEXSI -#include "dist_matrix_transformer.h" - #include #include @@ -31,11 +28,11 @@ namespace pexsi // wether this function is called for the first time for a index array; nprocs: total number of processes size_process: // the number of indices in each process displacement_process: the start position in each process index: the array // contains the indices -inline int DistMatrixTransformer::MinimumIndexPosition(const bool isFirst, - const int nprocs, - int* size_process, - int* displacement_process, - const int* index) +inline int MinimumIndexPosition(const bool isFirst, + const int nprocs, + int* size_process, + int* displacement_process, + const int* index) { // usually the minimum index is continuous, so it will be a good idea to // check the one next to the previous index first. @@ -107,16 +104,16 @@ inline int DistMatrixTransformer::MinimumIndexPosition(const bool isFirst, } } -inline void DistMatrixTransformer::buildCCSParameter(const int size, - const int nprocs, - std::vector size_process, - std::vector displacement_process, - const int* position_index, - DistCCSMatrix& DST_Matrix, - int* buffer2ccsIndex) +inline void buildCCSParameter(const int size, + const int nprocs, + std::vector size_process, + std::vector displacement_process, + const int* position_index, + DistCCSMatrix& DST_Matrix, + int* buffer2ccsIndex) { // find the minimum one from left buffer index - if (DST_Matrix.get_nnzlocal() <= 0) + if (DST_Matrix.nnzLocal <= 0) return; int pre_col = -1; @@ -126,34 +123,31 @@ inline void DistMatrixTransformer::buildCCSParameter(const int size, while (p_mini >= 0) { int index_mini = position_index[p_mini]; - int col_mini = index_mini / DST_Matrix.get_size(); //-DST_Matrix.firstCol; - int row_mini = index_mini % DST_Matrix.get_size(); + int col_mini = index_mini / DST_Matrix.size; //-DST_Matrix.firstCol; + int row_mini = index_mini % DST_Matrix.size; if (col_mini > pre_col) // a new column starts, column pointer is a 1-based array { pre_col = col_mini; - DST_Matrix.get_colptr_local()[col_mini] = nnz_now + 1; + DST_Matrix.colptrLocal[col_mini] = nnz_now + 1; } - DST_Matrix.get_rowind_local()[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based + DST_Matrix.rowindLocal[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based // copy data from buffer to M, be careful M is a 0-based array buffer2ccsIndex[nnz_now] = p_mini; ++nnz_now; p_mini = MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index); } // The last element of colptrLocal is nnzLocal+1 - DST_Matrix.get_colptr_local()[DST_Matrix.get_numcol_local()] = nnz_now + 1; + DST_Matrix.colptrLocal[DST_Matrix.numColLocal] = nnz_now + 1; } -inline void DistMatrixTransformer::buffer2CCSvalue(int nnzLocal, - int* buffer2ccsIndex, - double* buffer, - double* nzvalLocal) +inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal) { for (int i = 0; i < nnzLocal; ++i) { nzvalLocal[i] = buffer[buffer2ccsIndex[i]]; } } -inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std::map& P) +inline void countMatrixDistribution(int N, double* A, std::map& P) { for (int i = 0; i < N; ++i) { @@ -167,15 +161,15 @@ inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std } // find out the index of non-zero elements -inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT, - const int nrow, - const int ncol, - double* H_2d, - double* S_2d, - const double ZERO_Limit, - int& nnz, - std::vector& rowidx, - std::vector& colidx) +inline int getNonZeroIndex(char LAYOUT, + const int nrow, + const int ncol, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + int& nnz, + std::vector& rowidx, + std::vector& colidx) { #ifdef _DEBUG char f_log[80]; @@ -281,21 +275,21 @@ inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT, return 0; } -int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, - DistCCSMatrix& DST_Matrix, - const int NPROC_TRANS, - MPI_Group& GROUP_TRANS, - MPI_Comm& COMM_TRANS, - const int nnz, - std::vector& rowidx, - std::vector& colidx, - int& sender_size, - std::vector& sender_size_process, - std::vector& sender_displacement_process, - int& receiver_size, - std::vector& receiver_size_process, - std::vector& receiver_displacement_process, - std::vector& buffer2ccsIndex) +int buildTransformParameter(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + const int NPROC_TRANS, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS, + const int nnz, + std::vector& rowidx, + std::vector& colidx, + int& sender_size, + std::vector& sender_size_process, + std::vector& sender_displacement_process, + int& receiver_size, + std::vector& receiver_size_process, + std::vector& receiver_displacement_process, + std::vector& buffer2ccsIndex) { // debug int myproc; @@ -328,12 +322,12 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, std::vector proc_map_data_trans; if (myproc == 0) { - MPI_Group_size(DST_Matrix.get_group_data(), &nproc_data); + MPI_Group_size(DST_Matrix.group_data, &nproc_data); MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS); proc_map_data_trans.resize(nproc_data, 0); for (int i = 0; i < nproc_data; ++i) { - MPI_Group_translate_ranks(DST_Matrix.get_group_data(), 1, &i, GROUP_TRANS, &proc_map_data_trans[i]); + MPI_Group_translate_ranks(DST_Matrix.group_data, 1, &i, GROUP_TRANS, &proc_map_data_trans[i]); } MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS); } @@ -435,7 +429,7 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, int dst_col = DST_Matrix.localCol(g_col, dst_process); int l_row = rowidx[i]; int dst_row = SRC_Matrix.globalRow(l_row); - sender_index[i] = dst_col * DST_Matrix.get_size() + dst_row; + sender_index[i] = dst_col * DST_Matrix.size + dst_row; } // debug #ifdef _DEBUG @@ -484,10 +478,10 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, return 0; } -int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix, - DistCCSMatrix& DST_Matrix, - MPI_Group& GROUP_TRANS, - MPI_Comm& COMM_TRANS) +int newGroupCommTrans(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS) { // debug #ifdef _DEBUG @@ -505,7 +499,7 @@ int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix, #endif // build transfortram communicator which contains both processes of BCD processors and // CCS processors with nonzero elements - MPI_Group_union(DST_Matrix.get_group_data(), SRC_Matrix.get_group(), &GROUP_TRANS); + MPI_Group_union(DST_Matrix.group_data, SRC_Matrix.group, &GROUP_TRANS); MPI_Comm_create(MPI_COMM_WORLD, GROUP_TRANS, &COMM_TRANS); // debug #ifdef _DEBUG @@ -563,7 +557,7 @@ int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix, return 0; } -int DistMatrixTransformer::deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS) +int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS) { MPI_Group_free(&GROUP_TRANS); if (COMM_TRANS != MPI_COMM_NULL) @@ -577,13 +571,13 @@ int DistMatrixTransformer::deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm // two destination matrices share the same non-zero elements positions // if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, // even if one of them is acturely zero All matrices must have same MPI communicator -int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, - double* H_2d, - double* S_2d, - const double ZERO_Limit, - DistCCSMatrix& DST_Matrix, - double*& H_ccs, - double*& S_ccs) +int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + DistCCSMatrix& DST_Matrix, + double*& H_ccs, + double*& S_ccs) { // debug #ifdef _DEBUG @@ -620,9 +614,9 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, { log << "nprocs: " << SRC_Matrix.nprocs << " ; myprow: " << SRC_Matrix.myprow << " ; mypcol: " << SRC_Matrix.mypcol << std::endl; - log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.get_nrow() << " ; ncol: " << SRC_Matrix.get_ncol() + log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.nrow << " ; ncol: " << SRC_Matrix.ncol << std::endl; - log << "layout:" << SRC_Matrix.get_LAYOUT() << std::endl; + log << "layout:" << SRC_Matrix.LAYOUT << std::endl; log << "ZERO = " << ZERO_Limit << std::endl; log << "DST_Matrix parameters:" << std::endl; log << "size: " << DST_Matrix.size << " ;nproc_data: " << DST_Matrix.nproc_data << std::endl; @@ -639,11 +633,11 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, if (myproc < 100) log << "start counting nnz..." << std::endl; #endif - if (SRC_Matrix.get_comm() != MPI_COMM_NULL) + if (SRC_Matrix.comm != MPI_COMM_NULL) { - getNonZeroIndex(SRC_Matrix.get_LAYOUT(), - SRC_Matrix.get_nrow(), - SRC_Matrix.get_ncol(), + getNonZeroIndex(SRC_Matrix.LAYOUT, + SRC_Matrix.nrow, + SRC_Matrix.ncol, H_2d, S_2d, ZERO_Limit, @@ -660,11 +654,11 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, if(SRC_Matrix.comm != MPI_COMM_NULL) { log<<"NonZeroIndex :"< sender_buffer(sender_size); std::vector receiver_buffer(receiver_size); // put H to sender buffer - if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r') + if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r') { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]]; + sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]]; } } else { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; + sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]]; } } #ifdef _DEBUG @@ -755,18 +749,18 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, #endif // put S to sender buffer - if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r') + if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r') { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]]; + sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]]; } } else { for (int i = 0; i < sender_size; ++i) { - sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; + sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]]; } } #ifdef _DEBUG @@ -810,12 +804,12 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, // transform two sparse matrices from Compressed Column Storage (CCS) to block cyclic distribution (BCD) distribution // two source matrices share the same non-zero elements positions -int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, - double* DMnzvalLocal, - double* EDMnzvalLocal, - DistBCDMatrix& DST_Matrix, - double* DM, - double* EDM) +int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, + double* DMnzvalLocal, + double* EDMnzvalLocal, + DistBCDMatrix& DST_Matrix, + double* DM, + double* EDM) { // debug #ifdef _DEBUG @@ -846,7 +840,7 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, if (COMM_TRANS != MPI_COMM_NULL) { // init DM and EDM with 0 - for (int i = 0; i < DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); ++i) + for (int i = 0; i < DST_Matrix.nrow * DST_Matrix.ncol; ++i) { DM[i] = 0; EDM[i] = 0; @@ -883,12 +877,12 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, MPI_Comm_rank(COMM_TRANS, &myproc_trans); if (myproc_trans == 0) { - MPI_Group_size(DST_Matrix.get_group(), &nproc_bcd); + MPI_Group_size(DST_Matrix.group, &nproc_bcd); MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS); proc_map_bcd_trans.resize(nproc_bcd, 0); for (int i = 0; i < nproc_bcd; ++i) { - MPI_Group_translate_ranks(DST_Matrix.get_group(), 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]); + MPI_Group_translate_ranks(DST_Matrix.group, 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]); } MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS); } @@ -939,7 +933,7 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, log << "display all columns and rows of nonzeros values:\n"; int log_nnz = 0; #endif - for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol) + for (int icol = 0; icol < SRC_Matrix.numColLocal; ++icol) { int g_col = SRC_Matrix.globalCol(icol); int recv_pcol_bcd; @@ -948,9 +942,9 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, // log<DST_Matrix.get_nrow()) + else if(receiver_index[i*2]>DST_Matrix.nrow) { - log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "< "< "<DST_Matrix.get_ncol()) + else if(receiver_index[i*2+1]>DST_Matrix.ncol) { - log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "< "< "< DST_Matrix.get_nrow()) + else if (receiver_index[i * 2] > DST_Matrix.nrow) { log << "ERROR! receiver_index(BCD)[" << 2 * i << "] = " << receiver_index[i * 2] << " > " - << DST_Matrix.get_nrow() << std::endl; + << DST_Matrix.nrow << std::endl; log.flush(); } if (receiver_index[i * 2 + 1] < 0) @@ -1394,10 +1388,10 @@ MPI_Barrier(COMM_TRANS); << std::endl; log.flush(); } - else if (receiver_index[i * 2 + 1] > DST_Matrix.get_ncol()) + else if (receiver_index[i * 2 + 1] > DST_Matrix.ncol) { log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " > " - << DST_Matrix.get_ncol() << std::endl; + << DST_Matrix.ncol << std::endl; log.flush(); } } @@ -1434,14 +1428,14 @@ MPI_Barrier(COMM_TRANS); // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM"); #endif // transform receiver_buffer to DM - if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r') + if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r') { - int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); + int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = ix * DST_Matrix.get_ncol() + iy; + int idx = ix * DST_Matrix.ncol + iy; #ifdef _DEBUG if (myproc < 100) { @@ -1450,7 +1444,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem << std::endl; log << "index number is " << 2 * i << " ix = " << ix << " iy = " << iy - << " ncol = " << DST_Matrix.get_ncol() << std::endl; + << " ncol = " << DST_Matrix.ncol << std::endl; log.flush(); } } @@ -1460,12 +1454,12 @@ MPI_Barrier(COMM_TRANS); } else { - int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); + int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = iy * DST_Matrix.get_nrow() + ix; + int idx = iy * DST_Matrix.nrow + ix; #ifdef _DEBUG if (myproc < 100) { @@ -1474,7 +1468,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem << std::endl; log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " nrow = " << DST_Matrix.get_nrow() << std::endl; + << " nrow = " << DST_Matrix.nrow << std::endl; log.flush(); } } @@ -1518,14 +1512,14 @@ MPI_Barrier(COMM_TRANS); // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM"); #endif // transform receiver_buffer to EDM - if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r') + if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r') { - int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); + int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = ix * DST_Matrix.get_ncol() + iy; + int idx = ix * DST_Matrix.ncol + iy; #ifdef _DEBUG if (myproc < 100) { @@ -1534,7 +1528,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem << std::endl; log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " ncol = " << DST_Matrix.get_ncol() << std::endl; + << " ncol = " << DST_Matrix.ncol << std::endl; log.flush(); } } @@ -1544,12 +1538,12 @@ MPI_Barrier(COMM_TRANS); } else { - int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); + int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol; for (int i = 0; i < receiver_size; ++i) { int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; - int idx = iy * DST_Matrix.get_nrow() + ix; + int idx = iy * DST_Matrix.nrow + ix; #ifdef _DEBUG if (myproc < 100) { @@ -1558,7 +1552,7 @@ MPI_Barrier(COMM_TRANS); log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem << std::endl; log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " nrow = " << DST_Matrix.get_nrow() << std::endl; + << " nrow = " << DST_Matrix.nrow << std::endl; log.flush(); } } @@ -1602,4 +1596,3 @@ MPI_Barrier(COMM_TRANS); } } // namespace pexsi -#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h index e3e27e995a..1d28866c96 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h @@ -1,9 +1,8 @@ #ifndef DISTMATRIXTRANSFORMER_H #define DISTMATRIXTRANSFORMER_H -#include - -#include +#include "dist_bcd_matrix.h" +#include "dist_ccs_matrix.h" // transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution // they should have same MPI communicator // The local matrix of BCD is column-major order @@ -16,62 +15,6 @@ // even if one of them is acturely zero All matrices must have same MPI communicator namespace pexsi { -class DistBCDMatrix; -class DistCCSMatrix; - -namespace DistMatrixTransformer -{ -int MinimumIndexPosition(const bool isFirst, - const int nprocs, - int* size_process, - int* displacement_process, - const int* index); - -void buildCCSParameter(const int size, - const int nprocs, - std::vector size_process, - std::vector displacement_process, - const int* position_index, - DistCCSMatrix& DST_Matrix, - int* buffer2ccsIndex); - -void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal); - -void countMatrixDistribution(int N, double* A, std::map& P); - -int getNonZeroIndex(char LAYOUT, - const int nrow, - const int ncol, - double* H_2d, - double* S_2d, - const double ZERO_Limit, - int& nnz, - std::vector& rowidx, - std::vector& colidx); - -int buildTransformParameter(DistBCDMatrix& SRC_Matrix, - DistCCSMatrix& DST_Matrix, - const int NPROC_TRANS, - MPI_Group& GROUP_TRANS, - MPI_Comm& COMM_TRANS, - const int nnz, - std::vector& rowidx, - std::vector& colidx, - int& sender_size, - std::vector& sender_size_process, - std::vector& sender_displacement_process, - int& receiver_size, - std::vector& receiver_size_process, - std::vector& receiver_displacement_process, - std::vector& buffer2ccsIndex); - -int newGroupCommTrans(DistBCDMatrix& SRC_Matrix, - DistCCSMatrix& DST_Matrix, - MPI_Group& GROUP_TRANS, - MPI_Comm& COMM_TRANS); - -int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS); - int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, double* H_2d, double* S_2d, @@ -88,7 +31,6 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* ENDnzvalLocal, DistBCDMatrix& DST_Matrix, double* DM_2d, - double* ED_2d); -}; // namespace DistMatrixTransformer + double* END_2d); } // namespace pexsi #endif // DISTMATRIXTRANSFORMER_H \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp index 1be66abf59..90d16ae993 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -1,4 +1,3 @@ -#ifdef __PEXSI #include "pexsi_solver.h" #include @@ -6,11 +5,6 @@ #include #include "module_base/global_variable.h" -#include "simple_pexsi.h" - -extern MPI_Comm DIAG_WORLD; -extern MPI_Comm GRID_WORLD; -extern MPI_Group GRID_GROUP; namespace pexsi { @@ -43,7 +37,9 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text, int PEXSI_Solver::solve() { - + extern MPI_Comm DIAG_WORLD; + extern MPI_Comm GRID_WORLD; + extern MPI_Group GRID_GROUP; simplePEXSI(DIAG_WORLD, GRID_WORLD, GRID_GROUP, @@ -65,12 +61,12 @@ int PEXSI_Solver::solve() return 0; } -double* PEXSI_Solver::get_DM() const +const double* PEXSI_Solver::get_DM() const { return DM; } -double* PEXSI_Solver::get_EDM() const +const double* PEXSI_Solver::get_EDM() const { return EDM; } @@ -79,16 +75,4 @@ const double PEXSI_Solver::get_totalFreeEnergy() const { return totalFreeEnergy; } - -const double PEXSI_Solver::get_totalEnergyH() const -{ - return totalEnergyH; -} - -const double PEXSI_Solver::get_totalEnergyS() const -{ - return totalEnergyS; -} - -} // namespace pexsi -#endif \ No newline at end of file +} // namespace pexsi \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h index b3d7aed152..0c3164e5f0 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.h +++ b/source/module_hsolver/module_pexsi/pexsi_solver.h @@ -18,11 +18,9 @@ class PEXSI_Solver double& totalEnergyS, double& totalFreeEnergy); int solve(); - double* get_DM() const; - double* get_EDM() const; + const double* get_DM() const; + const double* get_EDM() const; const double get_totalFreeEnergy() const; - const double get_totalEnergyH() const; - const double get_totalEnergyS() const; private: int blacs_text; diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp index df72a061c5..845beef18c 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -2,7 +2,6 @@ // the H and S matrices are given by 2D block cyclic distribution // the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution // #include "mpi.h" -#ifdef __PEXSI #include #include @@ -19,7 +18,6 @@ #include "module_base/lapack_connector.h" #include "module_base/timer.h" #include "module_base/tool_quit.h" -#include "module_base/global_variable.h" namespace pexsi { @@ -104,252 +102,220 @@ int loadPEXSIOption(MPI_Comm comm, // 10: numElectronPEXSITolerance // 11: ZERO_Limit double double_para[12]; + int myid; + MPI_Comm_rank(comm, &myid); + if (myid == 0) + { + std::ifstream ifs(PexsiOptionFile.c_str()); + if (!ifs) + { + return 1; + } + setDefaultOption(int_para, double_para); - // read in PEXSI options from GlobalV - int_para[0] = GlobalV::pexsi_npole; - int_para[1] = GlobalV::pexsi_inertia; - int_para[2] = GlobalV::pexsi_nmax; - int_para[3] = 0; - int_para[4] = 1; // GlobalV::pexsi_symbolic; - int_para[5] = GlobalV::pexsi_comm; - int_para[6] = 0; - int_para[7] = GlobalV::pexsi_storage; - int_para[8] = GlobalV::pexsi_ordering; - int_para[9] = GlobalV::pexsi_row_ordering; - int_para[10] = GlobalV::pexsi_nproc; - int_para[11] = GlobalV::pexsi_symm; - int_para[12] = GlobalV::pexsi_trans; - int_para[13] = GlobalV::pexsi_method; - int_para[14] = 2; - int_para[15] = 0; - int_para[16] = GlobalV::pexsi_nproc_pole; - - double_para[0] = GlobalV::NSPIN; // GlobalV::pexsi_spin; - double_para[1] = GlobalV::pexsi_temp; - double_para[2] = GlobalV::pexsi_gap; - double_para[3] = GlobalV::pexsi_delta_e; - double_para[4] = GlobalV::pexsi_mu_lower; - double_para[5] = GlobalV::pexsi_mu_upper; - double_para[6] = GlobalV::pexsi_mu; - double_para[7] = GlobalV::pexsi_mu_thr; - double_para[8] = GlobalV::pexsi_mu_expand; - double_para[9] = GlobalV::pexsi_mu_guard; - double_para[10] = GlobalV::pexsi_elec_thr; - double_para[11] = GlobalV::pexsi_zero_thr; - // int myid; - // MPI_Comm_rank(comm, &myid); - // if (myid == 0) - // { - // std::ifstream ifs(PexsiOptionFile.c_str()); - // if (!ifs) - // { - // return 1; - // } - // setDefaultOption(int_para, double_para); - - // ifs.clear(); - // ifs.seekg(0); + ifs.clear(); + ifs.seekg(0); - // char key[128]; - // char lowercase_key[128]; - // const int LINE_LINGTH = 1024; - // char unused_string[LINE_LINGTH]; + char key[128]; + char lowercase_key[128]; + const int LINE_LINGTH = 1024; + char unused_string[LINE_LINGTH]; - // while (ifs.good()) - // { - // ifs >> key; - // //~ cout<<"readin word is: "<>options.spin; - // ifs >> double_para[0]; - // //~ cout<<"double_para[0]: "<>options.temperature; - // ifs >> double_para[1]; - // //~ cout<<"double_para[1]: "<>options.gap; - // ifs >> double_para[2]; - // //~ cout<<"double_para[2]: "<>options.deltaE; - // ifs >> double_para[3]; - // //~ cout<<"double_para[3]: "<>options.numPole; - // ifs >> int_para[0]; - // //~ cout<<"int_para[0]: "<>options.isInertiaCount; - // ifs >> int_para[1]; - // //~ cout<<"int_para[1]: "<>options.maxPEXSIIter; - // ifs >> int_para[2]; - // //~ cout<<"int_para[2]: "<>options.muMin0; - // ifs >> double_para[4]; - // //~ cout<<"double_para[4]: "<>options.muMax0; - // ifs >> double_para[5]; - // //~ cout<<"double_para[5]: "<>options.mu0; - // ifs >> double_para[6]; - // //~ cout<<"double_para[6]: "<>options.muInertiaTolerance; - // ifs >> double_para[7]; - // //~ cout<<"double_para[7]: "<>options.muInertiaExpansion; - // ifs >> double_para[8]; - // //~ cout<<"double_para[8]: "<>options.muPEXSISafeGuard; - // ifs >> double_para[9]; - // //~ cout<<"double_para[9]: "<>options.numElectronPEXSITolerance; - // ifs >> double_para[10]; - // //~ cout<<"double_para[10]: "<> double_para[11]; - // } - // else if (strcmp("matrixtype", lowercase_key) == 0) - // { - // //~ ifs>>options.matrixType; - // ifs >> int_para[3]; - // //~ cout<<"int_para[3]: "<>options.isSymbolicFactorize; - // ifs >> int_para[4]; - // //~ cout<<"int_para[4]: "<>options.isConstructCommPattern; - // ifs >> int_para[5]; - // //~ cout<<"int_para[5]: "<>options.solver; - // ifs >> int_para[6]; - // //~ cout<<"int_para[6]: "<>options.symmetricStorage; - // ifs >> int_para[7]; - // //~ cout<<"int_para[7]: "<>options.ordering; - // ifs >> int_para[8]; - // //~ cout<<"int_para[8]: "<>options.rowOrdering; - // ifs >> int_para[9]; - // //~ cout<<"int_para[9]: "<>options.npSymbFact; - // ifs >> int_para[10]; - // //~ cout<<"int_para[10]: "<>options.symmetric; - // ifs >> int_para[11]; - // //~ cout<<"int_para[11]: "<>options.transpose; - // ifs >> int_para[12]; - // //~ cout<<"int_para[12]: "<>options.method; - // ifs >> int_para[13]; - // //~ cout<<"int_para[13]: "<>options.nPoints; - // ifs >> int_para[14]; - // //~ cout<<"int_para[14]: "<>options.verbosity; - // ifs >> int_para[15]; - // //~ cout<<"int_para[15]: "<>options.verbosity; - // ifs >> int_para[16]; - // //~ cout<<"int_para[16]: "<> key; + //~ cout<<"readin word is: "<>options.spin; + ifs >> double_para[0]; + //~ cout<<"double_para[0]: "<>options.temperature; + ifs >> double_para[1]; + //~ cout<<"double_para[1]: "<>options.gap; + ifs >> double_para[2]; + //~ cout<<"double_para[2]: "<>options.deltaE; + ifs >> double_para[3]; + //~ cout<<"double_para[3]: "<>options.numPole; + ifs >> int_para[0]; + //~ cout<<"int_para[0]: "<>options.isInertiaCount; + ifs >> int_para[1]; + //~ cout<<"int_para[1]: "<>options.maxPEXSIIter; + ifs >> int_para[2]; + //~ cout<<"int_para[2]: "<>options.muMin0; + ifs >> double_para[4]; + //~ cout<<"double_para[4]: "<>options.muMax0; + ifs >> double_para[5]; + //~ cout<<"double_para[5]: "<>options.mu0; + ifs >> double_para[6]; + //~ cout<<"double_para[6]: "<>options.muInertiaTolerance; + ifs >> double_para[7]; + //~ cout<<"double_para[7]: "<>options.muInertiaExpansion; + ifs >> double_para[8]; + //~ cout<<"double_para[8]: "<>options.muPEXSISafeGuard; + ifs >> double_para[9]; + //~ cout<<"double_para[9]: "<>options.numElectronPEXSITolerance; + ifs >> double_para[10]; + //~ cout<<"double_para[10]: "<> double_para[11]; + } + else if (strcmp("matrixtype", lowercase_key) == 0) + { + //~ ifs>>options.matrixType; + ifs >> int_para[3]; + //~ cout<<"int_para[3]: "<>options.isSymbolicFactorize; + ifs >> int_para[4]; + //~ cout<<"int_para[4]: "<>options.isConstructCommPattern; + ifs >> int_para[5]; + //~ cout<<"int_para[5]: "<>options.solver; + ifs >> int_para[6]; + //~ cout<<"int_para[6]: "<>options.symmetricStorage; + ifs >> int_para[7]; + //~ cout<<"int_para[7]: "<>options.ordering; + ifs >> int_para[8]; + //~ cout<<"int_para[8]: "<>options.rowOrdering; + ifs >> int_para[9]; + //~ cout<<"int_para[9]: "<>options.npSymbFact; + ifs >> int_para[10]; + //~ cout<<"int_para[10]: "<>options.symmetric; + ifs >> int_para[11]; + //~ cout<<"int_para[11]: "<>options.transpose; + ifs >> int_para[12]; + //~ cout<<"int_para[12]: "<>options.method; + ifs >> int_para[13]; + //~ cout<<"int_para[13]: "<>options.nPoints; + ifs >> int_para[14]; + //~ cout<<"int_para[14]: "<>options.verbosity; + ifs >> int_para[15]; + //~ cout<<"int_para[15]: "<>options.verbosity; + ifs >> int_para[16]; + //~ cout<<"int_para[16]: "< 0) - { - bands_to_print.resize(band_print_num); - ifs.clear(); - ifs.seekg(0); // move to the beginning of the file - ifs.rdstate(); - while (ifs.good()) - { - ifs >> word1; - if (ifs.eof() != 0) - break; - strtolower(word1, word); // convert uppercase std::string to lower case; word1 --> word - - if (strcmp("bands_to_print", word) == 0) - { - for(int i = 0; i < band_print_num; i ++) - { - ifs >> bands_to_print[i]; - } - } - } - } - //---------------------------------------------------------- // DFT+U Xin Qu added on 2020-10-29 //---------------------------------------------------------- @@ -2996,7 +2826,7 @@ void Input::Default_2(void) // jiyy add 2019-08-04 this->relax_nmax = 1; out_stru = 0; out_dos = 0; - out_band[0] = 0; + out_band = 0; out_proj_band = 0; cal_force = 0; init_wfc = "file"; @@ -3013,7 +2843,7 @@ void Input::Default_2(void) // jiyy add 2019-08-04 this->relax_nmax = 1; out_stru = 0; out_dos = 0; - out_band[0] = 0; + out_band = 0; out_proj_band = 0; cal_force = 0; init_wfc = "file"; @@ -3462,7 +3292,6 @@ void Input::Bcast() Parallel_Common::bcast_string(mixing_mode); Parallel_Common::bcast_double(mixing_beta); Parallel_Common::bcast_int(mixing_ndim); - Parallel_Common::bcast_int(mixing_restart); Parallel_Common::bcast_double(mixing_gg0); // mohan add 2014-09-27 Parallel_Common::bcast_double(mixing_beta_mag); Parallel_Common::bcast_double(mixing_gg0_mag); @@ -3496,8 +3325,7 @@ void Input::Bcast() Parallel_Common::bcast_int(out_wfc_pw); Parallel_Common::bcast_bool(out_wfc_r); Parallel_Common::bcast_int(out_dos); - if(GlobalV::MY_RANK != 0) out_band.resize(2); /* If this line is absent, will cause segmentation fault in io_input_test_para */ - Parallel_Common::bcast_int(out_band.data(), 2); + Parallel_Common::bcast_bool(out_band); Parallel_Common::bcast_bool(out_proj_band); if(GlobalV::MY_RANK != 0) out_mat_hs.resize(2); /* If this line is absent, will cause segmentation fault in io_input_test_para */ Parallel_Common::bcast_int(out_mat_hs.data(), 2); @@ -3695,17 +3523,6 @@ void Input::Bcast() Parallel_Common::bcast_bool(restart_save); // Peize Lin add 2020.04.04 Parallel_Common::bcast_bool(restart_load); // Peize Lin add 2020.04.04 - Parallel_Common::bcast_int(band_print_num); - if(GlobalV::MY_RANK != 0) - { - bands_to_print.resize(band_print_num); - } - - for(int i = 0; i < band_print_num; i++) - { - Parallel_Common::bcast_int(bands_to_print[i]); - } - //----------------------------------------------------------------------------------- // DFT+U (added by Quxin 2020-10-29) //----------------------------------------------------------------------------------- @@ -3808,34 +3625,6 @@ void Input::Bcast() Parallel_Common::bcast_bool(qo_switch); Parallel_Common::bcast_string(qo_basis); Parallel_Common::bcast_double(qo_thr); - //========================================================== - // PEXSI - //========================================================== - Parallel_Common::bcast_int(pexsi_npole); - Parallel_Common::bcast_int(pexsi_inertia); - Parallel_Common::bcast_int(pexsi_nmax); - // Parallel_Common::bcast_int(pexsi_symbolic); - Parallel_Common::bcast_int(pexsi_comm); - Parallel_Common::bcast_int(pexsi_storage); - Parallel_Common::bcast_int(pexsi_ordering); - Parallel_Common::bcast_int(pexsi_row_ordering); - Parallel_Common::bcast_int(pexsi_nproc); - Parallel_Common::bcast_int(pexsi_symm); - Parallel_Common::bcast_int(pexsi_trans); - Parallel_Common::bcast_int(pexsi_method); - Parallel_Common::bcast_int(pexsi_nproc_pole); - // Parallel_Common::bcast_double(pexsi_spin); - Parallel_Common::bcast_double(pexsi_temp); - Parallel_Common::bcast_double(pexsi_gap); - Parallel_Common::bcast_double(pexsi_delta_e); - Parallel_Common::bcast_double(pexsi_mu_lower); - Parallel_Common::bcast_double(pexsi_mu_upper); - Parallel_Common::bcast_double(pexsi_mu); - Parallel_Common::bcast_double(pexsi_mu_thr); - Parallel_Common::bcast_double(pexsi_mu_expand); - Parallel_Common::bcast_double(pexsi_mu_guard); - Parallel_Common::bcast_double(pexsi_elec_thr); - Parallel_Common::bcast_double(pexsi_zero_thr); /* broadcasting std::vector is sometime a annorying task... */ if (ntype != 0) /* ntype has been broadcasted before */ { @@ -4133,11 +3922,10 @@ void Input::Check(void) } else if (ks_solver == "pexsi") { -#ifdef __PEXSI - GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl; +#ifndef __MPI + ModuleBase::WARNING_QUIT("Input", "Cusolver can not be used for series version."); #else - ModuleBase::WARNING_QUIT("Input", - "Can not use PEXSI if abacus is not compiled with PEXSI. Please change ks_solver to scalapack_gvx."); + GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl; #endif diff --git a/source/module_io/input.h b/source/module_io/input.h index 1d29c6311a..b4e983abad 100644 --- a/source/module_io/input.h +++ b/source/module_io/input.h @@ -232,7 +232,6 @@ class Input std::string mixing_mode; // "plain","broyden",... double mixing_beta; // 0 : no_mixing int mixing_ndim; // used in Broyden method - int mixing_restart; double mixing_gg0; // used in kerker method. mohan add 2014-09-27 double mixing_beta_mag; double mixing_gg0_mag; @@ -260,13 +259,11 @@ class Input bool out_chg; // output charge density. 0: no; 1: yes bool out_dm; // output density matrix. bool out_dm1; - int band_print_num; - std::vector bands_to_print; int out_pot; // yes or no int out_wfc_pw; // 0: no; 1: txt; 2: dat bool out_wfc_r; // 0: no; 1: yes int out_dos; // dos calculation. mohan add 20090909 - std::vector out_band; // band calculation pengfei 2014-10-13 + bool out_band; // band calculation pengfei 2014-10-13 bool out_proj_band; // projected band structure calculation jiyy add 2022-05-11 std::vector out_mat_hs; // output H matrix and S matrix in local basis. bool out_mat_xc; // output exchange-correlation matrix in KS-orbital representation. @@ -602,34 +599,6 @@ class Input double qo_thr = 1e-6; std::vector qo_strategy = {}; std::vector qo_screening_coeff = {}; - //========================================================== - // variables for PEXSI - //========================================================== - int pexsi_npole = 54; - int pexsi_inertia = 1; - int pexsi_nmax = 80; - // int pexsi_symbolic = 1; - int pexsi_comm = 1; - int pexsi_storage = 1; - int pexsi_ordering = 0; - int pexsi_row_ordering = 1; - int pexsi_nproc = 1; - int pexsi_symm = 1; - int pexsi_trans = 0; - int pexsi_method = 1; - int pexsi_nproc_pole = 1; - // double pexsi_spin = 2; - double pexsi_temp = 0.0001; - double pexsi_gap = 0; - double pexsi_delta_e = 20.0; - double pexsi_mu_lower = -10; - double pexsi_mu_upper = 10; - double pexsi_mu = 0.0; - double pexsi_mu_thr = 0.05; - double pexsi_mu_expand = 0.3; - double pexsi_mu_guard = 0.2; - double pexsi_elec_thr = 0.001; - double pexsi_zero_thr = 1e-10; private: //========================================================== @@ -698,15 +667,7 @@ class Input template typename std::enable_if::value, T>::type cast_string(const std::string& str) { return std::stod(str); } template - typename std::enable_if::value, T>::type cast_string(const std::string& str) - { - if (str == "true" || str == "1") - return 1; - else if (str == "false" || str == "0") - return 0; - else - return std::stoi(str); - } + typename std::enable_if::value, T>::type cast_string(const std::string& str) { return std::stoi(str); } template typename std::enable_if::value, T>::type cast_string(const std::string& str) { return (str == "true" || str == "1"); } template diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp index d6e3371111..a52245d05c 100644 --- a/source/module_io/input_conv.cpp +++ b/source/module_io/input_conv.cpp @@ -750,7 +750,6 @@ void Input_Conv::Convert(void) GlobalV::MIXING_MODE = INPUT.mixing_mode; GlobalV::MIXING_BETA = INPUT.mixing_beta; GlobalV::MIXING_NDIM = INPUT.mixing_ndim; - GlobalV::MIXING_RESTART = INPUT.mixing_restart; GlobalV::MIXING_GG0 = INPUT.mixing_gg0; GlobalV::MIXING_BETA_MAG = INPUT.mixing_beta_mag; GlobalV::MIXING_GG0_MAG = INPUT.mixing_gg0_mag; @@ -766,35 +765,6 @@ void Input_Conv::Convert(void) GlobalV::qo_strategy = INPUT.qo_strategy; GlobalV::qo_thr = INPUT.qo_thr; GlobalV::qo_screening_coeff = INPUT.qo_screening_coeff; - - //----------------------------------------------- - // PEXSI related parameters - //----------------------------------------------- - GlobalV::pexsi_npole = INPUT.pexsi_npole; - GlobalV::pexsi_inertia = INPUT.pexsi_inertia; - GlobalV::pexsi_nmax = INPUT.pexsi_nmax; - // GlobalV::pexsi_symbolic = INPUT.pexsi_symbolic; - GlobalV::pexsi_comm = INPUT.pexsi_comm; - GlobalV::pexsi_storage = INPUT.pexsi_storage; - GlobalV::pexsi_ordering = INPUT.pexsi_ordering; - GlobalV::pexsi_row_ordering = INPUT.pexsi_row_ordering; - GlobalV::pexsi_nproc = INPUT.pexsi_nproc; - GlobalV::pexsi_symm = INPUT.pexsi_symm; - GlobalV::pexsi_trans = INPUT.pexsi_trans; - GlobalV::pexsi_method = INPUT.pexsi_method; - GlobalV::pexsi_nproc_pole = INPUT.pexsi_nproc_pole; - // GlobalV::pexsi_spin = INPUT.pexsi_spin; - GlobalV::pexsi_temp = INPUT.pexsi_temp; - GlobalV::pexsi_gap = INPUT.pexsi_gap; - GlobalV::pexsi_delta_e = INPUT.pexsi_delta_e; - GlobalV::pexsi_mu_lower = INPUT.pexsi_mu_lower; - GlobalV::pexsi_mu_upper = INPUT.pexsi_mu_upper; - GlobalV::pexsi_mu = INPUT.pexsi_mu; - GlobalV::pexsi_mu_thr = INPUT.pexsi_mu_thr; - GlobalV::pexsi_mu_expand = INPUT.pexsi_mu_expand; - GlobalV::pexsi_mu_guard = INPUT.pexsi_mu_guard; - GlobalV::pexsi_elec_thr = INPUT.pexsi_elec_thr; - GlobalV::pexsi_zero_thr = INPUT.pexsi_zero_thr; ModuleBase::timer::tick("Input_Conv", "Convert"); return; } diff --git a/source/module_io/mulliken_charge.cpp b/source/module_io/mulliken_charge.cpp index bdcdb5a035..393da5fda4 100644 --- a/source/module_io/mulliken_charge.cpp +++ b/source/module_io/mulliken_charge.cpp @@ -44,7 +44,7 @@ ModuleBase::matrix ModuleIO::cal_mulliken(const std::vector> const char N_char = 'N'; const int one_int = 1; const double one_float = 1.0, zero_float = 0.0; - pdgemm_(&N_char, + pdgemm_(&T_char, &T_char, &GlobalV::NLOCAL, &GlobalV::NLOCAL, @@ -156,7 +156,7 @@ ModuleBase::matrix ModuleIO::cal_mulliken(const std::vector one_float = {1.0, 0.0}, zero_float = {0.0, 0.0}; - pzgemm_(&N_char, + pzgemm_(&T_char, &T_char, &GlobalV::NLOCAL, &GlobalV::NLOCAL, diff --git a/source/module_io/nscf_band.cpp b/source/module_io/nscf_band.cpp index 290dc58bd3..d8b7b05ca6 100644 --- a/source/module_io/nscf_band.cpp +++ b/source/module_io/nscf_band.cpp @@ -3,7 +3,6 @@ #include "module_base/global_variable.h" #include "module_base/timer.h" #include "module_base/tool_title.h" -#include "module_base/formatter_physfmt.h" void ModuleIO::nscf_band( const int &is, @@ -11,7 +10,6 @@ void ModuleIO::nscf_band( const int &nks, const int &nband, const double &fermie, - const int &precision, const ModuleBase::matrix& ekb, const K_Vectors& kv, const Parallel_Kpoints* Pkpoints) @@ -35,28 +33,23 @@ void ModuleIO::nscf_band( if (ik>0) { auto delta=kv.kvec_c[ik]-kv.kvec_c[ik-1]; - klength[ik] = klength[ik-1]; - klength[ik] += (kv.kl_segids[ik] == kv.kl_segids[ik-1]) ? delta.norm() : 0.0; + klength[ik] = klength[ik-1] + delta.norm(); } - /* first find if present kpoint in present pool */ if ( GlobalV::MY_POOL == Pkpoints->whichpool[ik] ) { - /* then get the local kpoint index, which starts definitly from 0 */ const int ik_now = ik - Pkpoints->startk_pool[GlobalV::MY_POOL]; - /* if present kpoint corresponds the spin of the present one */ if( kv.isk[ik_now+is*nks] == is ) { if ( GlobalV::RANK_IN_POOL == 0) { - formatter::PhysicalFmt physfmt; // create a physical formatter temporarily - std::ofstream ofs(out_band_dir.c_str(), std::ios::app); - physfmt.adjust_formatter_flexible(4, 0, false); // for integer - ofs << physfmt.get_p_formatter()->format(ik+1); - physfmt.adjust_formatter_flexible(precision, 4.0/double(precision), false); // for decimal - ofs << physfmt.get_p_formatter()->format(klength[ik]); + std::ofstream ofs(out_band_dir.c_str(),std::ios::app); + ofs << std::setprecision(8); + //start from 1 + ofs << ik+1; + ofs << " " << klength[ik] << " "; for(int ib = 0; ib < nband; ib++) { - ofs << physfmt.get_p_formatter()->format((ekb(ik_now+is*nks, ib)-fermie) * ModuleBase::Ry_to_eV); + ofs << " " << (ekb(ik_now+is*nks, ib)-fermie) * ModuleBase::Ry_to_eV; } ofs << std::endl; ofs.close(); @@ -90,30 +83,18 @@ void ModuleIO::nscf_band( #else // std::cout<<"\n nband = "< klength; - klength.resize(nks); - klength[0] = 0.0; + std::ofstream ofs(out_band_dir.c_str()); for(int ik=0;ik0) - { - auto delta=kv.kvec_c[ik]-kv.kvec_c[ik-1]; - klength[ik] = klength[ik-1]; - klength[ik] += (kv.kl_segids[ik] == kv.kl_segids[ik-1]) ? delta.norm() : 0.0; - } if( kv.isk[ik] == is) { - physfmt.adjust_formatter_flexible(4, 0, false); // for integer - ofs << physfmt.get_p_formatter()->format(ik+1); - physfmt.adjust_formatter_flexible(precision, 4.0/double(precision), false); // for decimal - ofs << physfmt.get_p_formatter()->format(klength[ik]); // add klength, in accordance with the MPI version + ofs<format((ekb(ik, ibnd)-fermie) * ModuleBase::Ry_to_eV); + ofs <& default_parametes_type) +bool default_parametes_reader(const std::string& fn, std::map& default_parametes_type) { std::ifstream inputFile(fn.c_str()); if (inputFile.is_open()) @@ -120,24 +122,28 @@ void default_parametes_reader(const std::string& fn, std::map& input) +bool input_parameters_get(const std::string& fn, std::map& input) { + // The module title information is displayed ModuleBase::TITLE("Input", "Read"); + // If it is not the primary node, return false if (GlobalV::MY_RANK != 0) - return; + return false; // Open the input parameter file std::ifstream ifs(fn.c_str(), std::ios::in); // "in_datas/input_parameters" + // If the opening fails, an error message is printed and false is returned if (!ifs) { - ModuleBase::WARNING_QUIT("Input", "Can't find the INPUT file at " + fn); + std::cout << " Can't find the INPUT file." << std::endl; + return false; } ifs.clear(); ifs.seekg(0); @@ -160,7 +166,8 @@ void input_parameters_get(const std::string& fn, std::map input_parameters) +bool input_parameters_set(std::map input_parameters) { if (input_parameters.count("nupdown") != 0) { @@ -818,10 +831,6 @@ void input_parameters_set(std::map input_parameters { INPUT.mixing_ndim = *static_cast(input_parameters["mixing_ndim"].get()); } - else if (input_parameters.count("mixing_restart") != 0) - { - INPUT.mixing_restart = *static_cast(input_parameters["mixing_restart"].get()); - } else if (input_parameters.count("mixing_gg0") != 0) { INPUT.mixing_gg0 = *static_cast(input_parameters["mixing_gg0"].get()); @@ -908,7 +917,7 @@ void input_parameters_set(std::map input_parameters } else if (input_parameters.count("out_band") != 0) { - INPUT.out_band = *static_cast*>(input_parameters["out_band"].get()); + INPUT.out_band = *static_cast(input_parameters["out_band"].get()); } else if (input_parameters.count("out_proj_band") != 0) { diff --git a/source/module_io/parameter_pool.h b/source/module_io/parameter_pool.h index bd4ae575dd..83baedd036 100644 --- a/source/module_io/parameter_pool.h +++ b/source/module_io/parameter_pool.h @@ -241,12 +241,12 @@ class InputParameter } } }; -void Init(const std::string& default_type_path, +bool Init(const std::string& default_type_path, const std::string& default_value_path, const std::string& input_value_path); -void default_parametes_reader(const std::string& fn, std::map& default_parametes_type); -void input_parameters_get(const std::string& fn, std::map& input); -void input_parameters_set(std::map input_parameters); +bool default_parametes_reader(const std::string& fn, std::map& default_parametes_type); +bool input_parameters_get(const std::string& fn, std::map& input); +bool input_parameters_set(std::map input_parameters); extern std::map input_parameters; extern std::map default_parametes_type; diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp index a566827792..f0d7e43f68 100644 --- a/source/module_io/test/input_conv_test.cpp +++ b/source/module_io/test/input_conv_test.cpp @@ -183,7 +183,6 @@ TEST_F(InputConvTest, Conv) EXPECT_EQ(GlobalV::sc_mag_switch,0); EXPECT_TRUE(GlobalV::decay_grad_switch); EXPECT_EQ(GlobalV::sc_file, "sc.json"); - EXPECT_EQ(GlobalV::MIXING_RESTART,0); } TEST_F(InputConvTest, ConvRelax) diff --git a/source/module_io/test/input_test.cpp b/source/module_io/test/input_test.cpp index 11bce873ab..02a5a19e10 100644 --- a/source/module_io/test/input_test.cpp +++ b/source/module_io/test/input_test.cpp @@ -176,11 +176,9 @@ TEST_F(InputTest, Default) EXPECT_EQ(INPUT.out_wfc_pw,0); EXPECT_EQ(INPUT.out_wfc_r,0); EXPECT_EQ(INPUT.out_dos,0); - EXPECT_EQ(INPUT.out_band[0],0); - EXPECT_EQ(INPUT.out_band[1],8); + EXPECT_EQ(INPUT.out_band,0); EXPECT_EQ(INPUT.out_proj_band,0); EXPECT_EQ(INPUT.out_mat_hs[0],0); - EXPECT_EQ(INPUT.out_mat_hs[1],8); EXPECT_EQ(INPUT.out_mat_hs2,0); EXPECT_EQ(INPUT.out_mat_xc, 0); EXPECT_EQ(INPUT.out_interval,1); @@ -541,11 +539,9 @@ TEST_F(InputTest, Read) EXPECT_EQ(INPUT.out_wfc_pw,0); EXPECT_EQ(INPUT.out_wfc_r,0); EXPECT_EQ(INPUT.out_dos,0); - EXPECT_EQ(INPUT.out_band[0],0); - EXPECT_EQ(INPUT.out_band[1],8); + EXPECT_EQ(INPUT.out_band,0); EXPECT_EQ(INPUT.out_proj_band,0); EXPECT_EQ(INPUT.out_mat_hs[0],0); - EXPECT_EQ(INPUT.out_mat_hs[1],8); EXPECT_EQ(INPUT.out_mat_hs2,0); EXPECT_EQ(INPUT.out_mat_xc, 0); EXPECT_EQ(INPUT.out_interval,1); @@ -925,8 +921,7 @@ TEST_F(InputTest, Default_2) EXPECT_EQ(INPUT.relax_nmax, 1); EXPECT_EQ(INPUT.out_stru, 0); EXPECT_EQ(INPUT.symmetry, "0"); - EXPECT_EQ(INPUT.out_band[0],0); - EXPECT_EQ(INPUT.out_band[1],8); + EXPECT_EQ(INPUT.out_band,0); EXPECT_EQ(INPUT.out_proj_band,0); EXPECT_EQ(INPUT.cal_force,0); EXPECT_EQ(INPUT.init_wfc,"file"); @@ -948,8 +943,7 @@ TEST_F(InputTest, Default_2) EXPECT_EQ(INPUT.relax_nmax, 1); EXPECT_EQ(INPUT.symmetry, "0"); EXPECT_EQ(INPUT.out_stru, 0); - EXPECT_EQ(INPUT.out_band[0],0); - EXPECT_EQ(INPUT.out_band[1],8); + EXPECT_EQ(INPUT.out_band,0); EXPECT_EQ(INPUT.out_proj_band,0); EXPECT_EQ(INPUT.cal_force,0); EXPECT_EQ(INPUT.init_wfc,"file"); diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp index d005fdfccc..58e04eb32d 100644 --- a/source/module_io/test/input_test_para.cpp +++ b/source/module_io/test/input_test_para.cpp @@ -26,13 +26,12 @@ class InputParaTest : public ::testing::Test #ifdef __MPI TEST_F(InputParaTest, Bcast) { - INPUT.Default(); if (GlobalV::MY_RANK == 0) { - INPUT.suffix = "BcastTest"; + INPUT.Default(); /* hmmm... why there is not Default_2 here? and, seems Default is execute directly on each processor? */ } INPUT.Bcast(); - EXPECT_EQ(INPUT.suffix, "BcastTest"); + EXPECT_EQ(INPUT.suffix, "ABACUS"); EXPECT_EQ(INPUT.stru_file, ""); EXPECT_EQ(INPUT.kpoint_file, ""); EXPECT_EQ(INPUT.pseudo_dir, ""); @@ -181,8 +180,7 @@ TEST_F(InputParaTest, Bcast) EXPECT_EQ(INPUT.out_wfc_pw, 0); EXPECT_EQ(INPUT.out_wfc_r, 0); EXPECT_EQ(INPUT.out_dos, 0); - EXPECT_EQ(INPUT.out_band[0], 0); - EXPECT_EQ(INPUT.out_band[1], 8); + EXPECT_EQ(INPUT.out_band, 0); EXPECT_EQ(INPUT.out_proj_band, 0); EXPECT_EQ(INPUT.out_mat_hs[0], 0); EXPECT_EQ(INPUT.out_mat_hs[1], 8); @@ -381,7 +379,6 @@ TEST_F(InputParaTest, Bcast) EXPECT_TRUE(INPUT.mdp.dump_virial); EXPECT_FALSE(INPUT.mixing_tau); EXPECT_FALSE(INPUT.mixing_dftu); - EXPECT_EQ(INPUT.mixing_restart,0); EXPECT_EQ(INPUT.out_bandgap, 0); EXPECT_EQ(INPUT.out_mat_t, 0); diff --git a/source/module_io/test/support/INPUT b/source/module_io/test/support/INPUT index 4fbde867db..469dff2ff4 100644 --- a/source/module_io/test/support/INPUT +++ b/source/module_io/test/support/INPUT @@ -59,7 +59,7 @@ out_pot 2 #output realspace potential out_wfc_pw 0 #output wave functions out_wfc_r 0 #output wave functions in realspace out_dos 0 #output energy and dos -out_band 0 #output energy and band structure +out_band false #output energy and band structure out_proj_band FaLse #output projected band structure restart_save f #print to disk every step for restart restart_load F #restart from disk diff --git a/source/module_io/test/support/witestfile b/source/module_io/test/support/witestfile index 4db819d53f..4043773876 100644 --- a/source/module_io/test/support/witestfile +++ b/source/module_io/test/support/witestfile @@ -55,7 +55,7 @@ out_pot 2 #output realspace potential out_wfc_pw 0 #output wave functions out_wfc_r 0 #output wave functions in realspace out_dos 0 #output energy and dos -out_band 0 #output energy and band structure +out_band false #output energy and band structure out_proj_band FaLse #output projected band structure restart_save f #print to disk every step for restart restart_load F #restart from disk diff --git a/source/module_io/test/to_qo_test.cpp b/source/module_io/test/to_qo_test.cpp index 9477b2eb54..93692f858e 100644 --- a/source/module_io/test/to_qo_test.cpp +++ b/source/module_io/test/to_qo_test.cpp @@ -543,39 +543,7 @@ TEST_F(toQOTest, CalculateSelfOvlpRFull) //tqo.write_ovlp(tqo.ovlp_R()[0], "QO_self_ovlp.dat"); } -/* Si_dojo_soc.upf is special: two p orbitals, one s orbital */ - -TEST_F(toQOTest, BuildPswfcPartial1) -{ - define_fcc_cell(ucell); - toQO tqo("pswfc", {"s", "s"}); - tqo.unwrap_unitcell(&ucell); - tqo.build_ao(ucell.ntype, ucell.pseudo_fn); - EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals - EXPECT_EQ(tqo.nchi(), 2); -} - -TEST_F(toQOTest, BuildPswfcPartial2) -{ - define_fcc_cell(ucell); - toQO tqo("pswfc", {"ps", "s"}); - tqo.unwrap_unitcell(&ucell); - tqo.build_ao(ucell.ntype, ucell.pseudo_fn); - EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals - EXPECT_EQ(tqo.nchi(), 8); // the first element is Si, it has two p orbitals, so 3+3+1+1 -} - -TEST_F(toQOTest, BuildPswfcPartial3) -{ - define_fcc_cell(ucell); - toQO tqo("pswfc", {"all", "p"}); - tqo.unwrap_unitcell(&ucell); - tqo.build_ao(ucell.ntype, ucell.pseudo_fn); - EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals - EXPECT_EQ(tqo.nchi(), 10); -} - -TEST_F(toQOTest, BuildPswfcAll) +TEST_F(toQOTest, BuildPswfc) { define_fcc_cell(ucell); toQO tqo("pswfc", {"all", "all"}); diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp index 8dccb5627a..d61133715d 100644 --- a/source/module_io/test/write_input_test.cpp +++ b/source/module_io/test/write_input_test.cpp @@ -384,16 +384,13 @@ TEST_F(write_input, Mixing7) std::string output((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); EXPECT_THAT(output, testing::HasSubstr("#Parameters (7.Charge Mixing)")); EXPECT_THAT(output, testing::HasSubstr("mixing_type broyden #plain; pulay; broyden")); - EXPECT_THAT(output, testing::HasSubstr("mixing_beta 0.7 #mixing parameter: 0 means no new charge")); + EXPECT_THAT(output, + testing::HasSubstr("mixing_beta 0.7 #mixing parameter: 0 means no new charge")); EXPECT_THAT(output, testing::HasSubstr("mixing_ndim 8 #mixing dimension in pulay or broyden")); EXPECT_THAT(output, testing::HasSubstr("mixing_gg0 0 #mixing parameter in kerker")); - EXPECT_THAT(output, testing::HasSubstr("mixing_beta_mag -10 #mixing parameter for magnetic density")); - EXPECT_THAT(output, testing::HasSubstr("mixing_gg0_mag 0 #mixing parameter in kerker")); - EXPECT_THAT(output, testing::HasSubstr("mixing_gg0_min 0.1 #the minimum kerker coefficient")); - EXPECT_THAT(output, testing::HasSubstr("mixing_angle -10 #angle mixing parameter for non-colinear calculations")); EXPECT_THAT(output, testing::HasSubstr("mixing_tau 0 #whether to mix tau in mGGA calculation")); - EXPECT_THAT(output, testing::HasSubstr("mixing_dftu 0 #whether to mix locale in DFT+U calculation")); - EXPECT_THAT(output, testing::HasSubstr("mixing_restart 0 #which step to restart mixing during SCF")); + EXPECT_THAT(output, + testing::HasSubstr("mixing_dftu 0 #whether to mix locale in DFT+U calculation")); EXPECT_THAT(output, testing::HasSubstr("")); ifs.close(); remove("write_input_test.log"); diff --git a/source/module_io/test_serial/nscf_band_test.cpp b/source/module_io/test_serial/nscf_band_test.cpp index db9bf752fb..4483bf37cd 100644 --- a/source/module_io/test_serial/nscf_band_test.cpp +++ b/source/module_io/test_serial/nscf_band_test.cpp @@ -54,16 +54,9 @@ class BandTest : public ::testing::Test ekb(1,1) = 2.0; ekb(1,2) = 3.0; kv = new K_Vectors; - // specify the kpoints - kv->kvec_c.resize(nks); - kv->kvec_c[0] = ModuleBase::Vector3(0.0, 0.0, 0.0); - kv->kvec_c[1] = ModuleBase::Vector3(1.0, 0.0, 0.0); kv->isk.resize(nks); kv->isk[0] = 0; kv->isk[1] = 1; - kv->kl_segids.resize(nks); - kv->kl_segids[0] = 0; - kv->kl_segids[1] = 0; Pkpoints = new Parallel_Kpoints; } @@ -88,12 +81,12 @@ class BandTest : public ::testing::Test TEST_F(BandTest, nscf_band) { // Call the function to be tested - ModuleIO::nscf_band(is, out_band_dir, nks, nband, fermie, 8, ekb, *kv, Pkpoints); + ModuleIO::nscf_band(is, out_band_dir, nks, nband, fermie, ekb, *kv, Pkpoints); // Check the output file std::ifstream ifs(out_band_dir); std::string str((std::istreambuf_iterator(ifs)),std::istreambuf_iterator()); ASSERT_TRUE(ifs.is_open()); - EXPECT_THAT(str, testing::HasSubstr("1 0.00000000 -27.21139600 -13.60569800 0.00000000")); + EXPECT_THAT(str, testing::HasSubstr("1 -27.2114 -13.6057 0")); ifs.close(); } diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp index cb26bc2283..6003a4fdb0 100644 --- a/source/module_io/write_input.cpp +++ b/source/module_io/write_input.cpp @@ -85,7 +85,6 @@ void Input::Print(const std::string &fn) const ModuleBase::GlobalFunc::OUTP(ofs, "cal_force", cal_force, "if calculate the force at the end of the electronic iteration"); ModuleBase::GlobalFunc::OUTP(ofs, "out_freq_ion", out_freq_ion, "the frequency ( >= 0 ) of ionic step to output charge density and wavefunction. 0: output only when ion steps are finished"); ModuleBase::GlobalFunc::OUTP(ofs, "device", device, "the computing device for ABACUS"); - ModuleBase::GlobalFunc::OUTP(ofs, "precision", precision, "the computing precision for ABACUS"); ofs << "\n#Parameters (2.PW)" << std::endl; ModuleBase::GlobalFunc::OUTP(ofs, "ecutwfc", ecutwfc, "#energy cutoff for wave functions"); @@ -123,7 +122,7 @@ void Input::Print(const std::string &fn) const ModuleBase::GlobalFunc::OUTP(ofs, "out_wfc_pw", out_wfc_pw, "output wave functions"); ModuleBase::GlobalFunc::OUTP(ofs, "out_wfc_r", out_wfc_r, "output wave functions in realspace"); ModuleBase::GlobalFunc::OUTP(ofs, "out_dos", out_dos, "output energy and dos"); - ModuleBase::GlobalFunc::OUTP(ofs, "out_band", out_band[0], "output energy and band structure (with precision "+std::to_string(out_band[1])+")"); + ModuleBase::GlobalFunc::OUTP(ofs, "out_band", out_band, "output energy and band structure"); ModuleBase::GlobalFunc::OUTP(ofs, "out_proj_band", out_proj_band, "output projected band structure"); ModuleBase::GlobalFunc::OUTP(ofs, "restart_save", restart_save, "print to disk every step for restart"); ModuleBase::GlobalFunc::OUTP(ofs, "restart_load", restart_load, "restart from disk"); @@ -223,7 +222,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ModuleBase::GlobalFunc::OUTP(ofs, "lcao_dk", lcao_dk, "delta k for 1D integration in LCAO"); ModuleBase::GlobalFunc::OUTP(ofs, "lcao_dr", lcao_dr, "delta r for 1D integration in LCAO"); ModuleBase::GlobalFunc::OUTP(ofs, "lcao_rmax", lcao_rmax, "max R for 1D two-center integration table"); - ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs", out_mat_hs[0], "output H and S matrix (with precision "+std::to_string(out_mat_hs[1])+")"); + ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs", out_mat_hs[0], "output H and S matrix"); ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs2", out_mat_hs2, "output H(R) and S(R) matrix"); ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_dh", out_mat_dh, "output of derivative of H(R) matrix"); ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_xc", out_mat_xc, "output exchange-correlation matrix in KS-orbital representation"); @@ -248,7 +247,6 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ModuleBase::GlobalFunc::OUTP(ofs, "mixing_type", mixing_mode, "plain; pulay; broyden"); ModuleBase::GlobalFunc::OUTP(ofs, "mixing_beta", mixing_beta, "mixing parameter: 0 means no new charge"); ModuleBase::GlobalFunc::OUTP(ofs, "mixing_ndim", mixing_ndim, "mixing dimension in pulay or broyden"); - ModuleBase::GlobalFunc::OUTP(ofs, "mixing_restart", mixing_restart, "which step to restart mixing during SCF"); ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0", mixing_gg0, "mixing parameter in kerker"); ModuleBase::GlobalFunc::OUTP(ofs, "mixing_beta_mag", mixing_beta_mag, "mixing parameter for magnetic density"); ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0_mag", mixing_gg0_mag, "mixing parameter in kerker"); @@ -495,32 +493,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ModuleBase::GlobalFunc::OUTP(ofs, "qo_switch", qo_switch, "0: no QO analysis; 1: QO analysis"); ModuleBase::GlobalFunc::OUTP(ofs, "qo_basis", qo_basis, "type of QO basis function: hydrogen: hydrogen-like basis, pswfc: read basis from pseudopotential"); ModuleBase::GlobalFunc::OUTP(ofs, "qo_thr", qo_thr, "accuracy for evaluating cutoff radius of QO basis function"); - - ofs << "\n#Parameters (24.PEXSI)" << std::endl; - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_npole", pexsi_npole, "Number of poles in expansion"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_inertia", pexsi_inertia, "Whether inertia counting is used at the very beginning of PEXSI process"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nmax", pexsi_nmax, "Maximum number of PEXSI iterations after each inertia counting procedure."); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_comm", pexsi_comm, "Whether to construct PSelInv communication pattern"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices, 0: non-symmetric, 1: symmetric"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_ordering", pexsi_ordering, "Ordering strategy for factorization and selected inversion"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc", pexsi_nproc, "Number of processors for parmetis"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "matrix symmetry, 0: non-symmetric, 1: symmetric"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "transpose, 0: no transpose, 1: transpose"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_method", pexsi_method, "pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc_pole", pexsi_nproc_pole, "Number of processes used by each pole"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_temp", pexsi_temp, "Temperature, in the same unit as H"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_gap", pexsi_gap, "Spectral gap"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \f$S^{-1} H\f$"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_lower", pexsi_mu_lower, "Initial guess of lower bound for mu"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_upper", pexsi_mu_upper, "Initial guess of upper bound for mu"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu", pexsi_mu, "Initial guess for mu (for the solver)"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_thr", pexsi_mu_thr, "Stopping criterion in terms of the chemical potential for the inertia counting procedure"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_expand", pexsi_mu_expand, "If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_guard", pexsi_mu_guard, "Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_elec_thr", pexsi_elec_thr, "Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_zero_thr", pexsi_zero_thr, "if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0"); - + ofs.close(); return; } \ No newline at end of file diff --git a/source/module_ri/Exx_LRI.hpp b/source/module_ri/Exx_LRI.hpp index c9b3b69601..ace9097bb0 100644 --- a/source/module_ri/Exx_LRI.hpp +++ b/source/module_ri/Exx_LRI.hpp @@ -12,6 +12,7 @@ #include "module_ri/exx_abfs-construct_orbs.h" #include "module_ri/exx_abfs-io.h" #include "module_ri/conv_coulomb_pot_k.h" +#include "module_ri/conv_coulomb_pot_k-template.h" #include "module_base/tool_title.h" #include "module_base/timer.h" #include "module_ri/serialization_cereal.h" @@ -70,19 +71,14 @@ void Exx_LRI::init(const MPI_Comm &mpi_comm_in, const K_Vectors &kv_in) case Conv_Coulomb_Pot_K::Ccp_Type::Ccp: return {}; case Conv_Coulomb_Pot_K::Ccp_Type::Hf: - { - // 4/3 * pi * Rcut^3 = V_{supercell} = V_{unitcell} * Nk - const int nspin0 = (GlobalV::NSPIN==2) ? 2 : 1; - const double hf_Rcut = std::pow(0.75 * this->p_kv->nkstot_full/nspin0 * GlobalC::ucell.omega / (ModuleBase::PI), 1.0/3.0); - return {{"hf_Rcut", hf_Rcut}}; - } + return {}; case Conv_Coulomb_Pot_K::Ccp_Type::Hse: return {{"hse_omega", this->info.hse_omega}}; default: throw std::domain_error(std::string(__FILE__)+" line "+std::to_string(__LINE__)); break; } }; - this->abfs_ccp = Conv_Coulomb_Pot_K::cal_orbs_ccp(this->abfs, this->info.ccp_type, get_ccp_parameter(), this->info.ccp_rmesh_times); + this->abfs_ccp = Conv_Coulomb_Pot_K::cal_orbs_ccp(this->abfs, this->info.ccp_type, get_ccp_parameter(), this->info.ccp_rmesh_times, this->p_kv->nkstot_full); for( size_t T=0; T!=this->abfs.size(); ++T ) diff --git a/source/module_ri/LRI_CV_Tools.hpp b/source/module_ri/LRI_CV_Tools.hpp index 8ad95c3715..532e7104fb 100644 --- a/source/module_ri/LRI_CV_Tools.hpp +++ b/source/module_ri/LRI_CV_Tools.hpp @@ -250,10 +250,11 @@ LRI_CV_Tools::cal_latvec_range(const double &rcut_times) const ModuleBase::Vector3 proj = ModuleBase::Mathzone::latvec_projection( std::array,3>{GlobalC::ucell.a1, GlobalC::ucell.a2, GlobalC::ucell.a3}); const ModuleBase::Vector3 latvec_times = Rcut_max * rcut_times / (proj * GlobalC::ucell.lat0); - const ModuleBase::Vector3 latvec_times_ceil = {static_cast(std::ceil(latvec_times.x)), - static_cast(std::ceil(latvec_times.y)), - static_cast(std::ceil(latvec_times.z))}; - const ModuleBase::Vector3 period = 2 * latvec_times_ceil + ModuleBase::Vector3{1,1,1}; + const ModuleBase::Vector3 latvec_times_ceil = + {std::ceil(latvec_times.x), + std::ceil(latvec_times.y), + std::ceil(latvec_times.z)}; + const ModuleBase::Vector3 period = 2 * latvec_times_ceil + ModuleBase::Vector3{1,1,1}; return std::array{period.x, period.y, period.z}; } @@ -307,7 +308,7 @@ LRI_CV_Tools::get_dCVws( const Abfs::Vector3_Order R_delta = -tau0+tau1+(RI_Util::array3_to_Vector3(cell1)*GlobalC::ucell.latvec); dCVws[it0][it1][R_delta][ix] = dCVs_B.second; } - } + } } return dCVws; } diff --git a/source/module_ri/conv_coulomb_pot_k-template.h b/source/module_ri/conv_coulomb_pot_k-template.h new file mode 100644 index 0000000000..9a3d245286 --- /dev/null +++ b/source/module_ri/conv_coulomb_pot_k-template.h @@ -0,0 +1,51 @@ +#ifndef CONV_COULOMB_POT_K_TEMPLATE_H +#define CONV_COULOMB_POT_K_TEMPLATE_H + +#include "conv_coulomb_pot_k.h" +#include +#include + +#include "../module_ri/test_code/exx_abfs-construct_orbs-test.h" + + +template< typename T > +T Conv_Coulomb_Pot_K::cal_orbs_ccp( + const T & orbs, + const Ccp_Type &ccp_type, + const std::map ¶meter, + const double rmesh_times, + const int& nks) +{ + T orbs_ccp(orbs.size()); + for( size_t i=0; i!=orbs.size(); ++i ) + orbs_ccp[i] = cal_orbs_ccp(orbs[i], ccp_type, parameter, rmesh_times, nks ); + return orbs_ccp; +} + +extern template +Numerical_Orbital_Lm Conv_Coulomb_Pot_K::cal_orbs_ccp( + const Numerical_Orbital_Lm & orbs, + const Ccp_Type &ccp_type, + const std::map ¶meter, + const double rmesh_times, + const int& nks); + + + +template< typename T > +double Conv_Coulomb_Pot_K::get_rmesh_proportion( + const T & orbs, + const double psi_threshold) +{ + double rmesh_proportion=0; + for( const auto &orb : orbs ) + rmesh_proportion = std::max(rmesh_proportion, get_rmesh_proportion(orb,psi_threshold)); + return rmesh_proportion; +} + +extern template +double Conv_Coulomb_Pot_K::get_rmesh_proportion( + const Numerical_Orbital_Lm & orbs, + const double psi_threshold); + +#endif \ No newline at end of file diff --git a/source/module_ri/conv_coulomb_pot_k.cpp b/source/module_ri/conv_coulomb_pot_k.cpp index 62dd582a44..9f573509ee 100644 --- a/source/module_ri/conv_coulomb_pot_k.cpp +++ b/source/module_ri/conv_coulomb_pot_k.cpp @@ -2,109 +2,104 @@ #include "../module_base/constants.h" #include "../module_basis/module_ao/ORB_atomic_lm.h" #include "../module_hamilt_pw/hamilt_pwdft/global.h" - -namespace Conv_Coulomb_Pot_K +std::vector Conv_Coulomb_Pot_K::cal_psi_ccp( const std::vector & psif ) { + std::vector psik2_ccp(psif.size()); + for( size_t ik=0; ik cal_psi_ccp( - const std::vector & psif) - { - std::vector psik2_ccp(psif.size()); - for( size_t ik=0; ik cal_psi_hf( - const std::vector &psif, - const std::vector &k_radial, - const double hf_Rcut) - { - std::vector psik2_ccp(psif.size()); - for (size_t ik = 0; ik < psif.size(); ++ik) - psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1 - std::cos(k_radial[ik] * hf_Rcut)); - return psik2_ccp; - } +// rongshi add 2022-07-27 +// Sphere truction -- Spencer +std::vector Conv_Coulomb_Pot_K::cal_psi_hf(const int& nks, const std::vector &psif, + const std::vector &k_radial, + const double omega = 0) +{ + const int nspin0 = (GlobalV::NSPIN==2) ? 2 : 1; + const double Rc = std::pow(0.75 * nks/nspin0 * GlobalC::ucell.omega / (ModuleBase::PI), 1.0/3.0); + std::vector psik2_ccp(psif.size()); + for (size_t ik = 0; ik < psif.size(); ++ik) + psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1 - std::cos(k_radial[ik] * Rc)); + return psik2_ccp; +} - std::vector cal_psi_hse( - const std::vector & psif, - const std::vector & k_radial, - const double hse_omega) - { - std::vector psik2_ccp(psif.size()); - for( size_t ik=0; ik Conv_Coulomb_Pot_K::cal_psi_hse( + const std::vector & psif, + const std::vector & k_radial, + const double omega) +{ + std::vector psik2_ccp(psif.size()); + for( size_t ik=0; ik - Numerical_Orbital_Lm cal_orbs_ccp( - const Numerical_Orbital_Lm &orbs, - const Ccp_Type &ccp_type, - const std::map ¶meter, - const double rmesh_times) +template<> +Numerical_Orbital_Lm Conv_Coulomb_Pot_K::cal_orbs_ccp( + const Numerical_Orbital_Lm &orbs, + const Ccp_Type &ccp_type, + const std::map ¶meter, + const double rmesh_times, + const int& nks) +{ + std::vector psik2_ccp; + switch(ccp_type) { - std::vector psik2_ccp; - switch(ccp_type) - { - case Ccp_Type::Ccp: - psik2_ccp = cal_psi_ccp( orbs.get_psif() ); break; - case Ccp_Type::Hf: - psik2_ccp = cal_psi_hf( orbs.get_psif(), orbs.get_k_radial(), parameter.at("hf_Rcut")); break; - case Ccp_Type::Hse: - psik2_ccp = cal_psi_hse( orbs.get_psif(), orbs.get_k_radial(), parameter.at("hse_omega") ); break; - default: - throw( ModuleBase::GlobalFunc::TO_STRING(__FILE__)+" line "+ModuleBase::GlobalFunc::TO_STRING(__LINE__) ); break; - } - - const double dr = orbs.get_rab().back(); - const int Nr = (static_cast(orbs.getNr()*rmesh_times)) | 1; - std::vector rab(Nr); - for( size_t ir=0; ir r_radial(Nr); - for( size_t ir=0; ir - double get_rmesh_proportion( - const Numerical_Orbital_Lm &orbs, - const double psi_threshold) + const double dr = orbs.get_rab().back(); + const int Nr = (static_cast(orbs.getNr()*rmesh_times)) | 1; + std::vector rab(Nr); + for( size_t ir=0; ir r_radial(Nr); + for( size_t ir=0; ir +double Conv_Coulomb_Pot_K::get_rmesh_proportion( + const Numerical_Orbital_Lm &orbs, + const double psi_threshold) +{ + for(int ir=orbs.getNr()-1; ir>=0; --ir) { - for(int ir=orbs.getNr()-1; ir>=0; --ir) - { - if(std::abs(orbs.getPsi(ir))>=psi_threshold) - return static_cast(ir)/orbs.getNr(); - } - return 0.0; + if(std::abs(orbs.getPsi(ir))>=psi_threshold) + return static_cast(ir)/orbs.getNr(); } - + return 0.0; } diff --git a/source/module_ri/conv_coulomb_pot_k.h b/source/module_ri/conv_coulomb_pot_k.h index d464a53f91..9adec9d915 100644 --- a/source/module_ri/conv_coulomb_pot_k.h +++ b/source/module_ri/conv_coulomb_pot_k.h @@ -5,37 +5,40 @@ #include #include -namespace Conv_Coulomb_Pot_K +class Conv_Coulomb_Pot_K { - enum class Ccp_Type{ // parameter: - Ccp, // - Hf, // "hf_Rcut" - Hse}; // "hse_omega" +public: - template T cal_orbs_ccp( + enum class Ccp_Type{ // parameter: + Ccp, // + Hf, // + Hse}; // "hse_omega" + + template static T cal_orbs_ccp( const T &orbs, const Ccp_Type &ccp_type, const std::map ¶meter, - const double rmesh_times); - - //private: - template< typename T > double get_rmesh_proportion( + const double rmesh_times, + const int& nks); + +private: + + template< typename T > static double get_rmesh_proportion( const T &orbs, const double psi_threshold); + +private: - //private: - std::vector cal_psi_ccp( - const std::vector & psif); - std::vector cal_psi_hf( - const std::vector &psif, - const std::vector &k_radial, - const double hf_Rcut); - std::vector cal_psi_hse( + static std::vector cal_psi_ccp( const std::vector & psif ); + + static std::vector cal_psi_hf(const int& nks, const std::vector &psif, + const std::vector &k_radial, + const double omega); + + static std::vector cal_psi_hse( const std::vector & psif, const std::vector & k_radial, - const double hse_omega); -} - -#include "conv_coulomb_pot_k.hpp" + const double omega); +}; #endif \ No newline at end of file diff --git a/source/module_ri/conv_coulomb_pot_k.hpp b/source/module_ri/conv_coulomb_pot_k.hpp deleted file mode 100644 index 5ca3abe5c8..0000000000 --- a/source/module_ri/conv_coulomb_pot_k.hpp +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef CONV_COULOMB_POT_K_HPP -#define CONV_COULOMB_POT_K_HPP - -#include "conv_coulomb_pot_k.h" -#include -#include - -namespace Conv_Coulomb_Pot_K -{ - - template< typename T > - std::vector cal_orbs_ccp( - const std::vector & orbs, - const Ccp_Type &ccp_type, - const std::map ¶meter, - const double rmesh_times) - { - std::vector orbs_ccp(orbs.size()); - for( size_t i=0; i!=orbs.size(); ++i ) - orbs_ccp[i] = cal_orbs_ccp(orbs[i], ccp_type, parameter, rmesh_times); - return orbs_ccp; - } - - template< typename T > - double get_rmesh_proportion( - const std::vector & orbs, - const double psi_threshold) - { - double rmesh_proportion=0; - for( const auto &orb : orbs ) - rmesh_proportion = std::max(rmesh_proportion, get_rmesh_proportion(orb,psi_threshold)); - return rmesh_proportion; - } - -} - -#endif \ No newline at end of file diff --git a/source/module_ri/exx_lip.cpp b/source/module_ri/exx_lip.cpp index 0c4211d890..2f685be5f1 100644 --- a/source/module_ri/exx_lip.cpp +++ b/source/module_ri/exx_lip.cpp @@ -481,7 +481,7 @@ void Exx_Lip::b_cal( int ik, int iq, int ib) } std::complex * const porter = new std::complex [rho_basis->nrxx]; - + for(size_t iw=0; iw< GlobalV::NLOCAL; ++iw) { const std::complex * const phi_w = phi[iw]; @@ -495,7 +495,7 @@ void Exx_Lip::b_cal( int ik, int iq, int ib) if( Conv_Coulomb_Pot_K::Ccp_Type::Ccp==info.ccp_type || Conv_Coulomb_Pot_K::Ccp_Type::Hf==info.ccp_type ) if((iq==iq_vecik) && (gzero_rank_in_pool==GlobalV::RANK_IN_POOL)) /// need to check while use k_point parallel b0[iw] = b_w[rho_basis->ig_gge0]; - + for( size_t ig=0; ignpw; ++ig) b_w[ig] *= recip_qkg2[ig]; } @@ -634,14 +634,12 @@ void Exx_Lip::write_q_pack() const if(!GlobalV::RANK_IN_POOL) { const std::string exx_q_pack = "exx_q_pack/"; - int return_value=0; + const std::string command_mkdir = "test -d " + GlobalV::global_out_dir + exx_q_pack + " || mkdir " + GlobalV::global_out_dir + exx_q_pack; - return_value = system(command_mkdir.c_str()); - assert(return_value == 0); + system( command_mkdir.c_str() ); // Need to check - const std::string command_kpoint = "test -f " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card + " || cp " + GlobalV::global_kpoint_card + " " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card; - return_value = system(command_kpoint.c_str()); - assert(return_value==0); + const std::string command_kpoint = "test -f " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card + " || cp " + GlobalV::global_kpoint_card + " " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card; + system( command_kpoint.c_str() ); // Need to check std::stringstream ss_wf_wg; ss_wf_wg << GlobalV::global_out_dir << exx_q_pack << "wf_wg_" << GlobalV::MY_POOL; diff --git a/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat b/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat index 1aa4b94ac1..af0ad58c0d 100644 --- a/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat +++ b/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat @@ -1,6 +1,6 @@ - 1 0.00000000 -3.38705933 -0.79801307 5.06488210 5.06488210 7.84114355 9.60594903 - 2 0.17320508 -3.86201936 -0.07517988 5.13653145 5.13653145 7.91810064 9.68495616 - 3 0.34641016 -4.63079629 1.43419304 5.35284854 5.35284854 8.15540136 9.81498032 - 4 0.51961524 -5.25812925 3.25094996 5.69541211 5.69541211 8.51864422 9.65342396 - 5 0.69282032 -5.65198054 5.13706981 6.08466055 6.08466055 8.86784679 9.13706533 - 6 0.86602540 -5.78587376 6.28876728 6.28876728 6.28876728 8.83643550 8.83643550 +1 0 -3.3870593 -0.79801307 5.0648821 5.0648821 7.8411435 9.605949 +2 0.17320508 -3.8620194 -0.075179882 5.1365314 5.1365314 7.9181006 9.6849562 +3 0.34641016 -4.6307963 1.434193 5.3528485 5.3528485 8.1554014 9.8149803 +4 0.51961524 -5.2581292 3.25095 5.6954121 5.6954121 8.5186442 9.653424 +5 0.69282032 -5.6519805 5.1370698 6.0846605 6.0846605 8.8678468 9.1370653 +6 0.8660254 -5.7858738 6.2887673 6.2887673 6.2887673 8.8364355 8.8364355 diff --git a/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat b/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat index 0a1088ceb5..ced64a27bf 100644 --- a/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat +++ b/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat @@ -1,6 +1,6 @@ - 1 0.00000000 -3.38704889 -0.79801666 5.06489026 5.06490253 7.84115900 9.60595365 - 2 0.17320508 -3.86198803 -0.07518005 5.13653307 5.13653200 7.91811409 9.68496369 - 3 0.34641016 -4.63079483 1.43421001 5.35290770 5.35285262 8.15538060 9.81497711 - 4 0.51961524 -5.25812025 3.25095973 5.69541240 5.69542353 8.51883375 9.65336577 - 5 0.69282032 -5.65197852 5.13708720 6.08467575 6.08469139 8.86796173 9.13705931 - 6 0.86602540 -5.78586742 6.28881704 6.28877545 6.28878152 8.83653074 8.83650706 +1 0 -3.3870489 -0.79801666 5.0648903 5.0649025 7.841159 9.6059536 +2 0.17320508 -3.861988 -0.075180055 5.1365331 5.136532 7.9181141 9.6849637 +3 0.34641016 -4.6307948 1.43421 5.3529077 5.3528526 8.1553806 9.8149771 +4 0.51961524 -5.2581202 3.2509597 5.6954124 5.6954235 8.5188338 9.6533658 +5 0.69282032 -5.6519785 5.1370872 6.0846758 6.0846914 8.8679617 9.1370593 +6 0.8660254 -5.7858674 6.288817 6.2887755 6.2887815 8.8365307 8.8365071 diff --git a/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref b/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref index bffad6b08a..168c5723a3 100644 --- a/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref +++ b/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref @@ -3,92 +3,92 @@ CALCULATE THE MULLIkEN ANALYSIS FOR EACH ATOM Total charge: 32 Decomposed Mulliken populations 0 Zeta of Fe Spin 1 Spin 2 Spin 3 Spin 4 -s 0 1.317 0.06196 -0.2625 -0.07949 - sum over m 1.317 0.06196 -0.2625 -0.07949 -s 1 1.726 -0.01809 0.09886 -0.01413 - sum over m 1.726 -0.01809 0.09886 -0.01413 -s 2 0.03246 -0.04153 0.2209 -0.02228 - sum over m 0.03246 -0.04153 0.2209 -0.02228 -s 3 -0.02921 0.005609 -0.025 -0.005114 - sum over m -0.02921 0.005609 -0.025 -0.005114 - sum over m+zeta 3.046 0.007945 0.0323 -0.121 -pz 0 2.034 -0.001186 0.005932 -3.981e-06 -px 0 2.033 -0.001283 0.006419 -3.989e-06 -py 0 2.033 -0.001188 0.005944 -3.979e-06 - sum over m 6.1 -0.003658 0.0183 -1.195e-05 -pz 1 -0.02621 0.0005578 -0.002789 0 -px 1 -0.02639 0.0006107 -0.003054 0 -py 1 -0.02603 0.0005536 -0.002768 0 - sum over m -0.07863 0.001722 -0.008611 0 - sum over m+zeta 6.021 -0.001936 0.009684 -1.277e-05 -dz^2 0 1.964 0.0008269 -0.004128 -1.088e-05 -dxz 0 1.044 0.156 -0.7849 -0.0003055 -dyz 0 0.9592 0.1564 -0.7869 -0.0003096 -dx^2-y^2 0 1.967 0.000752 -0.003754 -1.059e-05 -dxy 0 1.055 0.1558 -0.7835 -0.0003047 - sum over m 6.988 0.4698 -2.363 -0.0009413 -dz^2 1 0.03863 -0.0008716 0.004365 -1.357e-05 -dxz 1 -0.03708 -0.004148 0.02101 1.956e-05 -dyz 1 -0.03373 -0.004494 0.02274 1.968e-05 -dx^2-y^2 1 0.03943 -0.0009117 0.004566 -1.471e-05 -dxy 1 -0.03733 -0.004056 0.02055 1.945e-05 - sum over m -0.03008 -0.01448 0.07324 3.041e-05 - sum over m+zeta 6.958 0.4553 -2.29 -0.0009109 -fz^3 0 -0.007044 0.0007552 -0.003776 -1.406e-06 -fxz^2 0 -0.002046 0.0002628 -0.001314 0 -fyz^2 0 -0.00273 0.00029 -0.00145 0 -fzx^2-zy^2 0 5.811e-05 0 3.451e-06 0 -fxyz 0 1.14e-05 1.249e-06 -6.306e-06 0 -fx^3-3*xy^2 0 -0.003379 0.0004381 -0.00219 0 -f3yx^2-y^3 0 -0.00407 0.0004626 -0.002313 0 - sum over m -0.0192 0.002209 -0.01105 -4.307e-06 - sum over m+zeta -0.0192 0.002209 -0.01105 -4.307e-06 -Total Charge on atom: Fe 16.01 -Total Magnetism on atom: Fe (0.4635, -2.259, -0.1219) +s 0 1.317 0.05552 0.2843 0.02903 + sum over m 1.317 0.05552 0.2843 0.02903 +s 1 1.726 -0.01923 -0.09498 0.005159 + sum over m 1.726 -0.01923 -0.09498 0.005159 +s 2 0.03246 -0.04333 -0.2148 0.008137 + sum over m 0.03246 -0.04333 -0.2148 0.008137 +s 3 -0.02921 0.005194 0.02641 0.001867 + sum over m -0.02921 0.005194 0.02641 0.001867 + sum over m+zeta 3.046 -0.001842 0.0009368 0.04419 +pz 0 2.034 -0.001185 -0.005932 1.545e-06 +px 0 2.033 -0.001283 -0.006419 1.538e-06 +py 0 2.033 -0.001188 -0.005944 1.543e-06 + sum over m 6.1 -0.003656 -0.01829 4.626e-06 +pz 1 -0.02622 0.0005602 0.002791 0 +px 1 -0.02639 0.0006145 0.003054 0 +py 1 -0.02603 0.0005563 0.00277 0 + sum over m -0.07864 0.001731 0.008615 0 + sum over m+zeta 6.021 -0.001925 -0.00968 5.611e-06 +dz^2 0 1.964 0.0008273 0.004131 4.077e-06 +dxz 0 1.044 0.1755 0.7507 0.002258 +dyz 0 0.9544 0.1768 0.7532 0.002329 +dx^2-y^2 0 1.967 0.0007523 0.003756 3.978e-06 +dxy 0 1.055 0.1751 0.7495 0.002251 + sum over m 6.984 0.529 2.261 0.006846 +dz^2 1 0.03863 -0.0008699 -0.004363 5.197e-06 +dxz 1 -0.03759 -0.005346 -0.01936 -0.0001322 +dyz 1 -0.03407 -0.005734 -0.02118 -0.0001342 +dx^2-y^2 1 0.03943 -0.0009093 -0.004564 5.691e-06 +dxy 1 -0.03787 -0.005246 -0.0189 -0.0001314 + sum over m -0.03146 -0.01811 -0.06836 -0.000387 + sum over m+zeta 6.952 0.5109 2.193 0.006459 +fz^3 0 -0.007049 0.0007578 0.003775 0 +fxz^2 0 -0.002045 0.0002638 0.001312 0 +fyz^2 0 -0.002729 0.0002912 0.001448 0 +fzx^2-zy^2 0 6.273e-05 0 -6.642e-06 0 +fxyz 0 1.153e-05 1.446e-06 5.675e-06 0 +fx^3-3*xy^2 0 -0.00338 0.00044 0.002189 0 +f3yx^2-y^3 0 -0.00407 0.0004646 0.002311 0 + sum over m -0.0192 0.002219 0.01103 2.581e-06 + sum over m+zeta -0.0192 0.002219 0.01103 2.581e-06 +Total Charge on atom: Fe 16 +Total Magnetism on atom: Fe (0.5093, 2.195, 0.05066) 1 Zeta of Fe Spin 1 Spin 2 Spin 3 Spin 4 -s 0 1.275 0.04699 -0.2823 0.07949 - sum over m 1.275 0.04699 -0.2823 0.07949 -s 1 1.755 -0.01866 0.08491 0.01412 - sum over m 1.755 -0.01866 0.08491 0.01412 -s 2 -0.02899 -0.04221 0.1978 0.02226 - sum over m -0.02899 -0.04221 0.1978 0.02226 -s 3 -0.04712 0.00595 -0.03281 0.005133 - sum over m -0.04712 0.00595 -0.03281 0.005133 - sum over m+zeta 2.954 -0.007928 -0.03239 0.121 -pz 0 2.032 -0.001371 0.00685 3.967e-06 -px 0 2.025 -0.0009218 0.004606 3.958e-06 -py 0 2.032 -0.001333 0.006664 3.965e-06 - sum over m 6.089 -0.003626 0.01812 1.189e-05 -pz 1 -0.02529 0.0005803 -0.002904 0 -px 1 -0.01606 0.0001295 -0.0006492 0 -py 1 -0.02466 0.0005625 -0.002815 0 - sum over m -0.06602 0.001272 -0.006367 0 - sum over m+zeta 6.023 -0.002353 0.01175 1.25e-05 -dz^2 0 1.957 0.001154 -0.005778 1.149e-05 -dxz 0 1.091 0.1517 -0.7637 -8.462e-05 -dyz 0 0.9556 0.1553 -0.7815 -8.443e-05 -dx^2-y^2 0 1.947 0.001648 -0.008249 1.233e-05 -dxy 0 1.106 0.1508 -0.7591 -8.432e-05 - sum over m 7.056 0.4606 -2.318 -0.0002295 -dz^2 1 0.03925 -0.001067 0.005328 1.289e-05 -dxz 1 -0.03558 -0.002824 0.01439 2.5e-06 -dyz 1 -0.03117 -0.003962 0.0201 2.798e-06 -dx^2-y^2 1 0.04266 -0.001401 0.006997 1.29e-05 -dxy 1 -0.03637 -0.002747 0.01401 2.475e-06 - sum over m -0.02122 -0.012 0.06082 3.356e-05 - sum over m+zeta 7.035 0.4486 -2.257 -0.000196 -fz^3 0 -0.006615 0.0007206 -0.003605 1.352e-06 -fxz^2 0 -0.001955 0.0002554 -0.001278 0 -fyz^2 0 -0.002684 0.0002735 -0.001368 0 -fzx^2-zy^2 0 9.383e-05 1.68e-05 -8.473e-05 0 -fxyz 0 2.053e-05 3.66e-06 -1.839e-05 0 -fx^3-3*xy^2 0 -0.003204 0.0004266 -0.002134 0 -f3yx^2-y^3 0 -0.003695 0.0004558 -0.002281 0 - sum over m -0.01804 0.002152 -0.01077 4.022e-06 - sum over m+zeta -0.01804 0.002152 -0.01077 4.022e-06 -Total Charge on atom: Fe 15.99 -Total Magnetism on atom: Fe (0.4405, -2.289, 0.1208) +s 0 1.275 0.05341 0.2605 -0.02903 + sum over m 1.275 0.05341 0.2605 -0.02903 +s 1 1.755 -0.01752 -0.08879 -0.005156 + sum over m 1.755 -0.01752 -0.08879 -0.005156 +s 2 -0.02898 -0.0404 -0.2039 -0.00813 + sum over m -0.02898 -0.0404 -0.2039 -0.00813 +s 3 -0.04711 0.006367 0.03139 -0.001874 + sum over m -0.04711 0.006367 0.03139 -0.001874 + sum over m+zeta 2.954 0.001862 -0.0008532 -0.04419 +pz 0 2.032 -0.001369 -0.006852 -1.367e-06 +px 0 2.025 -0.0009208 -0.004608 -1.387e-06 +py 0 2.032 -0.001332 -0.006666 -1.366e-06 + sum over m 6.089 -0.003622 -0.01813 -4.119e-06 +pz 1 -0.02528 0.0005889 0.002889 0 +px 1 -0.01606 0.0001369 0.0006408 0 +py 1 -0.02466 0.000571 0.002802 0 + sum over m -0.066 0.001297 0.006331 2.367e-06 + sum over m+zeta 6.023 -0.002325 -0.01179 -1.753e-06 +dz^2 0 1.957 0.001158 0.005774 -3.913e-06 +dxz 0 1.097 0.1724 0.7275 0.002311 +dyz 0 0.9509 0.1759 0.7475 0.002269 +dx^2-y^2 0 1.947 0.001654 0.008245 -4.075e-06 +dxy 0 1.113 0.1714 0.7227 0.002304 + sum over m 7.065 0.5225 2.212 0.006876 +dz^2 1 0.03925 -0.001062 -0.005333 -4.383e-06 +dxz 1 -0.0366 -0.003947 -0.01263 -0.0001213 +dyz 1 -0.03157 -0.005197 -0.01856 -0.0001267 +dx^2-y^2 1 0.04266 -0.001394 -0.007002 -4.206e-06 +dxy 1 -0.03743 -0.003854 -0.01222 -0.0001203 + sum over m -0.02369 -0.01545 -0.05575 -0.0003768 + sum over m+zeta 7.041 0.5071 2.156 0.006499 +fz^3 0 -0.006614 0.0007261 0.003596 0 +fxz^2 0 -0.001954 0.0002565 0.001276 0 +fyz^2 0 -0.002684 0.0002742 0.001366 0 +fzx^2-zy^2 0 9.09e-05 1.99e-05 8.018e-05 0 +fxyz 0 2.062e-05 4.102e-06 1.816e-05 0 +fx^3-3*xy^2 0 -0.003203 0.0004291 0.00213 0 +f3yx^2-y^3 0 -0.003698 0.0004635 0.002271 0 + sum over m -0.01804 0.002174 0.01074 0 + sum over m+zeta -0.01804 0.002174 0.01074 0 +Total Charge on atom: Fe 16 +Total Magnetism on atom: Fe (0.5088, 2.154, -0.03769) diff --git a/tests/integrate/204_NO_KP_NC_deltaspin/result.ref b/tests/integrate/204_NO_KP_NC_deltaspin/result.ref index 8a17a1fada..649ae1ef31 100644 --- a/tests/integrate/204_NO_KP_NC_deltaspin/result.ref +++ b/tests/integrate/204_NO_KP_NC_deltaspin/result.ref @@ -1,4 +1,4 @@ -etotref -6844.326716364628 -etotperatomref -3422.1633581823 +etotref -6844.685232776227 +etotperatomref -3422.3426163881 Compare_mulliken_pass 0 -totaltimeref 36.59 +totaltimeref 21.55 diff --git a/tests/integrate/207_NO_KP_OB/refBANDS_1.dat b/tests/integrate/207_NO_KP_OB/refBANDS_1.dat index 87a35be1b0..c3cd3a0b6d 100644 --- a/tests/integrate/207_NO_KP_OB/refBANDS_1.dat +++ b/tests/integrate/207_NO_KP_OB/refBANDS_1.dat @@ -1,6 +1,6 @@ - 1 0.00000000 -3.20074324 -0.55268317 5.34115051 5.34115051 8.29516160 10.29249200 - 2 0.17320508 -3.66389133 0.16302859 5.41224692 5.41224692 8.38313120 10.35082356 - 3 0.34641016 -4.42333349 1.67151268 5.62380682 5.62380682 8.64342675 10.39966330 - 4 0.51961524 -5.04480873 3.49944170 5.95924219 5.95924219 9.02968708 10.13225743 - 5 0.69282032 -5.43307067 5.39954292 6.34143853 6.34143853 9.35699627 9.58079775 - 6 0.86602540 -5.56433513 6.53981221 6.53981221 6.53981221 9.28022796 9.28022796 +1 0 -3.2007432 -0.55268317 5.3411505 5.3411505 8.2951616 10.292492 +2 0.17320508 -3.6638913 0.16302859 5.4122469 5.4122469 8.3831312 10.350824 +3 0.34641016 -4.4233335 1.6715127 5.6238068 5.6238068 8.6434268 10.399663 +4 0.51961524 -5.0448087 3.4994417 5.9592422 5.9592422 9.0296871 10.132257 +5 0.69282032 -5.4330707 5.3995429 6.3414385 6.3414385 9.3569963 9.5807977 +6 0.8660254 -5.5643351 6.5398122 6.5398122 6.5398122 9.280228 9.280228 diff --git a/tests/integrate/Autotest.sh b/tests/integrate/Autotest.sh index 37908d1769..5310998097 100755 --- a/tests/integrate/Autotest.sh +++ b/tests/integrate/Autotest.sh @@ -74,11 +74,6 @@ check_out(){ # check every 'key' word #------------------------------------------------------ for key in $properties; do - - if [ $key == "totaltimeref" ]; then - # echo "time=$cal ref=$ref" - break - fi #-------------------------------------------------- # calculated value @@ -96,6 +91,11 @@ check_out(){ #-------------------------------------------------- deviation=`awk 'BEGIN {x='$ref';y='$cal';printf "%.'$ca'f\n",x-y}'` + if [ $key == "totaltimeref" ]; then + # echo "time=$cal ref=$ref" + break + fi + #-------------------------------------------------- # If deviation < threshold, then the test passes, diff --git a/toolchain/README.md b/toolchain/README.md index a8ce2f711e..3e747006b7 100644 --- a/toolchain/README.md +++ b/toolchain/README.md @@ -91,7 +91,6 @@ The needed dependencies version default: - `LibXC` 6.2.2 - `ELPA` 2023.05.001 - `CEREAL` 1.3.2 -- `RapidJSON` 1.1.0 And Intel-oneAPI need user or server manager to manually install from Intel. [Intel-oneAPI](https://www.intel.cn/content/www/cn/zh/developer/tools/oneapi/toolkits.html) @@ -103,12 +102,12 @@ Dependencies below are optional, which is NOT installed by default: Users can install them by using `--with-*=install` in toolchain*.sh, which is `no` in default. > Notice: LibRI, LibComm and Libnpy is on actively development, you should check-out the package version when using this toolchain. Also, LibRI and LibComm can be installed by github submodule, which is also work for libnpy, which is more recommended. -Notice: for `CEREAL`,`RapidJSON`, `Libnpy`, `LibRI` and `LibComm`, +Notice: for `CEREAL`, `Libnpy`, `LibRI` and `LibComm`, you need to download them from github.com, rename it as formatted, and put them in `build` directory at the same time e.g.: ```shell -# packages downloaded from github.com, RapidJSON is not supported now +# packages downloaded from github.com mv v1.3.2.tar.gz build/cereal-1.3.2.tar.gz ``` @@ -176,7 +175,7 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \ -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \ ...... ``` -Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS for now +Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS ### shell problem If you encounter problem like: @@ -207,8 +206,6 @@ The default compiler for Intel-oneAPI is `icpx` and `icx`, which will cause prob The best way is to change `icpx` to `icpc`, `icx` to `icc`. user can manually change it in toolchain*.sh via `--with-intel-classic=yes` -Notice: `icc` and `icpc` from Intel Classic Compiler of Intel-oneAPI is not supported for 2024.0 and newer version. - ### LibRI and LibComm problem (There is some problem sometimes when compling with LibRI and LibComm, detailed information is needed) @@ -219,9 +216,6 @@ Sometimes Intel-oneAPI have problem to link `mpirun`, which will always show in 2023.2.0 version of MPI in Intel-oneAPI. Try `source /path/to/setvars.sh` or install another version of IntelMPI may help. -which is fixed in 2024.0.0 version of Intel-oneAPI, -And will not occur in Intel-MPI before 2021.10.0 (Intel-oneAPI before 2023.2.0) - More problem and possible solution can be accessed via [#2928](https://github.com/deepmodeling/abacus-develop/issues/2928) @@ -236,6 +230,9 @@ from ABACUS repo, make dependencies package more independent and flexible. 2. Users can manually change `pkg_install_dir` variable in `scripts/stage*/install*` to change the installation directory of each packages, which may let the installation more fiexible. +3. Users can manually change `INSTALL` variable in `scripts/common_vars.sh` +to change the installation directory of all packages, which may let the +installation more fiexible. ## More diff --git a/toolchain/install_abacus_toolchain.sh b/toolchain/install_abacus_toolchain.sh index 68c7959ce1..be41687307 100755 --- a/toolchain/install_abacus_toolchain.sh +++ b/toolchain/install_abacus_toolchain.sh @@ -182,12 +182,10 @@ The --with-PKG options follow the rules: --with-scalapack Parallel linear algebra library, needed for parallel calculations. Default = install - --with-elpa Eigenvalue SoLvers for Petaflop-Applications library. - Fast library for large parallel jobs, Especially for ABACUS LCAO - Default = install --with-cereal Enable cereal for ABACUS LCAO Default = install - --with-rapidjson Enable rapidjson for ABACUS to read/write json files + --with-elpa Eigenvalue SoLvers for Petaflop-Applications library. + Fast library for large parallel jobs. Default = install --with-libtorch Enable libtorch the machine learning framework needed for DeePKS Default = no @@ -232,7 +230,7 @@ EOF tool_list="gcc intel cmake" mpi_list="mpich openmpi intelmpi" math_list="mkl acml openblas" -lib_list="fftw libxc scalapack elpa cereal rapidjson libtorch libnpy libri libcomm" +lib_list="fftw libxc scalapack elpa cereal libtorch libnpy libri libcomm" package_list="${tool_list} ${mpi_list} ${math_list} ${lib_list}" # ------------------------------------------------------------------------ @@ -266,8 +264,7 @@ with_acml="__SYSTEM__" with_openblas="__INSTALL__" with_elpa="__INSTALL__" with_cereal="__INSTALL__" -with_rapidjson="__INSTALL__" -# with_libtorch="__DONTUSE__" # default +# with_libtorch="__DONTUSE__" # with_libnpy="__DONTUSE__" # with_libri="__DONTUSE__" # with_libcomm="__DONTUSE__" @@ -552,9 +549,6 @@ while [ $# -ge 1 ]; do --with-cereal*) with_cereal=$(read_with "${1}") ;; - --with-rapidjson*) - with_rapidjson=$(read_with "${1}") - ;; --with-libnpy*) with_libnpy=$(read_with "${1}") ;; diff --git a/toolchain/scripts/common_vars.sh b/toolchain/scripts/common_vars.sh index 51f941f6fe..d04d40fa39 100755 --- a/toolchain/scripts/common_vars.sh +++ b/toolchain/scripts/common_vars.sh @@ -7,7 +7,8 @@ # directories and files used by the installer ROOTDIR=${ROOTDIR:-"$(pwd -P)"} SCRIPTDIR=${SCRIPTDIR:-"${ROOTDIR}/scripts"} -INSTALLDIR=${INSTALLDIR:-"${ROOTDIR}/install"} # should not be changed +INSTALLDIR=${INSTALLDIR:-"${ROOTDIR}/install"} +#INSTALLDIR=${INSTALLDIR:-"${HOME}/abacus_deps"} # advanced installation BUILDDIR=${BUILDDIR:-"${ROOTDIR}/build"} SETUPFILE=${SETUPFILE:-"${INSTALLDIR}/setup"} ARCH_FILE_TEMPLATE=${ARCH_FILE_TEMPLATE:-"${SCRIPTDIR}/arch_base.tmpl"} diff --git a/toolchain/scripts/stage4/install_rapidjson.sh b/toolchain/scripts/stage4/install_rapidjson.sh deleted file mode 100755 index b63789ffff..0000000000 --- a/toolchain/scripts/stage4/install_rapidjson.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/bin/bash -e - -# TODO: Review and if possible fix shellcheck errors. -# shellcheck disable=all -# RAPIDJSON is not need any complex setting -# Only problem is the installation from github.com - -[ "${BASH_SOURCE[0]}" ] && SCRIPT_NAME="${BASH_SOURCE[0]}" || SCRIPT_NAME=$0 -SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_NAME")/.." && pwd -P)" - -rapidjson_ver="1.1.0" -rapidjson_sha256="bf7ced29704a1e696fbccf2a2b4ea068e7774fa37f6d7dd4039d0787f8bed98e" -source "${SCRIPT_DIR}"/common_vars.sh -source "${SCRIPT_DIR}"/tool_kit.sh -source "${SCRIPT_DIR}"/signal_trap.sh -source "${INSTALLDIR}"/toolchain.conf -source "${INSTALLDIR}"/toolchain.env - -[ -f "${BUILDDIR}/setup_rapidjson" ] && rm "${BUILDDIR}/setup_rapidjson" - -RAPIDJSON_CFLAGS="" -! [ -d "${BUILDDIR}" ] && mkdir -p "${BUILDDIR}" -cd "${BUILDDIR}" - -case "$with_rapidjson" in - __INSTALL__) - echo "==================== Installing RAPIDJSON ====================" - dirname="rapidjson-${rapidjson_ver}" - pkg_install_dir="${INSTALLDIR}/$dirname" - #pkg_install_dir="${HOME}/lib/rapidjson/${rapidjson_ver}" - install_lock_file="$pkg_install_dir/install_successful" - url="https://github.com/Tencent/rapidjson/archive/refs/tags/v${rapidjson_ver}.tar.gz" - filename="rapidjson-${rapidjson_ver}.tar.gz" - if verify_checksums "${install_lock_file}"; then - echo "$dirname is already installed, skipping it." - else - if [ -f $filename ]; then - echo "$filename is found" - else - # download from github.com and checksum - echo "wget --quiet $url -O $filename" - if ! wget --quiet $url -O $filename; then - report_error "failed to download $url" - recommend_offline_installation $filename $url - fi - # checksum - checksum "$filename" "$rapidjson_sha256" - fi - echo "Installing from scratch into ${pkg_install_dir}" - [ -d $dirname ] && rm -rf $dirname - tar -xzf $filename - mkdir -p "${pkg_install_dir}" - cp -r $dirname/* "${pkg_install_dir}/" - write_checksums "${install_lock_file}" "${SCRIPT_DIR}/stage4/$(basename ${SCRIPT_NAME})" - fi - ;; - __SYSTEM__) - echo "==================== CANNOT Finding RAPIDJSON from system paths NOW ====================" - recommend_offline_installation $filename $url - # How to do it in rapidjson? -- Zhaoqing in 2023/08/23 - # check_lib -lxcf03 "libxc" - # check_lib -lxc "libxc" - # add_include_from_paths LIBXC_CFLAGS "xc.h" $INCLUDE_PATHS - # add_lib_from_paths LIBXC_LDFLAGS "libxc.*" $LIB_PATHS - ;; - __DONTUSE__) ;; - - *) - echo "==================== Linking RAPIDJSON to user paths ====================" - check_dir "${pkg_install_dir}" - RAPIDJSON_CFLAGS="-I'${pkg_install_dir}'" - ;; -esac -if [ "$with_rapidjson" != "__DONTUSE__" ]; then - if [ "$with_rapidjson" != "__SYSTEM__" ]; then - # LibRI deps should find rapidjson include in CPATH - cat << EOF > "${BUILDDIR}/setup_rapidjson" -prepend_path CPATH "$pkg_install_dir/include" -export CPATH="${pkg_install_dir}/include:"${CPATH} -EOF - cat "${BUILDDIR}/setup_rapidjson" >> $SETUPFILE - fi - cat << EOF >> "${BUILDDIR}/setup_rapidjson" -export RAPIDJSON_CFLAGS="${RAPIDJSON_CFLAGS}" -export RAPIDJSON_ROOT="$pkg_install_dir" -EOF -fi - -load "${BUILDDIR}/setup_rapidjson" -write_toolchain_env "${INSTALLDIR}" - -cd "${ROOTDIR}" -report_timing "rapidjson" diff --git a/toolchain/scripts/stage4/install_stage4.sh b/toolchain/scripts/stage4/install_stage4.sh index b5c7cf5eed..ffe8f670c9 100755 --- a/toolchain/scripts/stage4/install_stage4.sh +++ b/toolchain/scripts/stage4/install_stage4.sh @@ -4,7 +4,6 @@ # shellcheck disable=all ./scripts/stage4/install_cereal.sh -./scripts/stage4/install_rapidjson.sh ./scripts/stage4/install_libtorch.sh ./scripts/stage4/install_libnpy.sh ./scripts/stage4/install_libri.sh diff --git a/toolchain/scripts/tool_kit.sh b/toolchain/scripts/tool_kit.sh index 120b623fee..d07445089b 100755 --- a/toolchain/scripts/tool_kit.sh +++ b/toolchain/scripts/tool_kit.sh @@ -54,12 +54,9 @@ By download $__filename from $__url, Rename it as $__filename and put it into ${BUILDDIR}, And re-run toolchain installation script. -You can manually install requirements packages via: -1. Download from www.cp2k.org/static/downloads (for OpenBLAS, OpenMPI and Others) -2. Download from github.com (for CEREAL, RapidJSON, libnpy, LibRI and others stage4 packages) -3. Use git submodule update --init --recursive (for LibRI) -4. wget https://bohrium-api.dp.tech/ds-dl/abacus-deps-93wi-v2 -O abacus-deps.zip -5. for Intel-oneAPI, please contact your server manager our visit Intel official website +Instead of github.com. you can manually install requirements packages via: +1. Download from www.cp2k.org/static/downloads +2. wget https://bohrium-api.dp.tech/ds-dl/abacus-deps-93wi-v1 -O abacus-deps-v1.zip EOF } diff --git a/toolchain/toolchain_gnu.sh b/toolchain/toolchain_gnu.sh index 04635bb63d..57105f601b 100755 --- a/toolchain/toolchain_gnu.sh +++ b/toolchain/toolchain_gnu.sh @@ -18,7 +18,6 @@ --with-fftw=install \ --with-elpa=install \ --with-cereal=install \ ---with-rapidjson=install \ --with-libtorch=no \ --with-libnpy=no \ --with-libri=no \ diff --git a/toolchain/toolchain_intel-mpich.sh b/toolchain/toolchain_intel-mpich.sh index fcf3cc41ee..ffc2626670 100755 --- a/toolchain/toolchain_intel-mpich.sh +++ b/toolchain/toolchain_intel-mpich.sh @@ -21,7 +21,6 @@ --with-fftw=no \ --with-elpa=install \ --with-cereal=install \ ---with-rapidjson=install \ --with-libtorch=no \ --with-libnpy=no \ --with-libri=no \ diff --git a/toolchain/toolchain_intel.sh b/toolchain/toolchain_intel.sh index e5298c570d..8f391be008 100755 --- a/toolchain/toolchain_intel.sh +++ b/toolchain/toolchain_intel.sh @@ -22,7 +22,6 @@ --with-fftw=no \ --with-elpa=install \ --with-cereal=install \ ---with-rapidjson=install \ --with-libtorch=no \ --with-libnpy=no \ --with-libri=no \ From c4c5272ed2418d275d404c753dcb9f1ffd49a433 Mon Sep 17 00:00:00 2001 From: Hongxu Ren <60290838+Flying-dragon-boxing@users.noreply.github.com> Date: Sun, 28 Jan 2024 14:17:41 +0800 Subject: [PATCH 18/44] Update FindPEXSI.cmake to fix Comments --- cmake/FindPEXSI.cmake | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake index 062764acce..b1565d2c06 100644 --- a/cmake/FindPEXSI.cmake +++ b/cmake/FindPEXSI.cmake @@ -1,9 +1,14 @@ ############################################################################### -# - Find cereal -# Find the native cereal headers. +# - Find PEXSI +# Find PEXSI and its dependencies. # -# PEXSI_FOUND - True if cereal is found. -# PEXSI_INCLUDE_DIR - Where to find cereal headers. +# PEXSI_FOUND - True if pexsi is found. +# PEXSI_INCLUDE_DIR - Where to find pexsi headers. +# PEXSI_LIBRARY - pexsi library. +# ParMETIS_INCLUDE_DIR - Where to find pexsi headers. +# ParMETIS_LIBRARY - parmetis library. +# METIS_LIBRARY - metis library. +# SuperLU_DIST_LIBRARY - superlu_dist library. find_path(PEXSI_INCLUDE_DIR NAMES c_pexsi_interface.h @@ -42,7 +47,7 @@ find_library(SuperLU_DIST_LIBRARY ) # Handle the QUIET and REQUIRED arguments and -# set Cereal_FOUND to TRUE if all variables are non-zero. +# set PEXSI_FOUND to TRUE if all variables are non-zero. include(FindPackageHandleStandardArgs) find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_DIST_LIBRARY) From f3e18a469bf9d481b862ae008b4700b2e07a3f47 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Mon, 29 Jan 2024 12:52:57 +0800 Subject: [PATCH 19/44] Fix CI errors --- source/Makefile | 4 +- source/module_base/parallel_global.cpp | 65 ++++++++++++++++---------- source/module_esolver/esolver_ks.cpp | 5 +- source/module_hsolver/diago_elpa.cpp | 32 ------------- source/module_io/input.cpp | 2 +- 5 files changed, 46 insertions(+), 62 deletions(-) diff --git a/source/Makefile b/source/Makefile index 7bd81f26d9..e726b8fa1d 100644 --- a/source/Makefile +++ b/source/Makefile @@ -7,7 +7,7 @@ include Makefile.vars INCLUDES = -I. -Icommands -I../ -Imodule_base/module_container LIBS = -lm -lpthread -OPTS = ${INCLUDES} -std=c++14 -pedantic -m64 ${INCLUDES} +OPTS = -std=c++14 -pedantic -m64 ${INCLUDES} HONG = -D__LCAO HONG += -D__ELPA ifeq ($(OPENMP), ON) @@ -45,7 +45,7 @@ ifeq ($(DEBUG), ON) endif OPTS += -O0 -fsanitize=address -fno-omit-frame-pointer -Wall -g #It can check segmental defaults else - HONG += -O0 -march=native -DNDEBUG + HONG += -Ofast -march=native -DNDEBUG endif ifeq ($(INTEL), ON) diff --git a/source/module_base/parallel_global.cpp b/source/module_base/parallel_global.cpp index 6740cd622c..3361d4b7f1 100644 --- a/source/module_base/parallel_global.cpp +++ b/source/module_base/parallel_global.cpp @@ -26,7 +26,6 @@ MPI_Comm STO_WORLD; MPI_Comm PARAPW_WORLD; // qianrui add it for sto-dft 2021-4-14 MPI_Comm GRID_WORLD; // mohan add 2012-01-13z MPI_Comm DIAG_WORLD; // mohan add 2012-01-13 -MPI_Group GRID_GROUP; void Parallel_Global::myProd(std::complex *in,std::complex *inout,int *len,MPI_Datatype *dptr) { @@ -85,7 +84,6 @@ void Parallel_Global::split_diag_world(const int &diag_np) } MPI_Comm_split(MPI_COMM_WORLD, color, key, &DIAG_WORLD); - MPI_Comm_group(DIAG_WORLD, &GRID_GROUP); MPI_Comm_rank(DIAG_WORLD, &GlobalV::DRANK); MPI_Comm_size(DIAG_WORLD, &GlobalV::DSIZE); GlobalV::DCOLOR=color; @@ -102,30 +100,49 @@ void Parallel_Global::split_diag_world(const int &diag_np) -void Parallel_Global::split_grid_world(const int &grid_np) +void Parallel_Global::split_grid_world(const int &diag_np) { #ifdef __MPI - assert(grid_np>0); //LiuXh, 2020-12-14, diag_np --> grid_np - int myid; - MPI_Group WORLD_GROUP; - //MPI_Comm_rank(MPI_COMM_WORLD, &key); - MPI_Comm_rank(MPI_COMM_WORLD, &myid); //LiuXh, 2020-12-14, key --> myid - MPI_Comm_group(MPI_COMM_WORLD, &WORLD_GROUP); - - int grid_proc_range[3]={0, (GlobalV::NPROC/grid_np)*grid_np-1, GlobalV::NPROC/grid_np}; - MPI_Group_range_incl(WORLD_GROUP, 1, &grid_proc_range, &GRID_GROUP); - - GRID_WORLD=MPI_COMM_NULL; - MPI_Comm_create(MPI_COMM_WORLD, GRID_GROUP, &GRID_WORLD); - if(GRID_WORLD != MPI_COMM_NULL) - { - MPI_Comm_rank(GRID_WORLD, &GlobalV::GRANK); //LiuXh, 2020-12-14, DIAG_WORLD --> GRID_WORLD - MPI_Comm_size(GRID_WORLD, &GlobalV::GSIZE); //LiuXh, 2020-12-14, DIAG_WORLD --> GRID_WORLD - }else - { - GlobalV::GRANK=-1; - GlobalV::GSIZE=-1; - } + assert(diag_np>0); + // number of processors in each 'grid group'. + int* group_grid_np = new int[diag_np]; + ModuleBase::GlobalFunc::ZEROS(group_grid_np, diag_np); + // average processors in each 'grid group' + int ave = GlobalV::NPROC/diag_np; + // remain processors. + int remain = GlobalV::NPROC - ave * diag_np; + + for(int i=0; iconv_elec && iter >= 5) + if (this->conv_elec) { - std::cout << "this->conv_elec" << std::endl; this->niter = iter; bool stop = this->do_after_converge(iter); - if(stop) {std::cout << "break\n"; break;} + if(stop) break; } // notice for restart if (GlobalV::MIXING_RESTART > 0 && iter == GlobalV::MIXING_RESTART - 1) diff --git a/source/module_hsolver/diago_elpa.cpp b/source/module_hsolver/diago_elpa.cpp index a50645bfb8..f3852493a1 100644 --- a/source/module_hsolver/diago_elpa.cpp +++ b/source/module_hsolver/diago_elpa.cpp @@ -35,24 +35,7 @@ namespace hsolver ELPA_Solver es((const bool)isReal, COMM_DIAG, (const int)GlobalV::NBANDS, (const int)h_mat.row, (const int)h_mat.col, (const int*)h_mat.desc); this->DecomposedState=0; // for k pointer, the decomposed s_mat can not be reused ModuleBase::timer::tick("DiagoElpa", "elpa_solve"); - std::cout << "???" << std::endl; - GlobalV::ofs_running << "nrow: " << h_mat.row << "\nncol: " << h_mat.col << "\n"; - GlobalV::ofs_running << "print H" << std::endl; - for (int i = 0; i < h_mat.col; i++) - { - for (int j = 0; j < h_mat.row; j++) - { - if (std::abs(h_mat.p[i * h_mat.col + j]) < 0.00000001) - { - GlobalV::ofs_running << "0 "; - } - else - GlobalV::ofs_running << h_mat.p[i * h_mat.col + j] << " "; - } - GlobalV::ofs_running << std::endl; - } es.generalized_eigenvector(h_mat.p, s_mat.p, this->DecomposedState, eigen.data(), psi.get_pointer()); - ModuleBase::timer::tick("DiagoElpa", "elpa_solve"); es.exit(); @@ -78,21 +61,6 @@ namespace hsolver //ELPA_Solver es(isReal, COMM_DIAG, GlobalV::NBANDS, h_mat.row, h_mat.col, h_mat.desc); ELPA_Solver es((const bool)isReal, COMM_DIAG, (const int)GlobalV::NBANDS, (const int)h_mat.row, (const int)h_mat.col, (const int*)h_mat.desc); ModuleBase::timer::tick("DiagoElpa", "elpa_solve"); - GlobalV::ofs_running << "nrow: " << h_mat.row << "\nncol: " << h_mat.col << "\n"; - GlobalV::ofs_running << "print H" << std::endl; - for (int i = 0; i < h_mat.col; i++) - { - for (int j = 0; j < h_mat.row; j++) - { - if (std::abs(h_mat.p[i * h_mat.col + j]) < 0.00000001) - { - GlobalV::ofs_running << "0 "; - } - else - GlobalV::ofs_running << h_mat.p[i * h_mat.col + j] << " "; - } - GlobalV::ofs_running << std::endl; - } es.generalized_eigenvector(h_mat.p, s_mat.p, this->DecomposedState, eigen.data(), psi.get_pointer()); ModuleBase::timer::tick("DiagoElpa", "elpa_solve"); es.exit(); diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp index b74ac6b104..42ccdde938 100644 --- a/source/module_io/input.cpp +++ b/source/module_io/input.cpp @@ -4062,7 +4062,7 @@ void Input::Check(void) { ModuleBase::WARNING_QUIT("Input", "pexsi can not be used with plane wave basis."); } - else if (ks_solver != "default" && ks_solver != "cg" && ks_solver != "dav") + else if (ks_solver != "default" && ks_solver != "cg" && ks_solver != "dav" && ks_solver != "bpcg") { ModuleBase::WARNING_QUIT("Input", "please check the ks_solver parameter!"); } From 4d16f56f5d583f887811e96ec1a5572153840adb Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Mon, 29 Jan 2024 13:13:21 +0800 Subject: [PATCH 20/44] Fix CI Errors and Merge with Upstream --- docs/advanced/input_files/input-main.md | 6 +- source/module_cell/read_atoms.cpp | 91 +++++++++++-------- source/module_cell/test/unitcell_test.cpp | 3 +- source/module_esolver/esolver_dp.cpp | 9 ++ source/module_esolver/esolver_lj.cpp | 9 ++ .../hamilt_lcaodft/LCAO_hamilt.hpp | 1 + source/module_io/input.cpp | 11 ++- source/module_io/test/input_test.cpp | 9 -- 8 files changed, 83 insertions(+), 56 deletions(-) diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index 8c675cab92..abd8cb7ff4 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -2123,14 +2123,14 @@ These variables are relevant when using hybrid functionals. ### exx_hybrid_step - **Type**: Integer -- **Availability**: *[exx_seperate_loop](#exx_separate_loop)==1* +- **Availability**: *[exx_separate_loop](#exx_separate_loop)==1* - **Description**: the maximal iteration number of the outer-loop, where the Fock exchange is calculated - **Default**: 100 ### exx_mixing_beta - **Type**: Real -- **Availability**: *[exx_seperate_loop](#exx_separate_loop)==1* +- **Availability**: *[exx_separate_loop](#exx_separate_loop)==1* - **Description**: mixing_beta for densty matrix in each iteration of the outer-loop - **Default**: 1.0 @@ -2417,7 +2417,7 @@ These variables are used to control molecular dynamics calculations. For more in - **Type**: Real - **Description**: The target pressure used in NPT ensemble simulations, the default value of `md_plast` is `md_pfirst`. If `md_plast` is set to be different from `md_pfirst`, ABACUS will automatically change the target pressure from `md_pfirst` to `md_plast`. -- **Default**: No default +- **Default**: -1.0 - **Unit**: kbar ### md_pfreq diff --git a/source/module_cell/read_atoms.cpp b/source/module_cell/read_atoms.cpp index 4c6bf9c0eb..abe05909f2 100644 --- a/source/module_cell/read_atoms.cpp +++ b/source/module_cell/read_atoms.cpp @@ -1017,26 +1017,32 @@ void UnitCell::print_stru_file(const std::string &fn, const int &type, const int context<print_stru_file(fn,type,level); std::ifstream ifs; ifs.open("C1H2_STRU"); diff --git a/source/module_esolver/esolver_dp.cpp b/source/module_esolver/esolver_dp.cpp index d4ce7195a5..8551ead5ff 100644 --- a/source/module_esolver/esolver_dp.cpp +++ b/source/module_esolver/esolver_dp.cpp @@ -136,6 +136,15 @@ namespace ModuleESolver void ESolver_DP::cal_Stress(ModuleBase::matrix& stress) { stress = dp_virial; + + // external stress + double unit_transform = ModuleBase::RYDBERG_SI / pow(ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8; + double external_stress[3] = {GlobalV::PRESS1, GlobalV::PRESS2, GlobalV::PRESS3}; + for (int i = 0; i < 3; i++) + { + stress(i, i) -= external_stress[i] / unit_transform; + } + ModuleIO::print_stress("TOTAL-STRESS", stress, true, false); } diff --git a/source/module_esolver/esolver_lj.cpp b/source/module_esolver/esolver_lj.cpp index 19e9d2369e..f48ff290d8 100644 --- a/source/module_esolver/esolver_lj.cpp +++ b/source/module_esolver/esolver_lj.cpp @@ -107,6 +107,15 @@ namespace ModuleESolver void ESolver_LJ::cal_Stress(ModuleBase::matrix& stress) { stress = lj_virial; + + // external stress + double unit_transform = ModuleBase::RYDBERG_SI / pow(ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8; + double external_stress[3] = {GlobalV::PRESS1, GlobalV::PRESS2, GlobalV::PRESS3}; + for (int i = 0; i < 3; i++) + { + stress(i, i) -= external_stress[i] / unit_transform; + } + ModuleIO::print_stress("TOTAL-STRESS", stress, true, false); } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp index 8edfb5be65..db8b32a13a 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp @@ -51,6 +51,7 @@ void LCAO_Hamilt::calculate_HR_exx_sparse( { int is0_b, is1_b; std::tie(is0_b,is1_b) = RI_2D_Comm::split_is_block(is); + if (Hexxs.empty()) break; for(const auto &HexxA : Hexxs[is]) { const int iat0 = HexxA.first; diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp index 42ccdde938..580dd203db 100644 --- a/source/module_io/input.cpp +++ b/source/module_io/input.cpp @@ -713,6 +713,8 @@ bool Input::Read(const std::string& fn) return false; // return error : false } + bool plast_find = false; // whether the parameter md_plast is found liuyu 2024-01-28 + ifs.rdstate(); while (ifs.good()) { @@ -1609,10 +1611,15 @@ bool Input::Read(const std::string& fn) else if (strcmp("md_pfirst", word) == 0) { read_value(ifs, mdp.md_pfirst); + if (!plast_find) + { + mdp.md_plast = mdp.md_pfirst; + } } else if (strcmp("md_plast", word) == 0) { read_value(ifs, mdp.md_plast); + plast_find = true; } else if (strcmp("md_pfreq", word) == 0) { @@ -3036,8 +3043,6 @@ void Input::Default_2(void) // jiyy add 2019-08-04 } if (!out_md_control) out_level = "m"; // zhengdy add 2019-04-07 - if (mdp.md_plast < 0.0) - mdp.md_plast = mdp.md_pfirst; if (mdp.md_tfreq == 0) { @@ -3955,8 +3960,6 @@ void Input::Check(void) // deal with input parameters , 2019-04-30 if (mdp.md_dt < 0) ModuleBase::WARNING_QUIT("Input::Check", "time interval of MD calculation should be set!"); - if (mdp.md_type == "npt" && mdp.md_pfirst < 0) - ModuleBase::WARNING_QUIT("Input::Check", "pressure of MD calculation should be set!"); if (mdp.md_type == "msst") { if (mdp.msst_qmass <= 0) diff --git a/source/module_io/test/input_test.cpp b/source/module_io/test/input_test.cpp index 11bce873ab..decb59fdec 100644 --- a/source/module_io/test/input_test.cpp +++ b/source/module_io/test/input_test.cpp @@ -1140,15 +1140,6 @@ TEST_F(InputTest, Check) output = testing::internal::GetCapturedStdout(); EXPECT_THAT(output,testing::HasSubstr("time interval of MD calculation should be set!")); INPUT.mdp.md_dt = 1.0; - // - INPUT.mdp.md_type = "npt"; - INPUT.mdp.md_pmode = "iso"; - INPUT.mdp.md_pfirst = -1.0; - testing::internal::CaptureStdout(); - EXPECT_EXIT(INPUT.Check(),::testing::ExitedWithCode(0), ""); - output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output,testing::HasSubstr("pressure of MD calculation should be set!")); - INPUT.mdp.md_pfirst = 1.0; // INPUT.mdp.md_type = "msst"; INPUT.mdp.msst_qmass = -1.0; From 4857553ba450fd675557a8dd782ed9ca9fb90cfb Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Thu, 1 Feb 2024 13:50:44 +0800 Subject: [PATCH 21/44] Resolve Pull Request Reviews --- source/module_base/global_variable.cpp | 29 ----- source/module_base/global_variable.h | 26 ----- .../module_elecstate/elecstate_lcao_tddft.cpp | 2 - source/module_hsolver/diago_pexsi.cpp | 102 ++++++++++++++++++ source/module_hsolver/diago_pexsi.h | 31 ++++++ .../module_pexsi/dist_bcd_matrix.cpp | 9 +- .../module_pexsi/dist_bcd_matrix.h | 10 +- .../module_pexsi/dist_matrix_transformer.cpp | 22 ++-- .../module_pexsi/dist_matrix_transformer.h | 2 +- .../module_pexsi/pexsi_solver.cpp | 7 +- .../module_pexsi/simple_pexsi.cpp | 55 +++++----- .../module_pexsi/simple_pexsi.h | 2 +- source/module_io/input_conv.cpp | 81 +++++++++----- 13 files changed, 242 insertions(+), 136 deletions(-) diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp index 6b7015dc25..eb0dc636e2 100644 --- a/source/module_base/global_variable.cpp +++ b/source/module_base/global_variable.cpp @@ -301,33 +301,4 @@ std::string qo_basis = "hydrogen"; std::vector qo_strategy = {}; double qo_thr = 1.0e-6; std::vector qo_screening_coeff = {}; - -//========================================================== -// PEXSI related -//========================================================== -int pexsi_npole = 54; -int pexsi_inertia = 1; -int pexsi_nmax = 80; -// int pexsi_symbolic = 1; -int pexsi_comm = 1; -int pexsi_storage = 1; -int pexsi_ordering = 0; -int pexsi_row_ordering = 1; -int pexsi_nproc = 1; -int pexsi_symm = 1; -int pexsi_trans = 0; -int pexsi_method = 1; -int pexsi_nproc_pole = 1; -// double pexsi_spin = 2; -double pexsi_temp = 0.0001; -double pexsi_gap = 0; -double pexsi_delta_e = 20.0; -double pexsi_mu_lower = -10; -double pexsi_mu_upper = 10; -double pexsi_mu = 0.0; -double pexsi_mu_thr = 0.05; -double pexsi_mu_expand = 0.3; -double pexsi_mu_guard = 0.2; -double pexsi_elec_thr = 0.001; -double pexsi_zero_thr = 1e-10; } // namespace GlobalV diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h index 9808ca080b..3b75f1e77c 100644 --- a/source/module_base/global_variable.h +++ b/source/module_base/global_variable.h @@ -330,31 +330,5 @@ extern std::vector qo_strategy; extern double qo_thr; extern std::vector qo_screening_coeff; -// PEXSI related -extern int pexsi_npole; -extern int pexsi_inertia; -extern int pexsi_nmax; -// extern int pexsi_symbolic; -extern int pexsi_comm; -extern int pexsi_storage; -extern int pexsi_ordering; -extern int pexsi_row_ordering; -extern int pexsi_nproc; -extern int pexsi_symm; -extern int pexsi_trans; -extern int pexsi_method; -extern int pexsi_nproc_pole; -// extern double pexsi_spin; -extern double pexsi_temp; -extern double pexsi_gap; -extern double pexsi_delta_e; -extern double pexsi_mu_lower; -extern double pexsi_mu_upper; -extern double pexsi_mu; -extern double pexsi_mu_thr; -extern double pexsi_mu_expand; -extern double pexsi_mu_guard; -extern double pexsi_elec_thr; -extern double pexsi_zero_thr; } // namespace GlobalV #endif diff --git a/source/module_elecstate/elecstate_lcao_tddft.cpp b/source/module_elecstate/elecstate_lcao_tddft.cpp index d6bec95f99..3d3539fd1a 100644 --- a/source/module_elecstate/elecstate_lcao_tddft.cpp +++ b/source/module_elecstate/elecstate_lcao_tddft.cpp @@ -67,9 +67,7 @@ void ElecStateLCAO_TDDFT::psiToRho_td(const psi::Psi>& psi) Gint_inout inout(this->loc->DM_R, this->charge->rho, Gint_Tools::job_type::rho); // rho calculation this->uhm->GK.cal_gint(&inout); - std::cout << "this->charge->renormalize_rho(); 1" << std::endl; this->charge->renormalize_rho(); - std::cout << "this->charge->renormalize_rho(); 1 done" << std::endl; ModuleBase::timer::tick("ElecStateLCAO", "psiToRho"); return; diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index 8e4ee5b15b..3b059661dc 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -1,3 +1,4 @@ +#include #include #ifdef __PEXSI #include "c_pexsi_interface.h" @@ -14,6 +15,107 @@ typedef hamilt::MatrixBlock> matcd; namespace hsolver { +template <> +int DiagoPexsi::pexsi_npole = 0; +template <> +int DiagoPexsi::pexsi_inertia = 0; +template <> +int DiagoPexsi::pexsi_nmax = 0; +// template <> +// int DiagoPexsi::pexsi_symbolic = 0; +template <> +int DiagoPexsi::pexsi_comm = 0; +template <> +int DiagoPexsi::pexsi_storage = 0; +template <> +int DiagoPexsi::pexsi_ordering = 0; +template <> +int DiagoPexsi::pexsi_row_ordering = 0; +template <> +int DiagoPexsi::pexsi_nproc = 0; +template <> +int DiagoPexsi::pexsi_symm = 0; +template <> +int DiagoPexsi::pexsi_trans = 0; +template <> +int DiagoPexsi::pexsi_method = 0; +template <> +int DiagoPexsi::pexsi_nproc_pole = 0; +// template <> +// double DiagoPexsi::pexsi_spin = 2; +template <> +double DiagoPexsi::pexsi_temp = 0.0; +template <> +double DiagoPexsi::pexsi_gap = 0.0; +template <> +double DiagoPexsi::pexsi_delta_e = 0.0; +template <> +double DiagoPexsi::pexsi_mu_lower = 0.0; +template <> +double DiagoPexsi::pexsi_mu_upper = 0.0; +template <> +double DiagoPexsi::pexsi_mu = 0.0; +template <> +double DiagoPexsi::pexsi_mu_thr = 0.0; +template <> +double DiagoPexsi::pexsi_mu_expand = 0.0; +template <> +double DiagoPexsi::pexsi_mu_guard = 0.0; +template <> +double DiagoPexsi::pexsi_elec_thr = 0.0; +template <> +double DiagoPexsi::pexsi_zero_thr = 0.0; + +template <> +int DiagoPexsi>::pexsi_npole = 0; +template <> +int DiagoPexsi>::pexsi_inertia = 0; +template <> +int DiagoPexsi>::pexsi_nmax = 0; +// template <> +// int DiagoPexsi>::pexsi_symbolic = 0; +template <> +int DiagoPexsi>::pexsi_comm = 0; +template <> +int DiagoPexsi>::pexsi_storage = 0; +template <> +int DiagoPexsi>::pexsi_ordering = 0; +template <> +int DiagoPexsi>::pexsi_row_ordering = 0; +template <> +int DiagoPexsi>::pexsi_nproc = 0; +template <> +int DiagoPexsi>::pexsi_symm = 0; +template <> +int DiagoPexsi>::pexsi_trans = 0; +template <> +int DiagoPexsi>::pexsi_method = 0; +template <> +int DiagoPexsi>::pexsi_nproc_pole = 0; +// template <> +// double DiagoPexsi>::pexsi_spin = 2; +template <> +double DiagoPexsi>::pexsi_temp = 0.0; +template <> +double DiagoPexsi>::pexsi_gap = 0.0; +template <> +double DiagoPexsi>::pexsi_delta_e = 0.0; +template <> +double DiagoPexsi>::pexsi_mu_lower = 0.0; +template <> +double DiagoPexsi>::pexsi_mu_upper = 0.0; +template <> +double DiagoPexsi>::pexsi_mu = 0.0; +template <> +double DiagoPexsi>::pexsi_mu_thr = 0.0; +template <> +double DiagoPexsi>::pexsi_mu_expand = 0.0; +template <> +double DiagoPexsi>::pexsi_mu_guard = 0.0; +template <> +double DiagoPexsi>::pexsi_elec_thr = 0.0; +template <> +double DiagoPexsi>::pexsi_zero_thr = 0.0; template <> void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index c212d7795a..ffc89d6b4d 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -27,6 +27,37 @@ class DiagoPexsi : public DiagH double totalEnergyS; double totalFreeEnergy; pexsi::PEXSI_Solver* ps; + + //========================================================== + // PEXSI related variables + //========================================================== + static int pexsi_npole; + static int pexsi_inertia; + static int pexsi_nmax; + // static int pexsi_symbolic; + static int pexsi_comm; + static int pexsi_storage; + static int pexsi_ordering; + static int pexsi_row_ordering; + static int pexsi_nproc; + static int pexsi_symm; + static int pexsi_trans; + static int pexsi_method; + static int pexsi_nproc_pole; + // static double pexsi_spin = 2; + static double pexsi_temp; + static double pexsi_gap; + static double pexsi_delta_e; + static double pexsi_mu_lower; + static double pexsi_mu_upper; + static double pexsi_mu; + static double pexsi_mu_thr; + static double pexsi_mu_expand; + static double pexsi_mu_guard; + static double pexsi_elec_thr; + static double pexsi_zero_thr; + + static MPI_Group grid_group; }; } // namespace hsolver diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp index e498b83a2e..8e4c8e7ac7 100644 --- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp @@ -17,7 +17,7 @@ DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, int nblk, int nrow, int ncol, - char LAYOUT) + char layout) { this->comm = comm; this->group = group; @@ -26,13 +26,13 @@ DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, this->nblk = nblk; this->nrow = nrow; this->ncol = ncol; - if (LAYOUT == 'R' || LAYOUT == 'r' || LAYOUT == 'C' || LAYOUT == 'c') + if (layout == 'R' || layout == 'r' || layout == 'C' || layout == 'c') { - this->LAYOUT = LAYOUT; + this->layout = layout; } else { - throw("The LAYOUT must be 'R', 'r', 'C', or 'c'"); + throw("The layout must be 'R', 'r', 'C', or 'c'"); } if (comm != MPI_COMM_NULL) @@ -91,7 +91,6 @@ int DistBCDMatrix::globalRow(const int localRow) int DistBCDMatrix::globalCol(const int localCol) { - return (localCol / nblk * npcols + mypcol) * nblk + localCol % nblk; } diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h index 98b8512893..0964b9787c 100644 --- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h @@ -16,10 +16,10 @@ class DistBCDMatrix public: // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol); // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char - // LAYOUT); + // layout); // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol); - DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT); + DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char layout); ~DistBCDMatrix(); int globalRow(const int localRow); @@ -45,9 +45,9 @@ class DistBCDMatrix { return ncol; }; - const char get_LAYOUT() const + const char get_layout() const { - return LAYOUT; + return layout; }; private: @@ -90,7 +90,7 @@ class DistBCDMatrix // the local data layout // 'R' or 'r' for row-major, which is used in C/C++ // 'C' or 'c' for column-major, which is used in Fortran - char LAYOUT; + char layout; }; } // namespace pexsi #endif // DISTBCDMATRIX_H \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp index ef6c6fec72..eadd991217 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp @@ -167,7 +167,7 @@ inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std } // find out the index of non-zero elements -inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT, +inline int DistMatrixTransformer::getNonZeroIndex(char layout, const int nrow, const int ncol, double* H_2d, @@ -231,7 +231,7 @@ inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT, if (myproc < 100) log << "rowidx and colidx cleared" << std::endl; #endif - if (LAYOUT == 'C' || LAYOUT == 'c') + if (layout == 'C' || layout == 'c') { for (int i = 0; i < ncol; ++i) { @@ -247,7 +247,7 @@ inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT, } } } - else if (LAYOUT == 'R' || LAYOUT == 'r') + else if (layout == 'R' || layout == 'r') { for (int i = 0; i < ncol; ++i) { @@ -267,7 +267,7 @@ inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT, { #ifdef _DEBUG if (myproc < 100) - log << "unknown LAYOUT: " << LAYOUT << std::endl; + log << "unknown layout: " << layout << std::endl; #endif return 1; } @@ -622,7 +622,7 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, << " ; mypcol: " << SRC_Matrix.mypcol << std::endl; log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.get_nrow() << " ; ncol: " << SRC_Matrix.get_ncol() << std::endl; - log << "layout:" << SRC_Matrix.get_LAYOUT() << std::endl; + log << "layout:" << SRC_Matrix.get_layout() << std::endl; log << "ZERO = " << ZERO_Limit << std::endl; log << "DST_Matrix parameters:" << std::endl; log << "size: " << DST_Matrix.size << " ;nproc_data: " << DST_Matrix.nproc_data << std::endl; @@ -641,7 +641,7 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, #endif if (SRC_Matrix.get_comm() != MPI_COMM_NULL) { - getNonZeroIndex(SRC_Matrix.get_LAYOUT(), + getNonZeroIndex(SRC_Matrix.get_layout(), SRC_Matrix.get_nrow(), SRC_Matrix.get_ncol(), H_2d, @@ -660,7 +660,7 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, if(SRC_Matrix.comm != MPI_COMM_NULL) { log<<"NonZeroIndex :"< sender_buffer(sender_size); std::vector receiver_buffer(receiver_size); // put H to sender buffer - if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r') + if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r') { for (int i = 0; i < sender_size; ++i) { @@ -755,7 +755,7 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, #endif // put S to sender buffer - if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r') + if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r') { for (int i = 0; i < sender_size; ++i) { @@ -1434,7 +1434,7 @@ MPI_Barrier(COMM_TRANS); // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM"); #endif // transform receiver_buffer to DM - if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r') + if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r') { int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); for (int i = 0; i < receiver_size; ++i) @@ -1518,7 +1518,7 @@ MPI_Barrier(COMM_TRANS); // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM"); #endif // transform receiver_buffer to EDM - if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r') + if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r') { int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); for (int i = 0; i < receiver_size; ++i) diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h index e3e27e995a..c81128f9db 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h @@ -39,7 +39,7 @@ void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* void countMatrixDistribution(int N, double* A, std::map& P); -int getNonZeroIndex(char LAYOUT, +int getNonZeroIndex(char layout, const int nrow, const int ncol, double* H_2d, diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp index 1be66abf59..8d55c15707 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -10,8 +10,6 @@ extern MPI_Comm DIAG_WORLD; extern MPI_Comm GRID_WORLD; -extern MPI_Group GRID_GROUP; - namespace pexsi { PEXSI_Solver::PEXSI_Solver(const int blacs_text, @@ -43,10 +41,11 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text, int PEXSI_Solver::solve() { - + MPI_Group grid_group; + MPI_Comm_group(DIAG_WORLD, &grid_group); simplePEXSI(DIAG_WORLD, GRID_WORLD, - GRID_GROUP, + grid_group, this->blacs_text, GlobalV::NLOCAL, this->nb, diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp index df72a061c5..b59ed233ea 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -20,6 +20,7 @@ #include "module_base/timer.h" #include "module_base/tool_quit.h" #include "module_base/global_variable.h" +#include "module_hsolver/diago_pexsi.h" namespace pexsi { @@ -106,36 +107,36 @@ int loadPEXSIOption(MPI_Comm comm, double double_para[12]; // read in PEXSI options from GlobalV - int_para[0] = GlobalV::pexsi_npole; - int_para[1] = GlobalV::pexsi_inertia; - int_para[2] = GlobalV::pexsi_nmax; + int_para[0] = hsolver::DiagoPexsi::pexsi_npole; + int_para[1] = hsolver::DiagoPexsi::pexsi_inertia; + int_para[2] = hsolver::DiagoPexsi::pexsi_nmax; int_para[3] = 0; - int_para[4] = 1; // GlobalV::pexsi_symbolic; - int_para[5] = GlobalV::pexsi_comm; + int_para[4] = 1; // hsolver::DiagoPexsi::pexsi_symbolic; + int_para[5] = hsolver::DiagoPexsi::pexsi_comm; int_para[6] = 0; - int_para[7] = GlobalV::pexsi_storage; - int_para[8] = GlobalV::pexsi_ordering; - int_para[9] = GlobalV::pexsi_row_ordering; - int_para[10] = GlobalV::pexsi_nproc; - int_para[11] = GlobalV::pexsi_symm; - int_para[12] = GlobalV::pexsi_trans; - int_para[13] = GlobalV::pexsi_method; + int_para[7] = hsolver::DiagoPexsi::pexsi_storage; + int_para[8] = hsolver::DiagoPexsi::pexsi_ordering; + int_para[9] = hsolver::DiagoPexsi::pexsi_row_ordering; + int_para[10] = hsolver::DiagoPexsi::pexsi_nproc; + int_para[11] = hsolver::DiagoPexsi::pexsi_symm; + int_para[12] = hsolver::DiagoPexsi::pexsi_trans; + int_para[13] = hsolver::DiagoPexsi::pexsi_method; int_para[14] = 2; int_para[15] = 0; - int_para[16] = GlobalV::pexsi_nproc_pole; + int_para[16] = hsolver::DiagoPexsi::pexsi_nproc_pole; - double_para[0] = GlobalV::NSPIN; // GlobalV::pexsi_spin; - double_para[1] = GlobalV::pexsi_temp; - double_para[2] = GlobalV::pexsi_gap; - double_para[3] = GlobalV::pexsi_delta_e; - double_para[4] = GlobalV::pexsi_mu_lower; - double_para[5] = GlobalV::pexsi_mu_upper; - double_para[6] = GlobalV::pexsi_mu; - double_para[7] = GlobalV::pexsi_mu_thr; - double_para[8] = GlobalV::pexsi_mu_expand; - double_para[9] = GlobalV::pexsi_mu_guard; - double_para[10] = GlobalV::pexsi_elec_thr; - double_para[11] = GlobalV::pexsi_zero_thr; + double_para[0] = GlobalV::NSPIN; // hsolver::DiagoPexsi::pexsi_spin; + double_para[1] = hsolver::DiagoPexsi::pexsi_temp; + double_para[2] = hsolver::DiagoPexsi::pexsi_gap; + double_para[3] = hsolver::DiagoPexsi::pexsi_delta_e; + double_para[4] = hsolver::DiagoPexsi::pexsi_mu_lower; + double_para[5] = hsolver::DiagoPexsi::pexsi_mu_upper; + double_para[6] = hsolver::DiagoPexsi::pexsi_mu; + double_para[7] = hsolver::DiagoPexsi::pexsi_mu_thr; + double_para[8] = hsolver::DiagoPexsi::pexsi_mu_expand; + double_para[9] = hsolver::DiagoPexsi::pexsi_mu_guard; + double_para[10] = hsolver::DiagoPexsi::pexsi_elec_thr; + double_para[11] = hsolver::DiagoPexsi::pexsi_zero_thr; // int myid; // MPI_Comm_rank(comm, &myid); // if (myid == 0) @@ -426,7 +427,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI, const int nblk, const int nrow, const int ncol, - char LAYOUT, // matrix parameters + char layout, // matrix parameters double* H, double* S, // input matrices const double numElectronExact, @@ -531,7 +532,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI, // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test // DONE(ofs_running,"create block cyclic distribution matrix parameter, begin"); // OUT(ofs_running, "checkpoint10"); - DistBCDMatrix SRC_Matrix(comm_2D, group_2D, blacs_ctxt, size, nblk, nrow, ncol, LAYOUT); + DistBCDMatrix SRC_Matrix(comm_2D, group_2D, blacs_ctxt, size, nblk, nrow, ncol, layout); // OUT(ofs_running, "checkpoint11"); #ifdef _DEBUG if (comm_PEXSI != MPI_COMM_NULL) diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.h b/source/module_hsolver/module_pexsi/simple_pexsi.h index 6d569154e9..fded81fc59 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.h +++ b/source/module_hsolver/module_pexsi/simple_pexsi.h @@ -13,7 +13,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI, const int nblk, const int nrow, const int ncol, - char LAYOUT, // input matrix parameters + char layout, // input matrix parameters double* H, double* S, // input matrices const double nElectronExact, diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp index d6e3371111..9d96b9f993 100644 --- a/source/module_io/input_conv.cpp +++ b/source/module_io/input_conv.cpp @@ -24,6 +24,9 @@ #include "module_hamilt_lcao/module_dftu/dftu.h" #include "module_hamilt_lcao/module_tddft/evolve_elec.h" #endif +#ifdef __PEXSI +#include "module_hsolver/diago_pexsi.h" +#endif #include "module_base/timer.h" #include "module_elecstate/elecstate_lcao.h" @@ -770,31 +773,59 @@ void Input_Conv::Convert(void) //----------------------------------------------- // PEXSI related parameters //----------------------------------------------- - GlobalV::pexsi_npole = INPUT.pexsi_npole; - GlobalV::pexsi_inertia = INPUT.pexsi_inertia; - GlobalV::pexsi_nmax = INPUT.pexsi_nmax; - // GlobalV::pexsi_symbolic = INPUT.pexsi_symbolic; - GlobalV::pexsi_comm = INPUT.pexsi_comm; - GlobalV::pexsi_storage = INPUT.pexsi_storage; - GlobalV::pexsi_ordering = INPUT.pexsi_ordering; - GlobalV::pexsi_row_ordering = INPUT.pexsi_row_ordering; - GlobalV::pexsi_nproc = INPUT.pexsi_nproc; - GlobalV::pexsi_symm = INPUT.pexsi_symm; - GlobalV::pexsi_trans = INPUT.pexsi_trans; - GlobalV::pexsi_method = INPUT.pexsi_method; - GlobalV::pexsi_nproc_pole = INPUT.pexsi_nproc_pole; - // GlobalV::pexsi_spin = INPUT.pexsi_spin; - GlobalV::pexsi_temp = INPUT.pexsi_temp; - GlobalV::pexsi_gap = INPUT.pexsi_gap; - GlobalV::pexsi_delta_e = INPUT.pexsi_delta_e; - GlobalV::pexsi_mu_lower = INPUT.pexsi_mu_lower; - GlobalV::pexsi_mu_upper = INPUT.pexsi_mu_upper; - GlobalV::pexsi_mu = INPUT.pexsi_mu; - GlobalV::pexsi_mu_thr = INPUT.pexsi_mu_thr; - GlobalV::pexsi_mu_expand = INPUT.pexsi_mu_expand; - GlobalV::pexsi_mu_guard = INPUT.pexsi_mu_guard; - GlobalV::pexsi_elec_thr = INPUT.pexsi_elec_thr; - GlobalV::pexsi_zero_thr = INPUT.pexsi_zero_thr; +#ifdef __PEXSI + hsolver::DiagoPexsi>::pexsi_npole = INPUT.pexsi_npole; + hsolver::DiagoPexsi>::pexsi_inertia = INPUT.pexsi_inertia; + hsolver::DiagoPexsi>::pexsi_nmax = INPUT.pexsi_nmax; + // hsolver::DiagoPexsi>::pexsi_symbolic = INPUT.pexsi_symbolic; + hsolver::DiagoPexsi>::pexsi_comm = INPUT.pexsi_comm; + hsolver::DiagoPexsi>::pexsi_storage = INPUT.pexsi_storage; + hsolver::DiagoPexsi>::pexsi_ordering = INPUT.pexsi_ordering; + hsolver::DiagoPexsi>::pexsi_row_ordering = INPUT.pexsi_row_ordering; + hsolver::DiagoPexsi>::pexsi_nproc = INPUT.pexsi_nproc; + hsolver::DiagoPexsi>::pexsi_symm = INPUT.pexsi_symm; + hsolver::DiagoPexsi>::pexsi_trans = INPUT.pexsi_trans; + hsolver::DiagoPexsi>::pexsi_method = INPUT.pexsi_method; + hsolver::DiagoPexsi>::pexsi_nproc_pole = INPUT.pexsi_nproc_pole; + // hsolver::DiagoPexsi>::pexsi_spin = INPUT.pexsi_spin; + hsolver::DiagoPexsi>::pexsi_temp = INPUT.pexsi_temp; + hsolver::DiagoPexsi>::pexsi_gap = INPUT.pexsi_gap; + hsolver::DiagoPexsi>::pexsi_delta_e = INPUT.pexsi_delta_e; + hsolver::DiagoPexsi>::pexsi_mu_lower = INPUT.pexsi_mu_lower; + hsolver::DiagoPexsi>::pexsi_mu_upper = INPUT.pexsi_mu_upper; + hsolver::DiagoPexsi>::pexsi_mu = INPUT.pexsi_mu; + hsolver::DiagoPexsi>::pexsi_mu_thr = INPUT.pexsi_mu_thr; + hsolver::DiagoPexsi>::pexsi_mu_expand = INPUT.pexsi_mu_expand; + hsolver::DiagoPexsi>::pexsi_mu_guard = INPUT.pexsi_mu_guard; + hsolver::DiagoPexsi>::pexsi_elec_thr = INPUT.pexsi_elec_thr; + hsolver::DiagoPexsi>::pexsi_zero_thr = INPUT.pexsi_zero_thr; + + hsolver::DiagoPexsi::pexsi_npole = INPUT.pexsi_npole; + hsolver::DiagoPexsi::pexsi_inertia = INPUT.pexsi_inertia; + hsolver::DiagoPexsi::pexsi_nmax = INPUT.pexsi_nmax; + // hsolver::DiagoPexsi::pexsi_symbolic = INPUT.pexsi_symbolic; + hsolver::DiagoPexsi::pexsi_comm = INPUT.pexsi_comm; + hsolver::DiagoPexsi::pexsi_storage = INPUT.pexsi_storage; + hsolver::DiagoPexsi::pexsi_ordering = INPUT.pexsi_ordering; + hsolver::DiagoPexsi::pexsi_row_ordering = INPUT.pexsi_row_ordering; + hsolver::DiagoPexsi::pexsi_nproc = INPUT.pexsi_nproc; + hsolver::DiagoPexsi::pexsi_symm = INPUT.pexsi_symm; + hsolver::DiagoPexsi::pexsi_trans = INPUT.pexsi_trans; + hsolver::DiagoPexsi::pexsi_method = INPUT.pexsi_method; + hsolver::DiagoPexsi::pexsi_nproc_pole = INPUT.pexsi_nproc_pole; + // hsolver::DiagoPexsi::pexsi_spin = INPUT.pexsi_spin; + hsolver::DiagoPexsi::pexsi_temp = INPUT.pexsi_temp; + hsolver::DiagoPexsi::pexsi_gap = INPUT.pexsi_gap; + hsolver::DiagoPexsi::pexsi_delta_e = INPUT.pexsi_delta_e; + hsolver::DiagoPexsi::pexsi_mu_lower = INPUT.pexsi_mu_lower; + hsolver::DiagoPexsi::pexsi_mu_upper = INPUT.pexsi_mu_upper; + hsolver::DiagoPexsi::pexsi_mu = INPUT.pexsi_mu; + hsolver::DiagoPexsi::pexsi_mu_thr = INPUT.pexsi_mu_thr; + hsolver::DiagoPexsi::pexsi_mu_expand = INPUT.pexsi_mu_expand; + hsolver::DiagoPexsi::pexsi_mu_guard = INPUT.pexsi_mu_guard; + hsolver::DiagoPexsi::pexsi_elec_thr = INPUT.pexsi_elec_thr; + hsolver::DiagoPexsi::pexsi_zero_thr = INPUT.pexsi_zero_thr; +#endif ModuleBase::timer::tick("Input_Conv", "Convert"); return; } From 70d68d90c735996c392705f033cc182d15fe99df Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Thu, 1 Feb 2024 17:49:06 +0800 Subject: [PATCH 22/44] Fix parallel communication related issue --- source/module_hsolver/module_pexsi/pexsi_solver.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp index 8d55c15707..ebbf253b20 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -1,3 +1,4 @@ +#include "module_base/parallel_global.h" #ifdef __PEXSI #include "pexsi_solver.h" @@ -42,9 +43,17 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text, int PEXSI_Solver::solve() { MPI_Group grid_group; - MPI_Comm_group(DIAG_WORLD, &grid_group); + int myid, grid_np; + MPI_Group world_group; + MPI_Comm_rank(DIAG_WORLD, &myid); + MPI_Comm_size(DIAG_WORLD, &grid_np); + MPI_Comm_group(DIAG_WORLD, &world_group); + + int grid_proc_range[3]={0, (GlobalV::NPROC/grid_np)*grid_np-1, GlobalV::NPROC/grid_np}; + MPI_Group_range_incl(world_group, 1, &grid_proc_range, &grid_group); + simplePEXSI(DIAG_WORLD, - GRID_WORLD, + DIAG_WORLD, grid_group, this->blacs_text, GlobalV::NLOCAL, From 5b4a6cf8eb51c0861e2e9219f5a5a1a6492421f3 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Sat, 3 Feb 2024 17:10:29 +0800 Subject: [PATCH 23/44] Fix vars in Makefile.vars, add input tests and comments for pexsi vars --- source/Makefile.vars | 14 +-- source/module_base/global_variable.h | 1 - source/module_elecstate/elecstate.cpp | 28 +++--- source/module_elecstate/elecstate_lcao.cpp | 26 +----- source/module_elecstate/elecstate_lcao.h | 2 +- source/module_hsolver/diago_pexsi.cpp | 20 ++--- source/module_hsolver/diago_pexsi.h | 90 +++++++++++++++++-- source/module_hsolver/test/CMakeLists.txt | 6 -- .../module_hsolver/test/diago_lcao_test.cpp | 7 -- source/module_io/input.cpp | 21 +++-- source/module_io/input.h | 10 +-- source/module_io/test/input_conv_test.cpp | 52 +++++++++++ source/module_io/test/input_test_para.cpp | 24 +++++ source/module_io/test/write_input_test.cpp | 38 ++++++++ source/module_io/write_input.cpp | 10 +-- 15 files changed, 247 insertions(+), 102 deletions(-) diff --git a/source/Makefile.vars b/source/Makefile.vars index 477b0a251d..f00eca8ea3 100644 --- a/source/Makefile.vars +++ b/source/Makefile.vars @@ -29,10 +29,10 @@ OPENMP = OFF ## CEREAL_DIR should contain an include folder. #---------------------------------------------------------------------- -ELPA_DIR = /root/lib/ELPA -ELPA_INCLUDE_DIR = ${ELPA_DIR}/include/ +ELPA_DIR = /usr/local/include/elpa-2021.05.002 +ELPA_INCLUDE_DIR = ${ELPA_DIR}/elpa -CEREAL_DIR = /root/lib/cereal +CEREAL_DIR = /usr/local/include/cereal ##------------------- FOR GNU COMPILER ------------------------------ @@ -44,7 +44,7 @@ CEREAL_DIR = /root/lib/cereal ## CEREAL_DIR should contain an include folder. ##--------------------------------------------------------------------- -# FFTW_DIR = /root/lib/FFTW3 +# FFTW_DIR = /public/soft/fftw_3.3.8 # OPENBLAS_LIB_DIR = /public/soft/openblas/lib # SCALAPACK_LIB_DIR = /public/soft/openblas/lib @@ -73,9 +73,9 @@ CEREAL_DIR = /root/lib/cereal # LIBRI_DIR = /public/software/LibRI # LIBCOMM_DIR = /public/software/LibComm -# PEXSI_DIR = /home/rhx/projects/pexsi-build/pexsi -# DSUPERLU_DIR = /home/rhx/projects/pexsi-build/superlu -# PARMETIS_DIR = /home/rhx/projects/pexsi-build/parmetis +# PEXSI_DIR = /public/software/pexsi +# DSUPERLU_DIR = /public/software/superlu_dist +# PARMETIS_DIR = /public/software/parmetis ##--------------------------------------------------------------------- # NP = 14 # It is not supported. use make -j14 or make -j to parallelly compile diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h index 5efed29033..1bbe1edb91 100644 --- a/source/module_base/global_variable.h +++ b/source/module_base/global_variable.h @@ -330,6 +330,5 @@ extern std::string qo_basis; extern std::vector qo_strategy; extern double qo_thr; extern std::vector qo_screening_coeff; - } // namespace GlobalV #endif diff --git a/source/module_elecstate/elecstate.cpp b/source/module_elecstate/elecstate.cpp index e2a4c3eec6..393c2d07d5 100644 --- a/source/module_elecstate/elecstate.cpp +++ b/source/module_elecstate/elecstate.cpp @@ -174,28 +174,26 @@ void ElecState::calEBand() ModuleBase::TITLE("ElecState", "calEBand"); // calculate ebands using wg and ekb double eband = 0.0; - { #ifdef _OPENMP #pragma omp parallel for collapse(2) reduction(+:eband) #endif - for (int ik = 0; ik < this->ekb.nr; ++ik) + for (int ik = 0; ik < this->ekb.nr; ++ik) + { + for (int ibnd = 0; ibnd < this->ekb.nc; ibnd++) { - for (int ibnd = 0; ibnd < this->ekb.nc; ibnd++) - { - eband += this->ekb(ik, ibnd) * this->wg(ik, ibnd); - } + eband += this->ekb(ik, ibnd) * this->wg(ik, ibnd); } - this->f_en.eband = eband; - if (GlobalV::KPAR != 1 && GlobalV::ESOLVER_TYPE != "sdft") - { - //================================== - // Reduce all the Energy in each cpu - //================================== - this->f_en.eband /= GlobalV::NPROC_IN_POOL; + } + this->f_en.eband = eband; + if (GlobalV::KPAR != 1 && GlobalV::ESOLVER_TYPE != "sdft") + { + //================================== + // Reduce all the Energy in each cpu + //================================== + this->f_en.eband /= GlobalV::NPROC_IN_POOL; #ifdef __MPI - Parallel_Reduce::reduce_all(this->f_en.eband); + Parallel_Reduce::reduce_all(this->f_en.eband); #endif - } } return; } diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index 3970b98b4b..23d55162f8 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -191,10 +191,6 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) this->loc->set_dm_gamma(is, this->DM->get_DMK_pointer(is)); } } - - - - ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d"); for (int ik = 0; ik < psi.get_nk(); ++ik) @@ -222,23 +218,6 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) //------------------------------------------------------------ // calculate the charge density on real space grid. //------------------------------------------------------------ - // print matrix zzh - // GlobalV::ofs_running << "dm_gamma print\n"; - // for(int i=0; i< this->loc->dm_gamma[0].nc; i++) - // { - // for(int j=0; jloc->dm_gamma[0].nr; j++) - // { - // if (std::abs(this->loc->dm_gamma[0](i, j)) < 0.00000001) - // { - // GlobalV::ofs_running << "0 "; - // } - // else - // { - // GlobalV::ofs_running << this->loc->dm_gamma[0](i, j) << " "; - // } - // } - // GlobalV::ofs_running << std::endl; - // } ModuleBase::GlobalFunc::NOTE("Calculate the charge on real space grid!"); this->uhm->GG.transfer_DM2DtoGrid(this->DM->get_DMR_vector()); // transfer DM2D to DM_grid in gint Gint_inout inout(this->loc->DM, this->charge->rho, Gint_Tools::job_type::rho); @@ -285,11 +264,10 @@ void ElecStateLCAO::dmToRho(double* pexsi_DM) { ModuleBase::timer::tick("ElecStateLCAO", "dmToRho"); - this->loc->set_dm_gamma(0, pexsi_DM); - // old 2D-to-Grid conversion has been replaced by new Gint Refactor 2023/09/25 if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready { + this->loc->set_dm_gamma(0, pexsi_DM); this->loc->cal_dk_gamma_from_2D_pub(); } @@ -333,6 +311,4 @@ void ElecStateLCAO>::dmToRho(std::complex* DM) template class ElecStateLCAO; // Gamma_only case template class ElecStateLCAO>; // multi-k case - - } // namespace elecstate \ No newline at end of file diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h index 8c86844486..c1cf231a45 100644 --- a/source/module_elecstate/elecstate_lcao.h +++ b/source/module_elecstate/elecstate_lcao.h @@ -60,7 +60,7 @@ class ElecStateLCAO : public ElecState double get_spin_constrain_energy() override; #ifdef __PEXSI - //use for pexsi + // use for pexsi void dmToRho(TK* DM); #endif diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index 3b059661dc..803680fe31 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -18,15 +18,15 @@ namespace hsolver template <> int DiagoPexsi::pexsi_npole = 0; template <> -int DiagoPexsi::pexsi_inertia = 0; +bool DiagoPexsi::pexsi_inertia = 0; template <> int DiagoPexsi::pexsi_nmax = 0; // template <> // int DiagoPexsi::pexsi_symbolic = 0; template <> -int DiagoPexsi::pexsi_comm = 0; +bool DiagoPexsi::pexsi_comm = 0; template <> -int DiagoPexsi::pexsi_storage = 0; +bool DiagoPexsi::pexsi_storage = 0; template <> int DiagoPexsi::pexsi_ordering = 0; template <> @@ -34,9 +34,9 @@ int DiagoPexsi::pexsi_row_ordering = 0; template <> int DiagoPexsi::pexsi_nproc = 0; template <> -int DiagoPexsi::pexsi_symm = 0; +bool DiagoPexsi::pexsi_symm = 0; template <> -int DiagoPexsi::pexsi_trans = 0; +bool DiagoPexsi::pexsi_trans = 0; template <> int DiagoPexsi::pexsi_method = 0; template <> @@ -69,15 +69,15 @@ double DiagoPexsi::pexsi_zero_thr = 0.0; template <> int DiagoPexsi>::pexsi_npole = 0; template <> -int DiagoPexsi>::pexsi_inertia = 0; +bool DiagoPexsi>::pexsi_inertia = 0; template <> int DiagoPexsi>::pexsi_nmax = 0; // template <> // int DiagoPexsi>::pexsi_symbolic = 0; template <> -int DiagoPexsi>::pexsi_comm = 0; +bool DiagoPexsi>::pexsi_comm = 0; template <> -int DiagoPexsi>::pexsi_storage = 0; +bool DiagoPexsi>::pexsi_storage = 0; template <> int DiagoPexsi>::pexsi_ordering = 0; template <> @@ -85,9 +85,9 @@ int DiagoPexsi>::pexsi_row_ordering = 0; template <> int DiagoPexsi>::pexsi_nproc = 0; template <> -int DiagoPexsi>::pexsi_symm = 0; +bool DiagoPexsi>::pexsi_symm = 0; template <> -int DiagoPexsi>::pexsi_trans = 0; +bool DiagoPexsi>::pexsi_trans = 0; template <> int DiagoPexsi>::pexsi_method = 0; template <> diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index ffc89d6b4d..34076a1080 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -31,33 +31,105 @@ class DiagoPexsi : public DiagH //========================================================== // PEXSI related variables //========================================================== + /** + * @brief Number of terms in the pole expansion. + */ static int pexsi_npole; - static int pexsi_inertia; + /** + * @brief Whether inertia counting is used at the very beginning. + */ + static bool pexsi_inertia; + /** + * @brief Maximum number of PEXSI iterations after each inertia counting procedure. + */ static int pexsi_nmax; - // static int pexsi_symbolic; - static int pexsi_comm; - static int pexsi_storage; + /** + * @brief Whether to construct PSelInv communication pattern. + */ + static bool pexsi_comm; + /** + * @brief Whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices. + */ + static bool pexsi_storage; + /** + * @brief Ordering strategy for factorization and selected inversion. + */ static int pexsi_ordering; + /** + * @brief row permutation strategy for factorization and selected inversion. + */ static int pexsi_row_ordering; + /** + * @brief Number of processors for PARMETIS/PT-SCOTCH. Only used if the ordering == 0. + */ static int pexsi_nproc; - static int pexsi_symm; - static int pexsi_trans; + /** + * @brief Matrix structure. + * - = 0 : Unsymmetric matrix + * - = 1 : Symmetric matrix (default). + */ + static bool pexsi_symm; + /** + * @brief Transpose. + * - = 0 : Factor non transposed matrix (default). + * - = 1 : Factor transposed matrix. + */ + static bool pexsi_trans; + /** + * @brief The pole expansion method to be used. + * - = 1 : Cauchy Contour Integral method used. + * - = 2 : Moussa optimized method. + */ static int pexsi_method; + /** + * @brief The point parallelizaion of PEXSI. + * - = 2 : Recommend two points parallelization + */ static int pexsi_nproc_pole; - // static double pexsi_spin = 2; + /** + * @brief Temperature, in the same unit as H + */ static double pexsi_temp; + /** + * @brief Spectral gap. **Note** This can be set to be 0 in most cases. + */ static double pexsi_gap; + /** + * @brief An upper bound for the spectral radius of \f$S^{-1} H\f$. + */ static double pexsi_delta_e; + /** + * @brief Initial guess of lower bound for mu. + */ static double pexsi_mu_lower; + /** + * @brief Initial guess of upper bound for mu. + */ static double pexsi_mu_upper; + /** + * @brief Initial guess for mu (for the solver) (AG) + */ static double pexsi_mu; + /** + * @brief Stopping criterion in terms of the chemical potential for the inertia counting procedure. + */ static double pexsi_mu_thr; + /** + * @brief If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion. + */ static double pexsi_mu_expand; + /** + * @brief Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure. + */ static double pexsi_mu_guard; + /** + * @brief Stopping criterion of the %PEXSI iteration in terms of the number of electrons compared to numElectronExact. + */ static double pexsi_elec_thr; + /** + * @brief Stopping criterion for the zero threshold. + */ static double pexsi_zero_thr; - - static MPI_Group grid_group; }; } // namespace hsolver diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt index bf11e8ce8c..c76e223e18 100644 --- a/source/module_hsolver/test/CMakeLists.txt +++ b/source/module_hsolver/test/CMakeLists.txt @@ -80,12 +80,6 @@ AddTest( ) if(ENABLE_LCAO) - # if(USE_ELPA and USE_PEXSI) - # AddTest( - # TARGET HSolver_LCAO - # LIBS ${math_libs} ELPA::ELPA base genelpa psi device - # SOURCES diago_lcao_test.cpp ../diago_elpa.cpp ../diago_blas.cpp - # ) if(USE_ELPA) AddTest( TARGET HSolver_LCAO diff --git a/source/module_hsolver/test/diago_lcao_test.cpp b/source/module_hsolver/test/diago_lcao_test.cpp index 7e8d499526..487820a9d5 100644 --- a/source/module_hsolver/test/diago_lcao_test.cpp +++ b/source/module_hsolver/test/diago_lcao_test.cpp @@ -7,9 +7,6 @@ #ifdef __ELPA #include "module_hsolver/diago_elpa.h" #endif -#ifdef __PEXSI -#include "module_hsolver/diago_pexsi.h" -#endif #define PASSTHRESHOLD 1e-10 #define DETAILINFO false @@ -67,10 +64,6 @@ template class DiagoPrepare #ifdef __ELPA else if(ks_solver == "genelpa") dh = new hsolver::DiagoElpa; -#endif -#ifdef __PEXSI - else if(ks_solver == "pexsi") - dh = new hsolver::DiagoPexsi; #endif else { diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp index 5e90ef7b71..fb5bc43551 100644 --- a/source/module_io/input.cpp +++ b/source/module_io/input.cpp @@ -22,7 +22,6 @@ #include "module_base/global_variable.h" #include "module_base/parallel_common.h" #include "module_base/timer.h" -#include "module_base/tool_quit.h" #include "version.h" Input INPUT; @@ -645,16 +644,16 @@ void Input::Default(void) // variables for PEXSI //========================================================== pexsi_npole = 54; - pexsi_inertia = 1; + pexsi_inertia = true; pexsi_nmax = 80; // pexsi_symbolic = 1; - pexsi_comm = 1; - pexsi_storage = 1; + pexsi_comm = true; + pexsi_storage = true; pexsi_ordering = 0; pexsi_row_ordering = 1; pexsi_nproc = 1; - pexsi_symm = 1; - pexsi_trans = 0; + pexsi_symm = true; + pexsi_trans = false; pexsi_method = 1; pexsi_nproc_pole = 1; // pexsi_spin = 2; @@ -3824,16 +3823,16 @@ void Input::Bcast() // PEXSI //========================================================== Parallel_Common::bcast_int(pexsi_npole); - Parallel_Common::bcast_int(pexsi_inertia); + Parallel_Common::bcast_bool(pexsi_inertia); Parallel_Common::bcast_int(pexsi_nmax); // Parallel_Common::bcast_int(pexsi_symbolic); - Parallel_Common::bcast_int(pexsi_comm); - Parallel_Common::bcast_int(pexsi_storage); + Parallel_Common::bcast_bool(pexsi_comm); + Parallel_Common::bcast_bool(pexsi_storage); Parallel_Common::bcast_int(pexsi_ordering); Parallel_Common::bcast_int(pexsi_row_ordering); Parallel_Common::bcast_int(pexsi_nproc); - Parallel_Common::bcast_int(pexsi_symm); - Parallel_Common::bcast_int(pexsi_trans); + Parallel_Common::bcast_bool(pexsi_symm); + Parallel_Common::bcast_bool(pexsi_trans); Parallel_Common::bcast_int(pexsi_method); Parallel_Common::bcast_int(pexsi_nproc_pole); // Parallel_Common::bcast_double(pexsi_spin); diff --git a/source/module_io/input.h b/source/module_io/input.h index 6d326d6b5d..f166612a4d 100644 --- a/source/module_io/input.h +++ b/source/module_io/input.h @@ -607,16 +607,16 @@ class Input // variables for PEXSI //========================================================== int pexsi_npole = 54; - int pexsi_inertia = 1; + bool pexsi_inertia = true; int pexsi_nmax = 80; // int pexsi_symbolic = 1; - int pexsi_comm = 1; - int pexsi_storage = 1; + bool pexsi_comm = true; + bool pexsi_storage = true; int pexsi_ordering = 0; int pexsi_row_ordering = 1; int pexsi_nproc = 1; - int pexsi_symm = 1; - int pexsi_trans = 0; + bool pexsi_symm = true; + bool pexsi_trans = false; int pexsi_method = 1; int pexsi_nproc_pole = 1; // double pexsi_spin = 2; diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp index 5b9c93dd77..60ec6c82dd 100644 --- a/source/module_io/test/input_conv_test.cpp +++ b/source/module_io/test/input_conv_test.cpp @@ -1,5 +1,6 @@ #include "gtest/gtest.h" #include "gmock/gmock.h" +#include "module_hsolver/diago_pexsi.h" #include "module_io/input_conv.h" #include "module_base/global_variable.h" #include "for_testing_input_conv.h" @@ -539,6 +540,57 @@ TEST_F(InputConvTest, ReadTdEfieldTest) EXPECT_EQ(elecstate::H_TDDFT_pw::heavi_t0[0], 100); EXPECT_NEAR(elecstate::H_TDDFT_pw::heavi_amp[0], 1.00 * ModuleBase::BOHR_TO_A / ModuleBase::Ry_to_eV, 1e-8); } + +#ifdef __PEXSI +TEST_F(InputConvTest, PEXSI) +{ + INPUT.Default(); + std::string input_file = "./support/INPUT"; + INPUT.Read(input_file); + Input_Conv::Convert(); + EXPECT_EQ(hsolver::DiagoPexsi::pexsi_npole, 54); + EXPECT_TRUE(hsolver::DiagoPexsi::pexsi_inertia); + EXPECT_EQ(hsolver::DiagoPexsi::pexsi_nmax, 80); + EXPECT_TRUE(hsolver::DiagoPexsi::pexsi_comm); + EXPECT_TRUE(hsolver::DiagoPexsi::pexsi_storage); + EXPECT_EQ(hsolver::DiagoPexsi::pexsi_ordering, 0); + EXPECT_EQ(hsolver::DiagoPexsi::pexsi_row_ordering, 1); + EXPECT_EQ(hsolver::DiagoPexsi::pexsi_nproc, 1); + EXPECT_TRUE(hsolver::DiagoPexsi::pexsi_symm); + EXPECT_FALSE(hsolver::DiagoPexsi::pexsi_trans); + EXPECT_EQ(hsolver::DiagoPexsi::pexsi_method, 1); + EXPECT_EQ(hsolver::DiagoPexsi::pexsi_nproc_pole, 1); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_temp, 1e-4); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_gap, 0); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_delta_e, 20); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu_lower, -10); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu_upper, 10); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu, 0); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu_thr, 0.05); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu_expand, 0.3); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu_guard, 0.2); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_elec_thr, 0.001); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_zero_thr, 1e-10); + + EXPECT_EQ(hsolver::DiagoPexsi>::pexsi_nproc, 1); + EXPECT_TRUE(hsolver::DiagoPexsi>::pexsi_symm); + EXPECT_FALSE(hsolver::DiagoPexsi>::pexsi_trans); + EXPECT_EQ(hsolver::DiagoPexsi>::pexsi_method, 1); + EXPECT_EQ(hsolver::DiagoPexsi>::pexsi_nproc_pole, 1); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_temp, 0.0001); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_gap, 0); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_delta_e, 20); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu_lower, -10); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu_upper, 10); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu, 0); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu_thr, 0.05); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu_expand, 0.3); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu_guard, 0.2); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_elec_thr, 0.001); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_zero_thr, 1e-10); +} +#endif + #endif #undef private diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp index f74544ecb6..ad40d635ea 100644 --- a/source/module_io/test/input_test_para.cpp +++ b/source/module_io/test/input_test_para.cpp @@ -392,6 +392,30 @@ TEST_F(InputParaTest, Bcast) EXPECT_EQ(INPUT.qo_screening_coeff.size(), 0); EXPECT_EQ(INPUT.qo_thr, 1e-6); EXPECT_EQ(INPUT.qo_basis, "hydrogen"); + + EXPECT_EQ(INPUT.pexsi_npole, 54); + EXPECT_TRUE(INPUT.pexsi_inertia); + EXPECT_EQ(INPUT.pexsi_nmax, 80); + EXPECT_TRUE(INPUT.pexsi_comm); + EXPECT_TRUE(INPUT.pexsi_storage); + EXPECT_EQ(INPUT.pexsi_ordering, 0); + EXPECT_EQ(INPUT.pexsi_row_ordering, 1); + EXPECT_EQ(INPUT.pexsi_nproc, 1); + EXPECT_TRUE(INPUT.pexsi_symm); + EXPECT_FALSE(INPUT.pexsi_trans); + EXPECT_EQ(INPUT.pexsi_method, 1); + EXPECT_EQ(INPUT.pexsi_nproc_pole, 1); + EXPECT_DOUBLE_EQ(INPUT.pexsi_temp, 0.0001); + EXPECT_DOUBLE_EQ(INPUT.pexsi_gap, 0); + EXPECT_DOUBLE_EQ(INPUT.pexsi_delta_e, 20); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_lower, -10); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_upper, 10); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu, 0); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_thr, 0.05); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_expand, 0.3); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_guard, 0.2); + EXPECT_DOUBLE_EQ(INPUT.pexsi_elec_thr, 0.001); + EXPECT_DOUBLE_EQ(INPUT.pexsi_zero_thr, 1e-10); } TEST_F(InputParaTest, Init) diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp index 0103c0a14b..e106fc399f 100644 --- a/source/module_io/test/write_input_test.cpp +++ b/source/module_io/test/write_input_test.cpp @@ -922,4 +922,42 @@ TEST_F(write_input, Deltaspin22) EXPECT_THAT(output, testing::HasSubstr("sccut 3 #Maximal step size for lambda in eV/uB")); remove("write_input_test.log"); } + +TEST_F (write_input, PEXSI24) +{ + INPUT.Default(); + INPUT.Read("./support/witestfile"); + std::string output_file = "write_input_test.log"; + INPUT.Print(output_file); + int a = access("write_input_test.log", 00); + EXPECT_EQ(a, 0); + std::ifstream ifs ("write_input_test.log"); + std::string output ((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); + EXPECT_THAT(output, testing::HasSubstr("#Parameters (24.PEXSI)")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_npole 54 #Number of poles in expansion")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_inertia 1 #Whether inertia counting is used at the very beginning of PEXSI process")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_nmax 80 #Maximum number of PEXSI iterations after each inertia counting procedure.")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_comm 1 #Whether to construct PSelInv communication pattern")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_storage 1 #Storage space used by the Selected Inversion algorithm for symmetric matrices.")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_ordering 0 #Ordering strategy for factorization and selected inversion")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_row_ordering 1 #Row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_nproc 1 #Number of processors for parmetis")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_symm 1 #Matrix symmetry")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_trans 0 #Whether to transpose")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_method 1 #pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_nproc_pole 1 #Number of processes used by each pole")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_temp 0.0001 #Temperature, in the same unit as H")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_gap 0 #Spectral gap")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_delta_e 20 #An upper bound for the spectral radius of \\f$S^{-1} H\\f$")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_lower -10 #Initial guess of lower bound for mu")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_upper 10 #Initial guess of upper bound for mu")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu 0 #Initial guess for mu (for the solver)")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_thr 0.05 #Stopping criterion in terms of the chemical potential for the inertia counting procedure")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_expand 0.3 #If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_guard 0.2 #Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_elec_thr 0.001 #Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_zero_thr 1e-10 #if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0")); + ifs.close(); + remove("write_input_test.log"); +} #undef private diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp index 63488dd131..9b3edf8cc2 100644 --- a/source/module_io/write_input.cpp +++ b/source/module_io/write_input.cpp @@ -502,17 +502,17 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_inertia", pexsi_inertia, "Whether inertia counting is used at the very beginning of PEXSI process"); ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nmax", pexsi_nmax, "Maximum number of PEXSI iterations after each inertia counting procedure."); ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_comm", pexsi_comm, "Whether to construct PSelInv communication pattern"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices, 0: non-symmetric, 1: symmetric"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices."); ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_ordering", pexsi_ordering, "Ordering strategy for factorization and selected inversion"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "Row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag"); ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc", pexsi_nproc, "Number of processors for parmetis"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "matrix symmetry, 0: non-symmetric, 1: symmetric"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "transpose, 0: no transpose, 1: transpose"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "Matrix symmetry"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "Whether to transpose"); ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_method", pexsi_method, "pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method"); ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc_pole", pexsi_nproc_pole, "Number of processes used by each pole"); ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_temp", pexsi_temp, "Temperature, in the same unit as H"); ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_gap", pexsi_gap, "Spectral gap"); - ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \f$S^{-1} H\f$"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \\f$S^{-1} H\\f$"); ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_lower", pexsi_mu_lower, "Initial guess of lower bound for mu"); ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_upper", pexsi_mu_upper, "Initial guess of upper bound for mu"); ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu", pexsi_mu, "Initial guess for mu (for the solver)"); From 493f71345db90f84ab175ba672acd7d337144a23 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Sat, 3 Feb 2024 21:11:24 +0800 Subject: [PATCH 24/44] Fix nspin > 1 cases --- source/module_elecstate/elecstate_lcao.cpp | 14 +- source/module_elecstate/elecstate_lcao.h | 2 +- source/module_hsolver/diago_pexsi.cpp | 6 +- source/module_hsolver/diago_pexsi.h | 5 +- source/module_hsolver/hsolver_lcao.cpp | 3 + .../module_pexsi/dist_bcd_matrix.h | 11 - .../module_pexsi/dist_matrix_transformer.cpp | 864 +----------------- .../module_pexsi/dist_matrix_transformer.h | 3 - .../module_pexsi/pexsi_solver.cpp | 4 +- .../module_pexsi/pexsi_solver.h | 4 +- .../module_pexsi/simple_pexsi.cpp | 377 +------- source/module_hsolver/test/CMakeLists.txt | 8 - 12 files changed, 37 insertions(+), 1264 deletions(-) diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index 23d55162f8..267d50ddcb 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -260,19 +260,25 @@ double ElecStateLCAO>::get_spin_constrain_energy() #ifdef __PEXSI template<> -void ElecStateLCAO::dmToRho(double* pexsi_DM) +void ElecStateLCAO::dmToRho(std::vector pexsi_DM) { ModuleBase::timer::tick("ElecStateLCAO", "dmToRho"); // old 2D-to-Grid conversion has been replaced by new Gint Refactor 2023/09/25 if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready { - this->loc->set_dm_gamma(0, pexsi_DM); + for (int ik = 0; ik < GlobalV::NSPIN; ++ik) + { + this->loc->set_dm_gamma(ik, pexsi_DM[ik]); + } this->loc->cal_dk_gamma_from_2D_pub(); } auto DM = this->get_DM(); - DM->set_DMK_pointer(0, pexsi_DM); + for (int is = 0; is < GlobalV::NSPIN; is++) + { + this->DM->set_DMK_pointer(is, pexsi_DM[is]); + } DM->cal_DMR(); for (int is = 0; is < GlobalV::NSPIN; is++) @@ -301,7 +307,7 @@ void ElecStateLCAO::dmToRho(double* pexsi_DM) } template<> -void ElecStateLCAO>::dmToRho(std::complex* DM) +void ElecStateLCAO>::dmToRho(std::vector*> DM) { ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case"); } diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h index c1cf231a45..0a942a3bc6 100644 --- a/source/module_elecstate/elecstate_lcao.h +++ b/source/module_elecstate/elecstate_lcao.h @@ -61,7 +61,7 @@ class ElecStateLCAO : public ElecState #ifdef __PEXSI // use for pexsi - void dmToRho(TK* DM); + void dmToRho(std::vector pexsi_DM); #endif protected: diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index 803680fe31..1cfc765c2a 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -131,14 +131,12 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& this->ParaV->ncol, h_mat.p, s_mat.p, - this->DM, - this->EDM, this->totalEnergyH, this->totalEnergyS, this->totalFreeEnergy); this->ps->solve(); - this->EDM = this->ps->get_EDM(); - this->DM = this->ps->get_DM(); // loc.dm_gamma[ik] loc.dm_gamma[0]? + this->EDM.push_back(this->ps->get_EDM()); + this->DM.push_back(this->ps->get_DM()); // loc.dm_gamma[ik] loc.dm_gamma[0]? this->totalFreeEnergy = this->ps->get_totalFreeEnergy(); this->totalEnergyH = this->ps->get_totalEnergyH(); this->totalEnergyS = this->ps->get_totalEnergyS(); diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index 34076a1080..8e2aa98da1 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -1,6 +1,7 @@ #ifndef DIGAOPEXSI_H #define DIGAOPEXSI_H +#include #include "diagh.h" #include "module_basis/module_ao/parallel_orbitals.h" #include "module_pexsi/pexsi_solver.h" @@ -21,8 +22,8 @@ class DiagoPexsi : public DiagH } void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, Real* eigenvalue_in) override; const Parallel_Orbitals* ParaV; - T* DM; - double* EDM; + std::vector DM; + std::vector EDM; double totalEnergyH; double totalEnergyS; double totalFreeEnergy; diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index 7adea8b91d..743a02d51f 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -153,6 +153,9 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, delete[] this->pdiagh; this->pdiagh = nullptr; } + auto tem = dynamic_cast*>(this->pdiagh); + tem->DM.clear(); + tem->EDM.clear(); } if (this->pdiagh == nullptr) { diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h index 0964b9787c..94b61277d2 100644 --- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h @@ -12,13 +12,7 @@ namespace pexsi { class DistBCDMatrix { - public: - // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol); - // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char - // layout); - - // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol); DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char layout); ~DistBCDMatrix(); @@ -27,7 +21,6 @@ class DistBCDMatrix int localRow(const int globalRow, int& myprow); int localCol(const int globalCol, int& mypcol); int pnum(const int prow, const int pcol); - //~DistBCDMatrix(); const MPI_Comm get_comm() const { @@ -75,10 +68,6 @@ class DistBCDMatrix int nrow; int ncol; - // protected: - - // private: - // current process row and column int myprow; int mypcol; diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp index eadd991217..4b0fc23cfb 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp @@ -13,17 +13,6 @@ #include "dist_bcd_matrix.h" #include "dist_ccs_matrix.h" -// for debug -#ifdef _DEBUG -#include - -#include -#include - -#include "src_pw/global.h" -#endif -// end debug - namespace pexsi { // find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a @@ -177,60 +166,10 @@ inline int DistMatrixTransformer::getNonZeroIndex(char layout, std::vector& rowidx, std::vector& colidx) { -#ifdef _DEBUG - char f_log[80]; - int myproc; - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); - std::ofstream log; - if (myproc < 100) - { - sprintf(f_log, "transformer_%2.2d.log", myproc); - log.open(f_log, std::ios::app); - log << "start count nnz" << std::endl; - } - // count nonzeros value distribution of H and S - static bool isCOUNTNONZERO = true; - if (!isCOUNTNONZERO) - { - isCOUNTNONZERO = true; - char plog_name[80]; - sprintf(plog_name, "HS_Distribution_%d.log", myproc); - std::ofstream plog; - plog.open(plog_name, std::ios::app); - std::map pH; - countMatrixDistribution(nrow * ncol, H_2d, pH); - std::map pS; - countMatrixDistribution(nrow * ncol, H_2d, pS); - plog << "Element in H distribution:\n"; - // std::stringstream ss; - // ss.str(""); - for (auto iter = pH.begin(); iter != pH.end(); ++iter) - { - // ss<<"p["<first<<"] : "<second<first << "] : " << iter->second << std::endl; - } - // OUT(ofs_running,ss.str()); - // OUT(ofs_running, "Element in S distribution:"); - plog << "Element in S distribution:\n"; - // ss.str(""); - for (auto iter = pS.begin(); iter != pS.end(); ++iter) - { - // ss<<"p["<first<<"] : "<second<first << "] : " << iter->second << std::endl; - } - // OUT(ofs_running,ss.str()); - plog.close(); - } -#endif - int idx = 0; nnz = 0; colidx.clear(); rowidx.clear(); -#ifdef _DEBUG - if (myproc < 100) - log << "rowidx and colidx cleared" << std::endl; -#endif if (layout == 'C' || layout == 'c') { for (int i = 0; i < ncol; ++i) @@ -265,19 +204,8 @@ inline int DistMatrixTransformer::getNonZeroIndex(char layout, } else { -#ifdef _DEBUG - if (myproc < 100) - log << "unknown layout: " << layout << std::endl; -#endif return 1; } -#ifdef _DEBUG - if (myproc < 100) - { - log << "nnz is counted: " << nnz << std::endl; - log.close(); - } -#endif return 0; } @@ -297,32 +225,10 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, std::vector& receiver_displacement_process, std::vector& buffer2ccsIndex) { - // debug int myproc; MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -#ifdef _DEBUG - std::ofstream log; - if (myproc < 100) - { - char f_log[80]; - sprintf(f_log, "transformer_%2.2d.log", myproc); - log.open(f_log, std::ios::app); - log << "enter buildTransformParameter" << std::endl; - } -#endif - // end debug - // count sender non-zeros elements sender_size = nnz; std::fill(sender_size_process.begin(), sender_size_process.end(), 0); -// debug -#ifdef _DEBUG - if (myproc < 100) - { - log << "start translate ranks between group_data and group_trans" << std::endl; - log << "sender_size (in BCD) = " << sender_size << std::endl; - } -#endif - // end debug // create process id map from group_data to group_trans int nproc_data; std::vector proc_map_data_trans; @@ -344,17 +250,6 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS); } -// debug -#ifdef _DEBUG - if (myproc < 100) - { - log << "rank_data rank_trans" << std::endl; - for (int i = 0; i < nproc_data; ++i) - log << i << "\t\t\t" << proc_map_data_trans[i] << std::endl; - } -#endif - // end debug - for (int i = 0; i < nnz; ++i) { int l_col = colidx[i]; @@ -362,50 +257,16 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, int dst_process; int dst_col = DST_Matrix.localCol(g_col, dst_process); int dst_process_trans = proc_map_data_trans[dst_process]; - /* - // debug - #ifdef _DEBUG - log< receiver_index(receiver_size); @@ -455,12 +301,6 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, &receiver_displacement_process[0], MPI_INT, COMM_TRANS); -// debug -#ifdef _DEBUG - if (myproc < 100) - log << "receiver_index is got" << std::endl; -#endif - // end debug // setup buffer2ccsIndex based on receiver_index buffer2ccsIndex.resize(receiver_size); @@ -472,15 +312,6 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, &receiver_index[0], DST_Matrix, &buffer2ccsIndex[0]); -// debug -#ifdef _DEBUG - if (myproc < 100) - { - log << "ccs parameter is built" << std::endl; - log.close(); - } -#endif - // end debug return 0; } @@ -489,77 +320,10 @@ int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix, MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS) { -// debug -#ifdef _DEBUG - char f_log[80]; - int myproc; - MPI_Comm_rank(MPI_COMM_WORLD, &myproc); - std::ofstream log; - if (myproc < 100) - { - sprintf(f_log, "transformer_%2.2d.log", myproc); - log.open(f_log, std::ios::app); - // log< receiver_size_process(NPROC_TRANS); std::vector receiver_displacement_process(NPROC_TRANS); -#ifdef _DEBUG - if (myproc < 100) - { - log << "nprocs: " << SRC_Matrix.nprocs << " ; myprow: " << SRC_Matrix.myprow - << " ; mypcol: " << SRC_Matrix.mypcol << std::endl; - log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.get_nrow() << " ; ncol: " << SRC_Matrix.get_ncol() - << std::endl; - log << "layout:" << SRC_Matrix.get_layout() << std::endl; - log << "ZERO = " << ZERO_Limit << std::endl; - log << "DST_Matrix parameters:" << std::endl; - log << "size: " << DST_Matrix.size << " ;nproc_data: " << DST_Matrix.nproc_data << std::endl; - log << "start transforming H and S to CCS format" << std::endl; - } -#endif - // end debug - // find out the non-zeros elements' positions std::vector rowidx; std::vector colidx; int nnz = 0; -#ifdef _DEBUG - if (myproc < 100) - log << "start counting nnz..." << std::endl; -#endif if (SRC_Matrix.get_comm() != MPI_COMM_NULL) { getNonZeroIndex(SRC_Matrix.get_layout(), @@ -651,43 +380,6 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, rowidx, colidx); } -#ifdef _DEBUG - if (myproc < 100) - { - log << "NonZeroIndex is got, nnz is " << nnz << std::endl; - log << "rowidx size: " << rowidx.size() << "; colidx size: " << colidx.size() << std::endl; - /* - if(SRC_Matrix.comm != MPI_COMM_NULL) - { - log<<"NonZeroIndex :"< buffer2ccsIndex; buildTransformParameter(SRC_Matrix, @@ -706,10 +398,6 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, receiver_displacement_process, buffer2ccsIndex); // Do transformation -#ifdef _DEBUG - if (myproc < 100) - log << "Parameters are built" << std::endl; -#endif std::vector sender_buffer(sender_size); std::vector receiver_buffer(receiver_size); // put H to sender buffer @@ -727,10 +415,6 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; } } -#ifdef _DEBUG - if (myproc < 100) - log << "H sender_buffer is filled" << std::endl; -#endif // do all2all transformation MPI_Alltoallv(&sender_buffer[0], &sender_size_process[0], @@ -742,17 +426,9 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, MPI_DOUBLE, COMM_TRANS); // collect H from receiver buffer -#ifdef _DEBUG - if (myproc < 100) - log << "H receiver_buffer is received" << std::endl; -#endif delete[] H_ccs; H_ccs = new double[receiver_size]; buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], H_ccs); -#ifdef _DEBUG - if (myproc < 100) - log << "H_ccs is received" << std::endl; -#endif // put S to sender buffer if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r') @@ -769,10 +445,6 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; } } -#ifdef _DEBUG - if (myproc < 100) - log << "S sender_buffer is filled" << std::endl; -#endif // do all2all transformation MPI_Alltoallv(&sender_buffer[0], &sender_size_process[0], @@ -784,27 +456,12 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, MPI_DOUBLE, COMM_TRANS); // collect S from receiver buffer -#ifdef _DEBUG - if (myproc < 100) - log << "S receiver_buffer is received" << std::endl; -#endif delete[] S_ccs; S_ccs = new double[receiver_size]; buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], S_ccs); -#ifdef _DEBUG - if (myproc < 100) - log << "S_ccs is received" << std::endl; -#endif } // clear and return deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS); -#ifdef _DEBUG - if (myproc < 100) - { - log << "COMM_TRANS is deleted" << std::endl; - log.close(); - } -#endif return 0; } @@ -817,29 +474,8 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DM, double* EDM) { -// debug -#ifdef _DEBUG - OUT(ofs_running, "transformCCStoBCD: start"); - MPI_Barrier(MPI_COMM_WORLD); -#endif - // end debug int myproc; MPI_Comm_rank(MPI_COMM_WORLD, &myproc); -// debug -#ifdef _DEBUG - std::ofstream log; - if (myproc < 100) - { - char f_log[80]; - sprintf(f_log, "transformer_%2.2d.log", myproc); - // MPI_Barrier(MPI_COMM_WORLD); - log.open(f_log, std::ios::app); - // MPI_Barrier(MPI_COMM_WORLD); - log << "\nstart transform DMnzval to DM" << std::endl; - } - MPI_Barrier(MPI_COMM_WORLD); -#endif - // end debug MPI_Group GROUP_TRANS; MPI_Comm COMM_TRANS = MPI_COMM_NULL; newGroupCommTrans(DST_Matrix, SRC_Matrix, GROUP_TRANS, COMM_TRANS); @@ -851,32 +487,13 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, DM[i] = 0; EDM[i] = 0; } -#ifdef _DEBUG - // MPI_Barrier(COMM_TRANS); - if (myproc < 100) - log << "DM and EDM filled by 0" << std::endl; -// OUT(ofs_running, "transformCCStoBCD: DM and EDM filled by 0"); -#endif // setup number of local elements to be transfered to each remote processes int NPROC_TRANS; MPI_Comm_size(COMM_TRANS, &NPROC_TRANS); - // std::vector sender_size_process(NPROC_TRANS); - // std::vector sender_displacement_process(NPROC_TRANS); - // std::vector receiver_size_process(NPROC_TRANS); - // std::vector receiver_displacement_process(NPROC_TRANS); int sender_size_process[NPROC_TRANS]; int sender_displacement_process[NPROC_TRANS]; int receiver_size_process[NPROC_TRANS]; int receiver_displacement_process[NPROC_TRANS]; -#ifdef _DEBUG - if (myproc < 100) - log << "NPROC_TRANS = " << NPROC_TRANS << std::endl; - // MPI_Barrier(COMM_TRANS); - if (myproc < 100) - log << "build process rank map from BCD to TRANS" << std::endl; -// OUT(ofs_running, "transformCCStoBCD: build process rank map from BCD to TRANS"); -// MPI_Barrier(COMM_TRANS); -#endif int nproc_bcd; std::vector proc_map_bcd_trans; int myproc_trans; @@ -898,56 +515,16 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, proc_map_bcd_trans.resize(nproc_bcd, 0); MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS); } - -#ifdef _DEBUG - // check process map from BCD comm to TRANS comm - if (myproc < 100) - { - log << "check process map:\n"; - log << "pid in bcd\tpid in trans\n"; - for (int i = 0; i < nproc_bcd; ++i) - { - log << i << "\t\t" << proc_map_bcd_trans[i] << std::endl; - } - log << "check pid from prow and pcol int bcd to pid in trans\n"; - log << "p_row p_col p_bcd p_trans\n"; - for (int i = 0; i < DST_Matrix.nprows; ++i) - { - for (int j = 0; j < DST_Matrix.npcols; ++j) - { - int pid_bcd = DST_Matrix.pnum(i, j); - int pid_trans = proc_map_bcd_trans[pid_bcd]; - log << i << "\t" << j << "\t" << pid_bcd << "\t" << pid_trans << std::endl; - } - } - log << "setup alltoall parameters" << std::endl; - } - // OUT(ofs_running, "transformCCStoBCD: setup alltoall parameters"); - MPI_Barrier(COMM_TRANS); -#endif // setup sender_size_process // std::fill(sender_size_process.begin(), sender_size_process.end(), 0); for (int i = 0; i < NPROC_TRANS; ++i) sender_size_process[i] = 0; -#ifdef _DEBUG - MPI_Barrier(COMM_TRANS); - if (myproc < 100) - log << "sender_size_process is inited by 0" << std::endl; - // OUT(ofs_running, "transformCCStoBCD: sender_size_process is inited by 0, size ", NPROC_TRANS); - MPI_Barrier(COMM_TRANS); - if (myproc < 100) - log << "display all columns and rows of nonzeros values:\n"; - int log_nnz = 0; -#endif for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol) { int g_col = SRC_Matrix.globalCol(icol); int recv_pcol_bcd; int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd); - // #ifdef _DEBUG - // log< 0) { sender_index = new int[sender_size]; @@ -1064,11 +583,7 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, dst_index[0] = -1; dst_index[1] = -1; } -#ifdef _DEBUG - MPI_Barrier(COMM_TRANS); - if (myproc < 100) - log << "; receiver_index size: "; -#endif + if (receiver_size > 0) { receiver_index = new int[2 * receiver_size]; @@ -1098,33 +613,9 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, { p[i] = sender_displacement_process[i]; } -#ifdef _DEBUG - MPI_Barrier(COMM_TRANS); - if (myproc < 100) - { - log << "check BCD pnum" << std::endl; - log.flush(); - for (int i = 0; i < DST_Matrix.nprows; ++i) - { - for (int j = 0; j < DST_Matrix.npcols; ++j) - { - log << i << "\t" << j << "\t" << DST_Matrix.pnum(i, j) << std::endl; - } - } - log << "source CCS matrix parameters:\n"; - log << "numColLocal: " << SRC_Matrix.numColLocal << std::endl; - log << "pointer to beginning of each process is inited by sender_displacement_process" << std::endl; - // log<<"icol"<<"\t"<<"g_col"<<"\t"<<"col(bcd)"<<"\t"<<"pcol(bcd)"<= DST_Matrix.nprows || recv_prow_bcd < 0) - { - log << "ERROR: recv_prow_bcd error! recv_prow_bcd is " << recv_prow_bcd << "; max is " - << DST_Matrix.nprows << std::endl; - log.flush(); - } - } -#endif + int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd); -#ifdef _DEBUG - // MPI_Barrier(COMM_TRANS); - if (myproc < 100) - { - if (recv_proc_bcd > NPROC_TRANS || recv_proc_bcd < 0) - { - log << "ERROR: recv_proc_bcd outbound! recv_proc_bcd is " << recv_proc_bcd << "; max is " - << NPROC_TRANS << std::endl; - log.flush(); - } - } -#endif + int recv_proc = proc_map_bcd_trans[recv_proc_bcd]; -#ifdef _DEBUG - // MPI_Barrier(COMM_TRANS); - if (myproc < 100) - { - if (p[recv_proc] >= sender_size || p[recv_proc] < 0) - { - log << "ERROR: sender_index's index outbound! " << std::endl; - log << recv_prow_bcd << " " << recv_pcol_bcd << recv_proc_bcd << " " << recv_proc << std::endl; - log << p[recv_proc] << " " << sender_size << std::endl; - log.flush(); - } - } -// MPI_Barrier(COMM_TRANS); -#endif + sender_index[p[recv_proc]] = idx; -#ifdef _DEBUG - // MPI_Barrier(COMM_TRANS); - if (myproc < 100) - { - if ((p[recv_proc] * 2 + 1) >= (2 * sender_size) || (p[recv_proc] * 2 + 1) < 0) - { - log << "ERROR: dst_index's index outbound! recv_proc:" << recv_proc - << "; p:" << p[recv_proc] * 2 + 1 << "; max is " << 2 * sender_size << std::endl; - log.flush(); - } - } -// MPI_Barrier(COMM_TRANS); -#endif + dst_index[p[recv_proc] * 2] = recv_row; dst_index[p[recv_proc] * 2 + 1] = recv_col; ++p[recv_proc]; @@ -1195,50 +640,6 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, } } -#ifdef _DEBUG - MPI_Barrier(COMM_TRANS); - // check sender_index and dst_index - if (myproc < 100) - { - for (int i = 0; i < sender_size; ++i) - { - if (sender_index[i] < 0 || sender_index[i] > SRC_Matrix.nnzLocal) - { - log << "ERROR! sender_index outbound: " << i << " " << sender_index[i] << std::endl; - log.flush(); - } - } - for (int i = 0; i < 2 * sender_size; ++i) - { - if (dst_index[i] < 0 || dst_index[i] > DST_Matrix.size) - { - log << "ERROR! dst_index outbound: " << i << " " << dst_index[i] << " " << DST_Matrix.size - << std::endl; - log.flush(); - } - } - log << "sender_index is built" << std::endl; - log << "sender_size = " << sender_size << std::endl; - // for(int i=0; i 0) - { - sprintf(f_log, "sender_index_from_%2.2d_to_%2.2d.log", myproc_trans, i); - log_sender_index.open(f_log, std::ios::app); - for(int j=sender_displacement_process[i]; j 0) - { - sprintf(f_log, "dst_index_from_%2.2d_to_%2.2d.log", myproc_trans, i); - log_dst_index.open(f_log, std::ios::app); - for(int j=sender_displacement_process[i]; j 0) - { - sprintf(f_log, "receiver_index_from_%2.2d_to_%2.2d.log", i, myproc_trans); - log_rcv_index.open(f_log, std::ios::app); - for(int j=receiver_displacement_process[i]; jDST_Matrix.get_nrow()) - { - log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "< "<DST_Matrix.get_ncol()) - { - log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "< "< DST_Matrix.get_nrow()) - { - log << "ERROR! receiver_index(BCD)[" << 2 * i << "] = " << receiver_index[i * 2] << " > " - << DST_Matrix.get_nrow() << std::endl; - log.flush(); - } - if (receiver_index[i * 2 + 1] < 0) - { - log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " < 0" - << std::endl; - log.flush(); - } - else if (receiver_index[i * 2 + 1] > DST_Matrix.get_ncol()) - { - log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " > " - << DST_Matrix.get_ncol() << std::endl; - log.flush(); - } - } - log << "receiver_index values are checked" << std::endl; - log.flush(); - // check parameters for alltoall for buffer - log << "pid_trans sender_size_process sender_displacement_process receiver_size_process " - "receiver_displacement_process" - << std::endl; - for (int i = 0; i < NPROC_TRANS; ++i) - { - log << i << "\t" << sender_size_process[i] << "\t\t" << sender_displacement_process[i] << "\t\t" - << receiver_size_process[i] << "\t\t" << receiver_displacement_process[i] << std::endl; - } - log.flush(); - } - MPI_Barrier(COMM_TRANS); -#endif // transfer sender buffer to receiver buffer MPI_Alltoallv(&sender_buffer[0], &sender_size_process[0], @@ -1427,12 +685,6 @@ MPI_Barrier(COMM_TRANS); MPI_DOUBLE, COMM_TRANS); -#ifdef _DEBUG - MPI_Barrier(COMM_TRANS); - if (myproc < 100) - log << "receiver_buffer is got from DM" << std::endl; -// OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM"); -#endif // transform receiver_buffer to DM if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r') { @@ -1442,19 +694,6 @@ MPI_Barrier(COMM_TRANS); int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; int idx = ix * DST_Matrix.get_ncol() + iy; -#ifdef _DEBUG - if (myproc < 100) - { - if (idx < 0 || idx >= DST_Matrix_elem) - { - log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem - << std::endl; - log << "index number is " << 2 * i << " ix = " << ix << " iy = " << iy - << " ncol = " << DST_Matrix.get_ncol() << std::endl; - log.flush(); - } - } -#endif DM[idx] = receiver_buffer[i]; } } @@ -1466,40 +705,15 @@ MPI_Barrier(COMM_TRANS); int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; int idx = iy * DST_Matrix.get_nrow() + ix; -#ifdef _DEBUG - if (myproc < 100) - { - if (idx < 0 || idx >= DST_Matrix_elem) - { - log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem - << std::endl; - log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " nrow = " << DST_Matrix.get_nrow() << std::endl; - log.flush(); - } - } -#endif DM[idx] = receiver_buffer[i]; } } -#ifdef _DEBUG - if (myproc < 100) - log << "DM(BCD) is got from receiver_buffer" << std::endl; - MPI_Barrier(COMM_TRANS); -// OUT(ofs_running, "transformCCStoBCD: DM(BCD) is got from receiver_buffer"); -#endif // setup up sender buffer of EDM for (int i = 0; i < sender_size; ++i) { sender_buffer[i] = EDMnzvalLocal[sender_index[i]]; } -#ifdef _DEBUG - MPI_Barrier(COMM_TRANS); - if (myproc < 100) - log << "EDM(CCS) is put to sender_buffer" << std::endl; -// OUT(ofs_running, "transformCCStoBCD: EDM(CCS) is put to sender_buffer"); -#endif // transfer sender buffer to receiver buffer MPI_Alltoallv(&sender_buffer[0], @@ -1511,12 +725,7 @@ MPI_Barrier(COMM_TRANS); &receiver_displacement_process[0], MPI_DOUBLE, COMM_TRANS); -#ifdef _DEBUG - MPI_Barrier(COMM_TRANS); - if (myproc < 100) - log << "receiver_buffer is got from EDM" << std::endl; -// OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM"); -#endif + // transform receiver_buffer to EDM if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r') { @@ -1526,19 +735,6 @@ MPI_Barrier(COMM_TRANS); int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; int idx = ix * DST_Matrix.get_ncol() + iy; -#ifdef _DEBUG - if (myproc < 100) - { - if (idx < 0 || idx >= DST_Matrix_elem) - { - log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem - << std::endl; - log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " ncol = " << DST_Matrix.get_ncol() << std::endl; - log.flush(); - } - } -#endif EDM[idx] = receiver_buffer[i]; } } @@ -1550,54 +746,18 @@ MPI_Barrier(COMM_TRANS); int ix = receiver_index[2 * i]; int iy = receiver_index[2 * i + 1]; int idx = iy * DST_Matrix.get_nrow() + ix; -#ifdef _DEBUG - if (myproc < 100) - { - if (idx < 0 || idx >= DST_Matrix_elem) - { - log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem - << std::endl; - log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy - << " nrow = " << DST_Matrix.get_nrow() << std::endl; - log.flush(); - } - } -#endif EDM[idx] = receiver_buffer[i]; } } -#ifdef _DEBUG - if (myproc < 100) - log << "EDM(BCD) is got from receiver_buffer" << std::endl; - MPI_Barrier(COMM_TRANS); -#endif + delete[] sender_index; delete[] sender_buffer; delete[] dst_index; delete[] receiver_index; delete[] receiver_buffer; -#ifdef _DEBUG - if (myproc < 100) - log << "work arrays are deleted" << std::endl; -#endif + } -#ifdef _DEBUG - if (myproc < 100) - log << "OUT COMM_TRANS" << std::endl; - if (myproc < 100) - log << "before deleteGroupCommTrans" << std::endl; -#endif deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS); -#ifdef _DEBUG - MPI_Barrier(MPI_COMM_WORLD); - if (myproc < 100) - { - log << "COMM_TRANS is deleted" << std::endl; - log.close(); - } - MPI_Barrier(MPI_COMM_WORLD); - OUT(ofs_running, "transformCCStoBCD: finish job, COMM_TRANS is deleted"); -#endif return 0; } diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h index c81128f9db..e261d31f3e 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h @@ -80,9 +80,6 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, double*& H_ccs, double*& S_ccs); -// int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, -// DistBCDMatrix& DST_Matrix, double* DM_2d); - int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* ENDnzvalLocal, diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp index ebbf253b20..b5f15b40d4 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -3,8 +3,8 @@ #include "pexsi_solver.h" #include - #include +#include #include "module_base/global_variable.h" #include "simple_pexsi.h" @@ -19,8 +19,6 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text, const int ncol, const double* h, const double* s, - double* DM, - double* EDM, double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy) diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h index b3d7aed152..6289ccfd55 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.h +++ b/source/module_hsolver/module_pexsi/pexsi_solver.h @@ -1,6 +1,8 @@ #ifndef PEXSI_Solver_H #define PEXSI_Solver_H +#include + namespace pexsi { class PEXSI_Solver @@ -12,8 +14,6 @@ class PEXSI_Solver const int ncol, const double* h, const double* s, - double* DM, - double* EDM, double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy); diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp index b59ed233ea..7169e0ab85 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -38,27 +38,16 @@ inline void strtolower(char* sa, char* sb) inline void setDefaultOption(int* int_para, double* double_para) { - // options.spin=2; double_para[0] = 2; - // options.gap=0; double_para[2] = 0; - // ZERO_Limit=DBL_MIN; double_para[11] = DBL_MIN; - // options.matrixType=0; int_para[3] = 0; - // options.solver=1; int_para[6] = 1; - // options.ordering=0; int_para[8] = 0; - // options.rowOrdering=0; int_para[9] = 0; - // options.symmetric=0; int_para[11] = 0; - // options.transpose=0; int_para[12] = 0; - // options.nPoints=2; int_para[14] = 2; - // options.verbosity=1; int_para[15] = 1; } @@ -137,222 +126,7 @@ int loadPEXSIOption(MPI_Comm comm, double_para[9] = hsolver::DiagoPexsi::pexsi_mu_guard; double_para[10] = hsolver::DiagoPexsi::pexsi_elec_thr; double_para[11] = hsolver::DiagoPexsi::pexsi_zero_thr; - // int myid; - // MPI_Comm_rank(comm, &myid); - // if (myid == 0) - // { - // std::ifstream ifs(PexsiOptionFile.c_str()); - // if (!ifs) - // { - // return 1; - // } - // setDefaultOption(int_para, double_para); - // ifs.clear(); - // ifs.seekg(0); - - // char key[128]; - // char lowercase_key[128]; - // const int LINE_LINGTH = 1024; - // char unused_string[LINE_LINGTH]; - - // while (ifs.good()) - // { - // ifs >> key; - // //~ cout<<"readin word is: "<>options.spin; - // ifs >> double_para[0]; - // //~ cout<<"double_para[0]: "<>options.temperature; - // ifs >> double_para[1]; - // //~ cout<<"double_para[1]: "<>options.gap; - // ifs >> double_para[2]; - // //~ cout<<"double_para[2]: "<>options.deltaE; - // ifs >> double_para[3]; - // //~ cout<<"double_para[3]: "<>options.numPole; - // ifs >> int_para[0]; - // //~ cout<<"int_para[0]: "<>options.isInertiaCount; - // ifs >> int_para[1]; - // //~ cout<<"int_para[1]: "<>options.maxPEXSIIter; - // ifs >> int_para[2]; - // //~ cout<<"int_para[2]: "<>options.muMin0; - // ifs >> double_para[4]; - // //~ cout<<"double_para[4]: "<>options.muMax0; - // ifs >> double_para[5]; - // //~ cout<<"double_para[5]: "<>options.mu0; - // ifs >> double_para[6]; - // //~ cout<<"double_para[6]: "<>options.muInertiaTolerance; - // ifs >> double_para[7]; - // //~ cout<<"double_para[7]: "<>options.muInertiaExpansion; - // ifs >> double_para[8]; - // //~ cout<<"double_para[8]: "<>options.muPEXSISafeGuard; - // ifs >> double_para[9]; - // //~ cout<<"double_para[9]: "<>options.numElectronPEXSITolerance; - // ifs >> double_para[10]; - // //~ cout<<"double_para[10]: "<> double_para[11]; - // } - // else if (strcmp("matrixtype", lowercase_key) == 0) - // { - // //~ ifs>>options.matrixType; - // ifs >> int_para[3]; - // //~ cout<<"int_para[3]: "<>options.isSymbolicFactorize; - // ifs >> int_para[4]; - // //~ cout<<"int_para[4]: "<>options.isConstructCommPattern; - // ifs >> int_para[5]; - // //~ cout<<"int_para[5]: "<>options.solver; - // ifs >> int_para[6]; - // //~ cout<<"int_para[6]: "<>options.symmetricStorage; - // ifs >> int_para[7]; - // //~ cout<<"int_para[7]: "<>options.ordering; - // ifs >> int_para[8]; - // //~ cout<<"int_para[8]: "<>options.rowOrdering; - // ifs >> int_para[9]; - // //~ cout<<"int_para[9]: "<>options.npSymbFact; - // ifs >> int_para[10]; - // //~ cout<<"int_para[10]: "<>options.symmetric; - // ifs >> int_para[11]; - // //~ cout<<"int_para[11]: "<>options.transpose; - // ifs >> int_para[12]; - // //~ cout<<"int_para[12]: "<>options.method; - // ifs >> int_para[13]; - // //~ cout<<"int_para[13]: "<>options.nPoints; - // ifs >> int_para[14]; - // //~ cout<<"int_para[14]: "<>options.verbosity; - // ifs >> int_para[15]; - // //~ cout<<"int_para[15]: "<>options.verbosity; - // ifs >> int_para[16]; - // //~ cout<<"int_para[16]: "< Date: Sat, 3 Feb 2024 21:52:23 +0800 Subject: [PATCH 25/44] Improvement: take calculated mu as new initial guess, may slightly improve performance --- source/module_hsolver/module_pexsi/simple_pexsi.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp index 7169e0ab85..3ef8e6eeeb 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -309,6 +309,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI, // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT"); + hsolver::DiagoPexsi::pexsi_mu = mu; // retrieve the results from the plan if (DMnzvalLocal != nullptr) delete[] DMnzvalLocal; From 2cf6773dfe075a4d558c65239a17085093587f80 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Sun, 4 Feb 2024 01:53:02 +0800 Subject: [PATCH 26/44] Fix mistakes in the last commit --- source/module_hsolver/diago_pexsi.cpp | 6 ++++-- source/module_hsolver/diago_pexsi.h | 7 +++++++ source/module_hsolver/module_pexsi/pexsi_solver.cpp | 11 +++++++++-- source/module_hsolver/module_pexsi/pexsi_solver.h | 4 +++- source/module_hsolver/module_pexsi/simple_pexsi.cpp | 7 ++++--- source/module_hsolver/module_pexsi/simple_pexsi.h | 4 +++- 6 files changed, 30 insertions(+), 9 deletions(-) diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index 1cfc765c2a..b0bac5e220 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -125,6 +125,7 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& phm_in->matrix(h_mat, s_mat); std::vector eigen(GlobalV::NLOCAL, 0.0); MPI_Comm COMM_DIAG = MPI_COMM_WORLD; + int ik = psi.get_current_k(); this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt, this->ParaV->nb, this->ParaV->nrow, @@ -134,12 +135,13 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& this->totalEnergyH, this->totalEnergyS, this->totalFreeEnergy); - this->ps->solve(); + this->ps->solve(mu_buffer[ik]); this->EDM.push_back(this->ps->get_EDM()); - this->DM.push_back(this->ps->get_DM()); // loc.dm_gamma[ik] loc.dm_gamma[0]? + this->DM.push_back(this->ps->get_DM()); this->totalFreeEnergy = this->ps->get_totalFreeEnergy(); this->totalEnergyH = this->ps->get_totalEnergyH(); this->totalEnergyS = this->ps->get_totalEnergyS(); + this->mu_buffer[ik] = this->ps->get_mu(); } template <> diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index 8e2aa98da1..c749096410 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -3,6 +3,7 @@ #include #include "diagh.h" +#include "module_base/global_variable.h" #include "module_basis/module_ao/parallel_orbitals.h" #include "module_pexsi/pexsi_solver.h" @@ -14,10 +15,16 @@ class DiagoPexsi : public DiagH { private: using Real = typename GetTypeReal::type; + std::vector mu_buffer; public: DiagoPexsi(const Parallel_Orbitals* ParaV_in) { + mu_buffer.resize(GlobalV::NSPIN); + for (int i = 0; i < GlobalV::NSPIN; i++) + { + mu_buffer[i] = this->pexsi_mu; + } this->ParaV = ParaV_in; } void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, Real* eigenvalue_in) override; diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp index b5f15b40d4..ed1cfa0061 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -38,7 +38,7 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text, this->totalFreeEnergy = 0.0; } -int PEXSI_Solver::solve() +int PEXSI_Solver::solve(double mu0) { MPI_Group grid_group; int myid, grid_np; @@ -67,7 +67,9 @@ int PEXSI_Solver::solve() this->EDM, this->totalEnergyH, this->totalEnergyS, - this->totalFreeEnergy); + this->totalFreeEnergy, + mu, + mu0); return 0; } @@ -96,5 +98,10 @@ const double PEXSI_Solver::get_totalEnergyS() const return totalEnergyS; } +const double PEXSI_Solver::get_mu() const +{ + return mu; +} + } // namespace pexsi #endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h index 6289ccfd55..1bf7060dde 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.h +++ b/source/module_hsolver/module_pexsi/pexsi_solver.h @@ -17,12 +17,13 @@ class PEXSI_Solver double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy); - int solve(); + int solve(double mu0); double* get_DM() const; double* get_EDM() const; const double get_totalFreeEnergy() const; const double get_totalEnergyH() const; const double get_totalEnergyS() const; + const double get_mu() const; private: int blacs_text; @@ -36,6 +37,7 @@ class PEXSI_Solver double totalEnergyH; double totalEnergyS; double totalFreeEnergy; + double mu; }; } // namespace pexsi #endif // PEXSI_Solver_H \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp index 3ef8e6eeeb..a5f8e6f804 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -210,7 +210,9 @@ int simplePEXSI(MPI_Comm comm_PEXSI, double*& EDM, // output matrices double& totalEnergyH, double& totalEnergyS, - double& totalFreeEnergy) // output energy + double& totalFreeEnergy, // output energy + double& mu, + double mu0) { if (comm_2D == MPI_COMM_NULL && comm_PEXSI == MPI_COMM_NULL) @@ -228,6 +230,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI, int numProcessPerPole; double ZERO_Limit; loadPEXSIOption(comm_PEXSI, PexsiOptionFile, options, numProcessPerPole, ZERO_Limit); + options.mu0 = mu0; ModuleBase::timer::tick("Diago_LCAO_Matrix", "setup_PEXSI_plan"); PPEXSIPlan plan; @@ -288,7 +291,6 @@ int simplePEXSI(MPI_Comm comm_PEXSI, SnzvalLocal, &info); - double mu; double nelec; double muMinInertia; double muMaxInertia; @@ -309,7 +311,6 @@ int simplePEXSI(MPI_Comm comm_PEXSI, // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT"); - hsolver::DiagoPexsi::pexsi_mu = mu; // retrieve the results from the plan if (DMnzvalLocal != nullptr) delete[] DMnzvalLocal; diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.h b/source/module_hsolver/module_pexsi/simple_pexsi.h index fded81fc59..db8879e5ac 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.h +++ b/source/module_hsolver/module_pexsi/simple_pexsi.h @@ -22,6 +22,8 @@ int simplePEXSI(MPI_Comm comm_PEXSI, double*& EDM, // output matrices double& totalEnergyH, double& totalEnergyS, - double& totalFreeEnergy); + double& totalFreeEnergy, + double& mu, + double mu0); } #endif // SIMPLE_PEXSI_H \ No newline at end of file From 7298c4160223e29a6bfd79f7bbd46f1f4f754021 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Tue, 6 Feb 2024 15:19:37 +0800 Subject: [PATCH 27/44] Fix: params and features - set default pexsi_temp - fix md in pexsi --- source/module_elecstate/elecstate_lcao.cpp | 9 ++++++--- source/module_elecstate/elecstate_lcao.h | 4 +++- .../hamilt_lcaodft/FORCE_gamma_edm.cpp | 19 +++++++++++++++++-- source/module_hsolver/hsolver_lcao.cpp | 2 +- source/module_io/input.cpp | 2 +- source/module_io/input.h | 2 +- source/module_io/test/input_conv_test.cpp | 4 ++-- source/module_io/test/input_test_para.cpp | 2 +- source/module_io/test/write_input_test.cpp | 2 +- 9 files changed, 33 insertions(+), 13 deletions(-) diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index a474a3c172..3538b28b82 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -1,4 +1,5 @@ #include "elecstate_lcao.h" +#include #include "cal_dm.h" #include "module_base/timer.h" @@ -166,7 +167,7 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) this->calEBand(); if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack" - || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao" || GlobalV::KS_SOLVER == "pexsi") + || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao") { ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d"); @@ -252,7 +253,7 @@ double ElecStateLCAO>::get_spin_constrain_energy() #ifdef __PEXSI template<> -void ElecStateLCAO::dmToRho(std::vector pexsi_DM) +void ElecStateLCAO::dmToRho(std::vector pexsi_DM, std::vector pexsi_EDM) { ModuleBase::timer::tick("ElecStateLCAO", "dmToRho"); @@ -267,9 +268,11 @@ void ElecStateLCAO::dmToRho(std::vector pexsi_DM) } auto DM = this->get_DM(); + this->pexsi_EDM.clear(); for (int is = 0; is < GlobalV::NSPIN; is++) { this->DM->set_DMK_pointer(is, pexsi_DM[is]); + this->pexsi_EDM.push_back(pexsi_EDM[is]); } DM->cal_DMR(); @@ -299,7 +302,7 @@ void ElecStateLCAO::dmToRho(std::vector pexsi_DM) } template<> -void ElecStateLCAO>::dmToRho(std::vector*> DM) +void ElecStateLCAO>::dmToRho(std::vector*> pexsi_DM, std::vector*> pexsi_EDM) { ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case"); } diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h index c47bd0c8cb..4a7df3d76e 100644 --- a/source/module_elecstate/elecstate_lcao.h +++ b/source/module_elecstate/elecstate_lcao.h @@ -1,6 +1,7 @@ #ifndef ELECSTATELCAO_H #define ELECSTATELCAO_H +#include #include "elecstate.h" #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h" #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h" @@ -61,7 +62,8 @@ class ElecStateLCAO : public ElecState #ifdef __PEXSI // use for pexsi - void dmToRho(std::vector pexsi_DM); + void dmToRho(std::vector pexsi_DM, std::vector pexsi_EDM); + std::vector pexsi_EDM; #endif protected: diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp index a69821c50c..1ce37e592e 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp @@ -1,4 +1,5 @@ #include "FORCE_gamma.h" +#include "module_elecstate/elecstate_lcao.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" #include "module_base/parallel_reduce.h" #include "module_base/timer.h" @@ -35,8 +36,22 @@ void Force_LCAO_gamma::cal_foverlap( // construct a DensityMatrix for Gamma-Only const Parallel_Orbitals* pv = this->ParaV; elecstate::DensityMatrix EDM(pv,GlobalV::NSPIN); - - elecstate::cal_dm_psi(EDM.get_paraV_pointer(), wgEkb, psid[0], EDM); + +#ifdef __PEXSI + if (GlobalV::KS_SOLVER == "pexsi") + { + auto pes = dynamic_cast*>(pelec); + for (int ik = 0; ik < GlobalV::NSPIN; ik++) + { + EDM.set_DMK_pointer(ik, pes->pexsi_EDM[ik]); + } + + } + else +#endif + { + elecstate::cal_dm_psi(EDM.get_paraV_pointer(), wgEkb, psid[0], EDM); + } ModuleBase::timer::tick("Force_LCAO_gamma","cal_edm_2d"); diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index 66725f764f..9f9460eb7c 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -232,7 +232,7 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!"); elecstate::ElecStateLCAO* _pes = dynamic_cast*>(pes); pes->f_en.eband = tem->totalFreeEnergy; - _pes->dmToRho(tem->DM); + _pes->dmToRho(tem->DM, tem->EDM); } else #endif diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp index fb5bc43551..df515d6d10 100644 --- a/source/module_io/input.cpp +++ b/source/module_io/input.cpp @@ -657,7 +657,7 @@ void Input::Default(void) pexsi_method = 1; pexsi_nproc_pole = 1; // pexsi_spin = 2; - pexsi_temp = 0.0001; + pexsi_temp = 0.015; pexsi_gap = 0; pexsi_delta_e = 20.0; pexsi_mu_lower = -10; diff --git a/source/module_io/input.h b/source/module_io/input.h index f166612a4d..e527987b8e 100644 --- a/source/module_io/input.h +++ b/source/module_io/input.h @@ -620,7 +620,7 @@ class Input int pexsi_method = 1; int pexsi_nproc_pole = 1; // double pexsi_spin = 2; - double pexsi_temp = 0.0001; + double pexsi_temp = 0.015; double pexsi_gap = 0; double pexsi_delta_e = 20.0; double pexsi_mu_lower = -10; diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp index 60ec6c82dd..3455e9ec40 100644 --- a/source/module_io/test/input_conv_test.cpp +++ b/source/module_io/test/input_conv_test.cpp @@ -560,7 +560,7 @@ TEST_F(InputConvTest, PEXSI) EXPECT_FALSE(hsolver::DiagoPexsi::pexsi_trans); EXPECT_EQ(hsolver::DiagoPexsi::pexsi_method, 1); EXPECT_EQ(hsolver::DiagoPexsi::pexsi_nproc_pole, 1); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_temp, 1e-4); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_temp, 0.015); EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_gap, 0); EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_delta_e, 20); EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu_lower, -10); @@ -577,7 +577,7 @@ TEST_F(InputConvTest, PEXSI) EXPECT_FALSE(hsolver::DiagoPexsi>::pexsi_trans); EXPECT_EQ(hsolver::DiagoPexsi>::pexsi_method, 1); EXPECT_EQ(hsolver::DiagoPexsi>::pexsi_nproc_pole, 1); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_temp, 0.0001); + EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_temp, 0.015); EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_gap, 0); EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_delta_e, 20); EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu_lower, -10); diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp index ad40d635ea..1b936f73d5 100644 --- a/source/module_io/test/input_test_para.cpp +++ b/source/module_io/test/input_test_para.cpp @@ -405,7 +405,7 @@ TEST_F(InputParaTest, Bcast) EXPECT_FALSE(INPUT.pexsi_trans); EXPECT_EQ(INPUT.pexsi_method, 1); EXPECT_EQ(INPUT.pexsi_nproc_pole, 1); - EXPECT_DOUBLE_EQ(INPUT.pexsi_temp, 0.0001); + EXPECT_DOUBLE_EQ(INPUT.pexsi_temp, 0.015); EXPECT_DOUBLE_EQ(INPUT.pexsi_gap, 0); EXPECT_DOUBLE_EQ(INPUT.pexsi_delta_e, 20); EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_lower, -10); diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp index e106fc399f..6c8a2b32f9 100644 --- a/source/module_io/test/write_input_test.cpp +++ b/source/module_io/test/write_input_test.cpp @@ -946,7 +946,7 @@ TEST_F (write_input, PEXSI24) EXPECT_THAT(output, testing::HasSubstr("pexsi_trans 0 #Whether to transpose")); EXPECT_THAT(output, testing::HasSubstr("pexsi_method 1 #pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method")); EXPECT_THAT(output, testing::HasSubstr("pexsi_nproc_pole 1 #Number of processes used by each pole")); - EXPECT_THAT(output, testing::HasSubstr("pexsi_temp 0.0001 #Temperature, in the same unit as H")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_temp 0.015 #Temperature, in the same unit as H")); EXPECT_THAT(output, testing::HasSubstr("pexsi_gap 0 #Spectral gap")); EXPECT_THAT(output, testing::HasSubstr("pexsi_delta_e 20 #An upper bound for the spectral radius of \\f$S^{-1} H\\f$")); EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_lower -10 #Initial guess of lower bound for mu")); From ce18c08714444b369288cb0a84f3d2326bfc6d9f Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Tue, 6 Feb 2024 15:45:52 +0800 Subject: [PATCH 28/44] fix empty lines --- source/module_elecstate/elecstate_lcao.cpp | 3 +-- source/module_esolver/esolver_ks.cpp | 3 +++ source/module_relax/relax_driver.cpp | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index 3538b28b82..3d629f7abe 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -150,6 +150,7 @@ void ElecStateLCAO>::psiToRho(const psi::Psiloc->DM_R, this->charge->kin_r, Gint_Tools::job_type::tau); this->uhm->GK.cal_gint(&inout1); } + this->charge->renormalize_rho(); ModuleBase::timer::tick("ElecStateLCAO", "psiToRho"); @@ -175,7 +176,6 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma); elecstate::cal_dm_psi(this->DM->get_paraV_pointer(), this->wg, psi, *(this->DM)); this->DM->cal_DMR(); - if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready { this->loc->dm_gamma.resize(GlobalV::NSPIN); @@ -185,7 +185,6 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) } } ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d"); - for (int ik = 0; ik < psi.get_nk(); ++ik) { // for gamma_only case, no convertion occured, just for print. diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp index ead5195085..fab63ce864 100644 --- a/source/module_esolver/esolver_ks.cpp +++ b/source/module_esolver/esolver_ks.cpp @@ -354,6 +354,7 @@ namespace ModuleESolver else { ModuleBase::timer::tick(this->classname, "Run"); + this->beforescf(istep); //Something else to do before the iter loop ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT SCF"); if(this->maxniter > 0) this->printhead(); //print the headline on the screen. @@ -471,8 +472,10 @@ namespace ModuleESolver } } afterscf(istep); + ModuleBase::timer::tick(this->classname, "Run"); } + return; }; diff --git a/source/module_relax/relax_driver.cpp b/source/module_relax/relax_driver.cpp index 4da9d7a283..78c4710e1a 100644 --- a/source/module_relax/relax_driver.cpp +++ b/source/module_relax/relax_driver.cpp @@ -31,6 +31,7 @@ void Relax_Driver::relax_driver(ModuleESolver::ESolver *p_esolve while (istep <= GlobalV::RELAX_NMAX && !stop) { time_t estart = time(NULL); + if (GlobalV::OUT_LEVEL == "ie" && (GlobalV::CALCULATION == "relax" || GlobalV::CALCULATION == "cell-relax" || GlobalV::CALCULATION == "scf" || GlobalV::CALCULATION == "nscf")) @@ -39,7 +40,6 @@ void Relax_Driver::relax_driver(ModuleESolver::ESolver *p_esolve } // mohan added eiter to count for the electron iteration number, 2021-01-28 - p_esolver->Run(istep - 1, GlobalC::ucell); time_t eend = time(NULL); @@ -117,8 +117,10 @@ void Relax_Driver::relax_driver(ModuleESolver::ESolver *p_esolve } } time_t fend = time(NULL); + ++istep; } + if (GlobalV::OUT_LEVEL == "i") { std::cout << " ION DYNAMICS FINISHED :)" << std::endl; From c4d86a44cc77466192ad72869d091dbf247998e1 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Wed, 7 Feb 2024 18:08:30 +0800 Subject: [PATCH 29/44] Fix: move params to pexsi_solver, rename USE_PEXSI to ENABLE_PEXSI --- CMakeLists.txt | 4 +- cmake/FindPEXSI.cmake | 2 +- source/module_hsolver/CMakeLists.txt | 2 +- source/module_hsolver/diago_pexsi.cpp | 102 ----------------- source/module_hsolver/diago_pexsi.h | 104 +----------------- .../module_pexsi/pexsi_solver.cpp | 27 +++++ .../module_pexsi/pexsi_solver.h | 103 +++++++++++++++++ .../module_pexsi/simple_pexsi.cpp | 50 ++++----- source/module_io/input_conv.cpp | 76 +++++-------- source/module_io/test/input_conv_test.cpp | 63 ++++------- 10 files changed, 208 insertions(+), 325 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c6080e51b..e2ae6dc04f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,7 +36,7 @@ option(COMMIT_INFO "Print commit information in log" ON) option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON) option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF) option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF) -option(USE_PEXSI "Enable support for PEXSI." OFF) +option(ENABLE_PEXSI "Enable support for PEXSI." OFF) @@ -210,7 +210,7 @@ if(ENABLE_LCAO) add_compile_definitions(USE_NEW_TWO_CENTER) endif() - if(USE_PEXSI) + if(ENABLE_PEXSI) find_package(PEXSI REQUIRED) target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR}) diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake index b1565d2c06..5adc4c8a6d 100644 --- a/cmake/FindPEXSI.cmake +++ b/cmake/FindPEXSI.cmake @@ -41,7 +41,7 @@ find_library(ParMETIS_LIBRARY ) find_library(SuperLU_DIST_LIBRARY - NAMES libsuperlu_dist.a + NAMES superlu_dist HINTS ${SuperLU_DIST_DIR} PATH_SUFFIXES "lib" ) diff --git a/source/module_hsolver/CMakeLists.txt b/source/module_hsolver/CMakeLists.txt index 9a023fb5d0..cea200887a 100644 --- a/source/module_hsolver/CMakeLists.txt +++ b/source/module_hsolver/CMakeLists.txt @@ -38,7 +38,7 @@ if(ENABLE_LCAO) endif() endif() - if(USE_PEXSI) + if(ENABLE_PEXSI) list(APPEND objects diago_pexsi.cpp ) diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index b0bac5e220..95c4a7433e 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -15,108 +15,6 @@ typedef hamilt::MatrixBlock> matcd; namespace hsolver { -template <> -int DiagoPexsi::pexsi_npole = 0; -template <> -bool DiagoPexsi::pexsi_inertia = 0; -template <> -int DiagoPexsi::pexsi_nmax = 0; -// template <> -// int DiagoPexsi::pexsi_symbolic = 0; -template <> -bool DiagoPexsi::pexsi_comm = 0; -template <> -bool DiagoPexsi::pexsi_storage = 0; -template <> -int DiagoPexsi::pexsi_ordering = 0; -template <> -int DiagoPexsi::pexsi_row_ordering = 0; -template <> -int DiagoPexsi::pexsi_nproc = 0; -template <> -bool DiagoPexsi::pexsi_symm = 0; -template <> -bool DiagoPexsi::pexsi_trans = 0; -template <> -int DiagoPexsi::pexsi_method = 0; -template <> -int DiagoPexsi::pexsi_nproc_pole = 0; -// template <> -// double DiagoPexsi::pexsi_spin = 2; -template <> -double DiagoPexsi::pexsi_temp = 0.0; -template <> -double DiagoPexsi::pexsi_gap = 0.0; -template <> -double DiagoPexsi::pexsi_delta_e = 0.0; -template <> -double DiagoPexsi::pexsi_mu_lower = 0.0; -template <> -double DiagoPexsi::pexsi_mu_upper = 0.0; -template <> -double DiagoPexsi::pexsi_mu = 0.0; -template <> -double DiagoPexsi::pexsi_mu_thr = 0.0; -template <> -double DiagoPexsi::pexsi_mu_expand = 0.0; -template <> -double DiagoPexsi::pexsi_mu_guard = 0.0; -template <> -double DiagoPexsi::pexsi_elec_thr = 0.0; -template <> -double DiagoPexsi::pexsi_zero_thr = 0.0; - -template <> -int DiagoPexsi>::pexsi_npole = 0; -template <> -bool DiagoPexsi>::pexsi_inertia = 0; -template <> -int DiagoPexsi>::pexsi_nmax = 0; -// template <> -// int DiagoPexsi>::pexsi_symbolic = 0; -template <> -bool DiagoPexsi>::pexsi_comm = 0; -template <> -bool DiagoPexsi>::pexsi_storage = 0; -template <> -int DiagoPexsi>::pexsi_ordering = 0; -template <> -int DiagoPexsi>::pexsi_row_ordering = 0; -template <> -int DiagoPexsi>::pexsi_nproc = 0; -template <> -bool DiagoPexsi>::pexsi_symm = 0; -template <> -bool DiagoPexsi>::pexsi_trans = 0; -template <> -int DiagoPexsi>::pexsi_method = 0; -template <> -int DiagoPexsi>::pexsi_nproc_pole = 0; -// template <> -// double DiagoPexsi>::pexsi_spin = 2; -template <> -double DiagoPexsi>::pexsi_temp = 0.0; -template <> -double DiagoPexsi>::pexsi_gap = 0.0; -template <> -double DiagoPexsi>::pexsi_delta_e = 0.0; -template <> -double DiagoPexsi>::pexsi_mu_lower = 0.0; -template <> -double DiagoPexsi>::pexsi_mu_upper = 0.0; -template <> -double DiagoPexsi>::pexsi_mu = 0.0; -template <> -double DiagoPexsi>::pexsi_mu_thr = 0.0; -template <> -double DiagoPexsi>::pexsi_mu_expand = 0.0; -template <> -double DiagoPexsi>::pexsi_mu_guard = 0.0; -template <> -double DiagoPexsi>::pexsi_elec_thr = 0.0; -template <> -double DiagoPexsi>::pexsi_zero_thr = 0.0; - template <> void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) { diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index c749096410..af3a175ff1 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -23,7 +23,7 @@ class DiagoPexsi : public DiagH mu_buffer.resize(GlobalV::NSPIN); for (int i = 0; i < GlobalV::NSPIN; i++) { - mu_buffer[i] = this->pexsi_mu; + mu_buffer[i] = this->ps->pexsi_mu; } this->ParaV = ParaV_in; } @@ -36,108 +36,6 @@ class DiagoPexsi : public DiagH double totalFreeEnergy; pexsi::PEXSI_Solver* ps; - //========================================================== - // PEXSI related variables - //========================================================== - /** - * @brief Number of terms in the pole expansion. - */ - static int pexsi_npole; - /** - * @brief Whether inertia counting is used at the very beginning. - */ - static bool pexsi_inertia; - /** - * @brief Maximum number of PEXSI iterations after each inertia counting procedure. - */ - static int pexsi_nmax; - /** - * @brief Whether to construct PSelInv communication pattern. - */ - static bool pexsi_comm; - /** - * @brief Whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices. - */ - static bool pexsi_storage; - /** - * @brief Ordering strategy for factorization and selected inversion. - */ - static int pexsi_ordering; - /** - * @brief row permutation strategy for factorization and selected inversion. - */ - static int pexsi_row_ordering; - /** - * @brief Number of processors for PARMETIS/PT-SCOTCH. Only used if the ordering == 0. - */ - static int pexsi_nproc; - /** - * @brief Matrix structure. - * - = 0 : Unsymmetric matrix - * - = 1 : Symmetric matrix (default). - */ - static bool pexsi_symm; - /** - * @brief Transpose. - * - = 0 : Factor non transposed matrix (default). - * - = 1 : Factor transposed matrix. - */ - static bool pexsi_trans; - /** - * @brief The pole expansion method to be used. - * - = 1 : Cauchy Contour Integral method used. - * - = 2 : Moussa optimized method. - */ - static int pexsi_method; - /** - * @brief The point parallelizaion of PEXSI. - * - = 2 : Recommend two points parallelization - */ - static int pexsi_nproc_pole; - /** - * @brief Temperature, in the same unit as H - */ - static double pexsi_temp; - /** - * @brief Spectral gap. **Note** This can be set to be 0 in most cases. - */ - static double pexsi_gap; - /** - * @brief An upper bound for the spectral radius of \f$S^{-1} H\f$. - */ - static double pexsi_delta_e; - /** - * @brief Initial guess of lower bound for mu. - */ - static double pexsi_mu_lower; - /** - * @brief Initial guess of upper bound for mu. - */ - static double pexsi_mu_upper; - /** - * @brief Initial guess for mu (for the solver) (AG) - */ - static double pexsi_mu; - /** - * @brief Stopping criterion in terms of the chemical potential for the inertia counting procedure. - */ - static double pexsi_mu_thr; - /** - * @brief If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion. - */ - static double pexsi_mu_expand; - /** - * @brief Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure. - */ - static double pexsi_mu_guard; - /** - * @brief Stopping criterion of the %PEXSI iteration in terms of the number of electrons compared to numElectronExact. - */ - static double pexsi_elec_thr; - /** - * @brief Stopping criterion for the zero threshold. - */ - static double pexsi_zero_thr; }; } // namespace hsolver diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp index ed1cfa0061..ebb2c6d78b 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -13,6 +13,33 @@ extern MPI_Comm DIAG_WORLD; extern MPI_Comm GRID_WORLD; namespace pexsi { + +int PEXSI_Solver::pexsi_npole = 0; +bool PEXSI_Solver::pexsi_inertia = 0; +int PEXSI_Solver::pexsi_nmax = 0; +// int PEXSI_Solver::pexsi_symbolic = 0; +bool PEXSI_Solver::pexsi_comm = 0; +bool PEXSI_Solver::pexsi_storage = 0; +int PEXSI_Solver::pexsi_ordering = 0; +int PEXSI_Solver::pexsi_row_ordering = 0; +int PEXSI_Solver::pexsi_nproc = 0; +bool PEXSI_Solver::pexsi_symm = 0; +bool PEXSI_Solver::pexsi_trans = 0; +int PEXSI_Solver::pexsi_method = 0; +int PEXSI_Solver::pexsi_nproc_pole = 0; +// double PEXSI_Solver::pexsi_spin = 2; +double PEXSI_Solver::pexsi_temp = 0.0; +double PEXSI_Solver::pexsi_gap = 0.0; +double PEXSI_Solver::pexsi_delta_e = 0.0; +double PEXSI_Solver::pexsi_mu_lower = 0.0; +double PEXSI_Solver::pexsi_mu_upper = 0.0; +double PEXSI_Solver::pexsi_mu = 0.0; +double PEXSI_Solver::pexsi_mu_thr = 0.0; +double PEXSI_Solver::pexsi_mu_expand = 0.0; +double PEXSI_Solver::pexsi_mu_guard = 0.0; +double PEXSI_Solver::pexsi_elec_thr = 0.0; +double PEXSI_Solver::pexsi_zero_thr = 0.0; + PEXSI_Solver::PEXSI_Solver(const int blacs_text, const int nb, const int nrow, diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h index 1bf7060dde..880efaf504 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.h +++ b/source/module_hsolver/module_pexsi/pexsi_solver.h @@ -25,6 +25,109 @@ class PEXSI_Solver const double get_totalEnergyS() const; const double get_mu() const; + //========================================================== + // PEXSI related variables + //========================================================== + /** + * @brief Number of terms in the pole expansion. + */ + static int pexsi_npole; + /** + * @brief Whether inertia counting is used at the very beginning. + */ + static bool pexsi_inertia; + /** + * @brief Maximum number of PEXSI iterations after each inertia counting procedure. + */ + static int pexsi_nmax; + /** + * @brief Whether to construct PSelInv communication pattern. + */ + static bool pexsi_comm; + /** + * @brief Whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices. + */ + static bool pexsi_storage; + /** + * @brief Ordering strategy for factorization and selected inversion. + */ + static int pexsi_ordering; + /** + * @brief row permutation strategy for factorization and selected inversion. + */ + static int pexsi_row_ordering; + /** + * @brief Number of processors for PARMETIS/PT-SCOTCH. Only used if the ordering == 0. + */ + static int pexsi_nproc; + /** + * @brief Matrix structure. + * - = 0 : Unsymmetric matrix + * - = 1 : Symmetric matrix (default). + */ + static bool pexsi_symm; + /** + * @brief Transpose. + * - = 0 : Factor non transposed matrix (default). + * - = 1 : Factor transposed matrix. + */ + static bool pexsi_trans; + /** + * @brief The pole expansion method to be used. + * - = 1 : Cauchy Contour Integral method used. + * - = 2 : Moussa optimized method. + */ + static int pexsi_method; + /** + * @brief The point parallelizaion of PEXSI. + * - = 2 : Recommend two points parallelization + */ + static int pexsi_nproc_pole; + /** + * @brief Temperature, in the same unit as H + */ + static double pexsi_temp; + /** + * @brief Spectral gap. **Note** This can be set to be 0 in most cases. + */ + static double pexsi_gap; + /** + * @brief An upper bound for the spectral radius of \f$S^{-1} H\f$. + */ + static double pexsi_delta_e; + /** + * @brief Initial guess of lower bound for mu. + */ + static double pexsi_mu_lower; + /** + * @brief Initial guess of upper bound for mu. + */ + static double pexsi_mu_upper; + /** + * @brief Initial guess for mu (for the solver) (AG) + */ + static double pexsi_mu; + /** + * @brief Stopping criterion in terms of the chemical potential for the inertia counting procedure. + */ + static double pexsi_mu_thr; + /** + * @brief If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion. + */ + static double pexsi_mu_expand; + /** + * @brief Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure. + */ + static double pexsi_mu_guard; + /** + * @brief Stopping criterion of the %PEXSI iteration in terms of the number of electrons compared to numElectronExact. + */ + static double pexsi_elec_thr; + /** + * @brief Stopping criterion for the zero threshold. + */ + static double pexsi_zero_thr; + private: int blacs_text; int nb; diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp index a5f8e6f804..c52a6c8ef3 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -96,36 +96,36 @@ int loadPEXSIOption(MPI_Comm comm, double double_para[12]; // read in PEXSI options from GlobalV - int_para[0] = hsolver::DiagoPexsi::pexsi_npole; - int_para[1] = hsolver::DiagoPexsi::pexsi_inertia; - int_para[2] = hsolver::DiagoPexsi::pexsi_nmax; + int_para[0] = pexsi::PEXSI_Solver::pexsi_npole; + int_para[1] = pexsi::PEXSI_Solver::pexsi_inertia; + int_para[2] = pexsi::PEXSI_Solver::pexsi_nmax; int_para[3] = 0; - int_para[4] = 1; // hsolver::DiagoPexsi::pexsi_symbolic; - int_para[5] = hsolver::DiagoPexsi::pexsi_comm; + int_para[4] = 1; // pexsi::PEXSI_Solver::pexsi_symbolic; + int_para[5] = pexsi::PEXSI_Solver::pexsi_comm; int_para[6] = 0; - int_para[7] = hsolver::DiagoPexsi::pexsi_storage; - int_para[8] = hsolver::DiagoPexsi::pexsi_ordering; - int_para[9] = hsolver::DiagoPexsi::pexsi_row_ordering; - int_para[10] = hsolver::DiagoPexsi::pexsi_nproc; - int_para[11] = hsolver::DiagoPexsi::pexsi_symm; - int_para[12] = hsolver::DiagoPexsi::pexsi_trans; - int_para[13] = hsolver::DiagoPexsi::pexsi_method; + int_para[7] = pexsi::PEXSI_Solver::pexsi_storage; + int_para[8] = pexsi::PEXSI_Solver::pexsi_ordering; + int_para[9] = pexsi::PEXSI_Solver::pexsi_row_ordering; + int_para[10] = pexsi::PEXSI_Solver::pexsi_nproc; + int_para[11] = pexsi::PEXSI_Solver::pexsi_symm; + int_para[12] = pexsi::PEXSI_Solver::pexsi_trans; + int_para[13] = pexsi::PEXSI_Solver::pexsi_method; int_para[14] = 2; int_para[15] = 0; - int_para[16] = hsolver::DiagoPexsi::pexsi_nproc_pole; + int_para[16] = pexsi::PEXSI_Solver::pexsi_nproc_pole; - double_para[0] = GlobalV::NSPIN; // hsolver::DiagoPexsi::pexsi_spin; - double_para[1] = hsolver::DiagoPexsi::pexsi_temp; - double_para[2] = hsolver::DiagoPexsi::pexsi_gap; - double_para[3] = hsolver::DiagoPexsi::pexsi_delta_e; - double_para[4] = hsolver::DiagoPexsi::pexsi_mu_lower; - double_para[5] = hsolver::DiagoPexsi::pexsi_mu_upper; - double_para[6] = hsolver::DiagoPexsi::pexsi_mu; - double_para[7] = hsolver::DiagoPexsi::pexsi_mu_thr; - double_para[8] = hsolver::DiagoPexsi::pexsi_mu_expand; - double_para[9] = hsolver::DiagoPexsi::pexsi_mu_guard; - double_para[10] = hsolver::DiagoPexsi::pexsi_elec_thr; - double_para[11] = hsolver::DiagoPexsi::pexsi_zero_thr; + double_para[0] = GlobalV::NSPIN; // pexsi::PEXSI_Solver::pexsi_spin; + double_para[1] = pexsi::PEXSI_Solver::pexsi_temp; + double_para[2] = pexsi::PEXSI_Solver::pexsi_gap; + double_para[3] = pexsi::PEXSI_Solver::pexsi_delta_e; + double_para[4] = pexsi::PEXSI_Solver::pexsi_mu_lower; + double_para[5] = pexsi::PEXSI_Solver::pexsi_mu_upper; + double_para[6] = pexsi::PEXSI_Solver::pexsi_mu; + double_para[7] = pexsi::PEXSI_Solver::pexsi_mu_thr; + double_para[8] = pexsi::PEXSI_Solver::pexsi_mu_expand; + double_para[9] = pexsi::PEXSI_Solver::pexsi_mu_guard; + double_para[10] = pexsi::PEXSI_Solver::pexsi_elec_thr; + double_para[11] = pexsi::PEXSI_Solver::pexsi_zero_thr; options.numPole = int_para[0]; options.isInertiaCount = int_para[1]; diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp index e77205c0c8..17da1e6460 100644 --- a/source/module_io/input_conv.cpp +++ b/source/module_io/input_conv.cpp @@ -775,57 +775,31 @@ void Input_Conv::Convert(void) // PEXSI related parameters //----------------------------------------------- #ifdef __PEXSI - hsolver::DiagoPexsi>::pexsi_npole = INPUT.pexsi_npole; - hsolver::DiagoPexsi>::pexsi_inertia = INPUT.pexsi_inertia; - hsolver::DiagoPexsi>::pexsi_nmax = INPUT.pexsi_nmax; - // hsolver::DiagoPexsi>::pexsi_symbolic = INPUT.pexsi_symbolic; - hsolver::DiagoPexsi>::pexsi_comm = INPUT.pexsi_comm; - hsolver::DiagoPexsi>::pexsi_storage = INPUT.pexsi_storage; - hsolver::DiagoPexsi>::pexsi_ordering = INPUT.pexsi_ordering; - hsolver::DiagoPexsi>::pexsi_row_ordering = INPUT.pexsi_row_ordering; - hsolver::DiagoPexsi>::pexsi_nproc = INPUT.pexsi_nproc; - hsolver::DiagoPexsi>::pexsi_symm = INPUT.pexsi_symm; - hsolver::DiagoPexsi>::pexsi_trans = INPUT.pexsi_trans; - hsolver::DiagoPexsi>::pexsi_method = INPUT.pexsi_method; - hsolver::DiagoPexsi>::pexsi_nproc_pole = INPUT.pexsi_nproc_pole; - // hsolver::DiagoPexsi>::pexsi_spin = INPUT.pexsi_spin; - hsolver::DiagoPexsi>::pexsi_temp = INPUT.pexsi_temp; - hsolver::DiagoPexsi>::pexsi_gap = INPUT.pexsi_gap; - hsolver::DiagoPexsi>::pexsi_delta_e = INPUT.pexsi_delta_e; - hsolver::DiagoPexsi>::pexsi_mu_lower = INPUT.pexsi_mu_lower; - hsolver::DiagoPexsi>::pexsi_mu_upper = INPUT.pexsi_mu_upper; - hsolver::DiagoPexsi>::pexsi_mu = INPUT.pexsi_mu; - hsolver::DiagoPexsi>::pexsi_mu_thr = INPUT.pexsi_mu_thr; - hsolver::DiagoPexsi>::pexsi_mu_expand = INPUT.pexsi_mu_expand; - hsolver::DiagoPexsi>::pexsi_mu_guard = INPUT.pexsi_mu_guard; - hsolver::DiagoPexsi>::pexsi_elec_thr = INPUT.pexsi_elec_thr; - hsolver::DiagoPexsi>::pexsi_zero_thr = INPUT.pexsi_zero_thr; - - hsolver::DiagoPexsi::pexsi_npole = INPUT.pexsi_npole; - hsolver::DiagoPexsi::pexsi_inertia = INPUT.pexsi_inertia; - hsolver::DiagoPexsi::pexsi_nmax = INPUT.pexsi_nmax; - // hsolver::DiagoPexsi::pexsi_symbolic = INPUT.pexsi_symbolic; - hsolver::DiagoPexsi::pexsi_comm = INPUT.pexsi_comm; - hsolver::DiagoPexsi::pexsi_storage = INPUT.pexsi_storage; - hsolver::DiagoPexsi::pexsi_ordering = INPUT.pexsi_ordering; - hsolver::DiagoPexsi::pexsi_row_ordering = INPUT.pexsi_row_ordering; - hsolver::DiagoPexsi::pexsi_nproc = INPUT.pexsi_nproc; - hsolver::DiagoPexsi::pexsi_symm = INPUT.pexsi_symm; - hsolver::DiagoPexsi::pexsi_trans = INPUT.pexsi_trans; - hsolver::DiagoPexsi::pexsi_method = INPUT.pexsi_method; - hsolver::DiagoPexsi::pexsi_nproc_pole = INPUT.pexsi_nproc_pole; - // hsolver::DiagoPexsi::pexsi_spin = INPUT.pexsi_spin; - hsolver::DiagoPexsi::pexsi_temp = INPUT.pexsi_temp; - hsolver::DiagoPexsi::pexsi_gap = INPUT.pexsi_gap; - hsolver::DiagoPexsi::pexsi_delta_e = INPUT.pexsi_delta_e; - hsolver::DiagoPexsi::pexsi_mu_lower = INPUT.pexsi_mu_lower; - hsolver::DiagoPexsi::pexsi_mu_upper = INPUT.pexsi_mu_upper; - hsolver::DiagoPexsi::pexsi_mu = INPUT.pexsi_mu; - hsolver::DiagoPexsi::pexsi_mu_thr = INPUT.pexsi_mu_thr; - hsolver::DiagoPexsi::pexsi_mu_expand = INPUT.pexsi_mu_expand; - hsolver::DiagoPexsi::pexsi_mu_guard = INPUT.pexsi_mu_guard; - hsolver::DiagoPexsi::pexsi_elec_thr = INPUT.pexsi_elec_thr; - hsolver::DiagoPexsi::pexsi_zero_thr = INPUT.pexsi_zero_thr; + pexsi::PEXSI_Solver::pexsi_npole = INPUT.pexsi_npole; + pexsi::PEXSI_Solver::pexsi_inertia = INPUT.pexsi_inertia; + pexsi::PEXSI_Solver::pexsi_nmax = INPUT.pexsi_nmax; + // pexsi::PEXSI_Solver::pexsi_symbolic = INPUT.pexsi_symbolic; + pexsi::PEXSI_Solver::pexsi_comm = INPUT.pexsi_comm; + pexsi::PEXSI_Solver::pexsi_storage = INPUT.pexsi_storage; + pexsi::PEXSI_Solver::pexsi_ordering = INPUT.pexsi_ordering; + pexsi::PEXSI_Solver::pexsi_row_ordering = INPUT.pexsi_row_ordering; + pexsi::PEXSI_Solver::pexsi_nproc = INPUT.pexsi_nproc; + pexsi::PEXSI_Solver::pexsi_symm = INPUT.pexsi_symm; + pexsi::PEXSI_Solver::pexsi_trans = INPUT.pexsi_trans; + pexsi::PEXSI_Solver::pexsi_method = INPUT.pexsi_method; + pexsi::PEXSI_Solver::pexsi_nproc_pole = INPUT.pexsi_nproc_pole; + // pexsi::PEXSI_Solver::pexsi_spin = INPUT.pexsi_spin; + pexsi::PEXSI_Solver::pexsi_temp = INPUT.pexsi_temp; + pexsi::PEXSI_Solver::pexsi_gap = INPUT.pexsi_gap; + pexsi::PEXSI_Solver::pexsi_delta_e = INPUT.pexsi_delta_e; + pexsi::PEXSI_Solver::pexsi_mu_lower = INPUT.pexsi_mu_lower; + pexsi::PEXSI_Solver::pexsi_mu_upper = INPUT.pexsi_mu_upper; + pexsi::PEXSI_Solver::pexsi_mu = INPUT.pexsi_mu; + pexsi::PEXSI_Solver::pexsi_mu_thr = INPUT.pexsi_mu_thr; + pexsi::PEXSI_Solver::pexsi_mu_expand = INPUT.pexsi_mu_expand; + pexsi::PEXSI_Solver::pexsi_mu_guard = INPUT.pexsi_mu_guard; + pexsi::PEXSI_Solver::pexsi_elec_thr = INPUT.pexsi_elec_thr; + pexsi::PEXSI_Solver::pexsi_zero_thr = INPUT.pexsi_zero_thr; #endif ModuleBase::timer::tick("Input_Conv", "Convert"); return; diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp index 3455e9ec40..a0c566f9b5 100644 --- a/source/module_io/test/input_conv_test.cpp +++ b/source/module_io/test/input_conv_test.cpp @@ -548,46 +548,29 @@ TEST_F(InputConvTest, PEXSI) std::string input_file = "./support/INPUT"; INPUT.Read(input_file); Input_Conv::Convert(); - EXPECT_EQ(hsolver::DiagoPexsi::pexsi_npole, 54); - EXPECT_TRUE(hsolver::DiagoPexsi::pexsi_inertia); - EXPECT_EQ(hsolver::DiagoPexsi::pexsi_nmax, 80); - EXPECT_TRUE(hsolver::DiagoPexsi::pexsi_comm); - EXPECT_TRUE(hsolver::DiagoPexsi::pexsi_storage); - EXPECT_EQ(hsolver::DiagoPexsi::pexsi_ordering, 0); - EXPECT_EQ(hsolver::DiagoPexsi::pexsi_row_ordering, 1); - EXPECT_EQ(hsolver::DiagoPexsi::pexsi_nproc, 1); - EXPECT_TRUE(hsolver::DiagoPexsi::pexsi_symm); - EXPECT_FALSE(hsolver::DiagoPexsi::pexsi_trans); - EXPECT_EQ(hsolver::DiagoPexsi::pexsi_method, 1); - EXPECT_EQ(hsolver::DiagoPexsi::pexsi_nproc_pole, 1); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_temp, 0.015); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_gap, 0); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_delta_e, 20); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu_lower, -10); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu_upper, 10); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu, 0); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu_thr, 0.05); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu_expand, 0.3); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_mu_guard, 0.2); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_elec_thr, 0.001); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi::pexsi_zero_thr, 1e-10); - - EXPECT_EQ(hsolver::DiagoPexsi>::pexsi_nproc, 1); - EXPECT_TRUE(hsolver::DiagoPexsi>::pexsi_symm); - EXPECT_FALSE(hsolver::DiagoPexsi>::pexsi_trans); - EXPECT_EQ(hsolver::DiagoPexsi>::pexsi_method, 1); - EXPECT_EQ(hsolver::DiagoPexsi>::pexsi_nproc_pole, 1); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_temp, 0.015); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_gap, 0); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_delta_e, 20); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu_lower, -10); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu_upper, 10); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu, 0); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu_thr, 0.05); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu_expand, 0.3); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_mu_guard, 0.2); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_elec_thr, 0.001); - EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi>::pexsi_zero_thr, 1e-10); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 54); + EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_inertia); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nmax, 80); + EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_comm); + EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_storage); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_ordering, 0); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_row_ordering, 1); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nproc, 1); + EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_symm); + EXPECT_FALSE(pexsi::PEXSI_Solver::pexsi_trans); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_method, 1); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nproc_pole, 1); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_temp, 0.015); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_gap, 0); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_delta_e, 20); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_lower, -10); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_upper, 10); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu, 0); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_thr, 0.05); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_expand, 0.3); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_guard, 0.2); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_elec_thr, 0.001); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_zero_thr, 1e-10); } #endif From b33a37bef4315d2bab348533735cb1fd46691366 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Wed, 7 Feb 2024 21:58:13 +0800 Subject: [PATCH 30/44] Docs: added docs for pexsi inputs --- docs/advanced/input_files/input-main.md | 165 ++++++++++++++++++ .../module_pexsi/dist_matrix_transformer.h | 2 +- .../module_pexsi/pexsi_solver.h | 2 +- source/module_io/input.cpp | 2 +- source/module_io/input.h | 2 +- source/module_io/test/input_conv_test.cpp | 2 +- source/module_io/test/input_test_para.cpp | 2 +- source/module_io/test/write_input_test.cpp | 2 +- 8 files changed, 172 insertions(+), 7 deletions(-) diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index 6a3fa4fc3d..e617c519c5 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -382,6 +382,30 @@ - [qo\_strategy](#qo_strategy) - [qo\_screening\_coeff](#qo_screening_coeff) - [qo\_thr](#qo_thr) + - [PEXSI](#PEXSI) + - [pexsi_npole](#pexsi_npole) + - [pexsi_inertia](#pexsi_inertia) + - [pexsi_nmax](#pexsi_nmax) + - [pexsi_comm](#pexsi_comm) + - [pexsi_storage](#pexsi_storage) + - [pexsi_ordering](#pexsi_ordering) + - [pexsi_row_ordering](#pexsi_row_ordering) + - [pexsi_nproc](#pexsi_nproc) + - [pexsi_symm](#pexsi_symm) + - [pexsi_trans](#pexsi_trans) + - [pexsi_method](#pexsi_method) + - [pexsi_nproc_pole](#pexsi_nproc_pole) + - [pexsi_temp](#pexsi_temp) + - [pexsi_gap](#pexsi_gap) + - [pexsi_delta_e](#pexsi_delta_e) + - [pexsi_mu_lower](#pexsi_mu_lower) + - [pexsi_mu_upper](#pexsi_mu_upper) + - [pexsi_mu](#pexsi_mu) + - [pexsi_mu_thr](#pexsi_mu_thr) + - [pexsi_mu_expand](#pexsi_mu_expand) + - [pexsi_mu_guard](#pexsi_mu_guard) + - [pexsi_elec_thr](#pexsi_elec_thr) + - [pexsi_zero_thr](#pexsi_zero_thr) [back to top](#full-list-of-input-keywords) @@ -3548,5 +3572,146 @@ These variables are used to control the usage of QO analysis. Please note presen - **Description**: the convergence threshold determining the cutoff of generated orbital. Lower threshold will yield orbital with larger cutoff radius. - **Default**: 1.0e-6 +## PEXSI + +These variables are used to control the usage of PEXSI (Pole Expansion and Selected Inversion) method in calculations. + +### pexsi_npole + +- **Type**: Integer +- **Description**: the number of poles used in the pole expansion method, should be a even number. +- **Default**: 80 + +### pexsi_inertia + +- **Type**: Boolean +- **Description**: whether inertia counting is used at the very beginning. +- **Default**: True + +### pexsi_nmax + +- **Type**: Integer +- **Description**: maximum number of PEXSI iterations after each inertia counting procedure. +- **Default**: 80 + +### pexsi_comm + +- **Type**: Boolean +- **Description**: whether to construct PSelInv communication pattern. +- **Default**: True + +### pexsi_storage + +- **Type**: Boolean +- **Description**: whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices. +- **Default**: True + +### pexsi_ordering + +- **Type**: Integer +- **Description**: ordering strategy for factorization and selected inversion. 0: Parallel ordering using ParMETIS, 1: Sequential ordering using METIS, 2: Multiple minimum degree ordering +- **Default**: 0 + +### pexsi_row_ordering + +- **Type**: Integer +- **Description**: row permutation strategy for factorization and selected inversion, 0: No row permutation, 1: Make the diagonal entry of the matrix larger than the off-diagonal entries. +- **Default**: 1 + +### pexsi_nproc + +- **Type**: Integer +- **Description**: number of processors for PARMETIS. Only used if pexsi_ordering == 0. +- **Default**: 1 + +### pexsi_symm + +- **Type**: Boolean +- **Description**: whether the matrix is symmetric. +- **Default**: True + +### pexsi_trans + +- **Type**: Boolean +- **Description**: whether to factorize the transpose of the matrix. +- **Default**: False + +### pexsi_method + +- **Type**: Integer +- **Description**: the pole expansion method to be used. 1 for Cauchy Contour Integral method, 2 for Moussa optimized method. +- **Default**: 1 + +### pexsi_nproc_pole + +- **Type**: Integer +- **Description**: the point parallelizaion of PEXSI. Recommend two points parallelization. +- **Default**: 1 + +### pexsi_temp + +- **Type**: Real +- **Description**: temperature in Fermi-Dirac distribution, in Ry, should have the same effect as the smearing sigma when smearing method is set to Fermi-Dirac. +- **Default**: 0.015 + +### pexsi_gap + +- **Type**: Real +- **Description**: spectral gap, this can be set to be 0 in most cases. +- **Default**: 0 + +### pexsi_delta_e + +- **Type**: Real +- **Description**: an upper bound for the spectral radius of $S^{-1} H$. +- **Default**: 20 + +### pexsi_mu_lower + +- **Type**: Real +- **Description**: initial guess of lower bound for mu. +- **Default**: -10 + +### pexsi_mu_upper + +- **Type**: Real +- **Description**: initial guess of upper bound for mu. +- **Default**: 10 + +### pexsi_mu + +- **Type**: Real +- **Description**: initial guess for mu (for the solver). +- **Default**: 0 + +### pexsi_mu_thr + +- **Type**: Real +- **Description**: stopping criterion in terms of the chemical potential for the inertia counting procedure. +- **Default**: 0.05 + +### pexsi_mu_expand + +- **Type**: Real +- **Description**: if the chemical potential is not in the initial interval, the interval is expanded by this value. +- **Default**: 0.3 + +### pexsi_mu_guard + +- **Type**: Real +- **Description**: safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure. +- **Default**: 0.2 + +### pexsi_elec_thr + +- **Type**: Real +- **Description**: stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact. +- **Default**: 0.001 + +### pexsi_zero_thr + +- **Type**: Real +- **Description**: if the absolute value of CCS matrix element is less than this value, it will be considered as zero. +- **Default**: 1e-10 [back to top](#full-list-of-input-keywords) diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h index e261d31f3e..672b22f4f3 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h @@ -2,7 +2,7 @@ #define DISTMATRIXTRANSFORMER_H #include - +#include #include // transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution // they should have same MPI communicator diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h index 880efaf504..a5d52be5cf 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.h +++ b/source/module_hsolver/module_pexsi/pexsi_solver.h @@ -124,7 +124,7 @@ class PEXSI_Solver */ static double pexsi_elec_thr; /** - * @brief Stopping criterion for the zero threshold. + * @brief If the absolute value of CCS matrix element is less than this value, it will be considered as zero. */ static double pexsi_zero_thr; diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp index ce76fb3a40..ba01da5dae 100644 --- a/source/module_io/input.cpp +++ b/source/module_io/input.cpp @@ -644,7 +644,7 @@ void Input::Default(void) //========================================================== // variables for PEXSI //========================================================== - pexsi_npole = 54; + pexsi_npole = 80; pexsi_inertia = true; pexsi_nmax = 80; // pexsi_symbolic = 1; diff --git a/source/module_io/input.h b/source/module_io/input.h index dd3d8c0bb0..223a21c8ab 100644 --- a/source/module_io/input.h +++ b/source/module_io/input.h @@ -607,7 +607,7 @@ class Input //========================================================== // variables for PEXSI //========================================================== - int pexsi_npole = 54; + int pexsi_npole = 80; bool pexsi_inertia = true; int pexsi_nmax = 80; // int pexsi_symbolic = 1; diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp index a0c566f9b5..ff5c567990 100644 --- a/source/module_io/test/input_conv_test.cpp +++ b/source/module_io/test/input_conv_test.cpp @@ -548,7 +548,7 @@ TEST_F(InputConvTest, PEXSI) std::string input_file = "./support/INPUT"; INPUT.Read(input_file); Input_Conv::Convert(); - EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 54); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 80); EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_inertia); EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nmax, 80); EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_comm); diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp index 1b936f73d5..41b7597d58 100644 --- a/source/module_io/test/input_test_para.cpp +++ b/source/module_io/test/input_test_para.cpp @@ -393,7 +393,7 @@ TEST_F(InputParaTest, Bcast) EXPECT_EQ(INPUT.qo_thr, 1e-6); EXPECT_EQ(INPUT.qo_basis, "hydrogen"); - EXPECT_EQ(INPUT.pexsi_npole, 54); + EXPECT_EQ(INPUT.pexsi_npole, 80); EXPECT_TRUE(INPUT.pexsi_inertia); EXPECT_EQ(INPUT.pexsi_nmax, 80); EXPECT_TRUE(INPUT.pexsi_comm); diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp index 6c8a2b32f9..cfc874f060 100644 --- a/source/module_io/test/write_input_test.cpp +++ b/source/module_io/test/write_input_test.cpp @@ -934,7 +934,7 @@ TEST_F (write_input, PEXSI24) std::ifstream ifs ("write_input_test.log"); std::string output ((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); EXPECT_THAT(output, testing::HasSubstr("#Parameters (24.PEXSI)")); - EXPECT_THAT(output, testing::HasSubstr("pexsi_npole 54 #Number of poles in expansion")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_npole 80 #Number of poles in expansion")); EXPECT_THAT(output, testing::HasSubstr("pexsi_inertia 1 #Whether inertia counting is used at the very beginning of PEXSI process")); EXPECT_THAT(output, testing::HasSubstr("pexsi_nmax 80 #Maximum number of PEXSI iterations after each inertia counting procedure.")); EXPECT_THAT(output, testing::HasSubstr("pexsi_comm 1 #Whether to construct PSelInv communication pattern")); From 94ff925a865753855ac185975c8d11a1c7adedcc Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Sat, 30 Mar 2024 16:16:40 +0800 Subject: [PATCH 31/44] Fix unit test issues in input_conv --- source/module_io/input_conv.cpp | 2 +- .../module_io/test/for_testing_input_conv.h | 34 +++++++++++++++++++ source/module_io/test/input_conv_test.cpp | 1 - source/module_io/test/input_test_para.cpp | 1 - 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp index 49d0a55ee7..d140984a3b 100644 --- a/source/module_io/input_conv.cpp +++ b/source/module_io/input_conv.cpp @@ -25,7 +25,7 @@ #include "module_hamilt_lcao/module_tddft/evolve_elec.h" #endif #ifdef __PEXSI -#include "module_hsolver/diago_pexsi.h" +#include "module_hsolver/module_pexsi/pexsi_solver.h" #endif #include "module_base/timer.h" diff --git a/source/module_io/test/for_testing_input_conv.h b/source/module_io/test/for_testing_input_conv.h index 1e25cc32e5..b012dbf183 100644 --- a/source/module_io/test/for_testing_input_conv.h +++ b/source/module_io/test/for_testing_input_conv.h @@ -26,6 +26,9 @@ #include "module_relax/relax_old/ions_move_basic.h" #include "module_relax/relax_old/ions_move_cg.h" #include "module_relax/relax_old/lattice_change_basic.h" +#ifdef __PEXSI +#include "module_hsolver/module_pexsi/pexsi_solver.h" +#endif bool berryphase::berry_phase_flag = false; @@ -355,6 +358,37 @@ pseudopot_cell_vnl ppcell; Charge_Mixing CHR_MIX; } // namespace GlobalC +#ifdef __PEXSI +namespace pexsi +{ +int PEXSI_Solver::pexsi_npole = 0; +bool PEXSI_Solver::pexsi_inertia = 0; +int PEXSI_Solver::pexsi_nmax = 0; +// int PEXSI_Solver::pexsi_symbolic = 0; +bool PEXSI_Solver::pexsi_comm = 0; +bool PEXSI_Solver::pexsi_storage = 0; +int PEXSI_Solver::pexsi_ordering = 0; +int PEXSI_Solver::pexsi_row_ordering = 0; +int PEXSI_Solver::pexsi_nproc = 0; +bool PEXSI_Solver::pexsi_symm = 0; +bool PEXSI_Solver::pexsi_trans = 0; +int PEXSI_Solver::pexsi_method = 0; +int PEXSI_Solver::pexsi_nproc_pole = 0; +// double PEXSI_Solver::pexsi_spin = 2; +double PEXSI_Solver::pexsi_temp = 0.0; +double PEXSI_Solver::pexsi_gap = 0.0; +double PEXSI_Solver::pexsi_delta_e = 0.0; +double PEXSI_Solver::pexsi_mu_lower = 0.0; +double PEXSI_Solver::pexsi_mu_upper = 0.0; +double PEXSI_Solver::pexsi_mu = 0.0; +double PEXSI_Solver::pexsi_mu_thr = 0.0; +double PEXSI_Solver::pexsi_mu_expand = 0.0; +double PEXSI_Solver::pexsi_mu_guard = 0.0; +double PEXSI_Solver::pexsi_elec_thr = 0.0; +double PEXSI_Solver::pexsi_zero_thr = 0.0; +} // namespace pexsi +#endif + #undef private #endif diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp index 07640f1adc..302f7e1f6d 100644 --- a/source/module_io/test/input_conv_test.cpp +++ b/source/module_io/test/input_conv_test.cpp @@ -1,6 +1,5 @@ #include "gtest/gtest.h" #include "gmock/gmock.h" -#include "module_hsolver/diago_pexsi.h" #include "module_io/input_conv.h" #include "module_base/global_variable.h" #include "for_testing_input_conv.h" diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp index b2b2c90f31..9955b99e5a 100644 --- a/source/module_io/test/input_test_para.cpp +++ b/source/module_io/test/input_test_para.cpp @@ -392,7 +392,6 @@ TEST_F(InputParaTest, Bcast) EXPECT_EQ(INPUT.qo_strategy.size(), 0); EXPECT_EQ(INPUT.qo_screening_coeff.size(), 0); EXPECT_EQ(INPUT.qo_thr, 1e-6); - EXPECT_EQ(INPUT.qo_basis, "hydrogen"); EXPECT_EQ(INPUT.qo_basis, "szv"); EXPECT_EQ(INPUT.pexsi_npole, 80); From 11f0a12242e37a600e3e877d67df4424bb757461 Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Sun, 31 Mar 2024 15:04:37 +0800 Subject: [PATCH 32/44] Change default pexsi_npole from 80 to 40 --- docs/advanced/input_files/input-main.md | 50 +++++++++++----------- source/module_elecstate/elecstate_lcao.cpp | 2 +- source/module_elecstate/elecstate_lcao.h | 6 +++ source/module_io/input.cpp | 2 +- source/module_io/input.h | 2 +- source/module_io/test/input_conv_test.cpp | 2 +- source/module_io/test/input_test_para.cpp | 2 +- source/module_io/test/write_input_test.cpp | 2 +- 8 files changed, 37 insertions(+), 31 deletions(-) diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index 0fe2ded416..d50cdab8b3 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -384,30 +384,30 @@ - [qo\_strategy](#qo_strategy) - [qo\_screening\_coeff](#qo_screening_coeff) - [qo\_thr](#qo_thr) - - [PEXSI](#PEXSI) - - [pexsi_npole](#pexsi_npole) - - [pexsi_inertia](#pexsi_inertia) - - [pexsi_nmax](#pexsi_nmax) - - [pexsi_comm](#pexsi_comm) - - [pexsi_storage](#pexsi_storage) - - [pexsi_ordering](#pexsi_ordering) - - [pexsi_row_ordering](#pexsi_row_ordering) - - [pexsi_nproc](#pexsi_nproc) - - [pexsi_symm](#pexsi_symm) - - [pexsi_trans](#pexsi_trans) - - [pexsi_method](#pexsi_method) - - [pexsi_nproc_pole](#pexsi_nproc_pole) - - [pexsi_temp](#pexsi_temp) - - [pexsi_gap](#pexsi_gap) - - [pexsi_delta_e](#pexsi_delta_e) - - [pexsi_mu_lower](#pexsi_mu_lower) - - [pexsi_mu_upper](#pexsi_mu_upper) - - [pexsi_mu](#pexsi_mu) - - [pexsi_mu_thr](#pexsi_mu_thr) - - [pexsi_mu_expand](#pexsi_mu_expand) - - [pexsi_mu_guard](#pexsi_mu_guard) - - [pexsi_elec_thr](#pexsi_elec_thr) - - [pexsi_zero_thr](#pexsi_zero_thr) + - [PEXSI](#pexsi) + - [pexsi\_npole](#pexsi_npole) + - [pexsi\_inertia](#pexsi_inertia) + - [pexsi\_nmax](#pexsi_nmax) + - [pexsi\_comm](#pexsi_comm) + - [pexsi\_storage](#pexsi_storage) + - [pexsi\_ordering](#pexsi_ordering) + - [pexsi\_row\_ordering](#pexsi_row_ordering) + - [pexsi\_nproc](#pexsi_nproc) + - [pexsi\_symm](#pexsi_symm) + - [pexsi\_trans](#pexsi_trans) + - [pexsi\_method](#pexsi_method) + - [pexsi\_nproc\_pole](#pexsi_nproc_pole) + - [pexsi\_temp](#pexsi_temp) + - [pexsi\_gap](#pexsi_gap) + - [pexsi\_delta\_e](#pexsi_delta_e) + - [pexsi\_mu\_lower](#pexsi_mu_lower) + - [pexsi\_mu\_upper](#pexsi_mu_upper) + - [pexsi\_mu](#pexsi_mu) + - [pexsi\_mu\_thr](#pexsi_mu_thr) + - [pexsi\_mu\_expand](#pexsi_mu_expand) + - [pexsi\_mu\_guard](#pexsi_mu_guard) + - [pexsi\_elec\_thr](#pexsi_elec_thr) + - [pexsi\_zero\_thr](#pexsi_zero_thr) [back to top](#full-list-of-input-keywords) @@ -3643,7 +3643,7 @@ These variables are used to control the usage of PEXSI (Pole Expansion and Selec - **Type**: Integer - **Description**: the number of poles used in the pole expansion method, should be a even number. -- **Default**: 80 +- **Default**: 40 ### pexsi_inertia diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index 3d629f7abe..d5acbbaf05 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -95,7 +95,7 @@ void ElecStateLCAO>::psiToRho(const psi::Psi> dm_k_2d(); - if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack" || GlobalV::KS_SOLVER == "pexsi" + if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack" || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao") // Peize Lin test 2019-05-15 { //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_k); diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h index 4a7df3d76e..721f05148b 100644 --- a/source/module_elecstate/elecstate_lcao.h +++ b/source/module_elecstate/elecstate_lcao.h @@ -62,6 +62,12 @@ class ElecStateLCAO : public ElecState #ifdef __PEXSI // use for pexsi + + /** + * @brief calculate electronic charge density from pointers of density matrix calculated by pexsi + * @param pexsi_DM: pointers of density matrix calculated by pexsi + * @param pexsi_EDM: pointers of energy-weighed density matrix calculated by pexsi, needed by MD, will be stored in DensityMatrix::EDM + */ void dmToRho(std::vector pexsi_DM, std::vector pexsi_EDM); std::vector pexsi_EDM; #endif diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp index 71e3b5dc65..4f2abc67c3 100644 --- a/source/module_io/input.cpp +++ b/source/module_io/input.cpp @@ -647,7 +647,7 @@ void Input::Default(void) //========================================================== // variables for PEXSI //========================================================== - pexsi_npole = 80; + pexsi_npole = 40; pexsi_inertia = true; pexsi_nmax = 80; // pexsi_symbolic = 1; diff --git a/source/module_io/input.h b/source/module_io/input.h index 97213357a3..083905407e 100644 --- a/source/module_io/input.h +++ b/source/module_io/input.h @@ -609,7 +609,7 @@ class Input //========================================================== // variables for PEXSI //========================================================== - int pexsi_npole = 80; + int pexsi_npole = 40; bool pexsi_inertia = true; int pexsi_nmax = 80; // int pexsi_symbolic = 1; diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp index 302f7e1f6d..9fd1c0cc52 100644 --- a/source/module_io/test/input_conv_test.cpp +++ b/source/module_io/test/input_conv_test.cpp @@ -547,7 +547,7 @@ TEST_F(InputConvTest, PEXSI) std::string input_file = "./support/INPUT"; INPUT.Read(input_file); Input_Conv::Convert(); - EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 80); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 40); EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_inertia); EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nmax, 80); EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_comm); diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp index 9955b99e5a..c172a9fab0 100644 --- a/source/module_io/test/input_test_para.cpp +++ b/source/module_io/test/input_test_para.cpp @@ -394,7 +394,7 @@ TEST_F(InputParaTest, Bcast) EXPECT_EQ(INPUT.qo_thr, 1e-6); EXPECT_EQ(INPUT.qo_basis, "szv"); - EXPECT_EQ(INPUT.pexsi_npole, 80); + EXPECT_EQ(INPUT.pexsi_npole, 40); EXPECT_TRUE(INPUT.pexsi_inertia); EXPECT_EQ(INPUT.pexsi_nmax, 80); EXPECT_TRUE(INPUT.pexsi_comm); diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp index 628985ce0b..a06809c639 100644 --- a/source/module_io/test/write_input_test.cpp +++ b/source/module_io/test/write_input_test.cpp @@ -934,7 +934,7 @@ TEST_F (write_input, PEXSI24) std::ifstream ifs ("write_input_test.log"); std::string output ((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); EXPECT_THAT(output, testing::HasSubstr("#Parameters (24.PEXSI)")); - EXPECT_THAT(output, testing::HasSubstr("pexsi_npole 80 #Number of poles in expansion")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_npole 40 #Number of poles in expansion")); EXPECT_THAT(output, testing::HasSubstr("pexsi_inertia 1 #Whether inertia counting is used at the very beginning of PEXSI process")); EXPECT_THAT(output, testing::HasSubstr("pexsi_nmax 80 #Maximum number of PEXSI iterations after each inertia counting procedure.")); EXPECT_THAT(output, testing::HasSubstr("pexsi_comm 1 #Whether to construct PSelInv communication pattern")); From fd19b2c22e26c4d1ab73d36e9f0a4b8a51b881fd Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Sun, 31 Mar 2024 15:55:23 +0800 Subject: [PATCH 33/44] Place pexsi_EDM in DensityMatrix, set size of pexsi_dm = 1 when GlobalV::NSPIN==4, and add comments for dmToRho --- source/module_elecstate/elecstate_lcao.cpp | 12 +++++++++--- source/module_elecstate/elecstate_lcao.h | 1 - source/module_elecstate/module_dm/density_matrix.h | 10 ++++++++++ .../hamilt_lcaodft/FORCE_gamma_edm.cpp | 2 +- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index d5acbbaf05..56e6dd8dfb 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -267,11 +267,17 @@ void ElecStateLCAO::dmToRho(std::vector pexsi_DM, std::vectorget_DM(); - this->pexsi_EDM.clear(); - for (int is = 0; is < GlobalV::NSPIN; is++) + this->get_DM()->pexsi_EDM.clear(); + + int nspin = GlobalV::NSPIN; + if (GlobalV::NSPIN == 4) + { + nspin = 1; + } + for (int is = 0; is < nspin; is++) { this->DM->set_DMK_pointer(is, pexsi_DM[is]); - this->pexsi_EDM.push_back(pexsi_EDM[is]); + this->get_DM()->pexsi_EDM.push_back(pexsi_EDM[is]); } DM->cal_DMR(); diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h index 721f05148b..2ecc0af4eb 100644 --- a/source/module_elecstate/elecstate_lcao.h +++ b/source/module_elecstate/elecstate_lcao.h @@ -69,7 +69,6 @@ class ElecStateLCAO : public ElecState * @param pexsi_EDM: pointers of energy-weighed density matrix calculated by pexsi, needed by MD, will be stored in DensityMatrix::EDM */ void dmToRho(std::vector pexsi_DM, std::vector pexsi_EDM); - std::vector pexsi_EDM; #endif protected: diff --git a/source/module_elecstate/module_dm/density_matrix.h b/source/module_elecstate/module_dm/density_matrix.h index 519f798f5d..f7e6f4e8c0 100644 --- a/source/module_elecstate/module_dm/density_matrix.h +++ b/source/module_elecstate/module_dm/density_matrix.h @@ -200,6 +200,14 @@ namespace elecstate std::vector EDMK; // for TD-DFT +#ifdef __PEXSI + /** + * @brief EDM storage for PEXSI + * used in MD calculation + */ + std::vector pexsi_EDM; +#endif + private: /** * @brief HContainer for density matrix in real space for 2D parallelization @@ -247,6 +255,8 @@ namespace elecstate * _nks = kv->_nks / nspin */ int _nks = 0; + + }; } // namespace elecstate diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp index 1ce37e592e..dcf9a79049 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp @@ -43,7 +43,7 @@ void Force_LCAO_gamma::cal_foverlap( auto pes = dynamic_cast*>(pelec); for (int ik = 0; ik < GlobalV::NSPIN; ik++) { - EDM.set_DMK_pointer(ik, pes->pexsi_EDM[ik]); + EDM.set_DMK_pointer(ik, pes->get_DM()->pexsi_EDM[ik]); } } From aec57c0d94f8759a3b43cd7c9126a707e5ea600b Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Sun, 31 Mar 2024 16:07:31 +0800 Subject: [PATCH 34/44] An unit test added for DiagoPexsi --- source/module_hsolver/test/CMakeLists.txt | 19 + .../test/PEXSI-DM-GammaOnly-Si2.dat | 107 +++++ .../test/PEXSI-H-GammaOnly-Si2.dat | 26 ++ .../test/PEXSI-S-GammaOnly-Si2.dat | 26 ++ .../test/diago_pexsi_parallel_test.sh | 18 + .../module_hsolver/test/diago_pexsi_test.cpp | 403 ++++++++++++++++++ 6 files changed, 599 insertions(+) create mode 100644 source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat create mode 100644 source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat create mode 100644 source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat create mode 100644 source/module_hsolver/test/diago_pexsi_parallel_test.sh create mode 100644 source/module_hsolver/test/diago_pexsi_test.cpp diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt index 5dd16d10ee..ca7e4e822d 100644 --- a/source/module_hsolver/test/CMakeLists.txt +++ b/source/module_hsolver/test/CMakeLists.txt @@ -93,6 +93,14 @@ if(ENABLE_LCAO) SOURCES diago_lcao_test.cpp ../diago_blas.cpp ) endif() + + if (ENABLE_PEXSI) + AddTest( + TARGET HSolver_LCAO_PEXSI + LIBS ${math_libs} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} MPI::MPI_CXX base psi device pexsi + SOURCES diago_pexsi_test.cpp ../diago_pexsi.cpp ../../module_basis/module_ao/parallel_orbitals.cpp ../../module_basis/module_ao/parallel_2d.cpp + ) + endif() endif() if (USE_CUDA AND USE_CUSOLVER_LCAO) AddTest( @@ -116,6 +124,11 @@ install(FILES diago_cg_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) install(FILES diago_david_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) install(FILES diago_lcao_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +install(FILES PEXSI-H-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +install(FILES PEXSI-S-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +install(FILES PEXSI-DM-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +install(FILES diago_pexsi_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + find_program(BASH bash) add_test(NAME HSolver_cg_parallel COMMAND ${BASH} diago_cg_parallel_test.sh @@ -130,4 +143,10 @@ if(ENABLE_LCAO) COMMAND ${BASH} diago_lcao_parallel_test.sh WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) +if(ENABLE_PEXSI) + add_test(NAME HSolver_LCAO_PEXSI_parallel + COMMAND ${BASH} diago_pexsi_parallel_test.sh + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) +endif() endif() \ No newline at end of file diff --git a/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat new file mode 100644 index 0000000000..1043cc51a1 --- /dev/null +++ b/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat @@ -0,0 +1,107 @@ + 26 26 + 8 + 0.660474083048563 + 3.884e-01 1.025e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.883e-01 1.024e-02 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 1.025e-02 2.683e-04 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.024e-02 2.718e-04 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -7.169e-01 + 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 1.699e-01 + 0.000e+00 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 + 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 0.000e+00 1.699e-01 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 + 0.000e+00 1.671e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 1.699e-01 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 1.773e-01 + 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -4.160e-02 + 0.000e+00 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 + 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00 0.000e+00 -4.160e-02 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 + 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 -4.160e-02 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -5.653e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.426e-07 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02 + 0.000e+00 3.977e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 4.160e-02 0.000e+00 3.891e-02 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 + 0.000e+00 0.000e+00 3.977e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -1.699e-01 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 3.891e-02 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 -5.653e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -1.426e-07 0.000e+00 + 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.977e-02 0.000e+00 0.000e+00 -1.699e-01 + 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 3.891e-02 + 3.883e-01 1.024e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.884e-01 1.025e-02 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 1.024e-02 2.718e-04 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.025e-02 2.683e-04 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 7.260e-01 + 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -1.671e-01 + 0.000e+00 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 + 0.000e+00 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 0.000e+00 -1.671e-01 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 + 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 -1.671e-01 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 -1.781e-01 + 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 4.138e-02 + 0.000e+00 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 + 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00 0.000e+00 4.138e-02 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 + 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 4.138e-02 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -1.426e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -5.653e-07 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02 + 0.000e+00 3.891e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -1.671e-01 0.000e+00 0.000e+00 4.138e-02 0.000e+00 3.977e-02 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02 0.000e+00 + 0.000e+00 0.000e+00 3.891e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -1.671e-01 0.000e+00 0.000e+00 4.138e-02 0.000e+00 0.000e+00 0.000e+00 3.977e-02 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 -1.426e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -5.653e-07 0.000e+00 + 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.891e-02 0.000e+00 0.000e+00 -1.671e-01 + 0.000e+00 0.000e+00 4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 3.977e-02 \ No newline at end of file diff --git a/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat new file mode 100644 index 0000000000..87c76fc184 --- /dev/null +++ b/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat @@ -0,0 +1,26 @@ +26 -3.45008963e-01 7.00777279e-01 2.67435378e-18 -6.48062567e-16 5.69806192e-16 -2.01865721e-15 1.37825425e-15 1.02932021e-15 -1.29549404e-15 9.40872433e-16 -4.37541396e-16 -4.22502120e-15 -9.57567315e-16 -5.60569495e-01 3.19974163e-01 9.47088035e-16 6.99293619e-16 2.66546040e-17 2.24941591e-16 3.49977208e-16 6.64422948e-16 1.19697268e-15 -2.18249784e-16 -1.24808688e-15 2.25204771e-15 3.31560018e-16 + 7.97835961e-01 -3.26671355e-16 9.44414999e-17 1.08101471e-16 2.96820086e-16 -6.21597964e-17 -1.93606525e-17 1.47303571e-15 -5.85952677e-16 -9.93382239e-16 2.85142385e-15 1.26395876e-15 3.19974163e-01 -2.96734690e-02 -9.56683510e-16 2.99393236e-15 1.06817249e-15 2.22266652e-16 -1.05210457e-15 -2.10421437e-15 -2.19285057e-15 8.05560256e-16 1.20571583e-15 -4.19339449e-15 -1.13240719e-15 + 3.80362144e-01 5.95917771e-16 -2.18276481e-16 -3.54010265e-01 -4.19309899e-16 -2.30363123e-16 1.53423809e-15 -2.91444576e-16 -9.56803808e-17 -5.35261856e-17 -6.16823227e-02 -2.90999424e-16 -2.32177556e-15 -3.89575886e-02 2.12205807e-16 -8.22906289e-17 6.81593611e-03 2.10103917e-17 2.24379615e-16 -4.85438948e-16 9.00442486e-17 1.31161204e-16 4.61089846e-17 2.39556639e-01 + 3.80362144e-01 2.55037050e-16 -3.75167990e-16 -3.54010265e-01 -3.00934924e-16 4.18935390e-17 1.46886306e-16 -6.16823227e-02 -7.98028325e-16 -4.57838734e-17 -3.14309773e-16 1.21608455e-15 -4.64848149e-17 -3.89575886e-02 -8.08540395e-17 1.28237973e-16 6.81593611e-03 -8.35066919e-17 -1.00156533e-16 -1.74201705e-16 2.39556639e-01 4.32484922e-16 6.72537317e-17 + 3.80362144e-01 1.49782541e-16 6.27515573e-17 -3.54010265e-01 4.82940748e-17 -6.16823227e-02 6.20938499e-16 4.71589687e-16 -9.18536141e-16 -3.65700951e-16 -2.61249330e-16 -2.01796811e-16 3.17945596e-17 -3.89575886e-02 2.14594692e-16 -1.51811429e-16 6.81593611e-03 -2.12500255e-17 2.39556639e-01 -5.79371716e-17 -2.59943780e-16 5.30093710e-16 + 6.49510022e-01 -3.99877720e-16 2.33364502e-16 -1.06835200e-15 1.35924369e-16 4.35440159e-16 5.65086482e-17 2.05033710e-02 -2.40594211e-16 -5.69887431e-16 6.81593611e-03 2.06395261e-17 -1.15861484e-16 1.34872389e-01 5.35360292e-17 -5.33661495e-17 3.47762359e-16 -3.36405101e-16 -2.31631258e-16 -2.62331242e-17 -4.85682373e-02 + 6.49510022e-01 1.01473550e-16 -8.91511572e-16 -7.44322866e-16 2.05033710e-02 3.18067759e-16 1.77405265e-16 6.50186302e-16 -2.33952222e-15 3.10478769e-17 6.81593611e-03 -3.09684775e-17 3.48878974e-17 1.34872389e-01 1.38971253e-16 2.83475393e-17 1.26434741e-16 -4.85682373e-02 -2.28940621e-16 -2.75815582e-16 + 6.49510022e-01 -6.05413446e-16 2.05033710e-02 -4.85158818e-16 2.18100282e-16 5.90800199e-16 -4.24041197e-16 -1.76755725e-16 2.48864123e-16 6.96454974e-17 6.81593611e-03 -1.21887704e-16 8.99652487e-17 1.34872389e-01 1.30566455e-16 -4.85682373e-02 4.45112250e-17 2.04467699e-16 -3.02221322e-16 + 1.39709618e+00 3.28775150e-17 3.02403823e-16 1.12276155e-15 4.11283608e-16 -1.68777752e-15 1.99081203e-15 -3.80457763e-16 -2.91238204e-16 -2.93921607e-16 -7.17012563e-20 1.88551508e-16 -4.63408913e-17 -4.73478204e-01 1.77949873e-16 3.47597824e-16 -7.95551748e-17 1.22787443e-15 + 1.36533927e+00 -4.17480353e-16 2.38782046e-16 2.07954517e-16 4.06349249e-16 -9.28946662e-17 -1.76779162e-16 -2.40828408e-16 -2.39556639e-01 -9.25671896e-17 1.12753601e-16 4.85682373e-02 -1.30259315e-17 2.04879371e-01 2.55654809e-16 -8.58762656e-16 1.09717457e-15 + 1.36533927e+00 -2.91149533e-16 4.76951783e-17 4.23208590e-16 -1.68531661e-15 3.22333315e-16 -2.39556639e-01 -2.64951724e-17 -6.87350695e-17 4.85682373e-02 1.57686979e-16 -3.76991794e-16 5.08885431e-16 2.04879371e-01 1.27435694e-15 4.01644429e-16 + 1.39709618e+00 -4.55433608e-17 -2.99630111e-15 3.78928659e-15 6.58110607e-17 3.50160588e-16 -1.80754014e-16 -2.30392781e-17 -3.48739041e-16 1.55913703e-16 -6.27363150e-17 9.66464560e-16 -1.17769212e-15 -4.73478204e-01 3.90844224e-17 + 1.36533927e+00 -1.30972594e-15 1.71848190e-15 -2.39556639e-01 3.88957496e-17 4.45336175e-17 4.85682373e-02 -1.94106500e-17 -1.20756475e-16 -9.64457501e-16 6.28258136e-16 2.28328203e-16 7.89854971e-17 2.04879371e-01 + -3.45008963e-01 7.00777279e-01 2.73555881e-15 -1.00946454e-15 -1.16246495e-15 -1.18942654e-15 2.34640662e-15 -2.17975499e-17 1.59173761e-15 1.65013351e-16 -5.00521378e-16 3.23944830e-15 2.47573682e-16 + 7.97835961e-01 -2.89307129e-16 4.07830148e-16 3.07829438e-16 1.46389913e-16 -1.66937141e-16 -2.06630218e-17 -1.15365109e-15 4.29729746e-16 6.95094012e-16 -2.40691019e-15 -8.41099617e-16 + 3.80362144e-01 -3.72488558e-16 -1.36186620e-16 -3.54010265e-01 1.65418864e-16 -5.01909637e-17 8.72781487e-16 -3.15475321e-16 -1.75617767e-16 -1.33423172e-16 6.16823227e-02 + 3.80362144e-01 6.95266223e-16 1.24652862e-16 -3.54010265e-01 -2.05726330e-16 2.83810662e-16 2.42210163e-16 6.16823227e-02 -8.82894651e-16 9.45554362e-18 + 3.80362144e-01 -5.04905355e-16 -3.43420006e-16 -3.54010265e-01 1.97895263e-16 6.16823227e-02 5.27704784e-16 4.09823362e-16 -1.55579456e-16 + 6.49510022e-01 3.01665582e-16 1.65533985e-16 -1.52276340e-15 4.86620331e-16 6.36576344e-16 4.94854914e-17 -2.05033710e-02 + 6.49510022e-01 -1.88619393e-17 -1.87155561e-16 -5.78216449e-16 -2.05033710e-02 1.56194250e-15 -3.49237870e-16 + 6.49510022e-01 -3.24872954e-16 -2.05033710e-02 -7.32295395e-16 -8.96538828e-16 9.96070017e-16 + 1.39709618e+00 -1.11050273e-16 -1.69128575e-17 -1.24504759e-15 -8.08763902e-16 + 1.36533927e+00 3.67630294e-16 -7.04593046e-17 8.84496171e-18 + 1.36533927e+00 9.35720969e-16 -1.83878822e-16 + 1.39709618e+00 1.75929309e-16 + 1.36533927e+00 diff --git a/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat new file mode 100644 index 0000000000..e1bb9a1439 --- /dev/null +++ b/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat @@ -0,0 +1,26 @@ +26 1.49964801e+00 -1.41677065e+00 -2.71050543e-19 -4.60108297e-17 -3.03983184e-17 7.58941521e-18 7.92145212e-17 1.30781887e-17 -1.58750915e-17 7.77915059e-18 1.07336015e-17 6.22542594e-17 8.78542573e-18 1.24515654e+00 -1.87761557e+00 -4.52345240e-17 -1.21904982e-16 -4.81699167e-17 -3.15892467e-17 -1.29304662e-16 -9.53928505e-18 -6.92703544e-17 -2.51210914e-17 -6.61062629e-17 -5.20340810e-17 -2.29647573e-17 + 3.24615314e+00 1.08420217e-17 4.77048956e-18 -1.56125113e-17 -3.46944695e-18 2.05998413e-17 3.10081821e-17 4.84909422e-17 1.85669622e-18 -1.14518854e-18 -9.97060572e-17 1.05438661e-17 -1.87761557e+00 2.38584105e+00 5.04340357e-17 2.33103467e-17 6.07830843e-18 -2.07895767e-17 3.37674767e-16 -7.05544564e-17 -1.25743735e-16 2.64630033e-17 4.25617115e-17 -2.13442138e-16 2.66781497e-17 + 5.45596765e-01 2.16840434e-18 4.33680869e-18 -5.12717090e-01 -1.38777878e-17 -3.03576608e-18 2.16840434e-19 2.16840434e-19 -4.76710143e-18 8.09763498e-19 0.00000000e+00 4.52345240e-17 -5.04340357e-17 -1.24362116e-01 -3.18772379e-17 -7.50471191e-18 9.98285745e-02 1.08555743e-17 1.01711716e-17 1.79143233e-17 2.27868803e-17 2.29969445e-18 -1.53440018e-18 5.44995645e-01 + 5.45596765e-01 5.42101086e-19 -1.38777878e-17 -5.12717090e-01 -1.81061763e-17 1.57209315e-18 -9.14795583e-19 0.00000000e+00 -1.27054942e-18 -1.26038503e-18 1.21904982e-16 -2.33103467e-17 -3.18772379e-17 -1.24362116e-01 2.03965534e-17 1.08555743e-17 9.98285745e-02 2.64680855e-17 3.72186277e-17 -3.19162015e-18 5.44995645e-01 3.61445899e-17 1.12147162e-18 + 5.45596765e-01 -3.03576608e-18 -1.81061763e-17 -5.12717090e-01 -2.75793928e-18 0.00000000e+00 4.40457133e-18 -2.09217138e-18 -7.86046575e-18 4.81699167e-17 -6.07830843e-18 -7.50471191e-18 2.03965534e-17 -1.24362116e-01 1.01711716e-17 2.64680855e-17 9.98285745e-02 -3.06117707e-18 5.44995645e-01 -4.81792340e-18 1.73573992e-17 2.58853269e-17 + 5.51297780e-01 -2.77555756e-17 1.73472348e-18 1.35525272e-19 -4.60785923e-19 -1.00559751e-17 5.82758668e-19 -1.35525272e-20 3.15892467e-17 2.07895767e-17 9.98285745e-02 1.08555743e-17 1.01711716e-17 -3.41075882e-02 1.01779479e-17 6.03087458e-17 -5.67003855e-18 -3.71339244e-18 1.81722449e-17 7.94516905e-19 -4.08542832e-01 + 5.51297780e-01 -2.45029691e-17 -5.96311195e-19 -1.35525272e-19 -1.35525272e-20 4.60785923e-19 -3.19839641e-18 1.29304662e-16 -3.37674767e-16 1.08555743e-17 9.98285745e-02 2.64680855e-17 1.01779479e-17 -3.41075882e-02 3.01950305e-17 -1.21972744e-17 2.71050543e-18 -4.08542832e-01 1.97392558e-17 6.03087458e-18 + 5.51297780e-01 2.90024081e-18 -1.35525272e-20 -1.31459513e-18 -4.74338450e-19 2.16840434e-18 9.53928505e-18 7.05544564e-17 1.01711716e-17 2.64680855e-17 9.98285745e-02 6.03087458e-17 3.01950305e-17 -3.41075882e-02 -4.38763067e-18 -4.08542832e-01 3.43793733e-17 2.17179248e-18 -5.34647196e-18 + 9.27713518e-01 2.88668828e-18 -1.21972744e-19 7.70699912e-17 7.72494048e-19 -6.92703544e-17 -1.25743735e-16 -1.79143233e-17 -3.72186277e-17 3.06117707e-18 5.67003855e-18 1.21972744e-17 4.38763067e-18 -6.02927412e-01 -1.92810110e-17 2.84052499e-18 -7.30159341e-17 6.80624854e-17 + 1.16731758e+00 3.47791728e-18 -3.22550146e-18 3.52365706e-19 -2.51210914e-17 2.64630033e-17 -2.27868803e-17 3.19162015e-18 -5.44995645e-01 3.71339244e-18 -2.71050543e-18 4.08542832e-01 -1.92810110e-17 1.40487499e-01 6.89230709e-18 2.04346699e-17 1.79486282e-17 + 1.16731758e+00 1.04896560e-17 -6.09863722e-19 -6.61062629e-17 4.25617115e-17 -2.29969445e-18 -5.44995645e-01 4.81792340e-18 -1.81722449e-17 4.08542832e-01 -3.43793733e-17 2.84052499e-18 6.89230709e-18 1.40487499e-01 -3.05927125e-18 -4.40795946e-18 + 9.27713518e-01 7.03147766e-21 -5.20340810e-17 -2.13442138e-16 1.53440018e-18 -3.61445899e-17 -1.73573992e-17 -7.94516905e-19 -1.97392558e-17 -2.17179248e-18 -7.30159341e-17 2.04346699e-17 -3.05927125e-18 -6.02927412e-01 1.56701095e-18 + 1.16731758e+00 -2.29647573e-17 2.66781497e-17 -5.44995645e-01 -1.12147162e-18 -2.58853269e-17 4.08542832e-01 -6.03087458e-18 5.34647196e-18 6.80624854e-17 1.79486282e-17 -4.40795946e-18 1.56701095e-18 1.40487499e-01 + 1.49964801e+00 -1.41677065e+00 -2.71050543e-19 -4.60108297e-17 -3.03983184e-17 7.58941521e-18 7.92145212e-17 1.30781887e-17 -1.58750915e-17 7.77915059e-18 1.07336015e-17 6.22542594e-17 8.78542573e-18 + 3.24615314e+00 1.08420217e-17 4.77048956e-18 -1.56125113e-17 -3.46944695e-18 2.05998413e-17 3.10081821e-17 4.84909422e-17 1.85669622e-18 -1.14518854e-18 -9.97060572e-17 1.05438661e-17 + 5.45596765e-01 2.16840434e-18 4.33680869e-18 -5.12717090e-01 -1.38777878e-17 -3.03576608e-18 2.16840434e-19 2.16840434e-19 -4.76710143e-18 8.09763498e-19 0.00000000e+00 + 5.45596765e-01 5.42101086e-19 -1.38777878e-17 -5.12717090e-01 -1.81061763e-17 1.57209315e-18 -9.14795583e-19 0.00000000e+00 -1.27054942e-18 -1.26038503e-18 + 5.45596765e-01 -3.03576608e-18 -1.81061763e-17 -5.12717090e-01 -2.75793928e-18 0.00000000e+00 4.40457133e-18 -2.09217138e-18 -7.86046575e-18 + 5.51297780e-01 -2.77555756e-17 1.73472348e-18 1.35525272e-19 -4.60785923e-19 -1.00559751e-17 5.82758668e-19 -1.35525272e-20 + 5.51297780e-01 -2.45029691e-17 -5.96311195e-19 -1.35525272e-19 -1.35525272e-20 4.60785923e-19 -3.19839641e-18 + 5.51297780e-01 2.90024081e-18 -1.35525272e-20 -1.31459513e-18 -4.74338450e-19 2.16840434e-18 + 9.27713518e-01 2.88668828e-18 -1.21972744e-19 7.70699912e-17 7.72494048e-19 + 1.16731758e+00 3.47791728e-18 -3.22550146e-18 3.52365706e-19 + 1.16731758e+00 1.04896560e-17 -6.09863722e-19 + 9.27713518e-01 7.03147766e-21 + 1.16731758e+00 diff --git a/source/module_hsolver/test/diago_pexsi_parallel_test.sh b/source/module_hsolver/test/diago_pexsi_parallel_test.sh new file mode 100644 index 0000000000..4767d690a2 --- /dev/null +++ b/source/module_hsolver/test/diago_pexsi_parallel_test.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +np=`cat /proc/cpuinfo | grep "cpu cores" | uniq| awk '{print $NF}'` +echo "nprocs in this machine is $np" + +for i in 6 3 2;do + if [[ $i -gt $np ]];then + continue + fi + echo "TEST DIAGO PEXSI in parallel, nprocs=$i" + mpirun -np $i ./HSolver_LCAO_PEXSI + if [[ $? != 0 ]];then + echo -e "\e[1;33m [ FAILED ] \e[0m"\ + "execute UT with $i cores error." + exit 1 + fi + break +done diff --git a/source/module_hsolver/test/diago_pexsi_test.cpp b/source/module_hsolver/test/diago_pexsi_test.cpp new file mode 100644 index 0000000000..62c915e614 --- /dev/null +++ b/source/module_hsolver/test/diago_pexsi_test.cpp @@ -0,0 +1,403 @@ +#ifdef __PEXSI +#include +#include +#include +#include +#include +#include +#include + +#include "module_hsolver/diago_pexsi.h" +#include "module_hsolver/module_pexsi/pexsi_solver.h" +#include "module_hsolver/test/diago_elpa_utils.h" +#include "module_basis/module_ao/parallel_orbitals.h" +#include "module_base/parallel_global.h" +#include "module_base/global_variable.h" + +#define PASSTHRESHOLD 5e-4 +#define DETAILINFO false +#define PRINT_HS false +#define REPEATRUN 1 + +template class HamiltTEST : public hamilt::Hamilt +{ + public: + int desc[9]; + int nrow, ncol; + std::vector h_local; + std::vector s_local; + + void matrix(hamilt::MatrixBlock &hk_in, hamilt::MatrixBlock &sk_in) + { + hk_in = hamilt::MatrixBlock{this->h_local.data(), + (size_t)this->nrow, + (size_t)this->ncol, + this->desc}; + sk_in = hamilt::MatrixBlock{this->s_local.data(), + (size_t)this->nrow, + (size_t)this->ncol, + this->desc}; + } + + void constructHamilt(const int iter, const hamilt::MatrixBlock rho) {} + void updateHk(const int ik) {} +}; + + + +template class PexsiPrepare +{ + public: + PexsiPrepare(int nlocal, int nbands, int nb2d, int sparsity, std::string hfname, std::string sfname, std::string dmname) + : nlocal(nlocal), nbands(nbands), nb2d(nb2d), sparsity(sparsity), hfname(hfname), sfname(sfname), dmname(dmname) + { + MPI_Comm_size(MPI_COMM_WORLD, &dsize); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + } + + int dsize, myrank; + int nlocal, nbands, nb2d, sparsity; + int nprows, npcols, myprow, mypcol; + int nrow, ncol; + double hsolver_time = 0.0; + std::string sfname, hfname, dmname; + HamiltTEST hmtest; + std::vector h; + std::vector s; + std::vector h_local; + std::vector s_local; + psi::Psi psi; + hsolver::DiagoPexsi* dh = nullptr; + Parallel_Orbitals po; + std::vector abc; + int icontxt; + + double mu; + + // density matrix + std::vector dm_local; + std::vector edm_local; + + std::vector dm; + + bool read_HS() + { + bool readhfile = false; + bool readsfile = false; + if (this->myrank == 0) + { + int hdim, sdim; + readhfile = LCAO_DIAGO_TEST::read_hs>(hfname, this->h); + readsfile = LCAO_DIAGO_TEST::read_hs>(sfname, this->s); + hdim = sqrt(this->h.size()); + sdim = sqrt(this->s.size()); + if (hdim != sdim) + { + printf("Error: dimensions of H and S are not equal, %d, %d\n", hdim, sdim); + readhfile = readsfile = false; + } + nlocal = hdim; + } + MPI_Bcast(&nlocal, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&readhfile, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); + MPI_Bcast(&readsfile, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); + nbands = nlocal/2; + if (readhfile && readsfile) + return true; + return false; + } + + bool produce_HS() + { + bool ok = this->read_HS(); + return ok; + } + + void pb2d() + { + LCAO_DIAGO_TEST::process_2d(nprows, npcols, myprow, mypcol, icontxt); + + hmtest.nrow = LCAO_DIAGO_TEST::na_rc(nlocal, nb2d, nprows, myprow); // the number of row of the new_matrix in each process + hmtest.ncol = LCAO_DIAGO_TEST::na_rc(nlocal, nb2d, npcols, mypcol); // the number of column of the new_matrix in each process + + + int ISRC = 0, info; + descinit_(hmtest.desc, &nlocal, &nlocal, &nb2d, &nb2d, &ISRC, &ISRC, &icontxt, &(hmtest.nrow), &info); + if (info != 0) + { + printf("Invalid blacs-distribution. Abort!\n"); + exit(1); + } + + + // set po variables + po.ncol = hmtest.ncol; + po.nrow = hmtest.nrow; + po.nb = nb2d; + po.blacs_ctxt = icontxt; + po.comm_2D = MPI_COMM_WORLD; + po.dim0 = nprows; + po.dim1 = npcols; + po.testpb = true; + + if (DETAILINFO && myrank == 0) + { + std::cout << "nrow: " << hmtest.nrow << ", ncol: " << hmtest.ncol << ", nb: " << nb2d << std::endl; + } + + dh = new hsolver::DiagoPexsi(&po); + } + + void distribute_data() + { + + int local_size = hmtest.nrow * hmtest.ncol; + this->h_local.resize(local_size); + this->s_local.resize(local_size); + + LCAO_DIAGO_TEST::distribute_data(this->h.data(),this->h_local.data(),nlocal,nb2d,hmtest.nrow,hmtest.ncol,icontxt); + LCAO_DIAGO_TEST::distribute_data(this->s.data(),this->s_local.data(),nlocal,nb2d,hmtest.nrow,hmtest.ncol,icontxt); + } + + void set_env() + { + GlobalV::NLOCAL = nlocal; + GlobalV::NBANDS = nbands; + GlobalV::DSIZE = dsize; + GlobalV::NSPIN = 1; + DIAG_WORLD = MPI_COMM_WORLD; + GlobalV::NPROC = dsize; + + psi.fix_k(0); + } + + void set_pexsi_vars() + { + pexsi::PEXSI_Solver::set_pexsi_vars(); + pexsi::PEXSI_Solver::pexsi_mu = mu; + } + + void diago() + { + if (DETAILINFO && myrank == 0) + { + std::cout << "Start to solve the KS equation using PEXSI" << std::endl; + } + this->pb2d(); + if (DETAILINFO && myrank == 0) + { + std::cout << "Finish the 2D parallelization" << std::endl; + } + this->distribute_data(); + if (DETAILINFO && myrank == 0) + { + std::cout << "Finish the data distribution" << std::endl; + } + this->set_env(); + if (DETAILINFO && myrank == 0) + { + std::cout << "Finish the environment setting" << std::endl; + } + double starttime = 0.0, endtime = 0.0; + this->set_pexsi_vars(); + if (DETAILINFO && myrank == 0) + { + std::cout << "Finish the PEXSI setting" << std::endl; + } + MPI_Barrier(MPI_COMM_WORLD); + starttime = MPI_Wtime(); + for(int i=0;ih_local; + hmtest.s_local = this->s_local; + dh->diag(&hmtest, psi, nullptr); + + // copy the density matrix to dm_local + dm_local = dh->DM; + edm_local = dh->EDM; + } + MPI_Barrier(MPI_COMM_WORLD); + if (DETAILINFO && myrank == 0) + { + std::cout << "Finish the KS equation solving" << std::endl; + } + endtime = MPI_Wtime(); + hsolver_time = (endtime - starttime)/REPEATRUN; + } + + bool read_ref() + { + auto f_dm = std::ifstream(dmname); + if (!f_dm.is_open()) + { + std::cout << "Error: cannot open the reference file " << dmname << std::endl; + return false; + } + int nread = 0; + f_dm >> nread; + if (nread != nlocal) + { + std::cout << "Error: the number of global orbitals in the reference file is not equal to the current calculation" << std::endl; + return false; + } + f_dm >> nread; + if (nread != nlocal) + { + std::cout << "Error: the number of global orbitals in the reference file is not equal to the current calculation" << std::endl; + return false; + } + + f_dm >> GlobalV::nelec >> mu; + + dm.resize(nread*nread); + // T* edm = new T[nglobal*nglobal]; + for (int i = 0; i < nread; i++) + { + for (int j = 0; j < nread; j++) + { + f_dm >> dm[i*nread+j]; + } + } + return true; + } + + + bool compare_ref(std::stringstream &out_info) + { + double maxerror = 0.0; + int iindex = 0; + bool pass = true; + + auto ofs = std::ofstream("dm_local" + std::to_string(myprow) + std::to_string(mypcol) + ".dat"); + + int SENDPROW = 0, SENDPCOL = 0, tag = 0; + + // do iteration for matrix, distribute old_matrix to each process, pass a block each time + for (int row = 0; row < nlocal; row++) + { + int recv_prow = (row / nb2d) % nprows; // the row number of recive process + int nm_row = ((row / nb2d) / nprows) * nb2d + row % nb2d; // row number of block in new_matrix + for (int col = 0; col < nlocal; col += nb2d) + { + int recv_pcol = (col / nb2d) % npcols; // the column number of recive process + int nm_col = ((col / nb2d) / npcols) * nb2d + col % nb2d; + int pass_length = std::min(nlocal - col, nb2d); // nlocal may not be devided by nb2d; + // at global: nlocal * row + col + i + // at local: (nm_col + i) * hmtest.nrow + nm_row + + if (myprow == recv_prow && mypcol == recv_pcol) + { + double diff = 0; + for (int i = 0; i < pass_length; i++) + { + diff = std::abs(dm_local[0][(nm_col + i) * hmtest.nrow + nm_row] - dm[nlocal * row + col + i]); + if (diff > maxerror) + { + maxerror = diff; + } + if (diff > PASSTHRESHOLD) + { + pass = false; + } + } + } + } + } + + bool pass_all = true; + double maxerror_all = 0.0; + MPI_Allreduce(&pass, &pass_all, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD); + MPI_Allreduce(&maxerror, &maxerror_all, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + if (myrank == 0) + { + std::cout << "H/S matrix are read from " << hfname << ", " << sfname << std::endl; + std::cout << "Density matrix are read from " << dmname << std::endl; + std::cout << std::endl; + out_info << "Maximum difference between ks_hsolver and ref is " << maxerror_all << ", the pass threshold is " << PASSTHRESHOLD << std::endl; + + if (DETAILINFO) + { + std::cout << out_info.str(); + out_info.str(""); + out_info.clear(); + } + } + delete dh; + return pass_all; + } +}; + +class PexsiGammaOnlyTest : public ::testing::TestWithParam> {}; + +TEST_P(PexsiGammaOnlyTest, LCAO) +{ + std::stringstream out_info; + PexsiPrepare dp = GetParam(); + if (DETAILINFO && dp.myrank == 0) + { + std::cout << "nlocal: " << dp.nlocal << ", nbands: " << dp.nbands << ", nb2d: " << dp.nb2d << ", sparsity: " << dp.sparsity << std::endl; + + } + ASSERT_TRUE(dp.produce_HS()); + if (DETAILINFO && dp.myrank == 0) + { + std::cout << "H/S matrix are read from " << dp.hfname << ", " << dp.sfname << std::endl; + } + ASSERT_TRUE(dp.read_ref()); + if (DETAILINFO && dp.myrank == 0) + { + std::cout << "Density matrix are read from " << dp.dmname << std::endl; + } + dp.diago(); + if (DETAILINFO && dp.myrank == 0) + { + std::cout << "Time for hsolver: " << dp.hsolver_time << "s" << std::endl; + } + + bool pass = dp.compare_ref(out_info); + EXPECT_TRUE(pass) << out_info.str(); + + MPI_Barrier(MPI_COMM_WORLD); +} + +INSTANTIATE_TEST_SUITE_P( + DiagoTest, + PexsiGammaOnlyTest, + ::testing::Values( //int nlocal, int nbands, int nb2d, int sparsity, std::string ks_solver_in, std::string hfname, std::string sfname + PexsiPrepare(0, 0, 2, 0, "PEXSI-H-GammaOnly-Si2.dat", "PEXSI-S-GammaOnly-Si2.dat", "PEXSI-DM-GammaOnly-Si2.dat"), + PexsiPrepare(0, 0, 1, 0, "PEXSI-H-GammaOnly-Si2.dat", "PEXSI-S-GammaOnly-Si2.dat", "PEXSI-DM-GammaOnly-Si2.dat") + + )); + + +int main(int argc, char **argv) +{ + MPI_Init(&argc, &argv); + int mypnum, dsize; + MPI_Comm_size(MPI_COMM_WORLD, &dsize); + MPI_Comm_rank(MPI_COMM_WORLD, &mypnum); + + testing::InitGoogleTest(&argc, argv); + //Parallel_Global::split_diag_world(dsize); + ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); + if (mypnum != 0) + { + delete listeners.Release(listeners.default_result_printer()); + } + int result = RUN_ALL_TESTS(); + + if (mypnum == 0 && result != 0) + { + std::cout << "ERROR:some tests are not passed" << std::endl; + return result; + } + else + { + MPI_Finalize(); + return 0; + } +} + + +#endif // __PEXSI \ No newline at end of file From 70f9a54b197e05fa9b79cbd198a81f8b56adad72 Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Tue, 9 Apr 2024 13:49:07 +0800 Subject: [PATCH 35/44] modify for changed gint interface --- source/module_elecstate/elecstate_lcao.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index c29e2d9bfc..b9c7a80a49 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -297,9 +297,9 @@ void ElecStateLCAO::dmToRho(std::vector pexsi_DM, std::vectoruhm->GG.transfer_DM2DtoGrid(this->DM->get_DMR_vector()); // transfer DM2D to DM_grid in gint + this->gint_gamma->transfer_DM2DtoGrid(this->DM->get_DMR_vector()); // transfer DM2D to DM_grid in gint Gint_inout inout(this->loc->DM, this->charge->rho, Gint_Tools::job_type::rho); - this->uhm->GG.cal_gint(&inout); + this->gint_gamma->cal_gint(&inout); if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5) { for (int is = 0; is < GlobalV::NSPIN; is++) @@ -307,7 +307,7 @@ void ElecStateLCAO::dmToRho(std::vector pexsi_DM, std::vectorcharge->kin_r[0], this->charge->nrxx); } Gint_inout inout1(this->loc->DM, this->charge->kin_r, Gint_Tools::job_type::tau); - this->uhm->GG.cal_gint(&inout1); + this->gint_gamma->cal_gint(&inout1); } this->charge->renormalize_rho(); From 5a891f7e81ece8551a1693e29610e126a350288f Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Tue, 9 Apr 2024 14:28:43 +0800 Subject: [PATCH 36/44] correct nspin related behaviors --- source/module_elecstate/elecstate_lcao.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index b9c7a80a49..404e75f2e6 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -266,12 +266,18 @@ void ElecStateLCAO::dmToRho(std::vector pexsi_DM, std::vectorloc->out_dm) // keep interface for old Output_DM until new one is ready { - for (int ik = 0; ik < GlobalV::NSPIN; ++ik) + for (int is = 0; is < nspin; ++is) { - this->loc->set_dm_gamma(ik, pexsi_DM[ik]); + this->loc->set_dm_gamma(is, pexsi_DM[is]); } this->loc->cal_dk_gamma_from_2D_pub(); } @@ -279,11 +285,7 @@ void ElecStateLCAO::dmToRho(std::vector pexsi_DM, std::vectorget_DM(); this->get_DM()->pexsi_EDM.clear(); - int nspin = GlobalV::NSPIN; - if (GlobalV::NSPIN == 4) - { - nspin = 1; - } + for (int is = 0; is < nspin; is++) { this->DM->set_DMK_pointer(is, pexsi_DM[is]); From d7b402de0f983c654e693deb3617632b43f5f8ef Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Tue, 9 Apr 2024 14:41:36 +0800 Subject: [PATCH 37/44] add efermi passthrough --- source/module_hsolver/diago_pexsi.h | 2 +- source/module_hsolver/hsolver_lcao.cpp | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index af3a175ff1..24b09367a5 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -15,7 +15,6 @@ class DiagoPexsi : public DiagH { private: using Real = typename GetTypeReal::type; - std::vector mu_buffer; public: DiagoPexsi(const Parallel_Orbitals* ParaV_in) @@ -35,6 +34,7 @@ class DiagoPexsi : public DiagH double totalEnergyS; double totalFreeEnergy; pexsi::PEXSI_Solver* ps; + std::vector mu_buffer; }; } // namespace hsolver diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index 9f9460eb7c..bb53e57d28 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -232,6 +232,16 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!"); elecstate::ElecStateLCAO* _pes = dynamic_cast*>(pes); pes->f_en.eband = tem->totalFreeEnergy; + if (pes->eferm.two_efermi) + { + pes->eferm.ef = tem->mu_buffer[0]; + pes->eferm.ef_up = tem->mu_buffer[0]; + pes->eferm.ef_dw = tem->mu_buffer[1]; + } + else + { + pes->eferm.ef = tem->mu_buffer[0]; + } _pes->dmToRho(tem->DM, tem->EDM); } else From 76774f676426ca8c86bb26cd0efeb5d45f171b46 Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Tue, 9 Apr 2024 18:59:23 +0800 Subject: [PATCH 38/44] Revert "add efermi passthrough" This reverts commit d7b402de0f983c654e693deb3617632b43f5f8ef. --- source/module_hsolver/diago_pexsi.h | 2 +- source/module_hsolver/hsolver_lcao.cpp | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index 24b09367a5..af3a175ff1 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -15,6 +15,7 @@ class DiagoPexsi : public DiagH { private: using Real = typename GetTypeReal::type; + std::vector mu_buffer; public: DiagoPexsi(const Parallel_Orbitals* ParaV_in) @@ -34,7 +35,6 @@ class DiagoPexsi : public DiagH double totalEnergyS; double totalFreeEnergy; pexsi::PEXSI_Solver* ps; - std::vector mu_buffer; }; } // namespace hsolver diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index bb53e57d28..9f9460eb7c 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -232,16 +232,6 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!"); elecstate::ElecStateLCAO* _pes = dynamic_cast*>(pes); pes->f_en.eband = tem->totalFreeEnergy; - if (pes->eferm.two_efermi) - { - pes->eferm.ef = tem->mu_buffer[0]; - pes->eferm.ef_up = tem->mu_buffer[0]; - pes->eferm.ef_dw = tem->mu_buffer[1]; - } - else - { - pes->eferm.ef = tem->mu_buffer[0]; - } _pes->dmToRho(tem->DM, tem->EDM); } else From 9e8c7241b673b5d8e26a2d4d621d3899ab6bf1d5 Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Sat, 13 Apr 2024 20:15:18 +0800 Subject: [PATCH 39/44] commits to resolve conversations related to codes --- source/module_elecstate/elecstate_lcao.cpp | 2 - source/module_elecstate/elecstate_lcao.h | 4 +- source/module_hsolver/diago_pexsi.cpp | 41 +++++++++++++------ source/module_hsolver/diago_pexsi.h | 13 ++---- .../module_pexsi/dist_bcd_matrix.cpp | 6 +-- .../module_pexsi/dist_ccs_matrix.cpp | 14 +++---- .../module_pexsi/dist_matrix_transformer.cpp | 4 +- .../module_pexsi/pexsi_solver.cpp | 16 +++++++- .../module_pexsi/pexsi_solver.h | 3 +- 9 files changed, 62 insertions(+), 41 deletions(-) diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index 404e75f2e6..89e40dfe05 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -282,9 +282,7 @@ void ElecStateLCAO::dmToRho(std::vector pexsi_DM, std::vectorloc->cal_dk_gamma_from_2D_pub(); } - auto DM = this->get_DM(); this->get_DM()->pexsi_EDM.clear(); - for (int is = 0; is < nspin; is++) { diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h index b7996036a9..c3e7ae3a2d 100644 --- a/source/module_elecstate/elecstate_lcao.h +++ b/source/module_elecstate/elecstate_lcao.h @@ -70,8 +70,8 @@ class ElecStateLCAO : public ElecState /** * @brief calculate electronic charge density from pointers of density matrix calculated by pexsi - * @param pexsi_DM: pointers of density matrix calculated by pexsi - * @param pexsi_EDM: pointers of energy-weighed density matrix calculated by pexsi, needed by MD, will be stored in DensityMatrix::EDM + * @param pexsi_DM: pointers of density matrix (DMK) calculated by pexsi + * @param pexsi_EDM: pointers of energy-weighed density matrix (EDMK) calculated by pexsi, needed by MD, will be stored in DensityMatrix::pexsi_EDM */ void dmToRho(std::vector pexsi_DM, std::vector pexsi_EDM); #endif diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index 95c4a7433e..31b48aef5f 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -1,11 +1,8 @@ #include #include #ifdef __PEXSI -#include "c_pexsi_interface.h" #include "diago_pexsi.h" #include "module_base/global_variable.h" -#include "module_base/lapack_connector.h" -#include "module_base/timer.h" #include "module_base/tool_quit.h" #include "module_basis/module_ao/parallel_orbitals.h" #include "module_pexsi/pexsi_solver.h" @@ -15,6 +12,23 @@ typedef hamilt::MatrixBlock> matcd; namespace hsolver { +template +DiagoPexsi::DiagoPexsi(const Parallel_Orbitals* ParaV_in) +{ + int nspin = GlobalV::NSPIN; + if (GlobalV::NSPIN == 4) + { + nspin = 1; + } + mu_buffer.resize(nspin); + for (int i = 0; i < nspin; i++) + { + mu_buffer[i] = this->ps->pexsi_mu; + } + this->ParaV = ParaV_in; + this->ps = std::make_unique(); +} + template <> void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) { @@ -24,15 +38,15 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& std::vector eigen(GlobalV::NLOCAL, 0.0); MPI_Comm COMM_DIAG = MPI_COMM_WORLD; int ik = psi.get_current_k(); - this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt, - this->ParaV->nb, - this->ParaV->nrow, - this->ParaV->ncol, - h_mat.p, - s_mat.p, - this->totalEnergyH, - this->totalEnergyS, - this->totalFreeEnergy); + this->ps->prepare(this->ParaV->blacs_ctxt, + this->ParaV->nb, + this->ParaV->nrow, + this->ParaV->ncol, + h_mat.p, + s_mat.p, + this->totalEnergyH, + this->totalEnergyS, + this->totalFreeEnergy); this->ps->solve(mu_buffer[ik]); this->EDM.push_back(this->ps->get_EDM()); this->DM.push_back(this->ps->get_DM()); @@ -51,5 +65,8 @@ void DiagoPexsi>::diag(hamilt::Hamilt> ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case"); } +template class DiagoPexsi; +template class DiagoPexsi >; + } // namespace hsolver #endif \ No newline at end of file diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index af3a175ff1..c2063a22b4 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -2,6 +2,7 @@ #define DIGAOPEXSI_H #include +#include #include "diagh.h" #include "module_base/global_variable.h" #include "module_basis/module_ao/parallel_orbitals.h" @@ -18,15 +19,7 @@ class DiagoPexsi : public DiagH std::vector mu_buffer; public: - DiagoPexsi(const Parallel_Orbitals* ParaV_in) - { - mu_buffer.resize(GlobalV::NSPIN); - for (int i = 0; i < GlobalV::NSPIN; i++) - { - mu_buffer[i] = this->ps->pexsi_mu; - } - this->ParaV = ParaV_in; - } + DiagoPexsi(const Parallel_Orbitals* ParaV_in); void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, Real* eigenvalue_in) override; const Parallel_Orbitals* ParaV; std::vector DM; @@ -34,7 +27,7 @@ class DiagoPexsi : public DiagH double totalEnergyH; double totalEnergyS; double totalFreeEnergy; - pexsi::PEXSI_Solver* ps; + std::unique_ptr ps; }; } // namespace hsolver diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp index 8e4c8e7ac7..ff3f85f32b 100644 --- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp @@ -1,6 +1,6 @@ #ifdef __PEXSI #include "dist_bcd_matrix.h" - +#include #include extern "C" { @@ -26,13 +26,13 @@ DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, this->nblk = nblk; this->nrow = nrow; this->ncol = ncol; - if (layout == 'R' || layout == 'r' || layout == 'C' || layout == 'c') + if (layout == 'r' || layout == 'c') { this->layout = layout; } else { - throw("The layout must be 'R', 'r', 'C', or 'c'"); + throw("The layout must be 'r' or 'c'"); } if (comm != MPI_COMM_NULL) diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp index ddd02aaa9a..74391f2fbe 100644 --- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp +++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp @@ -12,8 +12,8 @@ DistCCSMatrix::DistCCSMatrix(void) this->nnz = 0; this->nnzLocal = 0; this->numColLocal = 0; - this->colptrLocal = NULL; - this->rowindLocal = NULL; + this->colptrLocal = nullptr; + this->rowindLocal = nullptr; } DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in) @@ -23,8 +23,8 @@ DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in) this->nnz = 0; this->nnzLocal = 0; this->numColLocal = 0; - this->colptrLocal = NULL; - this->rowindLocal = NULL; + this->colptrLocal = nullptr; + this->rowindLocal = nullptr; } DistCCSMatrix::DistCCSMatrix(int size_in, int nnzLocal_in) @@ -65,21 +65,21 @@ DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in, int nproc_data_in, int size_in) this->numColLocal = size / nproc_data; this->firstCol = size / nproc_data * myproc; this->colptrLocal = new int[this->numColLocal + 1]; - this->rowindLocal = NULL; + this->rowindLocal = nullptr; } else if (myproc == nproc_data - 1) { this->numColLocal = size - myproc * (size / nproc_data); this->firstCol = size / nproc_data * myproc; this->colptrLocal = new int[this->numColLocal + 1]; - this->rowindLocal = NULL; + this->rowindLocal = nullptr; } else { this->numColLocal = 0; this->firstCol = size - 1; this->colptrLocal = new int[this->numColLocal + 1]; - this->rowindLocal = NULL; + this->rowindLocal = nullptr; } } } diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp index 4b0fc23cfb..313a840e68 100644 --- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp @@ -170,7 +170,7 @@ inline int DistMatrixTransformer::getNonZeroIndex(char layout, nnz = 0; colidx.clear(); rowidx.clear(); - if (layout == 'C' || layout == 'c') + if (layout == 'c') { for (int i = 0; i < ncol; ++i) { @@ -186,7 +186,7 @@ inline int DistMatrixTransformer::getNonZeroIndex(char layout, } } } - else if (layout == 'R' || layout == 'r') + else if (layout == 'r') { for (int i = 0; i < ncol; ++i) { diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp index ebb2c6d78b..914f471a59 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -40,7 +40,7 @@ double PEXSI_Solver::pexsi_mu_guard = 0.0; double PEXSI_Solver::pexsi_elec_thr = 0.0; double PEXSI_Solver::pexsi_zero_thr = 0.0; -PEXSI_Solver::PEXSI_Solver(const int blacs_text, +void PEXSI_Solver::prepare(const int blacs_text, const int nb, const int nrow, const int ncol, @@ -54,17 +54,29 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text, this->nb = nb; this->nrow = nrow; this->ncol = ncol; + if (this->h) { delete[] this->h;} this->h = new double[nrow * ncol]; + if (this->s) { delete[] this->s;} this->s = new double[nrow * ncol]; std::memcpy(this->h, h, nrow * ncol * sizeof(double)); std::memcpy(this->s, s, nrow * ncol * sizeof(double)); + if (this->DM) { delete[] this->DM;} this->DM = new double[nrow * ncol]; + if (this->EDM) { delete[] this->EDM;} this->EDM = new double[nrow * ncol]; this->totalEnergyH = 0.0; this->totalEnergyS = 0.0; this->totalFreeEnergy = 0.0; } +PEXSI_Solver::~PEXSI_Solver() +{ + delete[] h; + delete[] s; + delete[] DM; + delete[] EDM; +} + int PEXSI_Solver::solve(double mu0) { MPI_Group grid_group; @@ -85,7 +97,7 @@ int PEXSI_Solver::solve(double mu0) this->nb, this->nrow, this->ncol, - 'C', + 'c', this->h, this->s, GlobalV::nelec, diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h index a5d52be5cf..2cb1dd13b2 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.h +++ b/source/module_hsolver/module_pexsi/pexsi_solver.h @@ -8,7 +8,7 @@ namespace pexsi class PEXSI_Solver { public: - PEXSI_Solver(const int blacs_text, + void prepare(const int blacs_text, const int nb, const int nrow, const int ncol, @@ -17,6 +17,7 @@ class PEXSI_Solver double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy); + ~PEXSI_Solver(); int solve(double mu0); double* get_DM() const; double* get_EDM() const; From 569269b4277542e300a419946840b95d0c267223 Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Sat, 13 Apr 2024 22:18:57 +0800 Subject: [PATCH 40/44] DM and EDM pointers in pexsi now handled by diagopexsi, and copying h s matrices no longer needed --- source/module_elecstate/elecstate_lcao.cpp | 3 +- source/module_hsolver/diago_pexsi.cpp | 44 ++++++++++++++----- source/module_hsolver/diago_pexsi.h | 2 +- source/module_hsolver/hsolver_lcao.cpp | 3 +- .../module_pexsi/pexsi_solver.cpp | 37 +++------------- .../module_pexsi/pexsi_solver.h | 8 +--- .../module_pexsi/simple_pexsi.cpp | 8 ++-- 7 files changed, 48 insertions(+), 57 deletions(-) diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index 89e40dfe05..5e3f81b1e5 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -282,12 +282,11 @@ void ElecStateLCAO::dmToRho(std::vector pexsi_DM, std::vectorloc->cal_dk_gamma_from_2D_pub(); } - this->get_DM()->pexsi_EDM.clear(); + this->get_DM()->pexsi_EDM = pexsi_EDM; for (int is = 0; is < nspin; is++) { this->DM->set_DMK_pointer(is, pexsi_DM[is]); - this->get_DM()->pexsi_EDM.push_back(pexsi_EDM[is]); } DM->cal_DMR(); diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index 31b48aef5f..a88349160d 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -25,8 +25,34 @@ DiagoPexsi::DiagoPexsi(const Parallel_Orbitals* ParaV_in) { mu_buffer[i] = this->ps->pexsi_mu; } + this->ParaV = ParaV_in; this->ps = std::make_unique(); + + this->DM.resize(nspin); + this->EDM.resize(nspin); + for (int i = 0; i < nspin; i++) + { + this->DM[i] = new T[ParaV->nrow * ParaV->ncol]; + this->EDM[i] = new T[ParaV->nrow * ParaV->ncol]; + } + +} + +template +DiagoPexsi::~DiagoPexsi() +{ + int nspin = GlobalV::NSPIN; + if (GlobalV::NSPIN == 4) + { + nspin = 1; + } + for (int i = 0; i < nspin; i++) + { + delete[] this->DM[i]; + delete[] this->EDM[i]; + } + } template <> @@ -36,20 +62,16 @@ void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& matd h_mat, s_mat; phm_in->matrix(h_mat, s_mat); std::vector eigen(GlobalV::NLOCAL, 0.0); - MPI_Comm COMM_DIAG = MPI_COMM_WORLD; int ik = psi.get_current_k(); this->ps->prepare(this->ParaV->blacs_ctxt, - this->ParaV->nb, - this->ParaV->nrow, - this->ParaV->ncol, - h_mat.p, - s_mat.p, - this->totalEnergyH, - this->totalEnergyS, - this->totalFreeEnergy); + this->ParaV->nb, + this->ParaV->nrow, + this->ParaV->ncol, + h_mat.p, + s_mat.p, + DM[ik], + EDM[ik]); this->ps->solve(mu_buffer[ik]); - this->EDM.push_back(this->ps->get_EDM()); - this->DM.push_back(this->ps->get_DM()); this->totalFreeEnergy = this->ps->get_totalFreeEnergy(); this->totalEnergyH = this->ps->get_totalEnergyH(); this->totalEnergyS = this->ps->get_totalEnergyS(); diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h index c2063a22b4..9802273e4b 100644 --- a/source/module_hsolver/diago_pexsi.h +++ b/source/module_hsolver/diago_pexsi.h @@ -28,7 +28,7 @@ class DiagoPexsi : public DiagH double totalEnergyS; double totalFreeEnergy; std::unique_ptr ps; - + ~DiagoPexsi(); }; } // namespace hsolver diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index 9f9460eb7c..c545ed50a9 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -154,8 +154,6 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, this->pdiagh = nullptr; } auto tem = dynamic_cast*>(this->pdiagh); - tem->DM.clear(); - tem->EDM.clear(); } if (this->pdiagh == nullptr) { @@ -232,6 +230,7 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!"); elecstate::ElecStateLCAO* _pes = dynamic_cast*>(pes); pes->f_en.eband = tem->totalFreeEnergy; + // maybe eferm could be dealt with in the future _pes->dmToRho(tem->DM, tem->EDM); } else diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp index 914f471a59..7a71e6ca01 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -46,37 +46,22 @@ void PEXSI_Solver::prepare(const int blacs_text, const int ncol, const double* h, const double* s, - double& totalEnergyH, - double& totalEnergyS, - double& totalFreeEnergy) + double*& _DM, + double*& _EDM) { this->blacs_text = blacs_text; this->nb = nb; this->nrow = nrow; this->ncol = ncol; - if (this->h) { delete[] this->h;} - this->h = new double[nrow * ncol]; - if (this->s) { delete[] this->s;} - this->s = new double[nrow * ncol]; - std::memcpy(this->h, h, nrow * ncol * sizeof(double)); - std::memcpy(this->s, s, nrow * ncol * sizeof(double)); - if (this->DM) { delete[] this->DM;} - this->DM = new double[nrow * ncol]; - if (this->EDM) { delete[] this->EDM;} - this->EDM = new double[nrow * ncol]; + this->h = const_cast(h); + this->s = const_cast(s); + this->DM = _DM; + this->EDM = _EDM; this->totalEnergyH = 0.0; this->totalEnergyS = 0.0; this->totalFreeEnergy = 0.0; } -PEXSI_Solver::~PEXSI_Solver() -{ - delete[] h; - delete[] s; - delete[] DM; - delete[] EDM; -} - int PEXSI_Solver::solve(double mu0) { MPI_Group grid_group; @@ -112,16 +97,6 @@ int PEXSI_Solver::solve(double mu0) return 0; } -double* PEXSI_Solver::get_DM() const -{ - return DM; -} - -double* PEXSI_Solver::get_EDM() const -{ - return EDM; -} - const double PEXSI_Solver::get_totalFreeEnergy() const { return totalFreeEnergy; diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h index 2cb1dd13b2..b041d13656 100644 --- a/source/module_hsolver/module_pexsi/pexsi_solver.h +++ b/source/module_hsolver/module_pexsi/pexsi_solver.h @@ -14,13 +14,9 @@ class PEXSI_Solver const int ncol, const double* h, const double* s, - double& totalEnergyH, - double& totalEnergyS, - double& totalFreeEnergy); - ~PEXSI_Solver(); + double*& DM, + double*& EDM); int solve(double mu0); - double* get_DM() const; - double* get_EDM() const; const double get_totalFreeEnergy() const; const double get_totalEnergyH() const; const double get_totalEnergyS() const; diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp index c52a6c8ef3..075c803182 100644 --- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -340,10 +340,10 @@ int simplePEXSI(MPI_Comm comm_PEXSI, // back to 2D block cyclic distribution if neccessary if (comm_2D != MPI_COMM_NULL) { - delete[] DM; - delete[] EDM; - DM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()]; - EDM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()]; + // delete[] DM; + // delete[] EDM; + // DM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()]; + // EDM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()]; } // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D"); From f8352cb7a8fdebd030a7ac1c36e3a2097aef2583 Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Sat, 13 Apr 2024 22:19:19 +0800 Subject: [PATCH 41/44] add pexsi examples --- examples/pexsi/md_Si8/INPUT | 32 ++++++++++++ examples/pexsi/md_Si8/KPT | 4 ++ examples/pexsi/md_Si8/STRU | 28 ++++++++++ examples/pexsi/scf_Si64/INPUT | 20 ++++++++ examples/pexsi/scf_Si64/KPT | 4 ++ examples/pexsi/scf_Si64/STRU | 85 +++++++++++++++++++++++++++++++ examples/pexsi/scf_spin_Fe2/INPUT | 22 ++++++++ examples/pexsi/scf_spin_Fe2/KPT | 4 ++ examples/pexsi/scf_spin_Fe2/STRU | 29 +++++++++++ 9 files changed, 228 insertions(+) create mode 100644 examples/pexsi/md_Si8/INPUT create mode 100644 examples/pexsi/md_Si8/KPT create mode 100644 examples/pexsi/md_Si8/STRU create mode 100755 examples/pexsi/scf_Si64/INPUT create mode 100644 examples/pexsi/scf_Si64/KPT create mode 100755 examples/pexsi/scf_Si64/STRU create mode 100644 examples/pexsi/scf_spin_Fe2/INPUT create mode 100644 examples/pexsi/scf_spin_Fe2/KPT create mode 100644 examples/pexsi/scf_spin_Fe2/STRU diff --git a/examples/pexsi/md_Si8/INPUT b/examples/pexsi/md_Si8/INPUT new file mode 100644 index 0000000000..13231579bc --- /dev/null +++ b/examples/pexsi/md_Si8/INPUT @@ -0,0 +1,32 @@ +INPUT_PARAMETERS +#Parameters (1.General) +suffix Si_rescaling +calculation md +nbands 20 +symmetry 0 +pseudo_dir ../../../tests/PP_ORB +orbital_dir ../../../tests/PP_ORB + +#Parameters (2.Iteration) +ecutwfc 30 +scf_thr 1e-5 +scf_nmax 100 + +#Parameters (3.Basis) +basis_type lcao +ks_solver pexsi +gamma_only 1 + +#Parameters (5.Mixing) +mixing_type broyden +mixing_beta 0.3 +chg_extrap second-order + +#Parameters (6.MD) +md_type nvt +md_thermostat rescaling +md_tolerance 10 +md_nstep 10 +md_dt 1 +md_tfirst 300 +md_tfreq 0.025 diff --git a/examples/pexsi/md_Si8/KPT b/examples/pexsi/md_Si8/KPT new file mode 100644 index 0000000000..c289c0158a --- /dev/null +++ b/examples/pexsi/md_Si8/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +1 1 1 0 0 0 diff --git a/examples/pexsi/md_Si8/STRU b/examples/pexsi/md_Si8/STRU new file mode 100644 index 0000000000..795530e8cf --- /dev/null +++ b/examples/pexsi/md_Si8/STRU @@ -0,0 +1,28 @@ +ATOMIC_SPECIES +Si 28.085 Si_ONCV_PBE-1.0.upf + +NUMERICAL_ORBITAL +Si_gga_8au_60Ry_2s2p1d.orb + +LATTICE_CONSTANT +1.8897270 # 1 Angstrom = 1.8897270 bohr + +LATTICE_VECTORS +5.43090 0.00000 0.00000 +0.00000 5.43090 0.00000 +0.00000 0.00000 5.43090 + +ATOMIC_POSITIONS +Direct + +Si +0.0 +8 +0.000 0.000 0.000 1 1 1 +0.000 0.500 0.500 1 1 1 +0.500 0.000 0.500 1 1 1 +0.500 0.500 0.000 1 1 1 +0.250 0.250 0.250 1 1 1 +0.250 0.750 0.750 1 1 1 +0.750 0.250 0.750 1 1 1 +0.750 0.750 0.250 1 1 1 \ No newline at end of file diff --git a/examples/pexsi/scf_Si64/INPUT b/examples/pexsi/scf_Si64/INPUT new file mode 100755 index 0000000000..ddb9813701 --- /dev/null +++ b/examples/pexsi/scf_Si64/INPUT @@ -0,0 +1,20 @@ +INPUT_PARAMETERS +suffix test +ntype 1 +nbands 200 +pseudo_dir ../../../tests/PP_ORB +orbital_dir ../../../tests/PP_ORB + +calculation scf +mixing_beta 0.4 +basis_type lcao +gamma_only 1 +symmetry 0 + +ecutwfc 60 +lcao_dr 1e-3 +scf_nmax 20 + +ks_solver pexsi + +pexsi_npole 40 \ No newline at end of file diff --git a/examples/pexsi/scf_Si64/KPT b/examples/pexsi/scf_Si64/KPT new file mode 100644 index 0000000000..c289c0158a --- /dev/null +++ b/examples/pexsi/scf_Si64/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +1 1 1 0 0 0 diff --git a/examples/pexsi/scf_Si64/STRU b/examples/pexsi/scf_Si64/STRU new file mode 100755 index 0000000000..70e722c0ec --- /dev/null +++ b/examples/pexsi/scf_Si64/STRU @@ -0,0 +1,85 @@ +ATOMIC_SPECIES +Si 28 Si_ONCV_PBE-1.0.upf + +NUMERICAL_ORBITAL +Si_gga_8au_60Ry_2s2p1d.orb + +LATTICE_CONSTANT +10.2 // add lattice constant + +LATTICE_VECTORS +2 0 0 +0 2 0 +0 0 2 + +ATOMIC_POSITIONS +Cartesian //Cartesian or Direct coordinate. + +Si // Element type +0 // magnetism +64 // number of atoms +0.00 0.00 0.00 0 0 0 +0.25 0.25 0.25 0 0 0 +0.00 0.50 0.50 0 0 0 +0.25 0.75 0.75 0 0 0 +0.50 0.00 0.50 0 0 0 +0.75 0.25 0.75 0 0 0 +0.50 0.50 0.00 0 0 0 +0.75 0.75 0.25 0 0 0 +1.00 0.00 0.00 0 0 0 +1.25 0.25 0.25 0 0 0 +1.00 0.50 0.50 0 0 0 +1.25 0.75 0.75 0 0 0 +1.50 0.00 0.50 0 0 0 +1.75 0.25 0.75 0 0 0 +1.50 0.50 0.00 0 0 0 +1.75 0.75 0.25 0 0 0 +0.00 1.00 0.00 0 0 0 +0.25 1.25 0.25 0 0 0 +0.00 1.50 0.50 0 0 0 +0.25 1.75 0.75 0 0 0 +0.50 1.00 0.50 0 0 0 +0.75 1.25 0.75 0 0 0 +0.50 1.50 0.00 0 0 0 +0.75 1.75 0.25 0 0 0 +1.00 1.00 0.00 0 0 0 +1.25 1.25 0.25 0 0 0 +1.00 1.50 0.50 0 0 0 +1.25 1.75 0.75 0 0 0 +1.50 1.00 0.50 0 0 0 +1.75 1.25 0.75 0 0 0 +1.50 1.50 0.00 0 0 0 +1.75 1.75 0.25 0 0 0 +0.00 0.00 1.00 0 0 0 +0.25 0.25 1.25 0 0 0 +0.00 0.50 1.50 0 0 0 +0.25 0.75 1.75 0 0 0 +0.50 0.00 1.50 0 0 0 +0.75 0.25 1.75 0 0 0 +0.50 0.50 1.00 0 0 0 +0.75 0.75 1.25 0 0 0 +1.00 0.00 1.00 0 0 0 +1.25 0.25 1.25 0 0 0 +1.00 0.50 1.50 0 0 0 +1.25 0.75 1.75 0 0 0 +1.50 0.00 1.50 0 0 0 +1.75 0.25 1.75 0 0 0 +1.50 0.50 1.00 0 0 0 +1.75 0.75 1.25 0 0 0 +0.00 1.00 1.00 0 0 0 +0.25 1.25 1.25 0 0 0 +0.00 1.50 1.50 0 0 0 +0.25 1.75 1.75 0 0 0 +0.50 1.00 1.50 0 0 0 +0.75 1.25 1.75 0 0 0 +0.50 1.50 1.00 0 0 0 +0.75 1.75 1.25 0 0 0 +1.00 1.00 1.00 0 0 0 +1.25 1.25 1.25 0 0 0 +1.00 1.50 1.50 0 0 0 +1.25 1.75 1.75 0 0 0 +1.50 1.00 1.50 0 0 0 +1.75 1.25 1.75 0 0 0 +1.50 1.50 1.00 0 0 0 +1.75 1.75 1.25 0 0 0 + diff --git a/examples/pexsi/scf_spin_Fe2/INPUT b/examples/pexsi/scf_spin_Fe2/INPUT new file mode 100644 index 0000000000..a6a5bcc971 --- /dev/null +++ b/examples/pexsi/scf_spin_Fe2/INPUT @@ -0,0 +1,22 @@ +INPUT_PARAMETERS +suffix autotest +#nbands 40 + +calculation scf +ecutwfc 20 +scf_thr 1.0e-8 +scf_nmax 50 +out_chg 0 + +mixing_type broyden + + +ks_solver pexsi +pexsi_temp 0.1 +pexsi_npole 80 +basis_type lcao +gamma_only 1 +symmetry 0 +nspin 2 +pseudo_dir ../../../tests/PP_ORB +orbital_dir ../../../tests/PP_ORB diff --git a/examples/pexsi/scf_spin_Fe2/KPT b/examples/pexsi/scf_spin_Fe2/KPT new file mode 100644 index 0000000000..c289c0158a --- /dev/null +++ b/examples/pexsi/scf_spin_Fe2/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +1 1 1 0 0 0 diff --git a/examples/pexsi/scf_spin_Fe2/STRU b/examples/pexsi/scf_spin_Fe2/STRU new file mode 100644 index 0000000000..b7a2039467 --- /dev/null +++ b/examples/pexsi/scf_spin_Fe2/STRU @@ -0,0 +1,29 @@ +ATOMIC_SPECIES +Fe1 1.000 Fe_ONCV_PBE-1.0.upf +Fe2 1.000 Fe_ONCV_PBE-1.0.upf + +NUMERICAL_ORBITAL +Fe_gga_9au_100Ry_4s2p2d1f.orb +Fe_gga_9au_100Ry_4s2p2d1f.orb + +LATTICE_CONSTANT +15 + +LATTICE_VECTORS + 1.00 0.50 0.50 + 0.50 1.00 0.50 + 0.50 0.50 1.00 +ATOMIC_POSITIONS +Direct + +Fe1 +5.0 +1 +0.00 0.00 0.00 1 1 1 + +Fe2 +-5.0 +1 +0.50 0.50 0.50 1 1 1 + + From b0ef9ad1965bb9012d8a00b15996d7106baa15e6 Mon Sep 17 00:00:00 2001 From: rhx's linux Date: Sat, 13 Apr 2024 22:33:29 +0800 Subject: [PATCH 42/44] fix pexsi unit test (original version shouldn't run) --- .../module_hsolver/test/diago_pexsi_test.cpp | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/source/module_hsolver/test/diago_pexsi_test.cpp b/source/module_hsolver/test/diago_pexsi_test.cpp index 62c915e614..4b3b8aba99 100644 --- a/source/module_hsolver/test/diago_pexsi_test.cpp +++ b/source/module_hsolver/test/diago_pexsi_test.cpp @@ -174,7 +174,32 @@ template class PexsiPrepare void set_pexsi_vars() { - pexsi::PEXSI_Solver::set_pexsi_vars(); + // pexsi::PEXSI_Solver::set_pexsi_vars(); + pexsi::PEXSI_Solver::pexsi_npole = 40; + pexsi::PEXSI_Solver::pexsi_inertia = true; + pexsi::PEXSI_Solver::pexsi_nmax = 80; + // pexsi_symbolic = 1; + pexsi::PEXSI_Solver::pexsi_comm = true; + pexsi::PEXSI_Solver::pexsi_storage = true; + pexsi::PEXSI_Solver::pexsi_ordering = 0; + pexsi::PEXSI_Solver::pexsi_row_ordering = 1; + pexsi::PEXSI_Solver::pexsi_nproc = 1; + pexsi::PEXSI_Solver::pexsi_symm = true; + pexsi::PEXSI_Solver::pexsi_trans = false; + pexsi::PEXSI_Solver::pexsi_method = 1; + pexsi::PEXSI_Solver::pexsi_nproc_pole = 1; + // pexsi_spin = 2; + pexsi::PEXSI_Solver::pexsi_temp = 0.015; + pexsi::PEXSI_Solver::pexsi_gap = 0; + pexsi::PEXSI_Solver::pexsi_delta_e = 20.0; + pexsi::PEXSI_Solver::pexsi_mu_lower = -10; + pexsi::PEXSI_Solver::pexsi_mu_upper = 10; + pexsi::PEXSI_Solver::pexsi_mu = 0.0; + pexsi::PEXSI_Solver::pexsi_mu_thr = 0.05; + pexsi::PEXSI_Solver::pexsi_mu_expand = 0.3; + pexsi::PEXSI_Solver::pexsi_mu_guard = 0.2; + pexsi::PEXSI_Solver::pexsi_elec_thr = 0.001; + pexsi::PEXSI_Solver::pexsi_zero_thr = 1e-10; pexsi::PEXSI_Solver::pexsi_mu = mu; } From 5b53d6fe78125ee45985bc1d689c5707483cc289 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Wed, 17 Apr 2024 11:10:02 +0800 Subject: [PATCH 43/44] add building docs for pexsi --- docs/advanced/install.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/advanced/install.md b/docs/advanced/install.md index d6201a060f..a5deeb5888 100644 --- a/docs/advanced/install.md +++ b/docs/advanced/install.md @@ -104,6 +104,18 @@ Currently supported math functions: cmake -B build -DUSE_ABACUS_LIBM=1 ``` +## Build with PEXSI support + +ABACUS supports the PEXSI library for gamma only LCAO calculations. PEXSI version 2.0.0 is tested to work with ABACUS, please always use the latest version of PEXSI. + +To build ABACUS with PEXSI support, you need to compile PEXSI (and its dependencies) first. Please refer to the [PEXSI Installation Guide](https://pexsi.readthedocs.io/en/latest/install.html) for more details. Note that PEXSI requires ParMETIS and SuperLU_DIST. + +After compiling PEXSI, you can set `ENABLE_PEXSI` to `ON`. If the libraries are not installed in standard paths, you can set `PEXSI_DIR`, `ParMETIS_DIR` and `SuperLU_DIST_DIR` to the corresponding directories. + +```bash +cmake -B build -DENABLE_PEXSI=ON -DPEXSI_DIR=${path to PEXSI installation directory} -DParMETIS_DIR=${path to ParMETIS installation directory} -DSuperLU_DIST_DIR=${path to SuperLU_DIST installation directory} +``` + ## Build ABACUS with make > Note: We suggest using CMake to configure and compile. From 110e5b07f5cfe5c75e7122831922003390edb0d6 Mon Sep 17 00:00:00 2001 From: Flying-dragon-boxing Date: Thu, 18 Apr 2024 14:45:35 +0800 Subject: [PATCH 44/44] set cxx standard to c++14, which is required in make_unique --- CMakeLists.txt | 1 + source/module_hsolver/diago_pexsi.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a4671d3dd..8a4b3fe79a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -215,6 +215,7 @@ if(ENABLE_LCAO) target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR}) add_compile_definitions(__PEXSI) + set(CMAKE_CXX_STANDARD 14) endif() else() set(ENABLE_DEEPKS OFF) diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp index a88349160d..4076922626 100644 --- a/source/module_hsolver/diago_pexsi.cpp +++ b/source/module_hsolver/diago_pexsi.cpp @@ -1,5 +1,6 @@ #include #include +#include #ifdef __PEXSI #include "diago_pexsi.h" #include "module_base/global_variable.h"