From 69ad6a79043e656812e70271d79e2adae292809f Mon Sep 17 00:00:00 2001
From: zhangzhihao <1900017707@pku.edu.cn>
Date: Mon, 1 May 2023 01:27:12 +0800
Subject: [PATCH 01/44] feat pexsi

---
 source/Makefile                               |    6 +
 source/Makefile.Objects                       |    7 +
 source/Makefile.vars                          |   21 +-
 source/module_base/parallel_global.cpp        |    2 +
 source/module_base/parallel_global.h          |    1 +
 source/module_hsolver/diago_pexsi.cpp         |   48 +
 source/module_hsolver/diago_pexsi.h           |   36 +
 source/module_hsolver/hsolver_lcao.cpp        |   18 +
 source/module_hsolver/pexsi/DistBCDMatrix.cpp |  163 ++
 source/module_hsolver/pexsi/DistBCDMatrix.h   |   63 +
 source/module_hsolver/pexsi/DistCCSMatrix.cpp |  112 ++
 source/module_hsolver/pexsi/DistCCSMatrix.h   |   46 +
 .../pexsi/DistMatrixTransformer.cpp           | 1438 +++++++++++++++++
 .../pexsi/DistMatrixTransformer.h             |   20 +
 source/module_hsolver/pexsi/pexsi_solver.cpp  |   59 +
 source/module_hsolver/pexsi/pexsi_solver.h    |   30 +
 source/module_hsolver/pexsi/simplePEXSI.cpp   |  697 ++++++++
 source/module_hsolver/pexsi/simplePEXSI.h     |    8 +
 source/module_io/input.cpp                    |   14 +
 19 files changed, 2786 insertions(+), 3 deletions(-)
 create mode 100644 source/module_hsolver/diago_pexsi.cpp
 create mode 100644 source/module_hsolver/diago_pexsi.h
 create mode 100644 source/module_hsolver/pexsi/DistBCDMatrix.cpp
 create mode 100644 source/module_hsolver/pexsi/DistBCDMatrix.h
 create mode 100644 source/module_hsolver/pexsi/DistCCSMatrix.cpp
 create mode 100644 source/module_hsolver/pexsi/DistCCSMatrix.h
 create mode 100644 source/module_hsolver/pexsi/DistMatrixTransformer.cpp
 create mode 100644 source/module_hsolver/pexsi/DistMatrixTransformer.h
 create mode 100644 source/module_hsolver/pexsi/pexsi_solver.cpp
 create mode 100644 source/module_hsolver/pexsi/pexsi_solver.h
 create mode 100644 source/module_hsolver/pexsi/simplePEXSI.cpp
 create mode 100644 source/module_hsolver/pexsi/simplePEXSI.h

diff --git a/source/Makefile b/source/Makefile
index 822d8487aa..faaf6697b8 100644
--- a/source/Makefile
+++ b/source/Makefile
@@ -138,6 +138,12 @@ ifdef LIBTORCH_DIR
   endif
 endif
 
+ifdef PEXSI_DIR
+    INCLUDES += -I${PEXSI_INCLUDE_DIR} ${SCOTCH_INCLUDE} ${DSUPERLU_INCLUDE}
+    LIBS += -L${PEXSI_LIB_DIR} -lpexsi_linux_release_v2.0 ${DSUPERLU_LIB} ${PTSCOTCH_LIB} ${SCOTCH_LIB}
+    HONG += -D__PEXSI
+endif
+
 ifdef DeePMD_DIR
     HONG  += -D__DPMD -DHIGH_PREC 
     OPTS  += -Wl,--no-as-needed
diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index 2acb7d866a..97b591d444 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -24,6 +24,7 @@ VPATH=./src_global:\
 ./module_hsolver:\
 ./module_hsolver/kernels:\
 ./module_hsolver/genelpa:\
+./module_hsolver/pexsi:\
 ./module_elecstate:\
 ./module_elecstate/kernels:\
 ./module_elecstate/potentials:\
@@ -238,6 +239,12 @@ OBJS_HSOLVER=diago_cg.o\
     diago_iter_assist.o\
     math_kernel_op.o\
     dngvd_op.o\
+    diago_pexsi.o\
+    DistBCDMatrix.o\
+    DistCCSMatrix.o\
+    DistMatrixTransformer.o\
+    pexsi_solver.o\
+    simplePEXSI.o\
 
 OBJS_HSOLVER_LCAO=hsolver_lcao.o\
       diago_blas.o\
diff --git a/source/Makefile.vars b/source/Makefile.vars
index 7c510d4ed3..d19109fc96 100644
--- a/source/Makefile.vars
+++ b/source/Makefile.vars
@@ -29,10 +29,19 @@ OPENMP = OFF
 ## CEREAL_DIR        should contain an include folder.
 #----------------------------------------------------------------------
 
-ELPA_DIR      = /usr/local/include/elpa-2021.05.002
-ELPA_INCLUDE_DIR = ${ELPA_DIR}/elpa
+ELPA_DIR      = /root/lib/ELPA
+ELPA_INCLUDE_DIR = ${ELPA_DIR}/include/
+
+CEREAL_DIR    = /root/lib/cereal
+DSUPERLU_DIR = /root/workspace/superlu_dist-7.2.0
+DSUPERLU_INCLUDE = -I${DSUPERLU_DIR}/include
+DSUPERLU_LIB    = ${DSUPERLU_DIR}/lib/libsuperlu_dist.a
+
+SCOTCH_INCLUDE  = -I/usr/local/include
+PTSCOTCH_DIR    = /root/workspace/scotch_6.0.0
+PTSCOTCH_LIB    = ${PTSCOTCH_DIR}/lib/libptscotchparmetis.a ${PTSCOTCH_DIR}/lib/libptscotch.a ${PTSCOTCH_DIR}/lib/libptscotcherrexit.a ${PTSCOTCH_DIR}/lib/libptscotcherr.a
+SCOTCH_LIB      = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libscotch.a ${PTSCOTCH_DIR}/lib/libscotcherr.a ${PTSCOTCH_DIR}/lib/libscotcherrexit.a
 
-CEREAL_DIR    = /usr/local/include/cereal
 
 
 ##-------------------  FOR GNU COMPILER  ------------------------------
@@ -59,8 +68,14 @@ CEREAL_DIR    = /usr/local/include/cereal
 ## To use LIBXC:  set LIBXC_DIR which contains include and lib/libxc.a (>5.1.7)
 ## To use DeePMD: set DeePMD_DIR and TensorFlow_DIR
 ## To use LibRI:  set LIBRI_DIR and LIBCOMM_DIR
+## To use PEXSI: set PEXSI_DIR which contains include and libpexsi.a
 ##---------------------------------------------------------------------
 
+PEXSI_DIR = /root/workspace/pexsi_v2.0.0
+PEXSI_LIB_DIR = ${PEXSI_DIR}/src
+PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include
+
+
 # LIBTORCH_DIR  = /usr/local
 # LIBNPY_DIR    = /usr/local
 
diff --git a/source/module_base/parallel_global.cpp b/source/module_base/parallel_global.cpp
index 1542382fe1..954b0662cd 100644
--- a/source/module_base/parallel_global.cpp
+++ b/source/module_base/parallel_global.cpp
@@ -22,6 +22,7 @@ MPI_Comm STO_WORLD;
 MPI_Comm PARAPW_WORLD; // qianrui add it for sto-dft 2021-4-14
 MPI_Comm GRID_WORLD; // mohan add 2012-01-13z
 MPI_Comm DIAG_WORLD; // mohan add 2012-01-13
+MPI_Group GRID_GROUP;
 
 void Parallel_Global::myProd(std::complex<double> *in,std::complex<double> *inout,int *len,MPI_Datatype *dptr)
 {
@@ -80,6 +81,7 @@ void Parallel_Global::split_diag_world(const int &diag_np)
 	}
 
 	MPI_Comm_split(MPI_COMM_WORLD, color, key, &DIAG_WORLD);
+    MPI_Comm_group(DIAG_WORLD, &GRID_GROUP);
 	MPI_Comm_rank(DIAG_WORLD, &GlobalV::DRANK);
 	MPI_Comm_size(DIAG_WORLD, &GlobalV::DSIZE);
 	GlobalV::DCOLOR=color;
diff --git a/source/module_base/parallel_global.h b/source/module_base/parallel_global.h
index d0e3456822..2b6d59c6c0 100644
--- a/source/module_base/parallel_global.h
+++ b/source/module_base/parallel_global.h
@@ -16,6 +16,7 @@ extern MPI_Comm STO_WORLD;
 extern MPI_Comm PARAPW_WORLD;
 extern MPI_Comm GRID_WORLD; //mohan add 2012-01-13
 extern MPI_Comm DIAG_WORLD; //mohan add 2012-01-13
+extern MPI_Group GRID_GROUP;
 #endif
 
 //void myProd(std::complex<double> *in,std::complex<double> *inout,int *len,MPI_Datatype *dptr);
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
new file mode 100644
index 0000000000..8a7257d747
--- /dev/null
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -0,0 +1,48 @@
+#ifdef __PEXSI
+#include "diago_pexsi.h"
+
+#include "c_pexsi_interface.h"
+#include "module_base/global_variable.h"
+#include "module_base/lapack_connector.h"
+#include "module_base/timer.h"
+#include "module_base/tool_quit.h"
+#include "module_basis/module_ao/parallel_orbitals.h"
+#include "pexsi/pexsi_solver.h"
+
+typedef hamilt::MatrixBlock<double> matd;
+typedef hamilt::MatrixBlock<std::complex<double>> matcd;
+
+namespace hsolver
+{
+
+void DiagoPexsi::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in)
+{
+    ModuleBase::TITLE("DiagoPEXSI", "diag");
+    matd h_mat, s_mat;
+    phm_in->matrix(h_mat, s_mat);
+    std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
+    MPI_Comm COMM_DIAG = MPI_COMM_WORLD;
+    this->ps = new PEXSI_Solver(this->ParaV->blacs_ctxt,
+                                this->ParaV->nb,
+                                this->ParaV->nrow,
+                                this->ParaV->ncol,
+                                h_mat.p,
+                                s_mat.p,
+                                this->DM,
+                                this->EDM,
+                                this->totalEnergyH,
+                                this->totalEnergyS,
+                                this->totalFreeEnergy);
+    this->ps->solve();
+    std::cout << this->ps->totalEnergyH << "xxxxxx" << this->ps->totalEnergyS << "xxxxxx" << this->ps->totalFreeEnergy
+              << std::endl;
+    ModuleBase::WARNING_QUIT("DiagoPexsi", "Pexsi is not completed");
+}
+void DiagoPexsi::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<std::complex<double>>& psi, double* eigenvalue_in)
+{
+    ModuleBase::TITLE("DiagoPEXSI", "diag");
+    ModuleBase::WARNING_QUIT("DiagoPexsi", "Pexsi is not completed");
+}
+
+} // namespace hsolver
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
new file mode 100644
index 0000000000..a48b92b867
--- /dev/null
+++ b/source/module_hsolver/diago_pexsi.h
@@ -0,0 +1,36 @@
+#ifndef DIGAOPEXSI_H
+#define DIGAOPEXSI_H
+
+#ifdef  __PEXSI
+
+#define DIGAOPEXSI_H
+#endif
+
+#include "module_basis/module_ao/parallel_orbitals.h"
+#include "diagh.h"
+#include "pexsi/pexsi_solver.h"
+
+namespace hsolver
+{
+
+class DiagoPexsi : public DiagH<double>
+{
+  public:
+    DiagoPexsi(const Parallel_Orbitals* ParaV_in)
+    {
+      this->ParaV = ParaV_in;
+    }
+    void diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in) override;
+    void diag(hamilt::Hamilt<double>* phm_in, psi::Psi<std::complex<double>> &psi, double *eigenvalue_in) override;
+    const Parallel_Orbitals* ParaV;
+    double* DM;
+    double* EDM;
+    double totalEnergyH;
+    double totalEnergyS;
+    double totalFreeEnergy;
+    PEXSI_Solver* ps;
+};
+
+}
+
+#endif
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index 0c964ec501..e870e4f191 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -7,6 +7,7 @@
 #ifdef __ELPA
 #include "diago_elpa.h"
 #endif
+#include "diago_pexsi.h"
 
 namespace hsolver
 {
@@ -79,6 +80,23 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt<double>* pHamilt,
         */
         ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "This method of DiagH is not supported!");
     }
+    else if (this->method == "pexsi")
+    {
+        if (pdiagh != nullptr)
+        {
+            if (pdiagh->method != this->method)
+            {
+                delete[] pdiagh;
+                pdiagh = nullptr;
+            }
+        }
+        if (pdiagh == nullptr)
+        {
+            DiagoPexsi* tem = new DiagoPexsi(this->ParaV);
+            this->pdiagh = tem;
+            pdiagh->method = this->method;
+        }
+    }
     else
     {
         ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "This method of DiagH is not supported!");
diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.cpp b/source/module_hsolver/pexsi/DistBCDMatrix.cpp
new file mode 100644
index 0000000000..8a3f2740e2
--- /dev/null
+++ b/source/module_hsolver/pexsi/DistBCDMatrix.cpp
@@ -0,0 +1,163 @@
+#include <mpi.h>
+#include "DistBCDMatrix.h"
+extern "C"
+{
+    void Cblacs_gridinfo(int icontxt, int* nprow, int *npcol, int *myprow, int *mypcol);
+    int Cblacs_pnum(int blacs_ctxt, int prow, int pcol);
+};
+
+/*
+DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol)
+{
+    this->comm=comm;
+    this->group=group;
+    MPI_Comm_rank(comm, &this->myproc);
+    this->nprows=nprow;
+    this->npcols=npcol;
+    this->size=size;
+    this->nblk=nblk;
+    this->nrow=nrow;
+    this->ncol=ncol;
+    this->LAYOUT='R';
+}
+
+DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char LAYOUT)
+{
+    this->comm=comm;
+    this->group=group;
+    MPI_Comm_rank(comm, &this->myproc);
+    this->nprows=nprow;
+    this->npcols=npcol;
+    this->size=size;
+    this->nblk=nblk;
+    this->nrow=nrow;
+    this->ncol=ncol;
+    if(LAYOUT == 'R' ||
+       LAYOUT == 'r' ||
+       LAYOUT == 'C' ||
+       LAYOUT == 'c')
+    {
+        this->LAYOUT=LAYOUT;
+    } else
+    {
+        throw("The LAYOUT must be 'R', 'r', 'C', or 'c'");
+    }
+}
+
+DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol)
+{
+    this->comm=comm;
+    this->group=group;
+    this->blacs_ctxt=blacs_ctxt;
+    this->size=size;
+    this->nblk=nblk;
+    this->nrow=nrow;
+    this->ncol=ncol;
+    this->LAYOUT='R';
+    Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol);
+    if(comm != MPI_COMM_NULL)
+    {
+        MPI_Comm_rank(comm, &this->myproc);
+        MPI_Comm_size(comm, &this->nprocs);
+    }else
+    {
+        this->myproc=-1;
+        this->nprocs=-1;
+    }
+}
+*/
+
+DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT)
+{
+    this->comm=comm;
+    this->group=group;
+    this->blacs_ctxt=blacs_ctxt;
+    this->size=size;
+    this->nblk=nblk;
+    this->nrow=nrow;
+    this->ncol=ncol;
+    if(LAYOUT == 'R' ||
+       LAYOUT == 'r' ||
+       LAYOUT == 'C' ||
+       LAYOUT == 'c')
+    {
+        this->LAYOUT=LAYOUT;
+    } else
+    {
+        throw("The LAYOUT must be 'R', 'r', 'C', or 'c'");
+    }
+
+    if(comm != MPI_COMM_NULL)
+    {
+        MPI_Comm_rank(comm, &this->myproc);
+        Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol);
+    }else
+    {
+        this->myproc=-1;
+        this->myprow=-1;
+        this->mypcol=-1;
+    }
+
+    // synchronize matrix parameters to all processes, including those are not in bcd group
+    int myid_in_comm_world;
+    MPI_Comm_rank(MPI_COMM_WORLD, &myid_in_comm_world);
+    if(myid_in_comm_world == 0)
+    {
+        MPI_Comm_size(comm, &this->nprocs);
+        int PARA_BCAST[4]={this->nblk, this->nprocs, this->nprows, this->npcols};
+        MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD);
+    }
+    else
+    {
+        int PARA_BCAST[4];
+        MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD);
+        this->nblk=PARA_BCAST[0];
+        this->nprocs=PARA_BCAST[1];
+        this->nprows=PARA_BCAST[2];
+        this->npcols=PARA_BCAST[3];
+    }
+    this->prowpcol2pnum=new int[this->nprocs];
+    if(myid_in_comm_world == 0)
+    {
+        for(int i=0; i<this->nprows; ++i)
+        {
+            for(int j=0; j<this->npcols; ++j)
+            {
+                this->prowpcol2pnum[i*this->npcols+j]=Cblacs_pnum(this->blacs_ctxt, i, j);
+            }
+        }
+    }
+    MPI_Bcast(this->prowpcol2pnum, this->nprocs, MPI_INT, 0, MPI_COMM_WORLD);
+}
+
+DistBCDMatrix::~DistBCDMatrix()
+{
+    delete[] prowpcol2pnum;
+}
+
+int DistBCDMatrix::globalRow(const int localRow)
+{
+    return (localRow/nblk*nprows+myprow)*nblk+localRow%nblk;
+}
+
+int DistBCDMatrix::globalCol(const int localCol)
+{
+    return (localCol/nblk*npcols+mypcol)*nblk+localCol%nblk;
+}
+
+int DistBCDMatrix::localRow(const int globalRow, int& myprow)
+{
+    myprow=int((globalRow%(nblk*nprows))/nblk);
+    return int(globalRow/(nblk*nprows))*nblk+globalRow%nblk;
+}
+
+int DistBCDMatrix::localCol(const int globalCol, int& mypcol)
+{
+    mypcol=int((globalCol%(nblk*npcols))/nblk);
+    return int(globalCol/(nblk*npcols))*nblk+globalCol%nblk;
+}
+
+int DistBCDMatrix::pnum(const int prow, const int pcol)
+{
+    return this->prowpcol2pnum[prow*this->npcols+pcol];
+}
diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.h b/source/module_hsolver/pexsi/DistBCDMatrix.h
new file mode 100644
index 0000000000..a0b8c7a907
--- /dev/null
+++ b/source/module_hsolver/pexsi/DistBCDMatrix.h
@@ -0,0 +1,63 @@
+// a Block Cyclic Data Distribution matrix
+// http://www.netlib.org/utk/papers/factor/node3.html
+// local matrix elements is stored in column major
+// used for pexsi
+class DistBCDMatrix {
+
+        public:
+        // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol);
+        // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char LAYOUT);
+
+        // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol);
+        DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT);
+        ~DistBCDMatrix();
+
+        int globalRow(const int localRow);
+        int globalCol(const int localCol);
+        int localRow(const int globalRow, int& myprow);
+        int localCol(const int globalCol, int& mypcol);
+        int pnum(const int prow, const int pcol);
+        //~DistBCDMatrix();
+
+        // MPI communicator
+        MPI_Comm comm;
+        MPI_Group group;
+
+        // blacs context
+        int blacs_ctxt;
+
+        // row and column of process grid
+        int nprows;
+        int npcols;
+
+        // total number of processes
+        int nprocs;
+
+        // Matrix size
+        int size;
+
+        // block size
+        int nblk;
+
+        // row and c0lumn of Local matrix part
+        int nrow;
+        int ncol;
+
+
+        // protected:
+
+        // private:
+
+        // current process row and column
+        int myprow;
+        int mypcol;
+
+        // current process id
+        int myproc;
+
+        int *prowpcol2pnum;
+        // the local data layout
+        // 'R' or 'r' for row-major, which is used in C/C++
+        // 'C' or 'c' for column-major, which is used in Fortran
+        char LAYOUT;
+};
diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.cpp b/source/module_hsolver/pexsi/DistCCSMatrix.cpp
new file mode 100644
index 0000000000..45a14d6ac7
--- /dev/null
+++ b/source/module_hsolver/pexsi/DistCCSMatrix.cpp
@@ -0,0 +1,112 @@
+#include <mpi.h>
+#include "DistCCSMatrix.h"
+
+DistCCSMatrix::DistCCSMatrix(void)
+{
+    this->comm=MPI_COMM_WORLD;
+    this->size=0;
+    this->nnz=0;
+    this->nnzLocal=0;
+    this->numColLocal=0;
+    this->colptrLocal=NULL;
+    this->rowindLocal=NULL;
+}
+
+DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in)
+{
+    this->comm=comm_in;
+    this->size=0;
+    this->nnz=0;
+    this->nnzLocal=0;
+    this->numColLocal=0;
+    this->colptrLocal=NULL;
+    this->rowindLocal=NULL;
+}
+
+DistCCSMatrix::DistCCSMatrix(int size_in, int nnzLocal_in)
+{
+    this->comm=MPI_COMM_WORLD;
+    this->size=size_in;
+    this->nnzLocal=nnzLocal_in;
+    MPI_Request req;
+    MPI_Iallreduce(&nnzLocal, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm, &req);
+    this->numColLocal=0;
+    this->colptrLocal=new int[size];
+    this->rowindLocal=new int[nnzLocal];
+
+    MPI_Status req_status;
+    MPI_Wait(&req, &req_status);
+}
+
+DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in, int nproc_data_in, int size_in)
+{
+    this->comm=comm_in;
+    this->nproc_data=nproc_data_in;
+    int nproc_data_range[3]={0, this->nproc_data-1, 1};
+    // create processes group with data: this->group_data and associated communicator
+    MPI_Comm_group(this->comm, &this->group);
+    MPI_Group_range_incl(this->group, 1, &nproc_data_range, &this->group_data);
+    this->comm_data=MPI_COMM_NULL;
+    MPI_Comm_create(this->comm, this->group_data, &this->comm_data);
+    this->size=size_in;
+    this->nnz=0;
+    this->nnzLocal=0;
+    int myproc;
+    if(comm != MPI_COMM_NULL)
+    {
+        MPI_Comm_size(comm, &nprocs);
+        MPI_Comm_rank(comm, &myproc);
+        if(myproc<nproc_data-1)
+        {
+            this->numColLocal=size/nproc_data;
+            this->firstCol=size/nproc_data*myproc;
+            this->colptrLocal=new int[this->numColLocal+1];
+            this->rowindLocal=NULL;
+        }
+        else if(myproc==nproc_data-1)
+        {
+            this->numColLocal=size-myproc*(size/nproc_data);
+            this->firstCol=size/nproc_data*myproc;
+            this->colptrLocal=new int[this->numColLocal+1];
+            this->rowindLocal=NULL;
+        }
+        else
+        {
+            this->numColLocal=0;
+            this->firstCol=size-1;
+            this->colptrLocal=new int[this->numColLocal+1];
+            this->rowindLocal=NULL;
+        }
+    }
+}
+
+int DistCCSMatrix::globalCol(int localCol)
+{
+    return this->firstCol+localCol;
+}
+
+
+// NOTE: the process id is 0-based
+int DistCCSMatrix::localCol(int globalCol, int& mypcol)
+{
+    mypcol=int(globalCol/int(this->size/this->nproc_data));
+    if(mypcol >= this->nproc_data) mypcol=this->nproc_data-1;
+    return mypcol>0 ? globalCol-(this->size/this->nproc_data)*mypcol : globalCol;
+}
+
+void DistCCSMatrix::setnnz(int nnzLocal_in)
+{
+    if(this->comm_data != MPI_COMM_NULL)
+    {
+        MPI_Allreduce(&nnzLocal_in, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm_data);
+        this->nnzLocal=nnzLocal_in;
+        this->rowindLocal=new int[nnzLocal];
+        this->colptrLocal[this->numColLocal]=nnzLocal_in+1;
+    }
+}
+
+DistCCSMatrix::~DistCCSMatrix()
+{
+    delete[] colptrLocal;
+    delete[] rowindLocal;
+}
diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.h b/source/module_hsolver/pexsi/DistCCSMatrix.h
new file mode 100644
index 0000000000..43d1126bf6
--- /dev/null
+++ b/source/module_hsolver/pexsi/DistCCSMatrix.h
@@ -0,0 +1,46 @@
+// Distributed Compressed Column Storage Matrix format
+// used for PEXSI
+class DistCCSMatrix {
+
+        public:
+        DistCCSMatrix();
+        DistCCSMatrix(MPI_Comm comm);
+        DistCCSMatrix(int size, int nnzLocal);
+        DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal);
+        DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal, double* valLocal, int* index);
+
+        int globalCol(int localCol);
+        int localCol(int globalCol, int& mypcol);
+        void setnnz(int nnzLocal);
+        ~DistCCSMatrix();
+
+        // MPI communicator
+        MPI_Comm comm;
+        MPI_Group group;
+
+        // total number of processes and the processes with data in
+        int nprocs;
+        int nproc_data;
+        MPI_Group group_data;
+        MPI_Comm comm_data;
+
+        // Matrix size
+        int size;
+
+        // Number of non-zero values in the matrix
+        int nnz;
+
+        // Number of non-zero values in the matrix of the local process
+        int nnzLocal;
+
+        // number of columns in current process
+        int numColLocal;
+
+        // the first column index in current process
+        int firstCol;
+
+        // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal
+        int* colptrLocal;
+        int* rowindLocal;
+};
+
diff --git a/source/module_hsolver/pexsi/DistMatrixTransformer.cpp b/source/module_hsolver/pexsi/DistMatrixTransformer.cpp
new file mode 100644
index 0000000000..285d3cdd94
--- /dev/null
+++ b/source/module_hsolver/pexsi/DistMatrixTransformer.cpp
@@ -0,0 +1,1438 @@
+#include <mpi.h>
+#include <cstdlib>
+#include <climits>
+#include <cmath>
+#include <vector>
+#include <map>
+#include "DistBCDMatrix.h"
+#include "DistCCSMatrix.h"
+
+
+// for debug
+#ifdef _DEBUG
+#include <cstring>
+#include <fstream>
+#include <unistd.h>
+#include "src_pw/global.h"
+#endif
+// end debug
+
+// find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a negtive value
+// the size_process and displacement_process array will be changed after the index is found
+// isFirst: wether this function is called for the first time for a index array;
+// nprocs: total number of processes
+// size_process: the number of indices in each process
+// displacement_process: the start position in each process
+// index: the array contains the indices
+inline int MinimumIndexPosition(const bool isFirst, const int nprocs,
+                                int* size_process, int* displacement_process, const int* index)
+{
+    // usually the minimum index is continuous, so it will be a good idea to
+    // check the one next to the previous index first.
+    static int pre_position; // previous position in index array of minimum index,
+    static int pre_process; // the process contains previous index
+
+    int minimum_index=INT_MAX; // the minimum index, initial value is a large number which is larger than any other index;
+    int minimum_position=-1;
+    int minimum_process=-1;
+
+    if(isFirst)
+    {
+        for(int i=0; i<nprocs; ++i)
+        {
+            if(size_process[i]>0)
+            {
+                if(minimum_index>index[displacement_process[i]])  // find a smaller index
+                {
+                    minimum_position=displacement_process[i];
+                    minimum_index=index[minimum_position];
+                    minimum_process=i;
+                }
+            }
+        }
+        if(minimum_process>=0) // find it!
+        {
+            ++displacement_process[minimum_process];
+            --size_process[minimum_process];
+        }
+        pre_position=minimum_position;
+        pre_process=minimum_process;
+        return minimum_position;
+    }
+    else
+    {
+        // check the next one of pre_position
+        if(size_process[pre_process]>0  &&  // the previous process still has elements
+            index[pre_position+1]==index[pre_position]+1) // find it!
+        {
+            ++displacement_process[pre_process];
+            --size_process[pre_process];
+            ++pre_position;  // new pre_position is the next one
+                                      // new pre_process keeps the same
+            return pre_position; // current position is the new pre_position
+        }
+
+        // if the next one of pre_position is not the minimum one
+        for(int i=0; i<nprocs; ++i)
+        {
+            if(size_process[i]>0)
+            {
+                if(minimum_index>index[displacement_process[i]])
+                {
+                    minimum_position=displacement_process[i];
+                    minimum_index=index[minimum_position];
+                    minimum_process=i;
+                }
+            }
+        }
+        if(minimum_process>=0) // find it!
+        {
+            ++displacement_process[minimum_process];
+            --size_process[minimum_process];
+        }
+        pre_position=minimum_position;
+        pre_process=minimum_process;
+        return minimum_position;
+    }
+}
+
+inline void buildCCSParameter(const int size, const int nprocs,
+            std::vector<int> size_process, std::vector<int> displacement_process,
+            const int* position_index, DistCCSMatrix &DST_Matrix, int* buffer2ccsIndex)
+{
+    // find the minimum one from left buffer index
+    if(DST_Matrix.nnzLocal<=0) return;
+
+    int pre_col=-1;
+    int nnz_now=0;
+    int p_mini;
+    p_mini=MinimumIndexPosition(true, nprocs, &size_process[0], &displacement_process[0], position_index);
+    while(p_mini>=0)
+    {
+        int index_mini=position_index[p_mini];
+        int col_mini=index_mini/DST_Matrix.size; //-DST_Matrix.firstCol;
+        int row_mini=index_mini%DST_Matrix.size;
+        if(col_mini>pre_col) // a new column starts, column pointer is a 1-based array
+        {
+            pre_col=col_mini;
+            DST_Matrix.colptrLocal[col_mini]=nnz_now+1;
+        }
+        DST_Matrix.rowindLocal[nnz_now]=row_mini+1; // setup row index array, which is also 1-based
+        // copy data from buffer to M, be careful M is a 0-based array
+        buffer2ccsIndex[nnz_now]=p_mini;
+        ++nnz_now;
+        p_mini=MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index);
+    }
+    // The last element of colptrLocal is nnzLocal+1
+    DST_Matrix.colptrLocal[DST_Matrix.numColLocal]=nnz_now+1;
+}
+
+inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer,
+                            double* nzvalLocal)
+{
+    for(int i=0; i<nnzLocal; ++i)
+    {
+        nzvalLocal[i]=buffer[buffer2ccsIndex[i]];
+    }
+}
+inline void countMatrixDistribution(int N, double* A, std::map<int, int>& P)
+{
+    for(int i=0; i<N; ++i)
+    {
+        int key;
+        if(fabs(A[i]<1e-31))
+            key=-100;
+        else
+            key=floor(log10(fabs(A[i])));
+        ++P[key];
+    }
+}
+
+// find out the index of non-zero elements
+inline int getNonZeroIndex(char LAYOUT, const int nrow, const int ncol, double* H_2d, double* S_2d, const double ZERO_Limit,
+                    int &nnz, std::vector<int> &rowidx, std::vector<int> &colidx)
+{
+    #ifdef _DEBUG
+    char f_log[80];
+    int myproc;
+    MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+    std::ofstream log;
+    if(myproc<100)
+    {
+        sprintf(f_log, "transformer_%2.2d.log", myproc);
+        log.open(f_log, std::ios::app);
+        log<<"start count nnz"<<std::endl;
+    }
+    // count nonzeros value distribution of H and S
+    static bool isCOUNTNONZERO=true;
+    if(!isCOUNTNONZERO)
+    {
+        isCOUNTNONZERO=true;
+        char plog_name[80];
+        sprintf(plog_name, "HS_Distribution_%d.log", myproc);
+        std::ofstream plog;
+        plog.open(plog_name, std::ios::app);
+        std::map<int, int> pH;
+        countMatrixDistribution(nrow*ncol, H_2d, pH);
+        std::map<int, int> pS;
+        countMatrixDistribution(nrow*ncol, H_2d, pS);
+        plog<<"Element in H distribution:\n";
+        // std::stringstream ss;
+        // ss.str("");
+        for(auto iter=pH.begin(); iter!=pH.end(); ++iter)
+        {
+            // ss<<"p["<<iter->first<<"] : "<<iter->second<<std::endl;
+            plog<<"p["<<iter->first<<"] : "<<iter->second<<std::endl;
+        }
+        //OUT(ofs_running,ss.str());
+        //OUT(ofs_running, "Element in S distribution:");
+        plog<<"Element in S distribution:\n";
+        //ss.str("");
+        for(auto iter=pS.begin(); iter!=pS.end(); ++iter)
+        {
+            //ss<<"p["<<iter->first<<"] : "<<iter->second<<std::endl;
+            plog<<"p["<<iter->first<<"] : "<<iter->second<<std::endl;
+        }
+        // OUT(ofs_running,ss.str());
+        plog.close();
+    }
+    #endif
+
+    int idx=0;
+    nnz=0;
+    colidx.clear();
+    rowidx.clear();
+    #ifdef _DEBUG
+    if(myproc<100) log<<"rowidx and colidx cleared"<<std::endl;
+    #endif
+    if(LAYOUT == 'C' || LAYOUT == 'c')
+    {
+        for(int i=0; i<ncol; ++i)
+        {
+            for(int j=0; j<nrow; ++j)
+            {
+                idx=i*nrow+j;
+                if(fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit)
+                {
+                    ++nnz;
+                    colidx.push_back(i);
+                    rowidx.push_back(j);
+                }
+            }
+        }
+    } else if(LAYOUT == 'R' || LAYOUT == 'r')
+    {
+        for(int i=0; i<ncol; ++i)
+        {
+            for(int j=0; j<nrow; ++j)
+            {
+                idx=j*ncol+i;
+                if(fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit)
+                {
+                    ++nnz;
+                    colidx.push_back(i);
+                    rowidx.push_back(j);
+                }
+            }
+        }
+    } else
+    {
+        #ifdef _DEBUG
+        if(myproc<100) log<<"unknown LAYOUT: "<<LAYOUT<<std::endl;
+        #endif
+        return 1;
+    }
+    #ifdef _DEBUG
+    if(myproc<100) 
+    {
+        log<<"nnz is counted: "<<nnz<<std::endl;
+        log.close();
+    }
+    #endif
+    return 0;
+}
+
+int buildTransformParameter(DistBCDMatrix &SRC_Matrix, DistCCSMatrix &DST_Matrix,
+                            const int NPROC_TRANS, MPI_Group &GROUP_TRANS, MPI_Comm &COMM_TRANS,
+                            const int nnz, std::vector<int> &rowidx, std::vector<int> &colidx,
+                            int &sender_size, std::vector<int> &sender_size_process, std::vector<int> &sender_displacement_process,
+                            int &receiver_size, std::vector<int> &receiver_size_process, std::vector<int> &receiver_displacement_process,
+                            std::vector<int> &buffer2ccsIndex)
+{
+    // debug
+    int myproc;
+    MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+    #ifdef _DEBUG
+    std::ofstream log;
+    if(myproc<100)
+    {
+        char f_log[80];
+        sprintf(f_log, "transformer_%2.2d.log", myproc);
+        log.open(f_log, std::ios::app);
+        log<<"enter buildTransformParameter"<<std::endl;
+    }
+    #endif
+    //end debug
+    //count sender non-zeros elements
+    sender_size=nnz;
+    std::fill(sender_size_process.begin(), sender_size_process.end(), 0);
+    // debug
+    #ifdef _DEBUG
+    if(myproc<100)
+    {
+        log<<"start translate ranks between group_data and group_trans"<<std::endl;
+        log<<"sender_size (in BCD) = "<<sender_size<<std::endl;
+    }
+    #endif
+    // end debug
+    // create process id map from group_data to group_trans
+    int nproc_data;
+    std::vector<int> proc_map_data_trans;
+    if(myproc == 0)
+    {
+        MPI_Group_size(DST_Matrix.group_data, &nproc_data);
+        MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS);
+        proc_map_data_trans.resize(nproc_data, 0);
+        for(int i=0; i<nproc_data; ++i)
+        {
+            MPI_Group_translate_ranks(DST_Matrix.group_data, 1, &i,
+                                      GROUP_TRANS, &proc_map_data_trans[i]);
+        }
+        MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS);
+    }
+    else
+    {
+        MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS);
+        proc_map_data_trans.resize(nproc_data, 0);
+        MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS);
+    }
+
+    // debug
+    #ifdef _DEBUG
+    if(myproc<100)
+    {
+        log<<"rank_data        rank_trans"<<std::endl;
+        for(int i=0; i<nproc_data; ++i)
+            log<<i<<"\t\t\t"<<proc_map_data_trans[i]<<std::endl;
+    }
+    #endif
+    // end debug
+
+    for(int i=0; i<nnz; ++i)
+    {
+        int l_col=colidx[i];
+        int g_col=SRC_Matrix.globalCol(l_col);
+        int dst_process;
+        int dst_col=DST_Matrix.localCol(g_col, dst_process);
+        int dst_process_trans=proc_map_data_trans[dst_process];
+        /*
+        // debug
+        #ifdef _DEBUG
+        log<<dst_process<<"\t\t";
+        #endif
+        // end debug
+         MPI_Group_translate_ranks(DST_Matrix.group_data, 1, &dst_process,
+                                   GROUP_TRANS, &dst_process_trans);
+        // debug
+        #ifdef _DEBUG
+        log<<dst_process_trans<<std::endl;
+        #endif
+        // end debug
+        */
+        ++sender_size_process[dst_process_trans];
+    }
+    // debug
+    #ifdef _DEBUG
+    if(myproc<100) log<<"sender_size_process is creaated"<<std::endl;
+    #endif
+    // end debug
+
+    // transfer sender index size to receiver index size
+    MPI_Alltoall(&sender_size_process[0], 1, MPI_INT, &receiver_size_process[0], 1, MPI_INT, COMM_TRANS);
+    // debug
+    #ifdef _DEBUG
+    if(myproc<100) log<<"receiver_size_process is got"<<std::endl;
+    #endif
+    // end debug
+
+    // setup all2all parameters
+    sender_displacement_process[0]=0;
+    for(int i=1; i<NPROC_TRANS; ++i)
+    {
+        sender_displacement_process[i]=sender_displacement_process[i-1]+sender_size_process[i-1];
+    }
+    // debug
+    #ifdef _DEBUG
+    if(myproc<100) log<<"sender_displacement_process is creaated"<<std::endl;
+    #endif
+    // end debug
+
+    receiver_displacement_process[0]=0;
+    receiver_size=receiver_size_process[0];
+    for(int i=1; i<NPROC_TRANS; ++i)
+    {
+        receiver_displacement_process[i]=receiver_displacement_process[i-1]+receiver_size_process[i-1];
+        receiver_size+=receiver_size_process[i];
+    }
+    // debug
+    #ifdef _DEBUG
+    if(myproc<100)
+    {
+        log<<"sender_size and receiver_displacement_process are creaated"<<std::endl;
+        log<<"receiver_size (in CCS) = "<<receiver_size<<std::endl;
+    }
+    #endif
+    // end debug
+
+    // setup receiver index
+    // setup sender_index
+    std::vector<int> sender_index(sender_size);
+    for(int i=0; i<nnz; ++i)
+    {
+        int l_col=colidx[i];
+        int g_col=SRC_Matrix.globalCol(l_col);
+        int dst_process;
+        int dst_col=DST_Matrix.localCol(g_col, dst_process);
+        int l_row=rowidx[i];
+        int dst_row=SRC_Matrix.globalRow(l_row);
+        sender_index[i]=dst_col*DST_Matrix.size+dst_row;
+    }
+    // debug
+    #ifdef _DEBUG
+    if(myproc<100) log<<"sender_index is got"<<std::endl;
+    #endif
+    // end debug
+
+    // transfer index to receiver
+    std::vector<int> receiver_index(receiver_size);
+    MPI_Alltoallv(&sender_index[0], &sender_size_process[0], &sender_displacement_process[0], MPI_INT,
+                  &receiver_index[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_INT, COMM_TRANS);
+    // debug
+    #ifdef _DEBUG
+    if(myproc<100) log<<"receiver_index is got"<<std::endl;
+    #endif
+    // end debug
+
+    // setup buffer2ccsIndex based on receiver_index
+    buffer2ccsIndex.resize(receiver_size);
+    DST_Matrix.setnnz(receiver_size);
+    buildCCSParameter(receiver_size, NPROC_TRANS,
+            receiver_size_process, receiver_displacement_process,
+            &receiver_index[0], DST_Matrix, &buffer2ccsIndex[0]);
+    // debug
+    #ifdef _DEBUG
+    if(myproc<100) 
+    {
+        log<<"ccs parameter is built"<<std::endl;
+        log.close();
+    }
+    #endif
+    // end debug
+    return 0;
+}
+
+int newGroupCommTrans(DistBCDMatrix &SRC_Matrix, DistCCSMatrix &DST_Matrix,
+                      MPI_Group &GROUP_TRANS, MPI_Comm &COMM_TRANS)
+{
+    // debug
+    #ifdef _DEBUG
+    char f_log[80];
+    int myproc;
+    MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+    std::ofstream log;
+    if(myproc<100)
+    {
+        sprintf(f_log, "transformer_%2.2d.log", myproc);
+        log.open(f_log, std::ios::app);
+        //log<<std::endl<<"LOG of process: "<<myproc<<std::endl;
+        log<<"enter newGroupCommTrans"<<std::endl;
+    }
+    #endif
+    // build transfortram communicator which contains both processes of BCD processors and
+    // CCS processors with nonzero elements
+    MPI_Group_union(DST_Matrix.group_data, SRC_Matrix.group, &GROUP_TRANS);
+    MPI_Comm_create(MPI_COMM_WORLD, GROUP_TRANS, &COMM_TRANS);
+    // debug
+    #ifdef _DEBUG
+    if(myproc<100)
+    {
+        int trans_myid, trans_nproc;
+        int trans_gid, trans_gproc;
+        if(COMM_TRANS != MPI_COMM_NULL)
+        {
+            MPI_Comm_rank(COMM_TRANS, &trans_myid);
+            MPI_Comm_size(COMM_TRANS, &trans_nproc);
+        }
+        else
+        {
+            trans_myid=-1;
+            trans_nproc=-1;
+            // trans_gid=-1;
+            // trans_gproc=-1;
+        }
+        MPI_Group_rank(GROUP_TRANS, &trans_gid);
+        MPI_Group_size(GROUP_TRANS, &trans_gproc);
+        int BCD_myid, BCD_nproc;
+        BCD_myid=SRC_Matrix.myproc;
+        BCD_nproc=SRC_Matrix.nprocs;
+        int BCD_gid, BCD_gproc;
+        MPI_Group_rank(SRC_Matrix.group, &BCD_gid);
+        MPI_Group_size(SRC_Matrix.group, &BCD_gproc);
+        int CCS_myid, CCS_nproc;
+        int CCS_gid, CCS_gproc;
+        if(DST_Matrix.comm_data != MPI_COMM_NULL)
+        {
+            MPI_Comm_rank(DST_Matrix.comm_data, &CCS_myid);
+            MPI_Comm_size(DST_Matrix.comm_data, &CCS_nproc);
+        }
+        else
+        {
+            CCS_myid = -1;
+            CCS_nproc = -1;
+            // CCS_gid=-1;
+            // CCS_gproc=-1;
+        }
+        MPI_Group_rank(DST_Matrix.group_data, &CCS_gid);
+        MPI_Group_size(DST_Matrix.group_data, &CCS_gproc);
+        log<<"myid in BCD:\t"<< BCD_myid <<"\tin CCS:\t"<< CCS_myid <<"\tin TRANS:\t"<< trans_myid
+        <<"\tBCD_gid:\t"<<BCD_gid<<"\tCCS_gid:\t"<<CCS_gid<<"\ttrans_gid:\t"<<trans_gid<<std::endl;
+        log<<"nproc in BCD:\t"<< BCD_nproc << "\tin CCS:\t" << CCS_nproc << "\tin TRANS:\t"<< trans_nproc
+        <<"\tBCD_gproc:\t"<<BCD_gproc<<"\tCCS_gproc:\t"<<CCS_gproc<<"\ttrans_gproc:\t"<<trans_gproc<<std::endl;
+
+        log<<"COMM_TRANS is created"<<std::endl;
+        log.close();
+    }
+    #endif
+    // end debug
+    return 0;
+}
+
+int deleteGroupCommTrans(MPI_Group &GROUP_TRANS, MPI_Comm &COMM_TRANS)
+{
+    MPI_Group_free(&GROUP_TRANS);
+    if(COMM_TRANS != MPI_COMM_NULL)
+    {
+        MPI_Comm_free(&COMM_TRANS);
+    }
+    return 0;
+}
+
+
+// transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
+// two destination matrices share the same non-zero elements positions
+// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, even if
+// one of them is acturely zero
+// All matrices must have same MPI communicator
+int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, double* S_2d, const double ZERO_Limit,
+                   DistCCSMatrix &DST_Matrix,  double*& H_ccs, double*& S_ccs)
+{
+    // debug
+    #ifdef _DEBUG
+    char f_log[80];
+    int myproc;
+    MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+    std::ofstream log;
+    if(myproc<100)
+    {
+        sprintf(f_log, "transformer_%2.2d.log", myproc);
+        log.open(f_log, std::ios::app);
+        log<<std::endl<<"LOG of process: "<<myproc<<std::endl;
+        log<<"enter transformBCDtoCCS for H and S"<<std::endl;
+    }
+    #endif
+    // end debug
+    MPI_Group GROUP_TRANS;
+    MPI_Comm COMM_TRANS=MPI_COMM_NULL;
+    newGroupCommTrans(SRC_Matrix, DST_Matrix, GROUP_TRANS, COMM_TRANS);
+    if(COMM_TRANS != MPI_COMM_NULL)
+    {
+        //set up sender and receiver
+        int NPROC_TRANS;
+        MPI_Comm_size(COMM_TRANS, &NPROC_TRANS);
+        int sender_size;
+        std::vector<int> sender_size_process(NPROC_TRANS);
+        std::vector<int> sender_displacement_process(NPROC_TRANS);
+        int receiver_size;
+        std::vector<int> receiver_size_process(NPROC_TRANS);
+        std::vector<int> receiver_displacement_process(NPROC_TRANS);
+
+        #ifdef _DEBUG
+        if(myproc<100)
+        {
+            log<<"nprocs: "<<SRC_Matrix.nprocs<<" ; myprow: "<<SRC_Matrix.myprow<<" ; mypcol: "<<SRC_Matrix.mypcol<<std::endl;
+            log<<"nblk:"<<SRC_Matrix.nblk<<" ; nrow: "<<SRC_Matrix.nrow<<" ; ncol: "<<SRC_Matrix.ncol<<std::endl;
+            log<<"layout:"<<SRC_Matrix.LAYOUT<<std::endl;
+            log<<"ZERO = "<<ZERO_Limit<<std::endl;
+            log<<"DST_Matrix parameters:"<<std::endl;
+            log<<"size: "<<DST_Matrix.size<<" ;nproc_data: "<<DST_Matrix.nproc_data<<std::endl;
+            log<<"start transforming H and S to CCS format"<<std::endl;
+        }
+        #endif
+        // end debug
+
+        // find out the non-zeros elements' positions
+        std::vector<int> rowidx;
+        std::vector<int> colidx;
+        int nnz=0;
+        #ifdef _DEBUG
+        if(myproc<100) log<<"start counting nnz..."<<std::endl;
+        #endif
+        if(SRC_Matrix.comm != MPI_COMM_NULL)
+        {
+            getNonZeroIndex(SRC_Matrix.LAYOUT, SRC_Matrix.nrow, SRC_Matrix.ncol, H_2d, S_2d, ZERO_Limit,
+                            nnz, rowidx, colidx);
+        }
+        #ifdef _DEBUG
+        if(myproc<100)
+        {
+            log<<"NonZeroIndex is got, nnz is "<<nnz<<std::endl;
+            log<<"rowidx size: "<<rowidx.size()<<"; colidx size: "<<colidx.size()<<std::endl;
+            /*
+            if(SRC_Matrix.comm != MPI_COMM_NULL)
+            {
+                log<<"NonZeroIndex :"<<std::endl;
+                if(SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
+                {
+                    for(int i=0; i<nnz; ++i)
+                    {
+                        int HS_idx=rowidx[i]*SRC_Matrix.ncol+colidx[i];
+                        log<<rowidx[i]<<' '<<colidx[i]<<' '<<HS_idx;
+                        log<<' '<<H_2d[HS_idx]<<' '<<S_2d[HS_idx]<<std::endl;
+                    }
+                }
+                else
+                {
+                    for(int i=0; i<nnz; ++i)
+                    {
+                        int HS_idx=colidx[i]*SRC_Matrix.nrow+rowidx[i];
+                        log<<rowidx[i]<<' '<<colidx[i]<<' '<<HS_idx;
+                        log<<' '<<H_2d[HS_idx]<<' '<<S_2d[HS_idx]<<std::endl;
+                    }
+                }
+                log<<"nonzero index is output"<<std::endl;
+            }
+            else
+            {
+                log<<"no src_matrix elements in current process"<<std::endl;
+            }
+            */
+        }
+        #endif
+
+        // build all2all transformation parameters and the map index of receiver buffer
+        std::vector<int> buffer2ccsIndex;
+        buildTransformParameter(SRC_Matrix, DST_Matrix,
+                                NPROC_TRANS, GROUP_TRANS, COMM_TRANS,
+                                nnz, rowidx, colidx,
+                                sender_size, sender_size_process, sender_displacement_process,
+                                receiver_size, receiver_size_process, receiver_displacement_process, buffer2ccsIndex);
+        // Do transformation
+        #ifdef _DEBUG
+        if(myproc<100) log<<"Parameters are built"<<std::endl;
+        #endif
+        std::vector<double> sender_buffer(sender_size);
+        std::vector<double> receiver_buffer(receiver_size);
+        // put H to sender buffer
+        if(SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
+        {
+            for(int i=0; i<sender_size; ++i)
+            {
+                sender_buffer[i]=H_2d[rowidx[i]*SRC_Matrix.ncol+colidx[i]];
+            }
+        }
+        else
+        {
+            for(int i=0; i<sender_size; ++i)
+            {
+                sender_buffer[i]=H_2d[colidx[i]*SRC_Matrix.nrow+rowidx[i]];
+            }
+        }
+        #ifdef _DEBUG
+        if(myproc<100) log<<"H sender_buffer is filled"<<std::endl;
+        #endif
+        // do all2all transformation
+        MPI_Alltoallv(&sender_buffer[0], &sender_size_process[0], &sender_displacement_process[0], MPI_DOUBLE,
+                      &receiver_buffer[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_DOUBLE, COMM_TRANS);
+        // collect H from receiver buffer
+        #ifdef _DEBUG
+        if(myproc<100) log<<"H receiver_buffer is received"<<std::endl;
+        #endif
+        delete[] H_ccs;
+        H_ccs=new double[receiver_size];
+        buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], H_ccs);
+        #ifdef _DEBUG
+        if(myproc<100) log<<"H_ccs is received"<<std::endl;
+        #endif
+
+        // put S to sender buffer
+        if(SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
+        {
+            for(int i=0; i<sender_size; ++i)
+            {
+                sender_buffer[i]=S_2d[rowidx[i]*SRC_Matrix.ncol+colidx[i]];
+            }
+        }
+        else
+        {
+            for(int i=0; i<sender_size; ++i)
+            {
+                sender_buffer[i]=S_2d[colidx[i]*SRC_Matrix.nrow+rowidx[i]];
+            }
+        }
+        #ifdef _DEBUG
+        if(myproc<100) log<<"S sender_buffer is filled"<<std::endl;
+        #endif
+        // do all2all transformation
+        MPI_Alltoallv(&sender_buffer[0], &sender_size_process[0], &sender_displacement_process[0], MPI_DOUBLE,
+                      &receiver_buffer[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_DOUBLE, COMM_TRANS);
+        // collect S from receiver buffer
+        #ifdef _DEBUG
+        if(myproc<100) log<<"S receiver_buffer is received"<<std::endl;
+        #endif
+        delete[] S_ccs;
+        S_ccs=new double[receiver_size];
+        buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], S_ccs);
+        #ifdef _DEBUG
+        if(myproc<100) log<<"S_ccs is received"<<std::endl;
+        #endif
+    }
+    // clear and return
+    deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS);
+    #ifdef _DEBUG
+    if(myproc<100)
+    {
+        log<<"COMM_TRANS is deleted"<<std::endl;
+        log.close();
+    }
+    #endif
+    return 0;
+}
+
+// transform two sparse matrices from Compressed Column Storage (CCS) to block cyclic distribution (BCD) distribution
+// two source matrices share the same non-zero elements positions
+int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* EDMnzvalLocal,
+                    DistBCDMatrix& DST_Matrix, double* DM, double* EDM)
+{
+    // debug
+    #ifdef _DEBUG
+    OUT(ofs_running, "transformCCStoBCD: start");
+    MPI_Barrier(MPI_COMM_WORLD);
+    #endif
+    // end debug
+    int myproc;
+    MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+    // debug
+    #ifdef _DEBUG
+    std::ofstream log;
+    if(myproc<100)
+    {
+        char f_log[80];
+        sprintf(f_log, "transformer_%2.2d.log", myproc);
+        //MPI_Barrier(MPI_COMM_WORLD);
+        log.open(f_log, std::ios::app);
+        //MPI_Barrier(MPI_COMM_WORLD);
+        log<<"\nstart transform DMnzval to DM"<<std::endl;
+    }
+    MPI_Barrier(MPI_COMM_WORLD);
+    #endif
+    // end debug
+    MPI_Group GROUP_TRANS;
+    MPI_Comm COMM_TRANS=MPI_COMM_NULL;
+    newGroupCommTrans(DST_Matrix, SRC_Matrix, GROUP_TRANS, COMM_TRANS);
+    if(COMM_TRANS != MPI_COMM_NULL)
+    {
+        // init DM and EDM with 0
+        for(int i=0; i<DST_Matrix.nrow*DST_Matrix.ncol; ++i)
+        {
+            DM[i]=0;
+            EDM[i]=0;
+        }
+        #ifdef _DEBUG
+        // MPI_Barrier(COMM_TRANS);
+        if(myproc<100) log<<"DM and EDM filled by 0"<<std::endl;
+        // OUT(ofs_running, "transformCCStoBCD: DM and EDM filled by 0");
+        #endif
+        // setup number of local elements to be transfered to each remote processes
+        int NPROC_TRANS;
+        MPI_Comm_size(COMM_TRANS, &NPROC_TRANS);
+        // std::vector<int> sender_size_process(NPROC_TRANS);
+        // std::vector<int> sender_displacement_process(NPROC_TRANS);
+        // std::vector<int> receiver_size_process(NPROC_TRANS);
+        // std::vector<int> receiver_displacement_process(NPROC_TRANS);
+        int sender_size_process[NPROC_TRANS];
+        int sender_displacement_process[NPROC_TRANS];
+        int receiver_size_process[NPROC_TRANS];
+        int receiver_displacement_process[NPROC_TRANS];
+        #ifdef _DEBUG
+        if(myproc<100) log<<"NPROC_TRANS = "<<NPROC_TRANS<<std::endl;
+        // MPI_Barrier(COMM_TRANS);
+        if(myproc<100) log<<"build process rank map from BCD to TRANS"<<std::endl;
+        // OUT(ofs_running, "transformCCStoBCD: build process rank map from BCD to TRANS");
+        // MPI_Barrier(COMM_TRANS);
+        #endif
+        int nproc_bcd;
+        std::vector<int> proc_map_bcd_trans;
+        int myproc_trans;
+        MPI_Comm_rank(COMM_TRANS, &myproc_trans);
+        if(myproc_trans == 0)
+        {
+            MPI_Group_size(DST_Matrix.group, &nproc_bcd);
+            MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS);
+            proc_map_bcd_trans.resize(nproc_bcd, 0);
+            for(int i=0; i<nproc_bcd; ++i)
+            {
+                MPI_Group_translate_ranks(DST_Matrix.group, 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]);
+            }
+            MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS);
+        }
+        else
+        {
+            MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS);
+            proc_map_bcd_trans.resize(nproc_bcd, 0);
+            MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS);
+        }
+
+        #ifdef _DEBUG
+        // check process map from BCD comm to TRANS comm
+        if(myproc<100) 
+        {
+            log<<"check process map:\n";
+            log<<"pid in bcd\tpid in trans\n";
+            for(int i=0; i<nproc_bcd; ++i)
+            {
+                log<<i<<"\t\t"<<proc_map_bcd_trans[i]<<std::endl;
+            }
+            log<<"check pid from prow and pcol int bcd to pid in trans\n";
+            log<<"p_row  p_col  p_bcd  p_trans\n";
+            for(int i=0; i<DST_Matrix.nprows; ++i)
+            {
+                for(int j=0; j<DST_Matrix.npcols; ++j)
+                {
+                    int pid_bcd=DST_Matrix.pnum(i, j);
+                    int pid_trans=proc_map_bcd_trans[pid_bcd];
+                    log<<i<<"\t"<<j<<"\t"<<pid_bcd<<"\t"<<pid_trans<<std::endl;
+                }
+            }
+            log<<"setup alltoall parameters"<<std::endl;
+        }
+        // OUT(ofs_running, "transformCCStoBCD: setup alltoall parameters");
+        MPI_Barrier(COMM_TRANS);
+        #endif
+        // setup sender_size_process
+        // std::fill(sender_size_process.begin(), sender_size_process.end(), 0);
+        for(int i=0; i<NPROC_TRANS; ++i) sender_size_process[i]=0;
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100) log<<"sender_size_process is inited by 0"<<std::endl;
+        // OUT(ofs_running, "transformCCStoBCD: sender_size_process is inited by 0, size ", NPROC_TRANS);
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100) log<<"display all columns and rows of nonzeros values:\n";
+        int log_nnz=0;
+        #endif
+        for(int icol=0; icol<SRC_Matrix.numColLocal; ++icol)
+        {
+            int g_col=SRC_Matrix.globalCol(icol);
+            int recv_pcol_bcd;
+            int recv_col=DST_Matrix.localCol(g_col, recv_pcol_bcd);
+            // #ifdef _DEBUG
+            // log<<g_col<<"\n ";
+            // #endif
+            //OUT(ofs_running, "transformCCStoBCD: recv_pcol_bcd", recv_pcol_bcd);
+            for(int rowidx=SRC_Matrix.colptrLocal[icol]-1;rowidx<SRC_Matrix.colptrLocal[icol+1]-1; ++rowidx)
+            {
+                int g_row=SRC_Matrix.rowindLocal[rowidx]-1;
+                int recv_prow_bcd;
+                int recv_row=DST_Matrix.localRow(g_row, recv_prow_bcd);
+                int recv_proc_bcd=DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
+                int recv_proc=proc_map_bcd_trans[recv_proc_bcd];
+                ++sender_size_process[recv_proc];
+                // #ifdef _DEBUG
+                // log<<" "<<g_row;
+                // ++log_nnz;
+                // #endif
+            }
+            // log<<"\n";
+        }
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100)
+        {
+            log<<"sender_size_process is counted, total nonzeros are: "<<log_nnz<<std::endl;
+            log<<"target pid\tsize\n";
+            for (int i = 0; i < NPROC_TRANS; i++)
+            {
+                log<<i<<"\t\t"<<sender_size_process[i]<<std::endl;
+            }
+        }
+        //OUT(ofs_running, "transformCCStoBCD: sender_size_process is counted");
+        MPI_Barrier(COMM_TRANS);
+        #endif
+
+        // setup receiver_size_process
+        //std::fill(receiver_size_process.begin(), receiver_size_process.end(), 0);
+        for(int i=0; i<NPROC_TRANS; ++i) receiver_size_process[i]=0;
+        MPI_Alltoall(&sender_size_process[0], 1, MPI_INT, &receiver_size_process[0], 1, MPI_INT, COMM_TRANS);
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100)
+        {
+            log<<"receiver_size_process is got"<<std::endl;
+            log<<"target pid\tsize\n";
+            for (int i = 0; i < NPROC_TRANS; i++)
+            {
+                log<<i<<"\t\t"<<receiver_size_process[i]<<std::endl;
+            }
+        }
+        // OUT(ofs_running, "transformCCStoBCD: receiver_size_process is got");
+        #endif
+
+        // setup sender_displacement and receiver_displacement
+        sender_displacement_process[0]=0;
+        receiver_displacement_process[0]=0;
+        int receiver_size=receiver_size_process[0];
+        for(int i=1; i<NPROC_TRANS; ++i)
+        {
+            sender_displacement_process[i]=sender_displacement_process[i-1]+sender_size_process[i-1];
+            receiver_displacement_process[i]=receiver_displacement_process[i-1]+receiver_size_process[i-1];
+            receiver_size+=receiver_size_process[i];
+        }
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100)
+        {
+            log<<"displacements are built"<<std::endl;
+            log<<"check alltoallv parameters"<<std::endl;
+            for(int i=0; i<NPROC_TRANS; ++i)
+            {
+                log<<"pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  receiver_displacement_process"<<std::endl;
+                log<<i<<"\t"<<sender_size_process[i]<<"\t\t\t"<<sender_displacement_process[i]<<"\t\t\t\t"<<
+                                receiver_size_process[i]<<"\t\t\t"<<receiver_displacement_process[i]<<std::endl;
+            }
+        }
+        // OUT(ofs_running, "transformCCStoBCD: displacements are built");
+        #endif
+
+        // setup up sender index and receiver index
+        int sender_size=SRC_Matrix.nnzLocal;
+        int* sender_index;
+        double* sender_buffer;
+        int* dst_index;
+        int* receiver_index;
+        double* receiver_buffer;
+        #ifdef _DEBUG
+        if(myproc<100)
+        {
+            log<<"sender_size = "<<sender_size<<"; receiver_size = "<<receiver_size<<std::endl;
+            log.flush();
+            log<<"start allocating sender_index, dst_index and receiver_index..."<<std::endl;
+            log.flush();
+        }
+        #endif
+        if(sender_size > 0)
+        {
+            sender_index=new int[sender_size];
+            for(int i=0; i<sender_size; ++i)
+            {
+                sender_index[i]=-1;
+            }
+            sender_buffer=new double[sender_size];
+            dst_index=new int[2*sender_size];
+            for(int i=0; i<2*sender_size; ++i)
+            {
+                dst_index[i]=-1;
+            }
+        }
+        else
+        {
+            sender_index=new int[1];
+            sender_index[0]=-1;
+            sender_buffer=new double[1];
+            dst_index=new int[2];
+            dst_index[0]=-1;
+            dst_index[1]=-1;
+        }
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100) log<<"; receiver_index size: ";
+        #endif
+        if(receiver_size > 0)
+        {
+            receiver_index=new int[2*receiver_size];
+            receiver_buffer=new double[receiver_size];
+            for(int i=0; i<2*receiver_size; ++i)
+            {
+                receiver_index[i]=-1;
+            }
+            for(int i=0; i<receiver_size; ++i)
+            {
+                receiver_buffer[i]=-1;
+            }
+        }
+        else
+        {
+            receiver_index=new int[2];
+            receiver_buffer=new double[1];
+            receiver_index[0]=-1;
+            receiver_index[1]=-1;
+            receiver_buffer[0]=-1;
+        }
+
+        // pointer to the first empty slot of each process
+        // std::vector<int> p(sender_displacement_process);
+        int p[NPROC_TRANS];
+        for(int i=0; i<NPROC_TRANS; ++i)
+        {
+            p[i]=sender_displacement_process[i];
+        }
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100)
+        {
+            log<<"check BCD pnum"<<std::endl;
+            log.flush();
+            for(int i=0; i<DST_Matrix.nprows; ++i)
+            {
+                for(int j=0; j<DST_Matrix.npcols; ++j)
+                {
+                    log<<i<<"\t"<<j<<"\t"<<DST_Matrix.pnum(i, j)<<std::endl;
+                }
+            }
+            log<<"source CCS matrix parameters:\n";
+            log<<"numColLocal: "<<SRC_Matrix.numColLocal<<std::endl;
+            log<<"pointer to beginning of each process is inited by sender_displacement_process"<<std::endl;
+            //log<<"icol"<<"\t"<<"g_col"<<"\t"<<"col(bcd)"<<"\t"<<"pcol(bcd)"<<std::endl;
+            //log.flush();
+        }
+        // MPI_Barrier(COMM_TRANS);
+        #endif
+
+        int idx=0;
+        #ifdef _DEBUG
+        if(myproc<100) log<<"idx start at "<<idx<<std::endl;
+        #endif
+        for(int icol=0; icol<SRC_Matrix.numColLocal; ++icol)
+        {
+            int g_col=SRC_Matrix.globalCol(icol);
+            int recv_pcol_bcd;
+            int recv_col=DST_Matrix.localCol(g_col, recv_pcol_bcd);
+            for(int rowidx=SRC_Matrix.colptrLocal[icol]-1; rowidx<SRC_Matrix.colptrLocal[icol+1]-1; ++rowidx)
+            {
+                int g_row=SRC_Matrix.rowindLocal[rowidx]-1;
+                int recv_prow_bcd;
+                int recv_row=DST_Matrix.localRow(g_row, recv_prow_bcd);
+                #ifdef _DEBUG
+                if(myproc<100)
+                {
+                    if(recv_prow_bcd >= DST_Matrix.nprows || recv_prow_bcd < 0)
+                    {
+                        log<<"ERROR: recv_prow_bcd error! recv_prow_bcd is "<<recv_prow_bcd<<"; max is "<<DST_Matrix.nprows<<std::endl;
+                        log.flush();
+                    }
+                }
+                #endif
+                int recv_proc_bcd=DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
+                #ifdef _DEBUG
+                // MPI_Barrier(COMM_TRANS);
+                if(myproc<100)
+                {
+                    if(recv_proc_bcd > NPROC_TRANS || recv_proc_bcd < 0)
+                    {
+                        log<<"ERROR: recv_proc_bcd outbound! recv_proc_bcd is "<<recv_proc_bcd<<"; max is "<<NPROC_TRANS<<std::endl;
+                        log.flush();
+                    }
+                }
+                #endif
+                int recv_proc=proc_map_bcd_trans[recv_proc_bcd];
+                #ifdef _DEBUG
+                // MPI_Barrier(COMM_TRANS);
+                if(myproc<100)
+                {
+                    if(p[recv_proc] >= sender_size || p[recv_proc] < 0)
+                    {
+                        log<<"ERROR: sender_index's index outbound! "<<std::endl;
+                        log<<recv_prow_bcd<<" "<<recv_pcol_bcd<<recv_proc_bcd<<" "<<recv_proc<<std::endl;
+                        log<<p[recv_proc]<<" "<<sender_size<<std::endl;
+                        log.flush();
+                    }
+                }
+                // MPI_Barrier(COMM_TRANS);
+                #endif
+                sender_index[p[recv_proc]]=idx;
+                #ifdef _DEBUG
+                // MPI_Barrier(COMM_TRANS);
+                if(myproc<100)
+                {
+                    if((p[recv_proc]*2+1) >= (2*sender_size)|| (p[recv_proc]*2+1) < 0)
+                    {
+                        log<<"ERROR: dst_index's index outbound! recv_proc:"<<recv_proc<<"; p:"<<p[recv_proc]*2+1<<"; max is "<<2*sender_size<<std::endl;
+                        log.flush();
+                    }
+                }
+                // MPI_Barrier(COMM_TRANS);
+                #endif
+                dst_index[p[recv_proc]*2]=recv_row;
+                dst_index[p[recv_proc]*2+1]=recv_col;
+                ++p[recv_proc];
+                ++idx;
+            }
+        }
+
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        // check sender_index and dst_index
+        if(myproc<100)
+        {
+            for(int i=0; i<sender_size; ++i)
+            {
+                if(sender_index[i]<0 || sender_index[i]>SRC_Matrix.nnzLocal)
+                {
+                    log<<"ERROR! sender_index outbound: "<<i<<" "<<sender_index[i]<<std::endl;
+                    log.flush();
+                }
+            }
+            for(int i=0; i<2*sender_size; ++i)
+            {
+                if(dst_index[i]<0 || dst_index[i]>DST_Matrix.size)
+                {
+                    log<<"ERROR! dst_index outbound: "<<i<<" "<<dst_index[i]<<" "<<DST_Matrix.size<<std::endl;
+                    log.flush();
+                }
+            }
+            log<<"sender_index is built"<<std::endl;
+            log<<"sender_size = "<<sender_size<<std::endl;
+            // for(int i=0; i<sender_size; i+=sender_size/100)
+            //     log<<i<<"\t"<<dst_index[2*i]<<"\t"<<dst_index[2*i+1]<<std::endl;
+            // OUT(ofs_running, "transformCCStoBCD: sender_index is built");
+
+            // save sender_index to file for debug
+            /*std::ofstream log_sender_index;
+            for(int i=0; i<NPROC_TRANS; ++i)
+            {
+                if(sender_size_process[i] > 0)
+                {
+                    sprintf(f_log, "sender_index_from_%2.2d_to_%2.2d.log", myproc_trans, i);
+                    log_sender_index.open(f_log, std::ios::app);
+                    for(int j=sender_displacement_process[i]; j<sender_displacement_process[i]+sender_size_process[i]; ++j)
+                        log_sender_index<<sender_index[j]<<std::endl;
+                    log_sender_index.close();
+                }
+            }
+            */
+        }
+        #endif
+
+        for(int i=0; i<NPROC_TRANS; ++i)
+        {
+            sender_size_process[i]*=2;
+            sender_displacement_process[i]*=2;
+            receiver_size_process[i]*=2;
+            receiver_displacement_process[i]*=2;
+        }
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100)
+        {
+            log<<"Alltoall parameters for index array"<<std::endl;
+            log<<"dst_index size:"<<2*sender_size<<"\t receiver_index size: "<<2*receiver_size<<std::endl;
+            log<<"pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  receiver_displacement_process"<<std::endl;
+            for(int i=0; i<NPROC_TRANS; ++i)
+            {
+                log<<i<<"\t"<<sender_size_process[i]<<"\t\t"<<sender_displacement_process[i]<<"\t\t"
+                   <<receiver_size_process[i]<<"\t\t"<<receiver_displacement_process[i]<<std::endl;
+            }
+            // save dst_index to file for debug
+            /*std::ofstream log_dst_index;
+            for(int i=0; i<NPROC_TRANS; ++i)
+            {
+                if(sender_size_process[i] > 0)
+                {
+                    sprintf(f_log, "dst_index_from_%2.2d_to_%2.2d.log", myproc_trans, i);
+                    log_dst_index.open(f_log, std::ios::app);
+                    for(int j=sender_displacement_process[i]; j<sender_displacement_process[i]+sender_size_process[i]; ++j)
+                        log_dst_index<<dst_index[j]<<std::endl;
+                    log_dst_index.close();
+                }
+            }
+            */
+            log<<"start alltoallv for index"<<std::endl;
+        }
+        MPI_Barrier(COMM_TRANS);
+        // OUT(ofs_running, "transformCCStoBCD: sender_index is built");
+        #endif
+        MPI_Alltoallv(&dst_index[0], &sender_size_process[0], &sender_displacement_process[0], MPI_INT,
+                      &receiver_index[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_INT, COMM_TRANS);
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100)
+        {
+            log<<"receiver_index is got"<<std::endl;
+            log<<"receiver_size is: "<<receiver_size<<std::endl;
+            log.flush();
+        }
+        /*
+        // save receiver_index to file for debug
+        std::ofstream log_rcv_index;
+        for(int i=0; i<NPROC_TRANS; ++i)
+        {
+            log<<"receive index (from proc_trans "<<i<<") is from "<<receiver_displacement_process[i]<<" to "<<receiver_displacement_process[i]+receiver_size_process[i]<<std::endl;
+            if(receiver_size_process[i] > 0)
+            {
+                sprintf(f_log, "receiver_index_from_%2.2d_to_%2.2d.log", i, myproc_trans);
+                log_rcv_index.open(f_log, std::ios::app);
+                for(int j=receiver_displacement_process[i]; j<receiver_displacement_process[i]+receiver_size_process[i]; ++j)
+                    log_rcv_index<<receiver_index[j]<<std::endl;
+                log_rcv_index.close();
+            }
+        }
+        log<<"receiver_index values are saved"<<std::endl;
+        log.flush();
+        // MPI_Barrier(COMM_TRANS);
+
+        for(int i=0; i<receiver_size; ++i)
+        {
+            if(receiver_index[i*2]<0)
+            {
+                log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" < 0"<<std::endl;
+                log.flush();
+            }
+            else if(receiver_index[i*2]>DST_Matrix.nrow)
+            {
+                log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.nrow<<std::endl;
+                log.flush();
+            }
+            if(receiver_index[i*2+1]<0)
+            {
+                log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" < 0"<<std::endl;
+                log.flush();
+            }
+            else if(receiver_index[i*2+1]>DST_Matrix.ncol)
+            {
+                log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.ncol<<std::endl;
+                log.flush();
+            }
+        }
+        log<<"receiver_index values are checked"<<std::endl;
+        log.flush();
+        MPI_Barrier(COMM_TRANS);
+        // OUT(ofs_running, "transformCCStoBCD: receiver_index is got");
+        */
+        #endif
+        // reset size and displacement for transfering matrix value by alltoall
+        for(int i=0; i<NPROC_TRANS; ++i)
+        {
+            sender_size_process[i]/=2;
+            sender_displacement_process[i]/=2;
+            receiver_size_process[i]/=2;
+            receiver_displacement_process[i]/=2;
+        }
+        #ifdef _DEBUG
+        if(myproc<100)
+        {
+            log<<"size_process and displacement_process are reset for buffer transform"<<std::endl;
+            log.flush();
+        }
+        MPI_Barrier(COMM_TRANS);
+        #endif
+
+        // transfer DM
+        // set up DM sender buffer
+        for(int i=0; i<sender_size; ++i)
+        {
+            sender_buffer[i]=DMnzvalLocal[sender_index[i]];
+        }
+        #ifdef _DEBUG
+        if(myproc<100)
+        {
+            log<<"DM(CCS) is put to sender_buffer"<<std::endl;
+            log.flush();
+            // OUT(ofs_running, "transformCCStoBCD: DM(CCS) is put to sender_buffer");
+
+            // check receiver_index, which may be changed after alltoall for buffer
+            for(int i=0; i<receiver_size; ++i)
+            {
+                if(receiver_index[i*2]<0)
+                {
+                    log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" < 0"<<std::endl;
+                    log.flush();
+                }
+                else if(receiver_index[i*2]>DST_Matrix.nrow)
+                {
+                    log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.nrow<<std::endl;
+                    log.flush();
+                }
+                if(receiver_index[i*2+1]<0)
+                {
+                    log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" < 0"<<std::endl;
+                    log.flush();
+                }
+                else if(receiver_index[i*2+1]>DST_Matrix.ncol)
+                {
+                    log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.ncol<<std::endl;
+                    log.flush();
+                }
+            }
+            log<<"receiver_index values are checked"<<std::endl;
+            log.flush();
+            // check parameters for alltoall for buffer
+            log<<"pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  receiver_displacement_process"<<std::endl;
+            for(int i=0; i<NPROC_TRANS; ++i)
+            {
+                log<<i<<"\t"<<sender_size_process[i]<<"\t\t"<<sender_displacement_process[i]<<"\t\t"
+                   <<receiver_size_process[i]<<"\t\t"<<receiver_displacement_process[i]<<std::endl;
+            }
+            log.flush();
+        }
+        MPI_Barrier(COMM_TRANS);
+        #endif
+        // transfer sender buffer to receiver buffer
+        MPI_Alltoallv(&sender_buffer[0], &sender_size_process[0], &sender_displacement_process[0], MPI_DOUBLE,
+                      &receiver_buffer[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_DOUBLE, COMM_TRANS);
+
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100) log<<"receiver_buffer is got from DM"<<std::endl;
+        // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM");
+        #endif
+        // transform receiver_buffer to DM
+        if(DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
+        {
+            int DST_Matrix_elem=DST_Matrix.nrow*DST_Matrix.ncol;
+            for(int i=0; i<receiver_size; ++i)
+            {
+                int ix=receiver_index[2*i];
+                int iy=receiver_index[2*i+1];
+                int idx=ix*DST_Matrix.ncol+iy;
+                #ifdef _DEBUG
+                if(myproc<100)
+                {
+                    if(idx<0 || idx>=DST_Matrix_elem)
+                    {
+                        log<<"idx for DM ERROR: idx is "<<idx<<"; DM total size is "<<DST_Matrix_elem<<std::endl;
+                        log<<"index number is "<<2*i<<" ix = "<<ix<<" iy = "<<iy<<" ncol = "<<DST_Matrix.ncol<<std::endl;
+                        log.flush();
+                    }
+                }
+                #endif
+                DM[idx]=receiver_buffer[i];
+            }
+        } else
+        {
+            int DST_Matrix_elem=DST_Matrix.nrow*DST_Matrix.ncol;
+            for(int i=0; i<receiver_size; ++i)
+            {
+                int ix=receiver_index[2*i];
+                int iy=receiver_index[2*i+1];
+                int idx=iy*DST_Matrix.nrow+ix;
+                #ifdef _DEBUG
+                if(myproc<100)
+                {
+                    if(idx<0 || idx>=DST_Matrix_elem)
+                    {
+                        log<<"idx for DM ERROR: idx is "<<idx<<"; DM total size is "<<DST_Matrix_elem<<std::endl;
+                        log<<"index number is"<<2*i<<" ix = "<<ix<<" iy = "<<iy<<" nrow = "<<DST_Matrix.nrow<<std::endl;
+                        log.flush();
+                    }
+                }
+                #endif
+                DM[idx]=receiver_buffer[i];
+            }
+        }
+
+        #ifdef _DEBUG
+        if(myproc<100) log<<"DM(BCD) is got from receiver_buffer"<<std::endl;
+        MPI_Barrier(COMM_TRANS);
+        // OUT(ofs_running, "transformCCStoBCD: DM(BCD) is got from receiver_buffer");
+        #endif
+        // setup up sender buffer of EDM
+        for(int i=0; i<sender_size; ++i)
+        {
+            sender_buffer[i]=EDMnzvalLocal[sender_index[i]];
+        }
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100) log<<"EDM(CCS) is put to sender_buffer"<<std::endl;
+        // OUT(ofs_running, "transformCCStoBCD: EDM(CCS) is put to sender_buffer");
+        #endif
+
+        // transfer sender buffer to receiver buffer
+        MPI_Alltoallv(&sender_buffer[0], &sender_size_process[0], &sender_displacement_process[0], MPI_DOUBLE,
+                      &receiver_buffer[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_DOUBLE, COMM_TRANS);
+        #ifdef _DEBUG
+        MPI_Barrier(COMM_TRANS);
+        if(myproc<100) log<<"receiver_buffer is got from EDM"<<std::endl;
+        // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM");
+        #endif
+        // transform receiver_buffer to EDM
+        if(DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
+        {
+            int DST_Matrix_elem=DST_Matrix.nrow*DST_Matrix.ncol;
+            for(int i=0; i<receiver_size; ++i)
+            {
+                int ix=receiver_index[2*i];
+                int iy=receiver_index[2*i+1];
+                int idx=ix*DST_Matrix.ncol+iy;
+                #ifdef _DEBUG
+                if(myproc<100)
+                {
+                    if(idx<0 || idx>=DST_Matrix_elem)
+                    {
+                        log<<"idx for EDM ERROR: idx is "<<idx<<"; EDM total size is "<<DST_Matrix_elem<<std::endl;
+                        log<<"index number is"<<2*i<<" ix = "<<ix<<" iy = "<<iy<<" ncol = "<<DST_Matrix.ncol<<std::endl;
+                        log.flush();
+                    }
+                }
+                #endif
+                EDM[idx]=receiver_buffer[i];
+            }
+        } else
+        {
+            int DST_Matrix_elem=DST_Matrix.nrow*DST_Matrix.ncol;
+            for(int i=0; i<receiver_size; ++i)
+            {
+                int ix=receiver_index[2*i];
+                int iy=receiver_index[2*i+1];
+                int idx=iy*DST_Matrix.nrow+ix;
+                #ifdef _DEBUG
+                if(myproc<100)
+                {
+                    if(idx<0 || idx>=DST_Matrix_elem)
+                    {
+                        log<<"idx for EDM ERROR: idx is "<<idx<<"; EDM total size is "<<DST_Matrix_elem<<std::endl;
+                        log<<"index number is"<<2*i<<" ix = "<<ix<<" iy = "<<iy<<" nrow = "<<DST_Matrix.nrow<<std::endl;
+                        log.flush();
+                    }
+                }
+                #endif
+                EDM[idx]=receiver_buffer[i];
+            }
+        }
+        #ifdef _DEBUG
+        if(myproc<100) log<<"EDM(BCD) is got from receiver_buffer"<<std::endl;
+        MPI_Barrier(COMM_TRANS);
+        #endif
+        delete[] sender_index;
+        delete[] sender_buffer;
+        delete[] dst_index;
+        delete[] receiver_index;
+        delete[] receiver_buffer;
+        #ifdef _DEBUG
+        if(myproc<100) log<<"work arrays are deleted"<<std::endl;
+        #endif
+    }
+    #ifdef _DEBUG
+    if(myproc<100) log<<"OUT COMM_TRANS"<<std::endl;
+    if(myproc<100) log<<"before deleteGroupCommTrans"<<std::endl;
+    #endif
+    deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS);
+    #ifdef _DEBUG
+    MPI_Barrier(MPI_COMM_WORLD);
+    if(myproc<100)
+    {
+        log<<"COMM_TRANS is deleted"<<std::endl;
+        log.close();
+    }
+    MPI_Barrier(MPI_COMM_WORLD);
+    OUT(ofs_running, "transformCCStoBCD: finish job, COMM_TRANS is deleted");
+    #endif
+    return 0;
+}
diff --git a/source/module_hsolver/pexsi/DistMatrixTransformer.h b/source/module_hsolver/pexsi/DistMatrixTransformer.h
new file mode 100644
index 0000000000..be92935642
--- /dev/null
+++ b/source/module_hsolver/pexsi/DistMatrixTransformer.h
@@ -0,0 +1,20 @@
+// transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
+// they should have same MPI communicator
+// The local matrix of BCD is column-major order
+// int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, const double ZERO_Limit, 
+//                    DistCCSMatrix &DST_Matrix, double*& H_ccs);
+
+// transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
+// two destination matrices share the same non-zero elements positions
+// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, even if
+// one of them is acturely zero
+// All matrices must have same MPI communicator
+int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, double* S_2d, const double ZERO_Limit,
+                    DistCCSMatrix &DST_Matrix,  double*& H_ccs, double*& S_ccs);
+
+// int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, 
+                    // DistBCDMatrix& DST_Matrix, double* DM_2d);
+
+int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* ENDnzvalLocal,
+                    DistBCDMatrix& DST_Matrix, double* DM_2d, double* END_2d);
+
diff --git a/source/module_hsolver/pexsi/pexsi_solver.cpp b/source/module_hsolver/pexsi/pexsi_solver.cpp
new file mode 100644
index 0000000000..03929098e0
--- /dev/null
+++ b/source/module_hsolver/pexsi/pexsi_solver.cpp
@@ -0,0 +1,59 @@
+#include "pexsi_solver.h"
+
+#include "module_base/global_variable.h"
+#include "simplePEXSI.h"
+
+#include <cstring>
+#include <mpi.h>
+
+PEXSI_Solver::PEXSI_Solver(const int blacs_text,
+                           const int nb,
+                           const int nrow,
+                           const int ncol,
+                           const double* h,
+                           const double* s,
+                           double* DM,
+                           double* EDM,
+                           double& totalEnergyH,
+                           double& totalEnergyS,
+                           double& totalFreeEnergy)
+{
+    this->blacs_text = blacs_text;
+    this->nb = nb;
+    this->nrow = nrow;
+    this->ncol = ncol;
+    this->h = new double[nrow * ncol];
+    this->s = new double[nrow * ncol];
+    std::memcpy(this->h, h, nrow * ncol * sizeof(double));
+    std::memcpy(this->s, s, nrow * ncol * sizeof(double));
+    this->DM = new double[nrow * ncol];
+    this->EDM = new double[nrow * ncol];
+    this->totalEnergyH = 0.0;
+    this->totalEnergyS = 0.0;
+    this->totalFreeEnergy = 0.0;
+}
+
+int PEXSI_Solver::solve()
+{
+    extern MPI_Comm DIAG_WORLD;
+    extern MPI_Comm GRID_WORLD;
+    extern MPI_Group GRID_GROUP;
+    return simplePEXSI(MPI_COMM_WORLD,
+                       MPI_COMM_WORLD,
+                       GRID_GROUP,
+                       this->blacs_text,
+                       GlobalV::NLOCAL,
+                       this->nb,
+                       this->nrow,
+                       this->ncol,
+                       'C',
+                       this->h,
+                       this->s,
+                       GlobalV::nelec,
+                       "PEXSIOPTION",
+                       this->DM,
+                       this->EDM,
+                       this->totalEnergyH,
+                       this->totalEnergyS,
+                       this->totalFreeEnergy);
+}
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/pexsi_solver.h b/source/module_hsolver/pexsi/pexsi_solver.h
new file mode 100644
index 0000000000..95ade7c15f
--- /dev/null
+++ b/source/module_hsolver/pexsi/pexsi_solver.h
@@ -0,0 +1,30 @@
+#ifndef PEXSI_Solver_H
+#define PEXSI_Solver_H
+class PEXSI_Solver
+{
+  public:
+    PEXSI_Solver(const int blacs_text,
+                 const int nb,
+                 const int nrow,
+                 const int ncol,
+                 const double* h,
+                 const double* s,
+                 double* DM,
+                 double* EDM,
+                 double& totalEnergyH,
+                 double& totalEnergyS,
+                 double& totalFreeEnergy);
+    int solve();
+    int blacs_text;
+    int nb;
+    int nrow;
+    int ncol;
+    double* h;
+    double* s;
+    double* DM;
+    double* EDM;
+    double totalEnergyH;
+    double totalEnergyS;
+    double totalFreeEnergy;
+};
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/simplePEXSI.cpp b/source/module_hsolver/pexsi/simplePEXSI.cpp
new file mode 100644
index 0000000000..6f1d2d1afe
--- /dev/null
+++ b/source/module_hsolver/pexsi/simplePEXSI.cpp
@@ -0,0 +1,697 @@
+// use PEXSI to solve a Kohn-Sham equation
+// the H and S matrices are given by 2D block cyclic distribution
+// the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution
+// #include "mpi.h"
+#include <iostream>
+#include <fstream>
+#include <cstring>
+#include <cmath>
+#include <cfloat>
+#include <memory>
+#include <mpi.h>
+#include "c_pexsi_interface.h"
+#include "module_base/lapack_connector.h"
+#include "module_base/timer.h"
+#include "module_base/tool_quit.h"
+#include "DistCCSMatrix.h"
+#include "DistBCDMatrix.h"
+#include "DistMatrixTransformer.h"
+
+inline void strtolower(char *sa, char *sb)
+{
+    char c;
+    int len = strlen(sa);
+    for (int i = 0; i < len; i++)
+    {
+        c = sa[i];
+        sb[i] = tolower(c);
+    }
+    sb[len] = '\0';
+}
+
+inline void setDefaultOption(int* int_para, double* double_para)
+{
+    // options.spin=2;
+    double_para[0]=2;
+    // options.gap=0;
+    double_para[2]=0;
+    // ZERO_Limit=DBL_MIN;
+    double_para[11]=DBL_MIN;
+    // options.matrixType=0;
+    int_para[3]=0;
+    // options.solver=1;
+    int_para[6]=1;
+    // options.ordering=0;
+    int_para[8]=0;
+    // options.rowOrdering=0;
+    int_para[9]=0;
+    // options.symmetric=0;
+    int_para[11]=0;
+    // options.transpose=0;
+    int_para[12]=0;
+    // options.nPoints=2;
+    int_para[14]=2;
+    // options.verbosity=1;
+    int_para[15]=1;
+}
+
+int loadPEXSIOption(MPI_Comm comm, const std::string PexsiOptionFile, PPEXSIOptions& options, int& numProcessPerPole, double& ZERO_Limit)
+{
+
+    // temp variable arrays read from conf file and will be bcast to all processors
+
+    // parameter array of type int,
+    //  0: numPole
+    //  1: isInertiaCount
+    //  2: maxPEXSIIter
+    //  3: matrixType
+    //  4: isSymbolicFactorize
+    //  5: isConstructCommPattern
+    //  6: solver
+    //  7: symmetricStorage
+    //  8: ordering
+    //  9: rowOrdering
+    // 10: npSymbFact
+    // 11: symmetric
+    // 12: transpose
+    // 13: method
+    // 14: nPoints
+    // 15: verbosity
+    // 16: numProcessPerPole
+    int int_para[17];
+
+    // parameter array of type double
+    //  0: spin
+    //  1: temperature
+    //  2: gap
+    //  3: deltaE
+    //  4: muMin0
+    //  5: muMax0
+    //  6: mu0
+    //  7: muInertiaTolerance
+    //  8: muInertiaExpansion
+    //  9: muPEXSISafeGuard
+    // 10: numElectronPEXSITolerance
+    // 11: ZERO_Limit
+    double double_para[12];
+    int myid;
+    MPI_Comm_rank(comm, &myid);
+    if(myid==0)
+    {
+        std::ifstream ifs(PexsiOptionFile.c_str());
+        if(! ifs)
+        {
+            return 1;
+        }
+        setDefaultOption(int_para, double_para);
+
+        ifs.clear();
+        ifs.seekg(0);
+
+        char key[128];
+        char lowercase_key[128];
+        const int LINE_LINGTH=1024;
+        char unused_string[LINE_LINGTH];
+
+        while(ifs.good())
+        {
+            ifs >> key;
+            //~ cout<<"readin word is: "<<key<<endl;
+            strtolower(key, lowercase_key);
+            if(strcmp("spin", lowercase_key)==0)
+            {
+                //~ ifs>>options.spin;
+                ifs>>double_para[0];
+                //~ cout<<"double_para[0]: "<<key<<" = "<<double_para[0]<<endl;
+            }
+            else if(strcmp("temperature", lowercase_key)==0)
+            {
+                //~ ifs>>options.temperature;
+                ifs>>double_para[1];
+                //~ cout<<"double_para[1]: "<<key<<" = "<<double_para[1]<<endl;
+            }
+            else if(strcmp("gap", lowercase_key)==0)
+            {
+                //~ ifs>>options.gap;
+                ifs>>double_para[2];
+                //~ cout<<"double_para[2]: "<<key<<" = "<<double_para[2]<<endl;
+            }
+            else if(strcmp("deltae", lowercase_key)==0)
+            {
+                //~ ifs>>options.deltaE;
+                ifs>>double_para[3];
+                //~ cout<<"double_para[3]: "<<key<<" = "<<double_para[3]<<endl;
+            }
+            else if(strcmp("numpole", lowercase_key)==0)
+            {
+                //~ ifs>>options.numPole;
+                ifs>>int_para[0];
+                //~ cout<<"int_para[0]: "<<key<<" = "<<int_para[0]<<endl;
+            }
+            else if(strcmp("isinertiacount", lowercase_key)==0)
+            {
+                //~ ifs>>options.isInertiaCount;
+                ifs>>int_para[1];
+                //~ cout<<"int_para[1]: "<<key<<" = "<<int_para[1]<<endl;
+            }
+            else if(strcmp("maxpexsiiter", lowercase_key)==0)
+            {
+                //~ ifs>>options.maxPEXSIIter;
+                ifs>>int_para[2];
+                //~ cout<<"int_para[2]: "<<key<<" = "<<int_para[2]<<endl;
+            }
+            else if(strcmp("mumin0", lowercase_key)==0)
+            {
+                //~ ifs>>options.muMin0;
+                ifs>>double_para[4];
+                //~ cout<<"double_para[4]: "<<key<<" = "<<double_para[4]<<endl;
+            }
+            else if(strcmp("mumax0", lowercase_key)==0)
+            {
+                //~ ifs>>options.muMax0;
+                ifs>>double_para[5];
+                //~ cout<<"double_para[5]: "<<key<<" = "<<double_para[5]<<endl;
+            }
+            else if(strcmp("mu0", lowercase_key)==0)
+            {
+                //~ ifs>>options.mu0;
+                ifs>>double_para[6];
+                //~ cout<<"double_para[6]: "<<key<<" = "<<double_para[6]<<endl;
+            }
+            else if(strcmp("muinertiatolerance", lowercase_key)==0)
+            {
+                //~ ifs>>options.muInertiaTolerance;
+                ifs>>double_para[7];
+                //~ cout<<"double_para[7]: "<<key<<" = "<<double_para[7]<<endl;
+            }
+            else if(strcmp("muinertiaexpansion", lowercase_key)==0)
+            {
+                //~ ifs>>options.muInertiaExpansion;
+                ifs>>double_para[8];
+                //~ cout<<"double_para[8]: "<<key<<" = "<<double_para[8]<<endl;
+            }
+            else if(strcmp("mupexsisafeguard", lowercase_key)==0)
+            {
+                //~ ifs>>options.muPEXSISafeGuard;
+                ifs>>double_para[9];
+                //~ cout<<"double_para[9]: "<<key<<" = "<<double_para[9]<<endl;
+            }
+            else if(strcmp("numelectronpexsitolerance", lowercase_key)==0)
+            {
+                //~ ifs>>options.numElectronPEXSITolerance;
+                ifs>>double_para[10];
+                //~ cout<<"double_para[10]: "<<key<<" = "<<double_para[10]<<endl;
+            }
+            else if(strcmp("zero_limit", lowercase_key)==0)
+            {
+                ifs>>double_para[11];
+            }
+            else if(strcmp("matrixtype", lowercase_key)==0)
+            {
+                //~ ifs>>options.matrixType;
+                ifs>>int_para[3];
+                //~ cout<<"int_para[3]: "<<key<<" = "<<int_para[3]<<endl;
+            }
+            else if(strcmp("issymbolicfactorize", lowercase_key)==0)
+            {
+                //~ ifs>>options.isSymbolicFactorize;
+                ifs>>int_para[4];
+                //~ cout<<"int_para[4]: "<<key<<" = "<<int_para[4]<<endl;
+            }
+            else if(strcmp("isconstructcommpattern", lowercase_key)==0)
+            {
+                //~ ifs>>options.isConstructCommPattern;
+                ifs>>int_para[5];
+                //~ cout<<"int_para[5]: "<<key<<" = "<<int_para[5]<<endl;
+            }
+            else if(strcmp("solver", lowercase_key)==0)
+            {
+                //~ ifs>>options.solver;
+                ifs>>int_para[6];
+                //~ cout<<"int_para[6]: "<<key<<" = "<<int_para[6]<<endl;
+            }
+            else if(strcmp("symmetricstorage", lowercase_key)==0)
+            {
+                //~ ifs>>options.symmetricStorage;
+                ifs>>int_para[7];
+                //~ cout<<"int_para[7]: "<<key<<" = "<<int_para[7]<<endl;
+            }
+            else if(strcmp("ordering", lowercase_key)==0)
+            {
+                //~ ifs>>options.ordering;
+                ifs>>int_para[8];
+                //~ cout<<"int_para[8]: "<<key<<" = "<<int_para[8]<<endl;
+            }
+            else if(strcmp("rowordering", lowercase_key)==0)
+            {
+                //~ ifs>>options.rowOrdering;
+                ifs>>int_para[9];
+                //~ cout<<"int_para[9]: "<<key<<" = "<<int_para[9]<<endl;
+            }
+            else if(strcmp("npsymbfact", lowercase_key)==0)
+            {
+                //~ ifs>>options.npSymbFact;
+                ifs>>int_para[10];
+                //~ cout<<"int_para[10]: "<<key<<" = "<<int_para[10]<<endl;
+            }
+            else if(strcmp("symmetric", lowercase_key)==0)
+            {
+                //~ ifs>>options.symmetric;
+                ifs>>int_para[11];
+                //~ cout<<"int_para[11]: "<<key<<" = "<<int_para[11]<<endl;
+            }
+            else if(strcmp("transpose", lowercase_key)==0)
+            {
+                //~ ifs>>options.transpose;
+                ifs>>int_para[12];
+                //~ cout<<"int_para[12]: "<<key<<" = "<<int_para[12]<<endl;
+            }
+            else if(strcmp("method", lowercase_key)==0)
+            {
+                //~ ifs>>options.method;
+                ifs>>int_para[13];
+                //~ cout<<"int_para[13]: "<<key<<" = "<<int_para[13]<<endl;
+            }
+            else if(strcmp("npoints", lowercase_key)==0)
+            {
+                //~ ifs>>options.nPoints;
+                ifs>>int_para[14];
+                //~ cout<<"int_para[14]: "<<key<<" = "<<int_para[14]<<endl;
+            }
+            else if(strcmp("verbosity", lowercase_key)==0)
+            {
+                //~ ifs>>options.verbosity;
+                ifs>>int_para[15];
+                //~ cout<<"int_para[15]: "<<key<<" = "<<int_para[15]<<endl;
+            }
+            else if(strcmp("numprocessperpole", lowercase_key)==0)
+            {
+                //~ ifs>>options.verbosity;
+                ifs>>int_para[16];
+                //~ cout<<"int_para[16]: "<<key<<" = "<<int_para[16]<<endl;
+            }
+            else
+            {
+                if(key[0] == '#' || key[0] == '/')
+                {
+                    ifs.getline(unused_string, LINE_LINGTH);
+                }
+                else
+                {
+                    std::cout<<" THE PARAMETER NAME '" << key << "' IS NOT USED!" << std::endl;
+                    return 1;
+                }
+            }
+        }
+    }
+
+    // broadcast all options
+    MPI_Bcast(int_para, 17, MPI_INT, 0, comm);
+    MPI_Bcast(double_para, 12, MPI_DOUBLE, 0, comm);
+
+    // setup PEXSI options from int_para and double_para
+    options.numPole=int_para[0];
+    options.isInertiaCount=int_para[1];
+    options.maxPEXSIIter=int_para[2];
+    options.matrixType=int_para[3];
+    options.isSymbolicFactorize=int_para[4];
+    options.isConstructCommPattern=int_para[5];
+    options.solver=int_para[6];
+    options.symmetricStorage=int_para[7];
+    options.ordering=int_para[8];
+    options.rowOrdering=int_para[9];
+    options.npSymbFact=int_para[10];
+    options.symmetric=int_para[11];
+    options.transpose=int_para[12];
+    options.method=int_para[13];
+    options.nPoints=int_para[14];
+    options.verbosity=int_para[15];
+    numProcessPerPole=int_para[16];
+
+    options.spin=double_para[0];
+    options.temperature=double_para[1];
+    options.gap=double_para[2];
+    options.deltaE=double_para[3];
+    options.muMin0=double_para[4];
+    options.muMax0=double_para[5];
+    options.mu0=double_para[6];
+    options.muInertiaTolerance=double_para[7];
+    options.muInertiaExpansion=double_para[8];
+    options.muPEXSISafeGuard=double_para[9];
+    options.numElectronPEXSITolerance=double_para[10];
+    ZERO_Limit=double_para[11];
+
+    return 0;
+}
+
+void splitNProc2NProwNPcol(const int NPROC, int& nprow, int& npcol)
+{
+    int integral_part = (int)sqrt(NPROC);
+    if(NPROC%integral_part == 0)
+    {
+        nprow=integral_part;
+        npcol=NPROC/integral_part;
+    }
+    else
+    {
+        int flag;
+        int i;
+        int low=pow(integral_part,2);
+        int high=pow(integral_part+1,2);
+        if( (NPROC-low) >= (high-NPROC))
+        {
+            flag=integral_part+1;
+        }
+        else
+        {
+            flag=integral_part;
+        }
+        for(i=flag; i>0; ++i)
+        {
+            if(NPROC%i == 0) break;
+        }
+        nprow=i;
+        npcol=NPROC/i;
+    }
+}
+
+int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const int blacs_ctxt,  // communicator parameters
+                const int size, const int nblk, const int nrow, const int ncol, char LAYOUT,  // matrix parameters
+                double* H, double* S,                 // input matrices
+                const double numElectronExact, const std::string PexsiOptionFile,         // pexsi parameters file
+                double*& DM, double*& EDM,      // output matrices
+                double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy)      // output energy
+{
+    int out_log=0;
+    if(out_log == 1)
+    {
+        std::stringstream ss;
+        int nproc_2D, nproc_PEXSI;
+        int myid_2D, myid_PEXSI;
+        if(comm_2D != MPI_COMM_NULL)
+        {
+            MPI_Comm_size(comm_2D, &nproc_2D);
+            MPI_Comm_rank(comm_2D, &myid_2D);
+            ss.str("");
+            ss<<"\tIn 2D comm, myid = "<<myid_2D<<"; nproc = "<<nproc_2D<<std::endl;
+            ss<<"H[0] = "<<H[0]<<", H["<<nrow*ncol-1<<"] = "<<H[nrow*ncol-1]<<std::endl;
+            ss<<"S[0] = "<<S[0]<<", S["<<nrow*ncol-1<<"] = "<<S[nrow*ncol-1]<<std::endl;
+            ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, ss.str());
+        }
+        if(comm_PEXSI != MPI_COMM_NULL)
+        {
+            MPI_Comm_size(comm_PEXSI, &nproc_PEXSI);
+            MPI_Comm_rank(comm_PEXSI, &myid_PEXSI);
+            ss.str("");
+            ss<<"\tIn PEXSI comm, myid = "<<myid_PEXSI<<"; nproc = "<<nproc_PEXSI;
+            ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, ss.str());
+        }
+    }
+    if(comm_2D == MPI_COMM_NULL && comm_PEXSI == MPI_COMM_NULL) return 0;
+    int myid;
+    std::ofstream f_log;
+    if(comm_PEXSI != MPI_COMM_NULL)
+    {
+        MPI_Comm_rank(comm_PEXSI, &myid);
+        // for log
+        #ifdef _DEBUG
+        if(myid<100) log_openfile(myid, f_log);
+        #endif
+    }
+
+    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+	//DONE(ofs_running,"set up PEXSI parameter, begin");
+    // set up PEXSI parameter
+    PPEXSIOptions options;
+    PPEXSISetDefaultOptions(&options);
+    int numProcessPerPole;
+    double ZERO_Limit;
+    loadPEXSIOption(comm_PEXSI, PexsiOptionFile, options, numProcessPerPole, ZERO_Limit);
+    //OUT(ofs_running, "checkpoint01");
+    // debug
+    #ifdef _DEBUG
+    if(comm_PEXSI != MPI_COMM_NULL)
+    {
+        if(myid<100) log_PEXSIOption(numElectronExact, f_log);
+    }
+    #endif
+    // end debug
+    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+	//DONE(ofs_running,"set up PEXSI parameter, finish");
+
+    // set up PEXSI plan
+    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "set_up_PEXSI_plan");
+    //OUT(ofs_running, "checkpoint02");
+    ModuleBase::timer::tick("DiagoPexsi","setup_PEXSI_plan");
+    PPEXSIPlan plan;
+    int info;
+    int outputFileIndex;
+    int pexsi_prow, pexsi_pcol;
+    ModuleBase::timer::tick("DiagoPexsi","splitNProc2NProwNPcol");
+    splitNProc2NProwNPcol(numProcessPerPole, pexsi_prow, pexsi_pcol);
+    ModuleBase::timer::tick("DiagoPexsi","splitNProc2NProwNPcol");
+    //OUT(ofs_running, "checkpoint03");
+    #ifdef _DEBUG
+    //if(comm_PEXSI != MPI_COMM_NULL)
+    //{
+        if(myid<100) log_PEXSIgrid(pexsi_prow, pexsi_pcol, f_log);
+    //}
+    #endif
+    if(myid % (pexsi_prow * pexsi_pcol) == 0)
+    {
+        outputFileIndex=myid/(pexsi_prow*pexsi_pcol);
+    }
+    else
+    {
+        outputFileIndex=-1;
+    }
+    //OUT(ofs_running, "checkpoint04");
+    ModuleBase::timer::tick("DiagoPexsi","PEXSIPlanInit");
+    if(comm_PEXSI != MPI_COMM_NULL)
+    {
+        //OUT(ofs_running, "checkpoint05");
+        plan = PPEXSIPlanInitialize(comm_PEXSI, pexsi_prow, pexsi_pcol, outputFileIndex, &info);
+        #ifdef _DEBUG
+        //OUT(ofs_running, "checkpoint06");
+        if(myid<100) log_PEXSIinit(info, f_log);
+        //OUT(ofs_running, "checkpoint07");
+        #endif
+    }
+    ModuleBase::timer::tick("DiagoPexsi","PEXSIPlanInit");
+    //OUT(ofs_running, "checkpoint08");
+    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    ModuleBase::timer::tick("DiagoPexsi","setup_PEXSI_plan");
+	ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"set_up_PEXSI_plan finish");
+
+    // create compressed column storage distribution matrix parameter
+    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+	//DONE(ofs_running,"create compressed column storage distribution matrix parameter, begin");
+    DistCCSMatrix DST_Matrix(comm_PEXSI, numProcessPerPole, size);
+    //OUT(ofs_running, "checkpoint09");
+    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+	//DONE(ofs_running,"create compressed column storage distribution matrix parameter, finish");
+
+    #ifdef _DEBUG
+    if(comm_PEXSI != MPI_COMM_NULL)
+    {
+        if(myid<100) log_DSTMatrix(DST_Matrix, f_log);
+    }
+    #endif
+
+    // create block cyclic distribution matrix parameter
+    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+	//DONE(ofs_running,"create block cyclic distribution matrix parameter, begin");
+    //OUT(ofs_running, "checkpoint10");
+    DistBCDMatrix SRC_Matrix(comm_2D, group_2D, blacs_ctxt, size, nblk, nrow, ncol, LAYOUT);
+    //OUT(ofs_running, "checkpoint11");
+    #ifdef _DEBUG
+    if(comm_PEXSI != MPI_COMM_NULL)
+    {
+        if(myid<100) log_SRCMatrix(SRC_Matrix, f_log);
+    }
+    #endif
+    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+	//DONE(ofs_running,"create block cyclic distribution matrix parameter, finish");
+
+    double *HnzvalLocal=new double[1];
+    double *SnzvalLocal=new double[1];
+    double *DMnzvalLocal=new double[1];
+    double *EDMnzvalLocal=new double[1];
+    double *FDMnzvalLocal=new double[1];
+    // transform H and S from 2D block cyclic distribution to compressed column sparse matrix
+    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+	ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"transformBCDtoCCS, begin");
+    //OUT(ofs_running, "checkpoint12");
+    ModuleBase::timer::tick("Diago_LCAO_Matrix","TransMat2PEXSI");
+    transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal);
+    ModuleBase::timer::tick("Diago_LCAO_Matrix","TransMat2PEXSI");
+    //MPI_Barrier(MPI_COMM_WORLD);
+    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+	ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"transformBCDtoCCS, finish");
+    //OUT(ofs_running, "checkpoint13");
+    if(comm_PEXSI != MPI_COMM_NULL)
+    {
+        // debug
+        #ifdef _DEBUG
+        if(myid<100) log_DSTparameter(DST_Matrix, HnzvalLocal, f_log);
+        #endif
+        // end debug
+
+        // Load H and S to PEXSI
+        int isSIdentity=0;
+        //OUT(ofs_running, "checkpoint14");
+        PPEXSILoadRealHSMatrix(plan, options,
+                                size,
+                                DST_Matrix.nnz, DST_Matrix.nnzLocal,
+                                DST_Matrix.numColLocal, DST_Matrix.colptrLocal, DST_Matrix.rowindLocal,
+                                HnzvalLocal,
+                                isSIdentity,
+                                SnzvalLocal,
+                                &info);
+        //OUT(ofs_running, "checkpoint15");
+        #ifdef _DEBUG
+        if(myid<100) log_HSload(f_log);
+        #endif
+        // call PEXSI to solve Kohn-Sham equation
+        // PPEXSIDFTDriver2(plan, &options,
+                         // numElectronExact,
+                         // &muPEXSI,
+                         // &numElectronPEXSI,
+                         // &numTotalInertiaIter,
+                         // &info);
+        double mu;
+        double nelec;
+        double muMinInertia;
+        double muMaxInertia;
+        int numTotalPEXSIIter;
+        int numTotalInertiaIter; // Number of total inertia[out]
+        //OUT(ofs_running, "checkpoint16");
+        //LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+	    ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"PPEXSIDFTDriver, begin");
+        ModuleBase::timer::tick("Diago_LCAO_Matrix","PEXSIDFT");
+        PPEXSIDFTDriver(
+        plan, // PEXSI plan[in]
+        options, // PEXSI Options[in]
+        numElectronExact, // exact electron number[in]
+        &mu, // chemical potential[out]
+        &nelec, // number of electrons[out]
+        &muMinInertia, // Lower bound for mu after the last inertia[out]
+        &muMaxInertia, // Upper bound for mu after the last inertia[out]
+        &numTotalInertiaIter, // Number of total inertia[out]
+        &numTotalPEXSIIter, // number of total pexsi evaluation procedure[out]
+        &info ); // 0: successful; otherwise: unsuccessful
+        //LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+        ModuleBase::timer::tick("Diago_LCAO_Matrix","PEXSIDFT");
+	    ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"PPEXSIDFTDriver, finish");
+        //OUT(ofs_running, "checkpoint17");
+
+        // debug
+        #ifdef _DEBUG
+        if(myid<100) log_PEXSIcalled(mu, nelec, muMinInertia, muMaxInertia, numTotalPEXSIIter, f_log);
+        #endif
+        // end debug
+
+        // retrieve the results from the plan
+        delete[] DMnzvalLocal;
+        delete[] EDMnzvalLocal;
+        delete[] FDMnzvalLocal;
+        DMnzvalLocal=new double[DST_Matrix.nnzLocal];
+        EDMnzvalLocal=new double[DST_Matrix.nnzLocal];
+        FDMnzvalLocal=new double[DST_Matrix.nnzLocal];
+        if(myid < numProcessPerPole)
+        {
+            PPEXSIRetrieveRealDFTMatrix(
+                plan,
+                DMnzvalLocal,
+                EDMnzvalLocal,
+                FDMnzvalLocal,
+                &totalEnergyH,
+                &totalEnergyS,
+                &totalFreeEnergy,
+                &info);
+            #ifdef _DEBUG
+            if(myid<100) log_DM(DST_Matrix, DMnzvalLocal, f_log);
+            #endif
+        }
+        // clean PEXSI
+        PPEXSIPlanFinalize(plan, &info);
+        #ifdef _DEBUG
+        if(myid<100) log_PEXSIFinalized(f_log);
+        #endif
+    }
+    //OUT(ofs_running, "checkpoint18");
+
+    // transform Density Matrix and Energy Density Matrix from compressed column sparse matrix
+    // back to 2D block cyclic distribution if neccessary
+    if(comm_2D != MPI_COMM_NULL)
+    {
+		delete[] DM;
+		delete[] EDM;
+		DM=new double[SRC_Matrix.nrow*SRC_Matrix.ncol];
+		EDM=new double[SRC_Matrix.nrow*SRC_Matrix.ncol];
+	}
+    #ifdef _DEBUG
+    //OUT(ofs_running, "checkpoint19");
+    if(myid<100) log_DMEDM_in_BCD_allocated(f_log);
+    MPI_Barrier(MPI_COMM_WORLD);
+    #endif
+    //LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+	ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"transformCCStoBCD, begin");
+    ModuleBase::timer::tick("Diago_LCAO_Matrix","TransMAT22D");
+    transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM);
+    ModuleBase::timer::tick("Diago_LCAO_Matrix","TransMAT22D");
+    //LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+	ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"transformCCStoBCD, finish");
+    #ifdef _DEBUG
+	MPI_Barrier(MPI_COMM_WORLD);
+    //OUT(ofs_running, "checkpoint20");
+    if(comm_PEXSI != MPI_COMM_NULL)
+    {
+        if(myid<100) log_DMtransformed(f_log);
+        if(myid<100) log_closefile(f_log);
+        // output result
+        // save local data of DMnzvalLocal
+        /*
+        ofstream f_DM;
+        sprintf(fname,"DM_%2.2d.dat", myid);
+        f_DM.open(fname, ios::out);
+        for(int i=0; i<SRC_Matrix.nrow; ++i)
+        {
+            for(int j=0; j<SRC_Matrix.ncol; ++j)
+            {
+                f_DM<<DM[i*SRC_Matrix.ncol+j]<<"\t";
+            }
+            f_DM<<"\n";
+        }
+        f_DM.close();
+
+        // save local data of EDMnzvalLocal
+        ofstream f_EDM;
+        sprintf(fname,"EDM_%2.2d.dat", myid);
+        f_EDM.open(fname, ios::out);
+        for(int i=0; i<SRC_Matrix.nrow; ++i)
+        {
+            for(int j=0; j<SRC_Matrix.ncol; ++j)
+            {
+                f_EDM<<EDM[i*SRC_Matrix.ncol+j]<<"\t";
+            }
+            f_EDM<<"\n";
+        }
+        f_EDM.close();
+        */
+    }
+    #endif
+	MPI_Barrier(MPI_COMM_WORLD);
+    //OUT(ofs_running, "checkpoint21");
+	MPI_Barrier(MPI_COMM_WORLD);
+    delete[] DMnzvalLocal;
+    delete[] EDMnzvalLocal;
+    delete[] FDMnzvalLocal;
+    delete[] HnzvalLocal;
+    delete[] SnzvalLocal;
+	MPI_Barrier(MPI_COMM_WORLD);
+    //OUT(ofs_running, "checkpoint22");
+	//MPI_Barrier(MPI_COMM_WORLD);
+    return 0;
+}
diff --git a/source/module_hsolver/pexsi/simplePEXSI.h b/source/module_hsolver/pexsi/simplePEXSI.h
new file mode 100644
index 0000000000..5bdf8d8bbb
--- /dev/null
+++ b/source/module_hsolver/pexsi/simplePEXSI.h
@@ -0,0 +1,8 @@
+#include <mpi.h>
+// a simple interface for calling pexsi with 2D block cyclic distributed matrix
+int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const int blacs_ctxt,  // communicator parameters
+                const int size, const int nblk, const int nrow, const int ncol, char LAYOUT, // input matrix parameters
+                double* H, double* S,                 // input matrices
+                const double nElectronExact, const std::string PexsiOptionFile,        // pexsi parameters file
+                double*& DM, double*& EDM,      // output matrices
+                double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy);
\ No newline at end of file
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index 6cb48947d7..34807f17cc 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -3356,6 +3356,10 @@ void Input::Check(void)
         {
             ModuleBase::WARNING_QUIT("Input", "lapack can not be used with plane wave basis.");
         }
+        else if (ks_solver == "pexsi")
+        {
+            ModuleBase::WARNING_QUIT("Input", "pexsi can not be used with plane wave basis.");
+        }
         else if (ks_solver != "default" && ks_solver != "cg" && ks_solver != "dav")
         {
             ModuleBase::WARNING_QUIT("Input", "please check the ks_solver parameter!");
@@ -3416,6 +3420,16 @@ void Input::Check(void)
 #ifndef __MPI
             ModuleBase::WARNING_QUIT("Input", "Cusolver can not be used for series version.");
 #endif
+        }
+        else if (ks_solver == "pexsi")
+        {
+#ifndef __MPI
+            ModuleBase::WARNING_QUIT("Input", "Cusolver can not be used for series version.");
+#else
+            GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl;
+#endif
+
+
         }
         else if (ks_solver != "default")
         {

From 3c57992b1143f044b599a3c04a62fe2494cfe96d Mon Sep 17 00:00:00 2001
From: zhangzhihao <1900017707@pku.edu.cn>
Date: Mon, 1 May 2023 02:29:35 +0800
Subject: [PATCH 02/44] fix : diag not completed

---
 source/module_base/global_function.h          |  2 +-
 source/module_basis/module_ao/ORB_control.cpp |  2 +-
 .../module_ao/parallel_orbitals.cpp           |  4 ++--
 .../module_esolver/esolver_ks_lcao_elec.cpp   |  2 +-
 .../hamilt_lcaodft/DM_gamma.cpp               |  4 ++--
 .../module_deepks/LCAO_deepks_odelta.cpp      |  2 +-
 source/module_hsolver/diago_pexsi.cpp         | 20 +++++++++++++++++--
 7 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/source/module_base/global_function.h b/source/module_base/global_function.h
index fa77c76b26..c9bcef7914 100644
--- a/source/module_base/global_function.h
+++ b/source/module_base/global_function.h
@@ -352,7 +352,7 @@ double ddot_real(
 //==========================================================
 static inline bool IS_COLUMN_MAJOR_KS_SOLVER()
 {
-    return GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="cusolver";
+    return GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="cusolver" || GlobalV::KS_SOLVER=="pexsi";
 }
 
 }//namespace GlobalFunc
diff --git a/source/module_basis/module_ao/ORB_control.cpp b/source/module_basis/module_ao/ORB_control.cpp
index f3fbec9518..0439c57de1 100644
--- a/source/module_basis/module_ao/ORB_control.cpp
+++ b/source/module_basis/module_ao/ORB_control.cpp
@@ -183,7 +183,7 @@ void ORB_control::setup_2d_division(std::ofstream& ofs_running,
     ofs_running << "\n SETUP THE DIVISION OF H/S MATRIX" << std::endl;
 
     // (1) calculate nrow, ncol, nloc.
-    if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver")
+    if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi")
     {
         ofs_running << " divide the H&S matrix using 2D block algorithms." << std::endl;
 #ifdef __MPI
diff --git a/source/module_basis/module_ao/parallel_orbitals.cpp b/source/module_basis/module_ao/parallel_orbitals.cpp
index 1c02dc4bb6..c1d530cb84 100644
--- a/source/module_basis/module_ao/parallel_orbitals.cpp
+++ b/source/module_basis/module_ao/parallel_orbitals.cpp
@@ -80,7 +80,7 @@ void ORB_control::set_trace(std::ofstream& ofs_running)
         pv->ncol = nlocal;
     }
 #ifdef __MPI
-    else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver") // xiaohui add 2013-09-02
+    else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi") // xiaohui add 2013-09-02
     {
         // ofs_running << " nrow=" << nrow << std::endl;
         for (int irow = 0; irow < pv->nrow; irow++)
@@ -245,7 +245,7 @@ void ORB_control::divide_HS_2d(
     pv->nloc = pv->MatrixInfo.col_num * pv->MatrixInfo.row_num;
 
     // init blacs context for genelpa
-    if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver")
+    if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi")
     {
         pv->blacs_ctxt = cart2blacs(pv->comm_2D,
                                     pv->dim0,
diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp
index f016d778c9..f6b6545b96 100644
--- a/source/module_esolver/esolver_ks_lcao_elec.cpp
+++ b/source/module_esolver/esolver_ks_lcao_elec.cpp
@@ -100,7 +100,7 @@ namespace ModuleESolver
             if(this->psid==nullptr)
             {
                 int ncol = this->LOWF.ParaV->ncol_bands;
-                if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="lapack_gvx"
+                if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="lapack_gvx" || GlobalV::KS_SOLVER=="pexsi"
 #ifdef __CUSOLVER_LCAO
                 ||GlobalV::KS_SOLVER=="cusolver"
 #endif
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp
index c57e463fc7..079d0600a7 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp
@@ -29,7 +29,7 @@ int Local_Orbital_Charge::setAlltoallvParameter(MPI_Comm comm_2D, int blacs_ctxt
 	int myproc=0;
 
     Cblacs_gridinfo(blacs_ctxt, &nprows, &npcols, &myprow, &mypcol);
-
+    std::cout << "testing" << "npcols:" << npcols << " nprows:" << nprows << endl;
     Cblacs_pinfo(&myproc, &nprocs);
     // ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"nprocs",nprocs);
 
@@ -288,7 +288,7 @@ void Local_Orbital_Charge::gamma_file(psi::Psi<double>* psid, Local_Orbital_wfc
 
     //allocate psi
     int ncol = this->ParaV->ncol_bands;
-    if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="lapack_gvx" || GlobalV::KS_SOLVER == "scalapack_gvx"
+    if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="lapack_gvx" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER=="pexsi"
 #ifdef __CUSOLVER_LCAO
     ||GlobalV::KS_SOLVER=="cusolver"
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
index ecdbfb6dab..351681edf9 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
@@ -64,7 +64,7 @@ void LCAO_Deepks::cal_o_delta_k(const std::vector<std::vector<ModuleBase::Comple
                     if (mu >= 0 && nu >= 0)
                     {                
                         int iic;
-                        if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx")  // save the matrix as column major format
+                        if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="pexsi")  // save the matrix as column major format
                         {
                             iic = mu + nu*ParaO.nrow;
                         }
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index 8a7257d747..cf37092a73 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -36,12 +36,28 @@ void DiagoPexsi::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, dou
     this->ps->solve();
     std::cout << this->ps->totalEnergyH << "xxxxxx" << this->ps->totalEnergyS << "xxxxxx" << this->ps->totalFreeEnergy
               << std::endl;
-    ModuleBase::WARNING_QUIT("DiagoPexsi", "Pexsi is not completed");
 }
 void DiagoPexsi::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<std::complex<double>>& psi, double* eigenvalue_in)
 {
     ModuleBase::TITLE("DiagoPEXSI", "diag");
-    ModuleBase::WARNING_QUIT("DiagoPexsi", "Pexsi is not completed");
+    matd h_mat, s_mat;
+    phm_in->matrix(h_mat, s_mat);
+    std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
+    MPI_Comm COMM_DIAG = MPI_COMM_WORLD;
+    this->ps = new PEXSI_Solver(this->ParaV->blacs_ctxt,
+                                this->ParaV->nb,
+                                this->ParaV->nrow,
+                                this->ParaV->ncol,
+                                h_mat.p,
+                                s_mat.p,
+                                this->DM,
+                                this->EDM,
+                                this->totalEnergyH,
+                                this->totalEnergyS,
+                                this->totalFreeEnergy);
+    this->ps->solve();
+    std::cout << this->ps->totalEnergyH << "xxxxxx" << this->ps->totalEnergyS << "xxxxxx" << this->ps->totalFreeEnergy
+              << std::endl;
 }
 
 } // namespace hsolver

From c3babb7948453edd014a817cc983d7645db0b66a Mon Sep 17 00:00:00 2001
From: zhangzhihao <1900017707@pku.edu.cn>
Date: Thu, 4 May 2023 00:19:34 +0800
Subject: [PATCH 03/44] feat

---
 source/module_elecstate/elecstate_lcao.cpp    | 23 +++++++++------
 source/module_elecstate/elecstate_lcao.h      |  3 ++
 .../module_elecstate/elecstate_lcao_tddft.cpp |  2 ++
 source/module_elecstate/energy.cpp            |  4 +++
 .../module_elecstate/module_charge/charge.cpp |  5 +---
 source/module_esolver/esolver_ks.cpp          |  9 ++----
 source/module_esolver/esolver_ks_lcao.cpp     |  5 ++--
 .../hamilt_lcaodft/local_orbital_charge.h     |  1 +
 source/module_hsolver/diago_pexsi.cpp         | 28 ++++++-------------
 source/module_hsolver/hsolver_lcao.cpp        | 11 +++++++-
 source/module_hsolver/pexsi/simplePEXSI.cpp   |  3 ++
 source/module_relax/relax_driver.cpp          |  5 +---
 12 files changed, 53 insertions(+), 46 deletions(-)

diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index 3134c248c7..d985fb53e8 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -25,13 +25,13 @@ void ElecStateLCAO::psiToRho(const psi::Psi<std::complex<double>>& psi)
     // this part for calculating dm_k in 2d-block format, not used for charge now
     //    psi::Psi<std::complex<double>> dm_k_2d();
 
-    if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx"
+    if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" ||  GlobalV::KS_SOLVER == "pexsi"
         || GlobalV::KS_SOLVER == "lapack") // Peize Lin test 2019-05-15
     {
         cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_k);
     }
 
-    if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack")
+    if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack"  ||  GlobalV::KS_SOLVER == "pexsi")
     {
         for (int ik = 0; ik < psi.get_nk(); ik++)
         {
@@ -60,7 +60,6 @@ void ElecStateLCAO::psiToRho(const psi::Psi<std::complex<double>>& psi)
         Gint_inout inout1(this->loc->DM_R, this->charge, Gint_Tools::job_type::tau);
         this->uhm->GK.cal_gint(&inout1);
     }
-
     this->charge->renormalize_rho();
 
     ModuleBase::timer::tick("ElecStateLCAO", "psiToRho");
@@ -73,23 +72,26 @@ void ElecStateLCAO::psiToRho(const psi::Psi<double>& psi)
     ModuleBase::TITLE("ElecStateLCAO", "psiToRho");
     ModuleBase::timer::tick("ElecStateLCAO", "psiToRho");
 
-    this->calculate_weights();
-    this->calEBand();
+    if (GlobalV::KS_SOLVER != "pexsi") // pexsi useless
+    {
+        this->calculate_weights();
+        this->calEBand();
+    }
 
-    if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack")
+    if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack" || GlobalV::KS_SOLVER == "pexsi")
     {
         ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d");
 
         // psi::Psi<double> dm_gamma_2d;
         //  caution:wfc and dm
-        cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma);
+        if (GlobalV::KS_SOLVER != "pexsi") cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma); // pexsi has done this
 
         ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d");
 
         for (int ik = 0; ik < psi.get_nk(); ++ik)
         {
             // for gamma_only case, no convertion occured, just for print.
-            if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx")
+            if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "scalapack_gvx")
             {
                 psi.fix_k(ik);
                 this->print_psi(psi);
@@ -183,4 +185,9 @@ void ElecStateLCAO::print_psi(const psi::Psi<std::complex<double>>& psi_in)
     return;
 }
 
+void ElecStateLCAO::get_DM_from_pexsi(double* DM)
+{
+    this->loc->dm_gamma[0].c = DM;
+}
+
 } // namespace elecstate
\ No newline at end of file
diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h
index c606e8b5fe..356d437a40 100644
--- a/source/module_elecstate/elecstate_lcao.h
+++ b/source/module_elecstate/elecstate_lcao.h
@@ -43,6 +43,9 @@ class ElecStateLCAO : public ElecState
     static int out_wfc_flag;
     static bool need_psi_grid;
 
+    //use for pexsi
+    void get_DM_from_pexsi(double* DM);
+
   protected:
     // calculate electronic charge density on grid points or density matrix in real space
     // the consequence charge density rho saved into rho_out, preparing for charge mixing.
diff --git a/source/module_elecstate/elecstate_lcao_tddft.cpp b/source/module_elecstate/elecstate_lcao_tddft.cpp
index 86f27b0fa3..e53137fdef 100644
--- a/source/module_elecstate/elecstate_lcao_tddft.cpp
+++ b/source/module_elecstate/elecstate_lcao_tddft.cpp
@@ -51,7 +51,9 @@ void ElecStateLCAO_TDDFT::psiToRho_td(const psi::Psi<std::complex<double>>& psi)
     Gint_inout inout(this->loc->DM_R, this->charge, Gint_Tools::job_type::rho);
     this->uhm->GK.cal_gint(&inout);
 
+    std::cout << "this->charge->renormalize_rho(); 1" << std::endl; 
     this->charge->renormalize_rho();
+    std::cout << "this->charge->renormalize_rho(); 1 done" << std::endl;
 
     ModuleBase::timer::tick("ElecStateLCAO", "psiToRho");
     return;
diff --git a/source/module_elecstate/energy.cpp b/source/module_elecstate/energy.cpp
index 56aa7cac47..188418946c 100644
--- a/source/module_elecstate/energy.cpp
+++ b/source/module_elecstate/energy.cpp
@@ -253,6 +253,10 @@ void energy::print_etot(
 	{
         label = "CU";
 	}
+	else if(GlobalV::KS_SOLVER=="pexsi")
+	{
+		label = "PE";
+	}
 	else
 	{
 		ModuleBase::WARNING_QUIT("Energy","print_etot");
diff --git a/source/module_elecstate/module_charge/charge.cpp b/source/module_elecstate/module_charge/charge.cpp
index 7d06b656db..52a09b0664 100644
--- a/source/module_elecstate/module_charge/charge.cpp
+++ b/source/module_elecstate/module_charge/charge.cpp
@@ -293,7 +293,7 @@ double Charge::sum_rho(void) const
 			sum_rho += this->rho[is][ir];
 		}
 	}
-
+	std::cout << "sum_rho: " << sum_rho <<std::endl;
 	// multiply the sum of charge density by a factor
     sum_rho *= GlobalC::ucell.omega / static_cast<double>( GlobalC::rhopw->nxyz );
     Parallel_Reduce::reduce_double_pool( sum_rho );
@@ -313,7 +313,6 @@ double Charge::sum_rho(void) const
 void Charge::renormalize_rho(void)
 {
     ModuleBase::TITLE("Charge","renormalize_rho");
-
     const double sr = this->sum_rho();
 	GlobalV::ofs_warning << std::setprecision(15);
 	ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning,"charge before normalized",sr);
@@ -326,9 +325,7 @@ void Charge::renormalize_rho(void)
 			rho[is][ir] *= normalize_factor;
 		}
 	}
-
 	ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning,"charge after normalized",this->sum_rho());
-
 	GlobalV::ofs_running << std::setprecision(6);
     return;
 }
diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp
index f094a2162a..284ed06603 100644
--- a/source/module_esolver/esolver_ks.cpp
+++ b/source/module_esolver/esolver_ks.cpp
@@ -183,7 +183,6 @@ namespace ModuleESolver
         else
         {
             ModuleBase::timer::tick(this->classname, "Run");
-
             this->beforescf(istep); //Something else to do before the iter loop
             ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT SCF");
             if(this->maxniter > 0)  this->printhead(); //print the headline on the screen.
@@ -199,10 +198,10 @@ namespace ModuleESolver
 #else
                 auto iterstart = std::chrono::system_clock::now();
 #endif
+                std::cout << "drho: " << drho << " iter: " << iter << std::endl;
                 FPTYPE diag_ethr = this->phsol->set_diagethr(istep, iter, drho);
                 eachiterinit(istep, iter);
                 this->hamilt2density(istep, iter, diag_ethr);
-                
                 //<Temporary> It may be changed when more clever parallel algorithm is put forward.
                 //When parallel algorithm for bands are adopted. Density will only be treated in the first group.
                 //(Different ranks should have abtained the same, but small differences always exist in practice.)
@@ -212,7 +211,6 @@ namespace ModuleESolver
                 {
                     // FPTYPE drho = this->estate.caldr2(); 
                     // EState should be used after it is constructed.
-
                     drho = GlobalC::CHR_MIX.get_drho(pelec->charge, GlobalV::nelec);
                     FPTYPE hsolver_error = 0.0;
                     if (firstscf)
@@ -278,16 +276,15 @@ namespace ModuleESolver
                 printiter(iter, drho, duration, diag_ethr);
                 if (this->conv_elec)
                 {
+                    std::cout << "this->conv_elec" << std::endl;
                     this->niter = iter;
                     bool stop = this->do_after_converge(iter);
-                    if(stop) break;
+                    if(stop) {std::cout << "break\n"; break;}
                 }
             }
             afterscf(istep);
-
             ModuleBase::timer::tick(this->classname, "Run");
         }       
-
         return;
     };
 
diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index 89e867ec8b..86055d7bb5 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -519,7 +519,6 @@ void ESolver_KS_LCAO::hamilt2density(int istep, int iter, double ethr)
 {
     // save input rho
     pelec->charge->save_rho_before_sum_band();
-
     // using HSolverLCAO::solve()
     if (this->phsol != nullptr)
     {
@@ -537,7 +536,7 @@ void ESolver_KS_LCAO::hamilt2density(int istep, int iter, double ethr)
         {
             this->phsol->solve(this->p_hamilt, this->psid[0], this->pelec, GlobalV::KS_SOLVER);
         }
-
+        
         // transform energy for print
         GlobalC::en.eband = this->pelec->eband;
         GlobalC::en.demet = this->pelec->demet;
@@ -1242,7 +1241,7 @@ void ESolver_KS_LCAO::afterscf(const int istep)
 bool ESolver_KS_LCAO::do_after_converge(int& iter)
 {
 #ifdef __EXX
-
+    std::cout << "test exx" << std::endl;
     // Add EXX operator
     auto add_exx_operator = [&]() {
         if (GlobalV::GAMMA_ONLY_LOCAL)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h
index 60ce6f3e43..5160af4803 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h
@@ -81,6 +81,7 @@ class Local_Orbital_Charge
 
     std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>> DMR_sparse;
 
+
 private:
 
 	// whether the DM array has been allocated
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index cf37092a73..cca6de9c1b 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -34,30 +34,18 @@ void DiagoPexsi::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, dou
                                 this->totalEnergyS,
                                 this->totalFreeEnergy);
     this->ps->solve();
-    std::cout << this->ps->totalEnergyH << "xxxxxx" << this->ps->totalEnergyS << "xxxxxx" << this->ps->totalFreeEnergy
-              << std::endl;
+    this->EDM = this->ps->EDM;
+    this->DM = this->ps->DM; // loc.dm_gamma[ik] loc.dm_gamma[0]?
+    this->totalFreeEnergy = this->ps->totalFreeEnergy;
+    this->totalEnergyH = this->ps->totalEnergyH;
+    this->totalEnergyS = this->ps->totalEnergyS;
+    std::cout << "this->totalEnergyH: " << this->ps->totalEnergyH << std::endl << "this->totalEnergyS: " << this->ps->totalEnergyS << std::endl << "this->totalFreeEnergy" << this->ps->totalFreeEnergy << std::endl;
 }
 void DiagoPexsi::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<std::complex<double>>& psi, double* eigenvalue_in)
 {
     ModuleBase::TITLE("DiagoPEXSI", "diag");
-    matd h_mat, s_mat;
-    phm_in->matrix(h_mat, s_mat);
-    std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
-    MPI_Comm COMM_DIAG = MPI_COMM_WORLD;
-    this->ps = new PEXSI_Solver(this->ParaV->blacs_ctxt,
-                                this->ParaV->nb,
-                                this->ParaV->nrow,
-                                this->ParaV->ncol,
-                                h_mat.p,
-                                s_mat.p,
-                                this->DM,
-                                this->EDM,
-                                this->totalEnergyH,
-                                this->totalEnergyS,
-                                this->totalFreeEnergy);
-    this->ps->solve();
-    std::cout << this->ps->totalEnergyH << "xxxxxx" << this->ps->totalEnergyS << "xxxxxx" << this->ps->totalFreeEnergy
-              << std::endl;
+    ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case");
+    
 }
 
 } // namespace hsolver
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index e870e4f191..2a61637dbe 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -8,6 +8,7 @@
 #include "diago_elpa.h"
 #endif
 #include "diago_pexsi.h"
+#include "module_elecstate/elecstate_lcao.h"
 
 namespace hsolver
 {
@@ -122,7 +123,7 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt<double>* pHamilt,
         }
     }
 
-    if (this->method != "genelpa" && this->method != "scalapack_gvx" && this->method != "lapack")
+    if (this->method != "genelpa" && this->method != "scalapack_gvx" && this->method != "lapack" && this->method != "pexsi")
     {
         delete pdiagh;
         pdiagh = nullptr;
@@ -137,6 +138,14 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt<double>* pHamilt,
 
     // calculate charge by psi
     // called in scf calculation
+    if (this->method == "pexsi")
+    {
+        DiagoPexsi* tem = dynamic_cast<DiagoPexsi*>(this->pdiagh);
+        if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!");
+        elecstate::ElecStateLCAO* _pes = dynamic_cast<elecstate::ElecStateLCAO*>(pes);
+        pes->eband = tem->totalFreeEnergy;
+        _pes->get_DM_from_pexsi(tem->DM);
+    }
     pes->psiToRho(psi);
     ModuleBase::timer::tick("HSolverLCAO", "solve");
 }
diff --git a/source/module_hsolver/pexsi/simplePEXSI.cpp b/source/module_hsolver/pexsi/simplePEXSI.cpp
index 6f1d2d1afe..438936280f 100644
--- a/source/module_hsolver/pexsi/simplePEXSI.cpp
+++ b/source/module_hsolver/pexsi/simplePEXSI.cpp
@@ -383,6 +383,8 @@ int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const
                 double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy)      // output energy
 {
     int out_log=0;
+    std::cout << "nrow: " << nrow << std::endl;
+    std::cout << "ncol: " << ncol << std::endl;
     if(out_log == 1)
     {
         std::stringstream ss;
@@ -614,6 +616,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const
             if(myid<100) log_DM(DST_Matrix, DMnzvalLocal, f_log);
             #endif
         }
+        std::cout << "totalEnergyH:" << totalEnergyH << "\ntotalEnergyS:" << totalEnergyS << "\ntotalFreeEnergy:" << totalFreeEnergy << std::endl; 
         // clean PEXSI
         PPEXSIPlanFinalize(plan, &info);
         #ifdef _DEBUG
diff --git a/source/module_relax/relax_driver.cpp b/source/module_relax/relax_driver.cpp
index 173101036e..b821ba8bcf 100644
--- a/source/module_relax/relax_driver.cpp
+++ b/source/module_relax/relax_driver.cpp
@@ -29,7 +29,6 @@ void Relax_Driver::relax_driver(ModuleESolver::ESolver *p_esolver)
     while (istep <= GlobalV::RELAX_NMAX && !stop)
     {
         time_t estart = time(NULL);
-
         if (GlobalV::OUT_LEVEL == "ie"
             && (GlobalV::CALCULATION == "relax" || GlobalV::CALCULATION == "cell-relax" || GlobalV::CALCULATION == "scf"
                 || GlobalV::CALCULATION == "nscf"))
@@ -38,6 +37,7 @@ void Relax_Driver::relax_driver(ModuleESolver::ESolver *p_esolver)
         }
 
         // mohan added eiter to count for the electron iteration number, 2021-01-28
+        
         p_esolver->Run(istep - 1, GlobalC::ucell);
 
         time_t eend = time(NULL);
@@ -80,7 +80,6 @@ void Relax_Driver::relax_driver(ModuleESolver::ESolver *p_esolver)
                                          force_step,
                                          stress_step); // pengfei Li 2018-05-14
             }
-
             if (GlobalV::CALCULATION == "relax" || GlobalV::CALCULATION == "cell-relax")
             {
                 // print structure
@@ -99,10 +98,8 @@ void Relax_Driver::relax_driver(ModuleESolver::ESolver *p_esolver)
             }
         }
         time_t fend = time(NULL);
-
         ++istep;
     }
-
     if (GlobalV::OUT_LEVEL == "i")
     {
         std::cout << " ION DYNAMICS FINISHED :)" << std::endl;

From 43d743d7995cc20596c8ddf0c487b647e99f5a32 Mon Sep 17 00:00:00 2001
From: zhangzhihao <1900017707@pku.edu.cn>
Date: Thu, 11 May 2023 14:37:03 +0800
Subject: [PATCH 04/44] feat: pexsi hsolver

---
 source/Makefile                               |    6 +-
 source/Makefile.vars                          |    2 +-
 source/module_base/parallel_global.cpp        |   63 +-
 source/module_elecstate/elecstate_lcao.cpp    |   20 +-
 source/module_elecstate/elecstate_lcao.h      |    2 +-
 .../module_elecstate/module_charge/charge.cpp |    1 -
 source/module_esolver/esolver_ks.cpp          |    1 -
 .../hamilt_lcaodft/DM_gamma.cpp               |    2 -
 source/module_hsolver/diago_elpa.cpp          |   32 +
 source/module_hsolver/diago_pexsi.cpp         |    1 -
 source/module_hsolver/hsolver_lcao.cpp        |    2 +-
 source/module_hsolver/pexsi/DistBCDMatrix.cpp |    1 +
 source/module_hsolver/pexsi/DistCCSMatrix.cpp |    1 +
 .../pexsi/DistMatrixTransformer.cpp           | 1538 +++++++++--------
 source/module_hsolver/pexsi/pexsi_solver.cpp  |   44 +-
 source/module_hsolver/pexsi/simplePEXSI.cpp   |  680 ++++----
 16 files changed, 1292 insertions(+), 1104 deletions(-)

diff --git a/source/Makefile b/source/Makefile
index faaf6697b8..8f3db96941 100644
--- a/source/Makefile
+++ b/source/Makefile
@@ -5,7 +5,7 @@ include Makefile.vars
 #==========================
 INCLUDES = -I. -Icommands -I../
 LIBS = -lm -lpthread
-OPTS = -std=c++14 -pedantic -m64 ${INCLUDES}
+OPTS = ${INCLUDES} -Ofast -g -traceback -xHost -std=c++11 -simd -march=native -m64 -qopenmp -Werror -Wall -pedantic 
 HONG = -D__LCAO
 HONG += -D__ELPA
 ifeq ($(OPENMP), ON)
@@ -43,7 +43,7 @@ ifeq ($(DEBUG), ON)
     endif
     OPTS += -O0 -fsanitize=address -fno-omit-frame-pointer -Wall -g #It can check segmental defaults
 else
-    HONG += -Ofast -march=native -DNDEBUG
+    HONG += -O0 -march=native -DNDEBUG
 endif
 
 ifeq ($(INTEL), ON)
@@ -73,7 +73,7 @@ else
     FFTW_INCLUDE_DIR = ${FFTW_DIR}/include
     FFTW_LIB_DIR     = ${FFTW_DIR}/lib
     HONG  += -D__FFTW3
-    LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR}
+    LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} -qmkl
     INCLUDES += -I${FFTW_INCLUDE_DIR}
     
     #==========================
diff --git a/source/Makefile.vars b/source/Makefile.vars
index d19109fc96..fb01878d5f 100644
--- a/source/Makefile.vars
+++ b/source/Makefile.vars
@@ -53,7 +53,7 @@ SCOTCH_LIB      = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libsc
 ## CEREAL_DIR        should contain an include folder.
 ##---------------------------------------------------------------------
 
-# FFTW_DIR = /public/soft/fftw_3.3.8
+# FFTW_DIR = /root/lib/FFTW3
 # OPENBLAS_LIB_DIR   = /public/soft/openblas/lib
 # SCALAPACK_LIB_DIR  = /public/soft/openblas/lib
 
diff --git a/source/module_base/parallel_global.cpp b/source/module_base/parallel_global.cpp
index 954b0662cd..9245cc72ff 100644
--- a/source/module_base/parallel_global.cpp
+++ b/source/module_base/parallel_global.cpp
@@ -98,49 +98,30 @@ void Parallel_Global::split_diag_world(const int &diag_np)
 
 
 
-void Parallel_Global::split_grid_world(const int &diag_np)
+void Parallel_Global::split_grid_world(const int &grid_np)
 {
 #ifdef __MPI
-	assert(diag_np>0);
-	// number of processors in each 'grid group'.
-	int* group_grid_np = new int[diag_np];
-	ModuleBase::GlobalFunc::ZEROS(group_grid_np, diag_np);
-	// average processors in each 'grid group'
-	int ave = GlobalV::NPROC/diag_np;
-	// remain processors.
-	int remain = GlobalV::NPROC - ave * diag_np;
-
-	for(int i=0; i<diag_np; ++i)
-	{
-		group_grid_np[i] = ave;
-		if(i<remain)
-		{
-			++group_grid_np[i];
-		}
-	}
-
-	// color: same color will stay in same group.
-	// key: rank in each fragment group.
-	int color = -1;		// Peize Lin add initialization for compiler warning at 2020.01.31
-	int key = -1;		// Peize Lin add initialization for compiler warning at 2020.01.31
-
-	int np_now = 0;
-	for(int i=0; i<diag_np; ++i)
-	{
-		np_now += group_grid_np[i];
-		if(GlobalV::MY_RANK < np_now)
-		{
-			color = i;
-			key = group_grid_np[i] - (np_now - GlobalV::MY_RANK);
-			break;
-		}
-	}
-
-	MPI_Comm_split(MPI_COMM_WORLD, color, key, &GRID_WORLD);
-	MPI_Comm_rank(GRID_WORLD, &GlobalV::GRANK);
-	MPI_Comm_size(GRID_WORLD, &GlobalV::GSIZE);
-
-	delete[] group_grid_np;
+	assert(grid_np>0); //LiuXh, 2020-12-14, diag_np --> grid_np
+    int myid;
+    MPI_Group WORLD_GROUP;
+    //MPI_Comm_rank(MPI_COMM_WORLD, &key);
+    MPI_Comm_rank(MPI_COMM_WORLD, &myid); //LiuXh, 2020-12-14, key --> myid
+    MPI_Comm_group(MPI_COMM_WORLD, &WORLD_GROUP);
+
+    int grid_proc_range[3]={0, (GlobalV::NPROC/grid_np)*grid_np-1, GlobalV::NPROC/grid_np};
+    MPI_Group_range_incl(WORLD_GROUP, 1, &grid_proc_range, &GRID_GROUP);
+
+    GRID_WORLD=MPI_COMM_NULL;
+    MPI_Comm_create(MPI_COMM_WORLD, GRID_GROUP, &GRID_WORLD);
+    if(GRID_WORLD != MPI_COMM_NULL)
+    {
+        MPI_Comm_rank(GRID_WORLD, &GlobalV::GRANK); //LiuXh, 2020-12-14, DIAG_WORLD --> GRID_WORLD
+        MPI_Comm_size(GRID_WORLD, &GlobalV::GSIZE); //LiuXh, 2020-12-14, DIAG_WORLD --> GRID_WORLD
+    }else
+    {
+        GlobalV::GRANK=-1;
+        GlobalV::GSIZE=-1;
+    }
 #else
 	GlobalV::GRANK=0;  //mohan fix bug 2012-02-04
 	GlobalV::GSIZE=1;
diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index d985fb53e8..d9fcc93648 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -108,6 +108,23 @@ void ElecStateLCAO::psiToRho(const psi::Psi<double>& psi)
     //------------------------------------------------------------
     // calculate the charge density on real space grid.
     //------------------------------------------------------------
+    // print matrix zzh
+    // GlobalV::ofs_running << "dm_gamma print\n";
+    // for(int i=0; i< this->loc->dm_gamma[0].nc; i++)
+    // {
+    //     for(int j=0; j<this->loc->dm_gamma[0].nr; j++)
+    //     {
+    //         if (std::abs(this->loc->dm_gamma[0](i, j)) < 0.00000001)
+    //         {
+    //             GlobalV::ofs_running << "0 ";
+    //         }
+    //         else
+    //         {
+    //             GlobalV::ofs_running << this->loc->dm_gamma[0](i, j) << " ";
+    //         }
+    //     }
+    //     GlobalV::ofs_running << std::endl;
+    // }
     ModuleBase::GlobalFunc::NOTE("Calculate the charge on real space grid!");
     Gint_inout inout(this->loc->DM, this->charge, Gint_Tools::job_type::rho);
     this->uhm->GG.cal_gint(&inout);
@@ -185,8 +202,9 @@ void ElecStateLCAO::print_psi(const psi::Psi<std::complex<double>>& psi_in)
     return;
 }
 
-void ElecStateLCAO::get_DM_from_pexsi(double* DM)
+void ElecStateLCAO::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV)
 {
+    this->loc->dm_gamma[0].create(ParaV->ncol, ParaV->nrow);
     this->loc->dm_gamma[0].c = DM;
 }
 
diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h
index 356d437a40..51cb164187 100644
--- a/source/module_elecstate/elecstate_lcao.h
+++ b/source/module_elecstate/elecstate_lcao.h
@@ -44,7 +44,7 @@ class ElecStateLCAO : public ElecState
     static bool need_psi_grid;
 
     //use for pexsi
-    void get_DM_from_pexsi(double* DM);
+    void get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV);
 
   protected:
     // calculate electronic charge density on grid points or density matrix in real space
diff --git a/source/module_elecstate/module_charge/charge.cpp b/source/module_elecstate/module_charge/charge.cpp
index 52a09b0664..cec15dbc01 100644
--- a/source/module_elecstate/module_charge/charge.cpp
+++ b/source/module_elecstate/module_charge/charge.cpp
@@ -293,7 +293,6 @@ double Charge::sum_rho(void) const
 			sum_rho += this->rho[is][ir];
 		}
 	}
-	std::cout << "sum_rho: " << sum_rho <<std::endl;
 	// multiply the sum of charge density by a factor
     sum_rho *= GlobalC::ucell.omega / static_cast<double>( GlobalC::rhopw->nxyz );
     Parallel_Reduce::reduce_double_pool( sum_rho );
diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp
index 284ed06603..eb82ccba8d 100644
--- a/source/module_esolver/esolver_ks.cpp
+++ b/source/module_esolver/esolver_ks.cpp
@@ -198,7 +198,6 @@ namespace ModuleESolver
 #else
                 auto iterstart = std::chrono::system_clock::now();
 #endif
-                std::cout << "drho: " << drho << " iter: " << iter << std::endl;
                 FPTYPE diag_ethr = this->phsol->set_diagethr(istep, iter, drho);
                 eachiterinit(istep, iter);
                 this->hamilt2density(istep, iter, diag_ethr);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp
index 079d0600a7..2e96ed2bf8 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/DM_gamma.cpp
@@ -29,7 +29,6 @@ int Local_Orbital_Charge::setAlltoallvParameter(MPI_Comm comm_2D, int blacs_ctxt
 	int myproc=0;
 
     Cblacs_gridinfo(blacs_ctxt, &nprows, &npcols, &myprow, &mypcol);
-    std::cout << "testing" << "npcols:" << npcols << " nprows:" << nprows << endl;
     Cblacs_pinfo(&myproc, &nprocs);
     // ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"nprocs",nprocs);
 
@@ -338,7 +337,6 @@ void Local_Orbital_Charge::gamma_file(psi::Psi<double>* psid, Local_Orbital_wfc
 void Local_Orbital_Charge::cal_dk_gamma_from_2D_pub(void)
 {
     ModuleBase::TITLE("Local_Orbital_Charge","cal_dk_gamma_from_2D_pub");
-
 	cal_dk_gamma_from_2D();
 }
 // calculate the grid distributed DM matrix from 2D block-cyclic distributed DM matrix
diff --git a/source/module_hsolver/diago_elpa.cpp b/source/module_hsolver/diago_elpa.cpp
index a47c363fe2..19b641e353 100644
--- a/source/module_hsolver/diago_elpa.cpp
+++ b/source/module_hsolver/diago_elpa.cpp
@@ -31,7 +31,24 @@ void DiagoElpa::diag(hamilt::Hamilt<double> *phm_in, psi::Psi<std::complex<doubl
     ELPA_Solver es((const bool)isReal, COMM_DIAG, (const int)GlobalV::NBANDS, (const int)h_mat.row, (const int)h_mat.col, (const int*)h_mat.desc);
     this->DecomposedState=0; // for k pointer, the decomposed s_mat can not be reused
     ModuleBase::timer::tick("DiagoElpa", "elpa_solve");
+    std::cout << "???" << std::endl;
+    GlobalV::ofs_running << "nrow: " << h_mat.row << "\nncol: " << h_mat.col << "\n";
+    GlobalV::ofs_running << "print H" << std::endl;
+    for (int i = 0; i < h_mat.col; i++)
+    {
+        for (int j = 0; j < h_mat.row; j++)
+        {
+            if (std::abs(h_mat.p[i * h_mat.col + j]) < 0.00000001)
+            {
+                GlobalV::ofs_running << "0 ";
+            }
+            else
+                GlobalV::ofs_running << h_mat.p[i * h_mat.col + j] << " ";
+        }
+        GlobalV::ofs_running << std::endl;
+    }
     es.generalized_eigenvector(h_mat.p, s_mat.p, this->DecomposedState, eigen.data(), psi.get_pointer());
+
     ModuleBase::timer::tick("DiagoElpa", "elpa_solve");
     es.exit();
 
@@ -56,6 +73,21 @@ void DiagoElpa::diag(hamilt::Hamilt<double> *phm_in, psi::Psi<double> &psi, doub
     //ELPA_Solver es(isReal, COMM_DIAG, GlobalV::NBANDS, h_mat.row, h_mat.col, h_mat.desc);
     ELPA_Solver es((const bool)isReal, COMM_DIAG, (const int)GlobalV::NBANDS, (const int)h_mat.row, (const int)h_mat.col, (const int*)h_mat.desc);
     ModuleBase::timer::tick("DiagoElpa", "elpa_solve");
+    GlobalV::ofs_running << "nrow: " << h_mat.row << "\nncol: " << h_mat.col << "\n";
+    GlobalV::ofs_running << "print H" << std::endl;
+    for (int i = 0; i < h_mat.col; i++)
+    {
+        for (int j = 0; j < h_mat.row; j++)
+        {
+            if (std::abs(h_mat.p[i * h_mat.col + j]) < 0.00000001)
+            {
+                GlobalV::ofs_running << "0 ";
+            }
+            else
+                GlobalV::ofs_running << h_mat.p[i * h_mat.col + j] << " ";
+        }
+        GlobalV::ofs_running << std::endl;
+    }
     es.generalized_eigenvector(h_mat.p, s_mat.p, this->DecomposedState, eigen.data(), psi.get_pointer());
     ModuleBase::timer::tick("DiagoElpa", "elpa_solve");
     es.exit();
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index cca6de9c1b..c0b4e3e24c 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -39,7 +39,6 @@ void DiagoPexsi::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, dou
     this->totalFreeEnergy = this->ps->totalFreeEnergy;
     this->totalEnergyH = this->ps->totalEnergyH;
     this->totalEnergyS = this->ps->totalEnergyS;
-    std::cout << "this->totalEnergyH: " << this->ps->totalEnergyH << std::endl << "this->totalEnergyS: " << this->ps->totalEnergyS << std::endl << "this->totalFreeEnergy" << this->ps->totalFreeEnergy << std::endl;
 }
 void DiagoPexsi::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<std::complex<double>>& psi, double* eigenvalue_in)
 {
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index 2a61637dbe..242bd5d5a8 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -144,7 +144,7 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt<double>* pHamilt,
         if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!");
         elecstate::ElecStateLCAO* _pes = dynamic_cast<elecstate::ElecStateLCAO*>(pes);
         pes->eband = tem->totalFreeEnergy;
-        _pes->get_DM_from_pexsi(tem->DM);
+        _pes->get_DM_from_pexsi(tem->DM, tem->ParaV);
     }
     pes->psiToRho(psi);
     ModuleBase::timer::tick("HSolverLCAO", "solve");
diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.cpp b/source/module_hsolver/pexsi/DistBCDMatrix.cpp
index 8a3f2740e2..383875dc87 100644
--- a/source/module_hsolver/pexsi/DistBCDMatrix.cpp
+++ b/source/module_hsolver/pexsi/DistBCDMatrix.cpp
@@ -142,6 +142,7 @@ int DistBCDMatrix::globalRow(const int localRow)
 
 int DistBCDMatrix::globalCol(const int localCol)
 {
+    
     return (localCol/nblk*npcols+mypcol)*nblk+localCol%nblk;
 }
 
diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.cpp b/source/module_hsolver/pexsi/DistCCSMatrix.cpp
index 45a14d6ac7..9e3fc728fa 100644
--- a/source/module_hsolver/pexsi/DistCCSMatrix.cpp
+++ b/source/module_hsolver/pexsi/DistCCSMatrix.cpp
@@ -91,6 +91,7 @@ int DistCCSMatrix::localCol(int globalCol, int& mypcol)
 {
     mypcol=int(globalCol/int(this->size/this->nproc_data));
     if(mypcol >= this->nproc_data) mypcol=this->nproc_data-1;
+    
     return mypcol>0 ? globalCol-(this->size/this->nproc_data)*mypcol : globalCol;
 }
 
diff --git a/source/module_hsolver/pexsi/DistMatrixTransformer.cpp b/source/module_hsolver/pexsi/DistMatrixTransformer.cpp
index 285d3cdd94..1eec8dca12 100644
--- a/source/module_hsolver/pexsi/DistMatrixTransformer.cpp
+++ b/source/module_hsolver/pexsi/DistMatrixTransformer.cpp
@@ -1,233 +1,252 @@
 #include <mpi.h>
-#include <cstdlib>
+
 #include <climits>
 #include <cmath>
-#include <vector>
+#include <cstdlib>
+#include <iostream>
 #include <map>
+#include <vector>
+
 #include "DistBCDMatrix.h"
 #include "DistCCSMatrix.h"
 
-
 // for debug
 #ifdef _DEBUG
+#include <unistd.h>
+
 #include <cstring>
 #include <fstream>
-#include <unistd.h>
+
 #include "src_pw/global.h"
 #endif
 // end debug
 
-// find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a negtive value
-// the size_process and displacement_process array will be changed after the index is found
-// isFirst: wether this function is called for the first time for a index array;
-// nprocs: total number of processes
-// size_process: the number of indices in each process
-// displacement_process: the start position in each process
-// index: the array contains the indices
-inline int MinimumIndexPosition(const bool isFirst, const int nprocs,
-                                int* size_process, int* displacement_process, const int* index)
+// find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a
+// negtive value the size_process and displacement_process array will be changed after the index is found isFirst:
+// wether this function is called for the first time for a index array; nprocs: total number of processes size_process:
+// the number of indices in each process displacement_process: the start position in each process index: the array
+// contains the indices
+inline int MinimumIndexPosition(const bool isFirst,
+                                const int nprocs,
+                                int* size_process,
+                                int* displacement_process,
+                                const int* index)
 {
     // usually the minimum index is continuous, so it will be a good idea to
     // check the one next to the previous index first.
     static int pre_position; // previous position in index array of minimum index,
-    static int pre_process; // the process contains previous index
+    static int pre_process;  // the process contains previous index
 
-    int minimum_index=INT_MAX; // the minimum index, initial value is a large number which is larger than any other index;
-    int minimum_position=-1;
-    int minimum_process=-1;
+    int minimum_index
+        = INT_MAX; // the minimum index, initial value is a large number which is larger than any other index;
+    int minimum_position = -1;
+    int minimum_process = -1;
 
-    if(isFirst)
+    if (isFirst)
     {
-        for(int i=0; i<nprocs; ++i)
+        for (int i = 0; i < nprocs; ++i)
         {
-            if(size_process[i]>0)
+            if (size_process[i] > 0)
             {
-                if(minimum_index>index[displacement_process[i]])  // find a smaller index
+                if (minimum_index > index[displacement_process[i]]) // find a smaller index
                 {
-                    minimum_position=displacement_process[i];
-                    minimum_index=index[minimum_position];
-                    minimum_process=i;
+                    minimum_position = displacement_process[i];
+                    minimum_index = index[minimum_position];
+                    minimum_process = i;
                 }
             }
         }
-        if(minimum_process>=0) // find it!
+        if (minimum_process >= 0) // find it!
         {
             ++displacement_process[minimum_process];
             --size_process[minimum_process];
         }
-        pre_position=minimum_position;
-        pre_process=minimum_process;
+        pre_position = minimum_position;
+        pre_process = minimum_process;
         return minimum_position;
     }
     else
     {
         // check the next one of pre_position
-        if(size_process[pre_process]>0  &&  // the previous process still has elements
-            index[pre_position+1]==index[pre_position]+1) // find it!
+        if (size_process[pre_process] > 0 &&                    // the previous process still has elements
+            index[pre_position + 1] == index[pre_position] + 1) // find it!
         {
             ++displacement_process[pre_process];
             --size_process[pre_process];
-            ++pre_position;  // new pre_position is the next one
-                                      // new pre_process keeps the same
+            ++pre_position;      // new pre_position is the next one
+                                 // new pre_process keeps the same
             return pre_position; // current position is the new pre_position
         }
 
         // if the next one of pre_position is not the minimum one
-        for(int i=0; i<nprocs; ++i)
+        for (int i = 0; i < nprocs; ++i)
         {
-            if(size_process[i]>0)
+            if (size_process[i] > 0)
             {
-                if(minimum_index>index[displacement_process[i]])
+                if (minimum_index > index[displacement_process[i]])
                 {
-                    minimum_position=displacement_process[i];
-                    minimum_index=index[minimum_position];
-                    minimum_process=i;
+                    minimum_position = displacement_process[i];
+                    minimum_index = index[minimum_position];
+                    minimum_process = i;
                 }
             }
         }
-        if(minimum_process>=0) // find it!
+        if (minimum_process >= 0) // find it!
         {
             ++displacement_process[minimum_process];
             --size_process[minimum_process];
         }
-        pre_position=minimum_position;
-        pre_process=minimum_process;
+        pre_position = minimum_position;
+        pre_process = minimum_process;
         return minimum_position;
     }
 }
 
-inline void buildCCSParameter(const int size, const int nprocs,
-            std::vector<int> size_process, std::vector<int> displacement_process,
-            const int* position_index, DistCCSMatrix &DST_Matrix, int* buffer2ccsIndex)
+inline void buildCCSParameter(const int size,
+                              const int nprocs,
+                              std::vector<int> size_process,
+                              std::vector<int> displacement_process,
+                              const int* position_index,
+                              DistCCSMatrix& DST_Matrix,
+                              int* buffer2ccsIndex)
 {
     // find the minimum one from left buffer index
-    if(DST_Matrix.nnzLocal<=0) return;
+    if (DST_Matrix.nnzLocal <= 0)
+        return;
 
-    int pre_col=-1;
-    int nnz_now=0;
+    int pre_col = -1;
+    int nnz_now = 0;
     int p_mini;
-    p_mini=MinimumIndexPosition(true, nprocs, &size_process[0], &displacement_process[0], position_index);
-    while(p_mini>=0)
+    p_mini = MinimumIndexPosition(true, nprocs, &size_process[0], &displacement_process[0], position_index);
+    while (p_mini >= 0)
     {
-        int index_mini=position_index[p_mini];
-        int col_mini=index_mini/DST_Matrix.size; //-DST_Matrix.firstCol;
-        int row_mini=index_mini%DST_Matrix.size;
-        if(col_mini>pre_col) // a new column starts, column pointer is a 1-based array
+        int index_mini = position_index[p_mini];
+        int col_mini = index_mini / DST_Matrix.size; //-DST_Matrix.firstCol;
+        int row_mini = index_mini % DST_Matrix.size;
+        if (col_mini > pre_col) // a new column starts, column pointer is a 1-based array
         {
-            pre_col=col_mini;
-            DST_Matrix.colptrLocal[col_mini]=nnz_now+1;
+            pre_col = col_mini;
+            DST_Matrix.colptrLocal[col_mini] = nnz_now + 1;
         }
-        DST_Matrix.rowindLocal[nnz_now]=row_mini+1; // setup row index array, which is also 1-based
+        DST_Matrix.rowindLocal[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based
         // copy data from buffer to M, be careful M is a 0-based array
-        buffer2ccsIndex[nnz_now]=p_mini;
+        buffer2ccsIndex[nnz_now] = p_mini;
         ++nnz_now;
-        p_mini=MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index);
+        p_mini = MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index);
     }
     // The last element of colptrLocal is nnzLocal+1
-    DST_Matrix.colptrLocal[DST_Matrix.numColLocal]=nnz_now+1;
+    DST_Matrix.colptrLocal[DST_Matrix.numColLocal] = nnz_now + 1;
 }
 
-inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer,
-                            double* nzvalLocal)
+inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal)
 {
-    for(int i=0; i<nnzLocal; ++i)
+    for (int i = 0; i < nnzLocal; ++i)
     {
-        nzvalLocal[i]=buffer[buffer2ccsIndex[i]];
+        nzvalLocal[i] = buffer[buffer2ccsIndex[i]];
     }
 }
 inline void countMatrixDistribution(int N, double* A, std::map<int, int>& P)
 {
-    for(int i=0; i<N; ++i)
+    for (int i = 0; i < N; ++i)
     {
         int key;
-        if(fabs(A[i]<1e-31))
-            key=-100;
+        if (fabs(A[i] < 1e-31))
+            key = -100;
         else
-            key=floor(log10(fabs(A[i])));
+            key = floor(log10(fabs(A[i])));
         ++P[key];
     }
 }
 
 // find out the index of non-zero elements
-inline int getNonZeroIndex(char LAYOUT, const int nrow, const int ncol, double* H_2d, double* S_2d, const double ZERO_Limit,
-                    int &nnz, std::vector<int> &rowidx, std::vector<int> &colidx)
+inline int getNonZeroIndex(char LAYOUT,
+                           const int nrow,
+                           const int ncol,
+                           double* H_2d,
+                           double* S_2d,
+                           const double ZERO_Limit,
+                           int& nnz,
+                           std::vector<int>& rowidx,
+                           std::vector<int>& colidx)
 {
-    #ifdef _DEBUG
+#ifdef _DEBUG
     char f_log[80];
     int myproc;
     MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
     std::ofstream log;
-    if(myproc<100)
+    if (myproc < 100)
     {
         sprintf(f_log, "transformer_%2.2d.log", myproc);
         log.open(f_log, std::ios::app);
-        log<<"start count nnz"<<std::endl;
+        log << "start count nnz" << std::endl;
     }
     // count nonzeros value distribution of H and S
-    static bool isCOUNTNONZERO=true;
-    if(!isCOUNTNONZERO)
+    static bool isCOUNTNONZERO = true;
+    if (!isCOUNTNONZERO)
     {
-        isCOUNTNONZERO=true;
+        isCOUNTNONZERO = true;
         char plog_name[80];
         sprintf(plog_name, "HS_Distribution_%d.log", myproc);
         std::ofstream plog;
         plog.open(plog_name, std::ios::app);
         std::map<int, int> pH;
-        countMatrixDistribution(nrow*ncol, H_2d, pH);
+        countMatrixDistribution(nrow * ncol, H_2d, pH);
         std::map<int, int> pS;
-        countMatrixDistribution(nrow*ncol, H_2d, pS);
-        plog<<"Element in H distribution:\n";
+        countMatrixDistribution(nrow * ncol, H_2d, pS);
+        plog << "Element in H distribution:\n";
         // std::stringstream ss;
         // ss.str("");
-        for(auto iter=pH.begin(); iter!=pH.end(); ++iter)
+        for (auto iter = pH.begin(); iter != pH.end(); ++iter)
         {
             // ss<<"p["<<iter->first<<"] : "<<iter->second<<std::endl;
-            plog<<"p["<<iter->first<<"] : "<<iter->second<<std::endl;
+            plog << "p[" << iter->first << "] : " << iter->second << std::endl;
         }
-        //OUT(ofs_running,ss.str());
-        //OUT(ofs_running, "Element in S distribution:");
-        plog<<"Element in S distribution:\n";
-        //ss.str("");
-        for(auto iter=pS.begin(); iter!=pS.end(); ++iter)
+        // OUT(ofs_running,ss.str());
+        // OUT(ofs_running, "Element in S distribution:");
+        plog << "Element in S distribution:\n";
+        // ss.str("");
+        for (auto iter = pS.begin(); iter != pS.end(); ++iter)
         {
-            //ss<<"p["<<iter->first<<"] : "<<iter->second<<std::endl;
-            plog<<"p["<<iter->first<<"] : "<<iter->second<<std::endl;
+            // ss<<"p["<<iter->first<<"] : "<<iter->second<<std::endl;
+            plog << "p[" << iter->first << "] : " << iter->second << std::endl;
         }
         // OUT(ofs_running,ss.str());
         plog.close();
     }
-    #endif
+#endif
 
-    int idx=0;
-    nnz=0;
+    int idx = 0;
+    nnz = 0;
     colidx.clear();
     rowidx.clear();
-    #ifdef _DEBUG
-    if(myproc<100) log<<"rowidx and colidx cleared"<<std::endl;
-    #endif
-    if(LAYOUT == 'C' || LAYOUT == 'c')
+#ifdef _DEBUG
+    if (myproc < 100)
+        log << "rowidx and colidx cleared" << std::endl;
+#endif
+    if (LAYOUT == 'C' || LAYOUT == 'c')
     {
-        for(int i=0; i<ncol; ++i)
+        for (int i = 0; i < ncol; ++i)
         {
-            for(int j=0; j<nrow; ++j)
+            for (int j = 0; j < nrow; ++j)
             {
-                idx=i*nrow+j;
-                if(fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit)
+                idx = i * nrow + j;
+                if (fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit)
                 {
-                    ++nnz;
+                     ++nnz;
                     colidx.push_back(i);
                     rowidx.push_back(j);
                 }
             }
         }
-    } else if(LAYOUT == 'R' || LAYOUT == 'r')
+    }
+    else if (LAYOUT == 'R' || LAYOUT == 'r')
     {
-        for(int i=0; i<ncol; ++i)
+        for (int i = 0; i < ncol; ++i)
         {
-            for(int j=0; j<nrow; ++j)
+            for (int j = 0; j < nrow; ++j)
             {
-                idx=j*ncol+i;
-                if(fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit)
+                idx = j * ncol + i;
+                if (fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit)
                 {
                     ++nnz;
                     colidx.push_back(i);
@@ -235,68 +254,78 @@ inline int getNonZeroIndex(char LAYOUT, const int nrow, const int ncol, double*
                 }
             }
         }
-    } else
+    }
+    else
     {
-        #ifdef _DEBUG
-        if(myproc<100) log<<"unknown LAYOUT: "<<LAYOUT<<std::endl;
-        #endif
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "unknown LAYOUT: " << LAYOUT << std::endl;
+#endif
         return 1;
     }
-    #ifdef _DEBUG
-    if(myproc<100) 
+#ifdef _DEBUG
+    if (myproc < 100)
     {
-        log<<"nnz is counted: "<<nnz<<std::endl;
+        log << "nnz is counted: " << nnz << std::endl;
         log.close();
     }
-    #endif
+#endif
     return 0;
 }
 
-int buildTransformParameter(DistBCDMatrix &SRC_Matrix, DistCCSMatrix &DST_Matrix,
-                            const int NPROC_TRANS, MPI_Group &GROUP_TRANS, MPI_Comm &COMM_TRANS,
-                            const int nnz, std::vector<int> &rowidx, std::vector<int> &colidx,
-                            int &sender_size, std::vector<int> &sender_size_process, std::vector<int> &sender_displacement_process,
-                            int &receiver_size, std::vector<int> &receiver_size_process, std::vector<int> &receiver_displacement_process,
-                            std::vector<int> &buffer2ccsIndex)
+int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
+                            DistCCSMatrix& DST_Matrix,
+                            const int NPROC_TRANS,
+                            MPI_Group& GROUP_TRANS,
+                            MPI_Comm& COMM_TRANS,
+                            const int nnz,
+                            std::vector<int>& rowidx,
+                            std::vector<int>& colidx,
+                            int& sender_size,
+                            std::vector<int>& sender_size_process,
+                            std::vector<int>& sender_displacement_process,
+                            int& receiver_size,
+                            std::vector<int>& receiver_size_process,
+                            std::vector<int>& receiver_displacement_process,
+                            std::vector<int>& buffer2ccsIndex)
 {
     // debug
     int myproc;
     MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
-    #ifdef _DEBUG
+#ifdef _DEBUG
     std::ofstream log;
-    if(myproc<100)
+    if (myproc < 100)
     {
         char f_log[80];
         sprintf(f_log, "transformer_%2.2d.log", myproc);
         log.open(f_log, std::ios::app);
-        log<<"enter buildTransformParameter"<<std::endl;
+        log << "enter buildTransformParameter" << std::endl;
     }
-    #endif
-    //end debug
-    //count sender non-zeros elements
-    sender_size=nnz;
+#endif
+    // end debug
+    // count sender non-zeros elements
+    sender_size = nnz;
     std::fill(sender_size_process.begin(), sender_size_process.end(), 0);
-    // debug
-    #ifdef _DEBUG
-    if(myproc<100)
+// debug
+#ifdef _DEBUG
+    if (myproc < 100)
     {
-        log<<"start translate ranks between group_data and group_trans"<<std::endl;
-        log<<"sender_size (in BCD) = "<<sender_size<<std::endl;
+        log << "start translate ranks between group_data and group_trans" << std::endl;
+        log << "sender_size (in BCD) = " << sender_size << std::endl;
     }
-    #endif
+#endif
     // end debug
     // create process id map from group_data to group_trans
     int nproc_data;
     std::vector<int> proc_map_data_trans;
-    if(myproc == 0)
+    if (myproc == 0)
     {
         MPI_Group_size(DST_Matrix.group_data, &nproc_data);
         MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS);
         proc_map_data_trans.resize(nproc_data, 0);
-        for(int i=0; i<nproc_data; ++i)
+        for (int i = 0; i < nproc_data; ++i)
         {
-            MPI_Group_translate_ranks(DST_Matrix.group_data, 1, &i,
-                                      GROUP_TRANS, &proc_map_data_trans[i]);
+            MPI_Group_translate_ranks(DST_Matrix.group_data, 1, &i, GROUP_TRANS, &proc_map_data_trans[i]);
         }
         MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS);
     }
@@ -307,24 +336,24 @@ int buildTransformParameter(DistBCDMatrix &SRC_Matrix, DistCCSMatrix &DST_Matrix
         MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS);
     }
 
-    // debug
-    #ifdef _DEBUG
-    if(myproc<100)
+// debug
+#ifdef _DEBUG
+    if (myproc < 100)
     {
-        log<<"rank_data        rank_trans"<<std::endl;
-        for(int i=0; i<nproc_data; ++i)
-            log<<i<<"\t\t\t"<<proc_map_data_trans[i]<<std::endl;
+        log << "rank_data        rank_trans" << std::endl;
+        for (int i = 0; i < nproc_data; ++i)
+            log << i << "\t\t\t" << proc_map_data_trans[i] << std::endl;
     }
-    #endif
+#endif
     // end debug
 
-    for(int i=0; i<nnz; ++i)
+    for (int i = 0; i < nnz; ++i)
     {
-        int l_col=colidx[i];
-        int g_col=SRC_Matrix.globalCol(l_col);
+        int l_col = colidx[i];
+        int g_col = SRC_Matrix.globalCol(l_col);
         int dst_process;
-        int dst_col=DST_Matrix.localCol(g_col, dst_process);
-        int dst_process_trans=proc_map_data_trans[dst_process];
+        int dst_col = DST_Matrix.localCol(g_col, dst_process);
+        int dst_process_trans = proc_map_data_trans[dst_process];
         /*
         // debug
         #ifdef _DEBUG
@@ -341,146 +370,164 @@ int buildTransformParameter(DistBCDMatrix &SRC_Matrix, DistCCSMatrix &DST_Matrix
         */
         ++sender_size_process[dst_process_trans];
     }
-    // debug
-    #ifdef _DEBUG
-    if(myproc<100) log<<"sender_size_process is creaated"<<std::endl;
-    #endif
+// debug
+#ifdef _DEBUG
+    if (myproc < 100)
+        log << "sender_size_process is creaated" << std::endl;
+#endif
     // end debug
 
     // transfer sender index size to receiver index size
     MPI_Alltoall(&sender_size_process[0], 1, MPI_INT, &receiver_size_process[0], 1, MPI_INT, COMM_TRANS);
-    // debug
-    #ifdef _DEBUG
-    if(myproc<100) log<<"receiver_size_process is got"<<std::endl;
-    #endif
+// debug
+#ifdef _DEBUG
+    if (myproc < 100)
+        log << "receiver_size_process is got" << std::endl;
+#endif
     // end debug
 
     // setup all2all parameters
-    sender_displacement_process[0]=0;
-    for(int i=1; i<NPROC_TRANS; ++i)
+    sender_displacement_process[0] = 0;
+    for (int i = 1; i < NPROC_TRANS; ++i)
     {
-        sender_displacement_process[i]=sender_displacement_process[i-1]+sender_size_process[i-1];
+        sender_displacement_process[i] = sender_displacement_process[i - 1] + sender_size_process[i - 1];
     }
-    // debug
-    #ifdef _DEBUG
-    if(myproc<100) log<<"sender_displacement_process is creaated"<<std::endl;
-    #endif
+// debug
+#ifdef _DEBUG
+    if (myproc < 100)
+        log << "sender_displacement_process is creaated" << std::endl;
+#endif
     // end debug
 
-    receiver_displacement_process[0]=0;
-    receiver_size=receiver_size_process[0];
-    for(int i=1; i<NPROC_TRANS; ++i)
+    receiver_displacement_process[0] = 0;
+    receiver_size = receiver_size_process[0];
+    for (int i = 1; i < NPROC_TRANS; ++i)
     {
-        receiver_displacement_process[i]=receiver_displacement_process[i-1]+receiver_size_process[i-1];
-        receiver_size+=receiver_size_process[i];
+        receiver_displacement_process[i] = receiver_displacement_process[i - 1] + receiver_size_process[i - 1];
+        receiver_size += receiver_size_process[i];
     }
-    // debug
-    #ifdef _DEBUG
-    if(myproc<100)
+// debug
+#ifdef _DEBUG
+    if (myproc < 100)
     {
-        log<<"sender_size and receiver_displacement_process are creaated"<<std::endl;
-        log<<"receiver_size (in CCS) = "<<receiver_size<<std::endl;
+        log << "sender_size and receiver_displacement_process are creaated" << std::endl;
+        log << "receiver_size (in CCS) = " << receiver_size << std::endl;
     }
-    #endif
+#endif
     // end debug
 
     // setup receiver index
     // setup sender_index
     std::vector<int> sender_index(sender_size);
-    for(int i=0; i<nnz; ++i)
+    for (int i = 0; i < nnz; ++i)
     {
-        int l_col=colidx[i];
-        int g_col=SRC_Matrix.globalCol(l_col);
+        int l_col = colidx[i];
+        int g_col = SRC_Matrix.globalCol(l_col);
         int dst_process;
-        int dst_col=DST_Matrix.localCol(g_col, dst_process);
-        int l_row=rowidx[i];
-        int dst_row=SRC_Matrix.globalRow(l_row);
-        sender_index[i]=dst_col*DST_Matrix.size+dst_row;
+        int dst_col = DST_Matrix.localCol(g_col, dst_process);
+        int l_row = rowidx[i];
+        int dst_row = SRC_Matrix.globalRow(l_row);
+        sender_index[i] = dst_col * DST_Matrix.size + dst_row;
     }
-    // debug
-    #ifdef _DEBUG
-    if(myproc<100) log<<"sender_index is got"<<std::endl;
-    #endif
+// debug
+#ifdef _DEBUG
+    if (myproc < 100)
+        log << "sender_index is got" << std::endl;
+#endif
     // end debug
 
     // transfer index to receiver
     std::vector<int> receiver_index(receiver_size);
-    MPI_Alltoallv(&sender_index[0], &sender_size_process[0], &sender_displacement_process[0], MPI_INT,
-                  &receiver_index[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_INT, COMM_TRANS);
-    // debug
-    #ifdef _DEBUG
-    if(myproc<100) log<<"receiver_index is got"<<std::endl;
-    #endif
+    MPI_Alltoallv(&sender_index[0],
+                  &sender_size_process[0],
+                  &sender_displacement_process[0],
+                  MPI_INT,
+                  &receiver_index[0],
+                  &receiver_size_process[0],
+                  &receiver_displacement_process[0],
+                  MPI_INT,
+                  COMM_TRANS);
+// debug
+#ifdef _DEBUG
+    if (myproc < 100)
+        log << "receiver_index is got" << std::endl;
+#endif
     // end debug
 
     // setup buffer2ccsIndex based on receiver_index
     buffer2ccsIndex.resize(receiver_size);
     DST_Matrix.setnnz(receiver_size);
-    buildCCSParameter(receiver_size, NPROC_TRANS,
-            receiver_size_process, receiver_displacement_process,
-            &receiver_index[0], DST_Matrix, &buffer2ccsIndex[0]);
-    // debug
-    #ifdef _DEBUG
-    if(myproc<100) 
+    buildCCSParameter(receiver_size,
+                      NPROC_TRANS,
+                      receiver_size_process,
+                      receiver_displacement_process,
+                      &receiver_index[0],
+                      DST_Matrix,
+                      &buffer2ccsIndex[0]);
+// debug
+#ifdef _DEBUG
+    if (myproc < 100)
     {
-        log<<"ccs parameter is built"<<std::endl;
+        log << "ccs parameter is built" << std::endl;
         log.close();
     }
-    #endif
+#endif
     // end debug
     return 0;
 }
 
-int newGroupCommTrans(DistBCDMatrix &SRC_Matrix, DistCCSMatrix &DST_Matrix,
-                      MPI_Group &GROUP_TRANS, MPI_Comm &COMM_TRANS)
+int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
+                      DistCCSMatrix& DST_Matrix,
+                      MPI_Group& GROUP_TRANS,
+                      MPI_Comm& COMM_TRANS)
 {
-    // debug
-    #ifdef _DEBUG
+// debug
+#ifdef _DEBUG
     char f_log[80];
     int myproc;
     MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
     std::ofstream log;
-    if(myproc<100)
+    if (myproc < 100)
     {
         sprintf(f_log, "transformer_%2.2d.log", myproc);
         log.open(f_log, std::ios::app);
-        //log<<std::endl<<"LOG of process: "<<myproc<<std::endl;
-        log<<"enter newGroupCommTrans"<<std::endl;
+        // log<<std::endl<<"LOG of process: "<<myproc<<std::endl;
+        log << "enter newGroupCommTrans" << std::endl;
     }
-    #endif
+#endif
     // build transfortram communicator which contains both processes of BCD processors and
     // CCS processors with nonzero elements
     MPI_Group_union(DST_Matrix.group_data, SRC_Matrix.group, &GROUP_TRANS);
     MPI_Comm_create(MPI_COMM_WORLD, GROUP_TRANS, &COMM_TRANS);
-    // debug
-    #ifdef _DEBUG
-    if(myproc<100)
+// debug
+#ifdef _DEBUG
+    if (myproc < 100)
     {
         int trans_myid, trans_nproc;
         int trans_gid, trans_gproc;
-        if(COMM_TRANS != MPI_COMM_NULL)
+        if (COMM_TRANS != MPI_COMM_NULL)
         {
             MPI_Comm_rank(COMM_TRANS, &trans_myid);
             MPI_Comm_size(COMM_TRANS, &trans_nproc);
         }
         else
         {
-            trans_myid=-1;
-            trans_nproc=-1;
+            trans_myid = -1;
+            trans_nproc = -1;
             // trans_gid=-1;
             // trans_gproc=-1;
         }
         MPI_Group_rank(GROUP_TRANS, &trans_gid);
         MPI_Group_size(GROUP_TRANS, &trans_gproc);
         int BCD_myid, BCD_nproc;
-        BCD_myid=SRC_Matrix.myproc;
-        BCD_nproc=SRC_Matrix.nprocs;
+        BCD_myid = SRC_Matrix.myproc;
+        BCD_nproc = SRC_Matrix.nprocs;
         int BCD_gid, BCD_gproc;
         MPI_Group_rank(SRC_Matrix.group, &BCD_gid);
         MPI_Group_size(SRC_Matrix.group, &BCD_gproc);
         int CCS_myid, CCS_nproc;
         int CCS_gid, CCS_gproc;
-        if(DST_Matrix.comm_data != MPI_COMM_NULL)
+        if (DST_Matrix.comm_data != MPI_COMM_NULL)
         {
             MPI_Comm_rank(DST_Matrix.comm_data, &CCS_myid);
             MPI_Comm_size(DST_Matrix.comm_data, &CCS_nproc);
@@ -494,59 +541,63 @@ int newGroupCommTrans(DistBCDMatrix &SRC_Matrix, DistCCSMatrix &DST_Matrix,
         }
         MPI_Group_rank(DST_Matrix.group_data, &CCS_gid);
         MPI_Group_size(DST_Matrix.group_data, &CCS_gproc);
-        log<<"myid in BCD:\t"<< BCD_myid <<"\tin CCS:\t"<< CCS_myid <<"\tin TRANS:\t"<< trans_myid
-        <<"\tBCD_gid:\t"<<BCD_gid<<"\tCCS_gid:\t"<<CCS_gid<<"\ttrans_gid:\t"<<trans_gid<<std::endl;
-        log<<"nproc in BCD:\t"<< BCD_nproc << "\tin CCS:\t" << CCS_nproc << "\tin TRANS:\t"<< trans_nproc
-        <<"\tBCD_gproc:\t"<<BCD_gproc<<"\tCCS_gproc:\t"<<CCS_gproc<<"\ttrans_gproc:\t"<<trans_gproc<<std::endl;
+        log << "myid in BCD:\t" << BCD_myid << "\tin CCS:\t" << CCS_myid << "\tin TRANS:\t" << trans_myid
+            << "\tBCD_gid:\t" << BCD_gid << "\tCCS_gid:\t" << CCS_gid << "\ttrans_gid:\t" << trans_gid << std::endl;
+        log << "nproc in BCD:\t" << BCD_nproc << "\tin CCS:\t" << CCS_nproc << "\tin TRANS:\t" << trans_nproc
+            << "\tBCD_gproc:\t" << BCD_gproc << "\tCCS_gproc:\t" << CCS_gproc << "\ttrans_gproc:\t" << trans_gproc
+            << std::endl;
 
-        log<<"COMM_TRANS is created"<<std::endl;
+        log << "COMM_TRANS is created" << std::endl;
         log.close();
     }
-    #endif
+#endif
     // end debug
     return 0;
 }
 
-int deleteGroupCommTrans(MPI_Group &GROUP_TRANS, MPI_Comm &COMM_TRANS)
+int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS)
 {
     MPI_Group_free(&GROUP_TRANS);
-    if(COMM_TRANS != MPI_COMM_NULL)
+    if (COMM_TRANS != MPI_COMM_NULL)
     {
         MPI_Comm_free(&COMM_TRANS);
     }
     return 0;
 }
 
-
 // transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
 // two destination matrices share the same non-zero elements positions
-// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, even if
-// one of them is acturely zero
-// All matrices must have same MPI communicator
-int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, double* S_2d, const double ZERO_Limit,
-                   DistCCSMatrix &DST_Matrix,  double*& H_ccs, double*& S_ccs)
+// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero,
+// even if one of them is acturely zero All matrices must have same MPI communicator
+int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
+                      double* H_2d,
+                      double* S_2d,
+                      const double ZERO_Limit,
+                      DistCCSMatrix& DST_Matrix,
+                      double*& H_ccs,
+                      double*& S_ccs)
 {
-    // debug
-    #ifdef _DEBUG
+// debug
+#ifdef _DEBUG
     char f_log[80];
     int myproc;
     MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
     std::ofstream log;
-    if(myproc<100)
+    if (myproc < 100)
     {
         sprintf(f_log, "transformer_%2.2d.log", myproc);
         log.open(f_log, std::ios::app);
-        log<<std::endl<<"LOG of process: "<<myproc<<std::endl;
-        log<<"enter transformBCDtoCCS for H and S"<<std::endl;
+        log << std::endl << "LOG of process: " << myproc << std::endl;
+        log << "enter transformBCDtoCCS for H and S" << std::endl;
     }
-    #endif
+#endif
     // end debug
     MPI_Group GROUP_TRANS;
-    MPI_Comm COMM_TRANS=MPI_COMM_NULL;
+    MPI_Comm COMM_TRANS = MPI_COMM_NULL;
     newGroupCommTrans(SRC_Matrix, DST_Matrix, GROUP_TRANS, COMM_TRANS);
-    if(COMM_TRANS != MPI_COMM_NULL)
+    if (COMM_TRANS != MPI_COMM_NULL)
     {
-        //set up sender and receiver
+        // set up sender and receiver=
         int NPROC_TRANS;
         MPI_Comm_size(COMM_TRANS, &NPROC_TRANS);
         int sender_size;
@@ -556,37 +607,47 @@ int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, double* S_2d, con
         std::vector<int> receiver_size_process(NPROC_TRANS);
         std::vector<int> receiver_displacement_process(NPROC_TRANS);
 
-        #ifdef _DEBUG
-        if(myproc<100)
+#ifdef _DEBUG
+        if (myproc < 100)
         {
-            log<<"nprocs: "<<SRC_Matrix.nprocs<<" ; myprow: "<<SRC_Matrix.myprow<<" ; mypcol: "<<SRC_Matrix.mypcol<<std::endl;
-            log<<"nblk:"<<SRC_Matrix.nblk<<" ; nrow: "<<SRC_Matrix.nrow<<" ; ncol: "<<SRC_Matrix.ncol<<std::endl;
-            log<<"layout:"<<SRC_Matrix.LAYOUT<<std::endl;
-            log<<"ZERO = "<<ZERO_Limit<<std::endl;
-            log<<"DST_Matrix parameters:"<<std::endl;
-            log<<"size: "<<DST_Matrix.size<<" ;nproc_data: "<<DST_Matrix.nproc_data<<std::endl;
-            log<<"start transforming H and S to CCS format"<<std::endl;
+            log << "nprocs: " << SRC_Matrix.nprocs << " ; myprow: " << SRC_Matrix.myprow
+                << " ; mypcol: " << SRC_Matrix.mypcol << std::endl;
+            log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.nrow << " ; ncol: " << SRC_Matrix.ncol
+                << std::endl;
+            log << "layout:" << SRC_Matrix.LAYOUT << std::endl;
+            log << "ZERO = " << ZERO_Limit << std::endl;
+            log << "DST_Matrix parameters:" << std::endl;
+            log << "size: " << DST_Matrix.size << " ;nproc_data: " << DST_Matrix.nproc_data << std::endl;
+            log << "start transforming H and S to CCS format" << std::endl;
         }
-        #endif
+#endif
         // end debug
 
         // find out the non-zeros elements' positions
         std::vector<int> rowidx;
         std::vector<int> colidx;
-        int nnz=0;
-        #ifdef _DEBUG
-        if(myproc<100) log<<"start counting nnz..."<<std::endl;
-        #endif
-        if(SRC_Matrix.comm != MPI_COMM_NULL)
+        int nnz = 0;
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "start counting nnz..." << std::endl;
+#endif
+        if (SRC_Matrix.comm != MPI_COMM_NULL)
         {
-            getNonZeroIndex(SRC_Matrix.LAYOUT, SRC_Matrix.nrow, SRC_Matrix.ncol, H_2d, S_2d, ZERO_Limit,
-                            nnz, rowidx, colidx);
+            getNonZeroIndex(SRC_Matrix.LAYOUT,
+                            SRC_Matrix.nrow,
+                            SRC_Matrix.ncol,
+                            H_2d,
+                            S_2d,
+                            ZERO_Limit,
+                            nnz,
+                            rowidx,
+                            colidx);
         }
-        #ifdef _DEBUG
-        if(myproc<100)
+#ifdef _DEBUG
+        if (myproc < 100)
         {
-            log<<"NonZeroIndex is got, nnz is "<<nnz<<std::endl;
-            log<<"rowidx size: "<<rowidx.size()<<"; colidx size: "<<colidx.size()<<std::endl;
+            log << "NonZeroIndex is got, nnz is " << nnz << std::endl;
+            log << "rowidx size: " << rowidx.size() << "; colidx size: " << colidx.size() << std::endl;
             /*
             if(SRC_Matrix.comm != MPI_COMM_NULL)
             {
@@ -617,141 +678,177 @@ int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, double* S_2d, con
             }
             */
         }
-        #endif
+#endif
 
         // build all2all transformation parameters and the map index of receiver buffer
         std::vector<int> buffer2ccsIndex;
-        buildTransformParameter(SRC_Matrix, DST_Matrix,
-                                NPROC_TRANS, GROUP_TRANS, COMM_TRANS,
-                                nnz, rowidx, colidx,
-                                sender_size, sender_size_process, sender_displacement_process,
-                                receiver_size, receiver_size_process, receiver_displacement_process, buffer2ccsIndex);
-        // Do transformation
-        #ifdef _DEBUG
-        if(myproc<100) log<<"Parameters are built"<<std::endl;
-        #endif
+        buildTransformParameter(SRC_Matrix,
+                                DST_Matrix,
+                                NPROC_TRANS,
+                                GROUP_TRANS,
+                                COMM_TRANS,
+                                nnz,
+                                rowidx,
+                                colidx,
+                                sender_size,
+                                sender_size_process,
+                                sender_displacement_process,
+                                receiver_size,
+                                receiver_size_process,
+                                receiver_displacement_process,
+                                buffer2ccsIndex);
+// Do transformation
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "Parameters are built" << std::endl;
+#endif
         std::vector<double> sender_buffer(sender_size);
         std::vector<double> receiver_buffer(receiver_size);
         // put H to sender buffer
-        if(SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
+        if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
         {
-            for(int i=0; i<sender_size; ++i)
+            for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i]=H_2d[rowidx[i]*SRC_Matrix.ncol+colidx[i]];
+                sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]];
             }
         }
         else
         {
-            for(int i=0; i<sender_size; ++i)
+            for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i]=H_2d[colidx[i]*SRC_Matrix.nrow+rowidx[i]];
+                sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]];
             }
         }
-        #ifdef _DEBUG
-        if(myproc<100) log<<"H sender_buffer is filled"<<std::endl;
-        #endif
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "H sender_buffer is filled" << std::endl;
+#endif
         // do all2all transformation
-        MPI_Alltoallv(&sender_buffer[0], &sender_size_process[0], &sender_displacement_process[0], MPI_DOUBLE,
-                      &receiver_buffer[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_DOUBLE, COMM_TRANS);
-        // collect H from receiver buffer
-        #ifdef _DEBUG
-        if(myproc<100) log<<"H receiver_buffer is received"<<std::endl;
-        #endif
+        MPI_Alltoallv(&sender_buffer[0],
+                      &sender_size_process[0],
+                      &sender_displacement_process[0],
+                      MPI_DOUBLE,
+                      &receiver_buffer[0],
+                      &receiver_size_process[0],
+                      &receiver_displacement_process[0],
+                      MPI_DOUBLE,
+                      COMM_TRANS);
+// collect H from receiver buffer
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "H receiver_buffer is received" << std::endl;
+#endif
         delete[] H_ccs;
-        H_ccs=new double[receiver_size];
+        H_ccs = new double[receiver_size];
         buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], H_ccs);
-        #ifdef _DEBUG
-        if(myproc<100) log<<"H_ccs is received"<<std::endl;
-        #endif
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "H_ccs is received" << std::endl;
+#endif
 
         // put S to sender buffer
-        if(SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
+        if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
         {
-            for(int i=0; i<sender_size; ++i)
+            for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i]=S_2d[rowidx[i]*SRC_Matrix.ncol+colidx[i]];
+                sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]];
             }
         }
         else
         {
-            for(int i=0; i<sender_size; ++i)
+            for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i]=S_2d[colidx[i]*SRC_Matrix.nrow+rowidx[i]];
+                sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]];
             }
         }
-        #ifdef _DEBUG
-        if(myproc<100) log<<"S sender_buffer is filled"<<std::endl;
-        #endif
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "S sender_buffer is filled" << std::endl;
+#endif
         // do all2all transformation
-        MPI_Alltoallv(&sender_buffer[0], &sender_size_process[0], &sender_displacement_process[0], MPI_DOUBLE,
-                      &receiver_buffer[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_DOUBLE, COMM_TRANS);
-        // collect S from receiver buffer
-        #ifdef _DEBUG
-        if(myproc<100) log<<"S receiver_buffer is received"<<std::endl;
-        #endif
+        MPI_Alltoallv(&sender_buffer[0],
+                      &sender_size_process[0],
+                      &sender_displacement_process[0],
+                      MPI_DOUBLE,
+                      &receiver_buffer[0],
+                      &receiver_size_process[0],
+                      &receiver_displacement_process[0],
+                      MPI_DOUBLE,
+                      COMM_TRANS);
+// collect S from receiver buffer
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "S receiver_buffer is received" << std::endl;
+#endif
         delete[] S_ccs;
-        S_ccs=new double[receiver_size];
+        S_ccs = new double[receiver_size];
         buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], S_ccs);
-        #ifdef _DEBUG
-        if(myproc<100) log<<"S_ccs is received"<<std::endl;
-        #endif
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "S_ccs is received" << std::endl;
+#endif
     }
     // clear and return
     deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS);
-    #ifdef _DEBUG
-    if(myproc<100)
+#ifdef _DEBUG
+    if (myproc < 100)
     {
-        log<<"COMM_TRANS is deleted"<<std::endl;
+        log << "COMM_TRANS is deleted" << std::endl;
         log.close();
     }
-    #endif
+#endif
     return 0;
 }
 
 // transform two sparse matrices from Compressed Column Storage (CCS) to block cyclic distribution (BCD) distribution
 // two source matrices share the same non-zero elements positions
-int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* EDMnzvalLocal,
-                    DistBCDMatrix& DST_Matrix, double* DM, double* EDM)
+int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
+                      double* DMnzvalLocal,
+                      double* EDMnzvalLocal,
+                      DistBCDMatrix& DST_Matrix,
+                      double* DM,
+                      double* EDM)
 {
-    // debug
-    #ifdef _DEBUG
+// debug
+#ifdef _DEBUG
     OUT(ofs_running, "transformCCStoBCD: start");
     MPI_Barrier(MPI_COMM_WORLD);
-    #endif
+#endif
     // end debug
     int myproc;
     MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
-    // debug
-    #ifdef _DEBUG
+// debug
+#ifdef _DEBUG
     std::ofstream log;
-    if(myproc<100)
+    if (myproc < 100)
     {
         char f_log[80];
         sprintf(f_log, "transformer_%2.2d.log", myproc);
-        //MPI_Barrier(MPI_COMM_WORLD);
+        // MPI_Barrier(MPI_COMM_WORLD);
         log.open(f_log, std::ios::app);
-        //MPI_Barrier(MPI_COMM_WORLD);
-        log<<"\nstart transform DMnzval to DM"<<std::endl;
+        // MPI_Barrier(MPI_COMM_WORLD);
+        log << "\nstart transform DMnzval to DM" << std::endl;
     }
     MPI_Barrier(MPI_COMM_WORLD);
-    #endif
+#endif
     // end debug
     MPI_Group GROUP_TRANS;
-    MPI_Comm COMM_TRANS=MPI_COMM_NULL;
+    MPI_Comm COMM_TRANS = MPI_COMM_NULL;
     newGroupCommTrans(DST_Matrix, SRC_Matrix, GROUP_TRANS, COMM_TRANS);
-    if(COMM_TRANS != MPI_COMM_NULL)
+    if (COMM_TRANS != MPI_COMM_NULL)
     {
         // init DM and EDM with 0
-        for(int i=0; i<DST_Matrix.nrow*DST_Matrix.ncol; ++i)
+        for (int i = 0; i < DST_Matrix.nrow * DST_Matrix.ncol; ++i)
         {
-            DM[i]=0;
-            EDM[i]=0;
+            DM[i] = 0;
+            EDM[i] = 0;
         }
-        #ifdef _DEBUG
+#ifdef _DEBUG
         // MPI_Barrier(COMM_TRANS);
-        if(myproc<100) log<<"DM and EDM filled by 0"<<std::endl;
-        // OUT(ofs_running, "transformCCStoBCD: DM and EDM filled by 0");
-        #endif
+        if (myproc < 100)
+            log << "DM and EDM filled by 0" << std::endl;
+// OUT(ofs_running, "transformCCStoBCD: DM and EDM filled by 0");
+#endif
         // setup number of local elements to be transfered to each remote processes
         int NPROC_TRANS;
         MPI_Comm_size(COMM_TRANS, &NPROC_TRANS);
@@ -763,23 +860,25 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* E
         int sender_displacement_process[NPROC_TRANS];
         int receiver_size_process[NPROC_TRANS];
         int receiver_displacement_process[NPROC_TRANS];
-        #ifdef _DEBUG
-        if(myproc<100) log<<"NPROC_TRANS = "<<NPROC_TRANS<<std::endl;
-        // MPI_Barrier(COMM_TRANS);
-        if(myproc<100) log<<"build process rank map from BCD to TRANS"<<std::endl;
-        // OUT(ofs_running, "transformCCStoBCD: build process rank map from BCD to TRANS");
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "NPROC_TRANS = " << NPROC_TRANS << std::endl;
         // MPI_Barrier(COMM_TRANS);
-        #endif
+        if (myproc < 100)
+            log << "build process rank map from BCD to TRANS" << std::endl;
+// OUT(ofs_running, "transformCCStoBCD: build process rank map from BCD to TRANS");
+// MPI_Barrier(COMM_TRANS);
+#endif
         int nproc_bcd;
         std::vector<int> proc_map_bcd_trans;
         int myproc_trans;
         MPI_Comm_rank(COMM_TRANS, &myproc_trans);
-        if(myproc_trans == 0)
+        if (myproc_trans == 0)
         {
             MPI_Group_size(DST_Matrix.group, &nproc_bcd);
             MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS);
             proc_map_bcd_trans.resize(nproc_bcd, 0);
-            for(int i=0; i<nproc_bcd; ++i)
+            for (int i = 0; i < nproc_bcd; ++i)
             {
                 MPI_Group_translate_ranks(DST_Matrix.group, 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]);
             }
@@ -792,59 +891,62 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* E
             MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS);
         }
 
-        #ifdef _DEBUG
+#ifdef _DEBUG
         // check process map from BCD comm to TRANS comm
-        if(myproc<100) 
+        if (myproc < 100)
         {
-            log<<"check process map:\n";
-            log<<"pid in bcd\tpid in trans\n";
-            for(int i=0; i<nproc_bcd; ++i)
+            log << "check process map:\n";
+            log << "pid in bcd\tpid in trans\n";
+            for (int i = 0; i < nproc_bcd; ++i)
             {
-                log<<i<<"\t\t"<<proc_map_bcd_trans[i]<<std::endl;
+                log << i << "\t\t" << proc_map_bcd_trans[i] << std::endl;
             }
-            log<<"check pid from prow and pcol int bcd to pid in trans\n";
-            log<<"p_row  p_col  p_bcd  p_trans\n";
-            for(int i=0; i<DST_Matrix.nprows; ++i)
+            log << "check pid from prow and pcol int bcd to pid in trans\n";
+            log << "p_row  p_col  p_bcd  p_trans\n";
+            for (int i = 0; i < DST_Matrix.nprows; ++i)
             {
-                for(int j=0; j<DST_Matrix.npcols; ++j)
+                for (int j = 0; j < DST_Matrix.npcols; ++j)
                 {
-                    int pid_bcd=DST_Matrix.pnum(i, j);
-                    int pid_trans=proc_map_bcd_trans[pid_bcd];
-                    log<<i<<"\t"<<j<<"\t"<<pid_bcd<<"\t"<<pid_trans<<std::endl;
+                    int pid_bcd = DST_Matrix.pnum(i, j);
+                    int pid_trans = proc_map_bcd_trans[pid_bcd];
+                    log << i << "\t" << j << "\t" << pid_bcd << "\t" << pid_trans << std::endl;
                 }
             }
-            log<<"setup alltoall parameters"<<std::endl;
+            log << "setup alltoall parameters" << std::endl;
         }
         // OUT(ofs_running, "transformCCStoBCD: setup alltoall parameters");
         MPI_Barrier(COMM_TRANS);
-        #endif
+#endif
         // setup sender_size_process
         // std::fill(sender_size_process.begin(), sender_size_process.end(), 0);
-        for(int i=0; i<NPROC_TRANS; ++i) sender_size_process[i]=0;
-        #ifdef _DEBUG
+        for (int i = 0; i < NPROC_TRANS; ++i)
+            sender_size_process[i] = 0;
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100) log<<"sender_size_process is inited by 0"<<std::endl;
+        if (myproc < 100)
+            log << "sender_size_process is inited by 0" << std::endl;
         // OUT(ofs_running, "transformCCStoBCD: sender_size_process is inited by 0, size ", NPROC_TRANS);
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100) log<<"display all columns and rows of nonzeros values:\n";
-        int log_nnz=0;
-        #endif
-        for(int icol=0; icol<SRC_Matrix.numColLocal; ++icol)
+        if (myproc < 100)
+            log << "display all columns and rows of nonzeros values:\n";
+        int log_nnz = 0;
+#endif
+        for (int icol = 0; icol < SRC_Matrix.numColLocal; ++icol)
         {
-            int g_col=SRC_Matrix.globalCol(icol);
+            int g_col = SRC_Matrix.globalCol(icol);
             int recv_pcol_bcd;
-            int recv_col=DST_Matrix.localCol(g_col, recv_pcol_bcd);
+            int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd);
             // #ifdef _DEBUG
             // log<<g_col<<"\n ";
             // #endif
-            //OUT(ofs_running, "transformCCStoBCD: recv_pcol_bcd", recv_pcol_bcd);
-            for(int rowidx=SRC_Matrix.colptrLocal[icol]-1;rowidx<SRC_Matrix.colptrLocal[icol+1]-1; ++rowidx)
+            // OUT(ofs_running, "transformCCStoBCD: recv_pcol_bcd", recv_pcol_bcd);
+            for (int rowidx = SRC_Matrix.colptrLocal[icol] - 1; rowidx < SRC_Matrix.colptrLocal[icol + 1] - 1; ++rowidx)
             {
-                int g_row=SRC_Matrix.rowindLocal[rowidx]-1;
+                int g_row = SRC_Matrix.rowindLocal[rowidx] - 1;
                 int recv_prow_bcd;
-                int recv_row=DST_Matrix.localRow(g_row, recv_prow_bcd);
-                int recv_proc_bcd=DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
-                int recv_proc=proc_map_bcd_trans[recv_proc_bcd];
+                int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd);
+                int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
+                int recv_proc = proc_map_bcd_trans[recv_proc_bcd];
                 ++sender_size_process[recv_proc];
                 // #ifdef _DEBUG
                 // log<<" "<<g_row;
@@ -853,253 +955,262 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* E
             }
             // log<<"\n";
         }
-        #ifdef _DEBUG
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100)
+        if (myproc < 100)
         {
-            log<<"sender_size_process is counted, total nonzeros are: "<<log_nnz<<std::endl;
-            log<<"target pid\tsize\n";
+            log << "sender_size_process is counted, total nonzeros are: " << log_nnz << std::endl;
+            log << "target pid\tsize\n";
             for (int i = 0; i < NPROC_TRANS; i++)
             {
-                log<<i<<"\t\t"<<sender_size_process[i]<<std::endl;
+                log << i << "\t\t" << sender_size_process[i] << std::endl;
             }
         }
-        //OUT(ofs_running, "transformCCStoBCD: sender_size_process is counted");
+        // OUT(ofs_running, "transformCCStoBCD: sender_size_process is counted");
         MPI_Barrier(COMM_TRANS);
-        #endif
+#endif
 
         // setup receiver_size_process
-        //std::fill(receiver_size_process.begin(), receiver_size_process.end(), 0);
-        for(int i=0; i<NPROC_TRANS; ++i) receiver_size_process[i]=0;
+        // std::fill(receiver_size_process.begin(), receiver_size_process.end(), 0);
+        for (int i = 0; i < NPROC_TRANS; ++i)
+            receiver_size_process[i] = 0;
         MPI_Alltoall(&sender_size_process[0], 1, MPI_INT, &receiver_size_process[0], 1, MPI_INT, COMM_TRANS);
-        #ifdef _DEBUG
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100)
+        if (myproc < 100)
         {
-            log<<"receiver_size_process is got"<<std::endl;
-            log<<"target pid\tsize\n";
+            log << "receiver_size_process is got" << std::endl;
+            log << "target pid\tsize\n";
             for (int i = 0; i < NPROC_TRANS; i++)
             {
-                log<<i<<"\t\t"<<receiver_size_process[i]<<std::endl;
+                log << i << "\t\t" << receiver_size_process[i] << std::endl;
             }
         }
-        // OUT(ofs_running, "transformCCStoBCD: receiver_size_process is got");
-        #endif
+// OUT(ofs_running, "transformCCStoBCD: receiver_size_process is got");
+#endif
 
         // setup sender_displacement and receiver_displacement
-        sender_displacement_process[0]=0;
-        receiver_displacement_process[0]=0;
-        int receiver_size=receiver_size_process[0];
-        for(int i=1; i<NPROC_TRANS; ++i)
+        sender_displacement_process[0] = 0;
+        receiver_displacement_process[0] = 0;
+        int receiver_size = receiver_size_process[0];
+        for (int i = 1; i < NPROC_TRANS; ++i)
         {
-            sender_displacement_process[i]=sender_displacement_process[i-1]+sender_size_process[i-1];
-            receiver_displacement_process[i]=receiver_displacement_process[i-1]+receiver_size_process[i-1];
-            receiver_size+=receiver_size_process[i];
+            sender_displacement_process[i] = sender_displacement_process[i - 1] + sender_size_process[i - 1];
+            receiver_displacement_process[i] = receiver_displacement_process[i - 1] + receiver_size_process[i - 1];
+            receiver_size += receiver_size_process[i];
         }
-        #ifdef _DEBUG
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100)
+        if (myproc < 100)
         {
-            log<<"displacements are built"<<std::endl;
-            log<<"check alltoallv parameters"<<std::endl;
-            for(int i=0; i<NPROC_TRANS; ++i)
+            log << "displacements are built" << std::endl;
+            log << "check alltoallv parameters" << std::endl;
+            for (int i = 0; i < NPROC_TRANS; ++i)
             {
-                log<<"pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  receiver_displacement_process"<<std::endl;
-                log<<i<<"\t"<<sender_size_process[i]<<"\t\t\t"<<sender_displacement_process[i]<<"\t\t\t\t"<<
-                                receiver_size_process[i]<<"\t\t\t"<<receiver_displacement_process[i]<<std::endl;
+                log << "pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  "
+                       "receiver_displacement_process"
+                    << std::endl;
+                log << i << "\t" << sender_size_process[i] << "\t\t\t" << sender_displacement_process[i] << "\t\t\t\t"
+                    << receiver_size_process[i] << "\t\t\t" << receiver_displacement_process[i] << std::endl;
             }
         }
-        // OUT(ofs_running, "transformCCStoBCD: displacements are built");
-        #endif
+// OUT(ofs_running, "transformCCStoBCD: displacements are built");
+#endif
 
         // setup up sender index and receiver index
-        int sender_size=SRC_Matrix.nnzLocal;
+        int sender_size = SRC_Matrix.nnzLocal;
         int* sender_index;
         double* sender_buffer;
         int* dst_index;
         int* receiver_index;
         double* receiver_buffer;
-        #ifdef _DEBUG
-        if(myproc<100)
+#ifdef _DEBUG
+        if (myproc < 100)
         {
-            log<<"sender_size = "<<sender_size<<"; receiver_size = "<<receiver_size<<std::endl;
+            log << "sender_size = " << sender_size << "; receiver_size = " << receiver_size << std::endl;
             log.flush();
-            log<<"start allocating sender_index, dst_index and receiver_index..."<<std::endl;
+            log << "start allocating sender_index, dst_index and receiver_index..." << std::endl;
             log.flush();
         }
-        #endif
-        if(sender_size > 0)
+#endif
+        if (sender_size > 0)
         {
-            sender_index=new int[sender_size];
-            for(int i=0; i<sender_size; ++i)
+            sender_index = new int[sender_size];
+            for (int i = 0; i < sender_size; ++i)
             {
-                sender_index[i]=-1;
+                sender_index[i] = -1;
             }
-            sender_buffer=new double[sender_size];
-            dst_index=new int[2*sender_size];
-            for(int i=0; i<2*sender_size; ++i)
+            sender_buffer = new double[sender_size];
+            dst_index = new int[2 * sender_size];
+            for (int i = 0; i < 2 * sender_size; ++i)
             {
-                dst_index[i]=-1;
+                dst_index[i] = -1;
             }
         }
         else
         {
-            sender_index=new int[1];
-            sender_index[0]=-1;
-            sender_buffer=new double[1];
-            dst_index=new int[2];
-            dst_index[0]=-1;
-            dst_index[1]=-1;
+            sender_index = new int[1];
+            sender_index[0] = -1;
+            sender_buffer = new double[1];
+            dst_index = new int[2];
+            dst_index[0] = -1;
+            dst_index[1] = -1;
         }
-        #ifdef _DEBUG
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100) log<<"; receiver_index size: ";
-        #endif
-        if(receiver_size > 0)
+        if (myproc < 100)
+            log << "; receiver_index size: ";
+#endif
+        if (receiver_size > 0)
         {
-            receiver_index=new int[2*receiver_size];
-            receiver_buffer=new double[receiver_size];
-            for(int i=0; i<2*receiver_size; ++i)
+            receiver_index = new int[2 * receiver_size];
+            receiver_buffer = new double[receiver_size];
+            for (int i = 0; i < 2 * receiver_size; ++i)
             {
-                receiver_index[i]=-1;
+                receiver_index[i] = -1;
             }
-            for(int i=0; i<receiver_size; ++i)
+            for (int i = 0; i < receiver_size; ++i)
             {
-                receiver_buffer[i]=-1;
+                receiver_buffer[i] = -1;
             }
         }
         else
         {
-            receiver_index=new int[2];
-            receiver_buffer=new double[1];
-            receiver_index[0]=-1;
-            receiver_index[1]=-1;
-            receiver_buffer[0]=-1;
+            receiver_index = new int[2];
+            receiver_buffer = new double[1];
+            receiver_index[0] = -1;
+            receiver_index[1] = -1;
+            receiver_buffer[0] = -1;
         }
 
         // pointer to the first empty slot of each process
         // std::vector<int> p(sender_displacement_process);
         int p[NPROC_TRANS];
-        for(int i=0; i<NPROC_TRANS; ++i)
+        for (int i = 0; i < NPROC_TRANS; ++i)
         {
-            p[i]=sender_displacement_process[i];
+            p[i] = sender_displacement_process[i];
         }
-        #ifdef _DEBUG
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100)
+        if (myproc < 100)
         {
-            log<<"check BCD pnum"<<std::endl;
+            log << "check BCD pnum" << std::endl;
             log.flush();
-            for(int i=0; i<DST_Matrix.nprows; ++i)
+            for (int i = 0; i < DST_Matrix.nprows; ++i)
             {
-                for(int j=0; j<DST_Matrix.npcols; ++j)
+                for (int j = 0; j < DST_Matrix.npcols; ++j)
                 {
-                    log<<i<<"\t"<<j<<"\t"<<DST_Matrix.pnum(i, j)<<std::endl;
+                    log << i << "\t" << j << "\t" << DST_Matrix.pnum(i, j) << std::endl;
                 }
             }
-            log<<"source CCS matrix parameters:\n";
-            log<<"numColLocal: "<<SRC_Matrix.numColLocal<<std::endl;
-            log<<"pointer to beginning of each process is inited by sender_displacement_process"<<std::endl;
-            //log<<"icol"<<"\t"<<"g_col"<<"\t"<<"col(bcd)"<<"\t"<<"pcol(bcd)"<<std::endl;
-            //log.flush();
+            log << "source CCS matrix parameters:\n";
+            log << "numColLocal: " << SRC_Matrix.numColLocal << std::endl;
+            log << "pointer to beginning of each process is inited by sender_displacement_process" << std::endl;
+            // log<<"icol"<<"\t"<<"g_col"<<"\t"<<"col(bcd)"<<"\t"<<"pcol(bcd)"<<std::endl;
+            // log.flush();
         }
-        // MPI_Barrier(COMM_TRANS);
-        #endif
+// MPI_Barrier(COMM_TRANS);
+#endif
 
-        int idx=0;
-        #ifdef _DEBUG
-        if(myproc<100) log<<"idx start at "<<idx<<std::endl;
-        #endif
-        for(int icol=0; icol<SRC_Matrix.numColLocal; ++icol)
+        int idx = 0;
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "idx start at " << idx << std::endl;
+#endif
+        for (int icol = 0; icol < SRC_Matrix.numColLocal; ++icol)
         {
-            int g_col=SRC_Matrix.globalCol(icol);
+            int g_col = SRC_Matrix.globalCol(icol);
             int recv_pcol_bcd;
-            int recv_col=DST_Matrix.localCol(g_col, recv_pcol_bcd);
-            for(int rowidx=SRC_Matrix.colptrLocal[icol]-1; rowidx<SRC_Matrix.colptrLocal[icol+1]-1; ++rowidx)
+            int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd);
+            for (int rowidx = SRC_Matrix.colptrLocal[icol] - 1; rowidx < SRC_Matrix.colptrLocal[icol + 1] - 1; ++rowidx)
             {
-                int g_row=SRC_Matrix.rowindLocal[rowidx]-1;
+                int g_row = SRC_Matrix.rowindLocal[rowidx] - 1;
                 int recv_prow_bcd;
-                int recv_row=DST_Matrix.localRow(g_row, recv_prow_bcd);
-                #ifdef _DEBUG
-                if(myproc<100)
+                int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd);
+#ifdef _DEBUG
+                if (myproc < 100)
                 {
-                    if(recv_prow_bcd >= DST_Matrix.nprows || recv_prow_bcd < 0)
+                    if (recv_prow_bcd >= DST_Matrix.nprows || recv_prow_bcd < 0)
                     {
-                        log<<"ERROR: recv_prow_bcd error! recv_prow_bcd is "<<recv_prow_bcd<<"; max is "<<DST_Matrix.nprows<<std::endl;
+                        log << "ERROR: recv_prow_bcd error! recv_prow_bcd is " << recv_prow_bcd << "; max is "
+                            << DST_Matrix.nprows << std::endl;
                         log.flush();
                     }
                 }
-                #endif
-                int recv_proc_bcd=DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
-                #ifdef _DEBUG
+#endif
+                int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
+#ifdef _DEBUG
                 // MPI_Barrier(COMM_TRANS);
-                if(myproc<100)
+                if (myproc < 100)
                 {
-                    if(recv_proc_bcd > NPROC_TRANS || recv_proc_bcd < 0)
+                    if (recv_proc_bcd > NPROC_TRANS || recv_proc_bcd < 0)
                     {
-                        log<<"ERROR: recv_proc_bcd outbound! recv_proc_bcd is "<<recv_proc_bcd<<"; max is "<<NPROC_TRANS<<std::endl;
+                        log << "ERROR: recv_proc_bcd outbound! recv_proc_bcd is " << recv_proc_bcd << "; max is "
+                            << NPROC_TRANS << std::endl;
                         log.flush();
                     }
                 }
-                #endif
-                int recv_proc=proc_map_bcd_trans[recv_proc_bcd];
-                #ifdef _DEBUG
+#endif
+                int recv_proc = proc_map_bcd_trans[recv_proc_bcd];
+#ifdef _DEBUG
                 // MPI_Barrier(COMM_TRANS);
-                if(myproc<100)
+                if (myproc < 100)
                 {
-                    if(p[recv_proc] >= sender_size || p[recv_proc] < 0)
+                    if (p[recv_proc] >= sender_size || p[recv_proc] < 0)
                     {
-                        log<<"ERROR: sender_index's index outbound! "<<std::endl;
-                        log<<recv_prow_bcd<<" "<<recv_pcol_bcd<<recv_proc_bcd<<" "<<recv_proc<<std::endl;
-                        log<<p[recv_proc]<<" "<<sender_size<<std::endl;
+                        log << "ERROR: sender_index's index outbound! " << std::endl;
+                        log << recv_prow_bcd << " " << recv_pcol_bcd << recv_proc_bcd << " " << recv_proc << std::endl;
+                        log << p[recv_proc] << " " << sender_size << std::endl;
                         log.flush();
                     }
                 }
+// MPI_Barrier(COMM_TRANS);
+#endif
+                sender_index[p[recv_proc]] = idx;
+#ifdef _DEBUG
                 // MPI_Barrier(COMM_TRANS);
-                #endif
-                sender_index[p[recv_proc]]=idx;
-                #ifdef _DEBUG
-                // MPI_Barrier(COMM_TRANS);
-                if(myproc<100)
+                if (myproc < 100)
                 {
-                    if((p[recv_proc]*2+1) >= (2*sender_size)|| (p[recv_proc]*2+1) < 0)
+                    if ((p[recv_proc] * 2 + 1) >= (2 * sender_size) || (p[recv_proc] * 2 + 1) < 0)
                     {
-                        log<<"ERROR: dst_index's index outbound! recv_proc:"<<recv_proc<<"; p:"<<p[recv_proc]*2+1<<"; max is "<<2*sender_size<<std::endl;
+                        log << "ERROR: dst_index's index outbound! recv_proc:" << recv_proc
+                            << "; p:" << p[recv_proc] * 2 + 1 << "; max is " << 2 * sender_size << std::endl;
                         log.flush();
                     }
                 }
-                // MPI_Barrier(COMM_TRANS);
-                #endif
-                dst_index[p[recv_proc]*2]=recv_row;
-                dst_index[p[recv_proc]*2+1]=recv_col;
+// MPI_Barrier(COMM_TRANS);
+#endif
+                dst_index[p[recv_proc] * 2] = recv_row;
+                dst_index[p[recv_proc] * 2 + 1] = recv_col;
                 ++p[recv_proc];
                 ++idx;
             }
         }
 
-        #ifdef _DEBUG
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
         // check sender_index and dst_index
-        if(myproc<100)
+        if (myproc < 100)
         {
-            for(int i=0; i<sender_size; ++i)
+            for (int i = 0; i < sender_size; ++i)
             {
-                if(sender_index[i]<0 || sender_index[i]>SRC_Matrix.nnzLocal)
+                if (sender_index[i] < 0 || sender_index[i] > SRC_Matrix.nnzLocal)
                 {
-                    log<<"ERROR! sender_index outbound: "<<i<<" "<<sender_index[i]<<std::endl;
+                    log << "ERROR! sender_index outbound: " << i << " " << sender_index[i] << std::endl;
                     log.flush();
                 }
             }
-            for(int i=0; i<2*sender_size; ++i)
+            for (int i = 0; i < 2 * sender_size; ++i)
             {
-                if(dst_index[i]<0 || dst_index[i]>DST_Matrix.size)
+                if (dst_index[i] < 0 || dst_index[i] > DST_Matrix.size)
                 {
-                    log<<"ERROR! dst_index outbound: "<<i<<" "<<dst_index[i]<<" "<<DST_Matrix.size<<std::endl;
+                    log << "ERROR! dst_index outbound: " << i << " " << dst_index[i] << " " << DST_Matrix.size
+                        << std::endl;
                     log.flush();
                 }
             }
-            log<<"sender_index is built"<<std::endl;
-            log<<"sender_size = "<<sender_size<<std::endl;
+            log << "sender_index is built" << std::endl;
+            log << "sender_size = " << sender_size << std::endl;
             // for(int i=0; i<sender_size; i+=sender_size/100)
             //     log<<i<<"\t"<<dst_index[2*i]<<"\t"<<dst_index[2*i+1]<<std::endl;
             // OUT(ofs_running, "transformCCStoBCD: sender_index is built");
@@ -1112,33 +1223,34 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* E
                 {
                     sprintf(f_log, "sender_index_from_%2.2d_to_%2.2d.log", myproc_trans, i);
                     log_sender_index.open(f_log, std::ios::app);
-                    for(int j=sender_displacement_process[i]; j<sender_displacement_process[i]+sender_size_process[i]; ++j)
-                        log_sender_index<<sender_index[j]<<std::endl;
-                    log_sender_index.close();
+                    for(int j=sender_displacement_process[i]; j<sender_displacement_process[i]+sender_size_process[i];
+            ++j) log_sender_index<<sender_index[j]<<std::endl; log_sender_index.close();
                 }
             }
             */
         }
-        #endif
+#endif
 
-        for(int i=0; i<NPROC_TRANS; ++i)
+        for (int i = 0; i < NPROC_TRANS; ++i)
         {
-            sender_size_process[i]*=2;
-            sender_displacement_process[i]*=2;
-            receiver_size_process[i]*=2;
-            receiver_displacement_process[i]*=2;
+            sender_size_process[i] *= 2;
+            sender_displacement_process[i] *= 2;
+            receiver_size_process[i] *= 2;
+            receiver_displacement_process[i] *= 2;
         }
-        #ifdef _DEBUG
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100)
+        if (myproc < 100)
         {
-            log<<"Alltoall parameters for index array"<<std::endl;
-            log<<"dst_index size:"<<2*sender_size<<"\t receiver_index size: "<<2*receiver_size<<std::endl;
-            log<<"pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  receiver_displacement_process"<<std::endl;
-            for(int i=0; i<NPROC_TRANS; ++i)
+            log << "Alltoall parameters for index array" << std::endl;
+            log << "dst_index size:" << 2 * sender_size << "\t receiver_index size: " << 2 * receiver_size << std::endl;
+            log << "pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  "
+                   "receiver_displacement_process"
+                << std::endl;
+            for (int i = 0; i < NPROC_TRANS; ++i)
             {
-                log<<i<<"\t"<<sender_size_process[i]<<"\t\t"<<sender_displacement_process[i]<<"\t\t"
-                   <<receiver_size_process[i]<<"\t\t"<<receiver_displacement_process[i]<<std::endl;
+                log << i << "\t" << sender_size_process[i] << "\t\t" << sender_displacement_process[i] << "\t\t"
+                    << receiver_size_process[i] << "\t\t" << receiver_displacement_process[i] << std::endl;
             }
             // save dst_index to file for debug
             /*std::ofstream log_dst_index;
@@ -1148,291 +1260,335 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* E
                 {
                     sprintf(f_log, "dst_index_from_%2.2d_to_%2.2d.log", myproc_trans, i);
                     log_dst_index.open(f_log, std::ios::app);
-                    for(int j=sender_displacement_process[i]; j<sender_displacement_process[i]+sender_size_process[i]; ++j)
-                        log_dst_index<<dst_index[j]<<std::endl;
-                    log_dst_index.close();
+                    for(int j=sender_displacement_process[i]; j<sender_displacement_process[i]+sender_size_process[i];
+            ++j) log_dst_index<<dst_index[j]<<std::endl; log_dst_index.close();
                 }
             }
             */
-            log<<"start alltoallv for index"<<std::endl;
+            log << "start alltoallv for index" << std::endl;
         }
         MPI_Barrier(COMM_TRANS);
-        // OUT(ofs_running, "transformCCStoBCD: sender_index is built");
-        #endif
-        MPI_Alltoallv(&dst_index[0], &sender_size_process[0], &sender_displacement_process[0], MPI_INT,
-                      &receiver_index[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_INT, COMM_TRANS);
-        #ifdef _DEBUG
+// OUT(ofs_running, "transformCCStoBCD: sender_index is built");
+#endif
+        MPI_Alltoallv(&dst_index[0],
+                      &sender_size_process[0],
+                      &sender_displacement_process[0],
+                      MPI_INT,
+                      &receiver_index[0],
+                      &receiver_size_process[0],
+                      &receiver_displacement_process[0],
+                      MPI_INT,
+                      COMM_TRANS);
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100)
+        if (myproc < 100)
         {
-            log<<"receiver_index is got"<<std::endl;
-            log<<"receiver_size is: "<<receiver_size<<std::endl;
+            log << "receiver_index is got" << std::endl;
+            log << "receiver_size is: " << receiver_size << std::endl;
             log.flush();
         }
-        /*
-        // save receiver_index to file for debug
-        std::ofstream log_rcv_index;
-        for(int i=0; i<NPROC_TRANS; ++i)
-        {
-            log<<"receive index (from proc_trans "<<i<<") is from "<<receiver_displacement_process[i]<<" to "<<receiver_displacement_process[i]+receiver_size_process[i]<<std::endl;
-            if(receiver_size_process[i] > 0)
-            {
-                sprintf(f_log, "receiver_index_from_%2.2d_to_%2.2d.log", i, myproc_trans);
-                log_rcv_index.open(f_log, std::ios::app);
-                for(int j=receiver_displacement_process[i]; j<receiver_displacement_process[i]+receiver_size_process[i]; ++j)
-                    log_rcv_index<<receiver_index[j]<<std::endl;
-                log_rcv_index.close();
-            }
-        }
-        log<<"receiver_index values are saved"<<std::endl;
-        log.flush();
-        // MPI_Barrier(COMM_TRANS);
+/*
+// save receiver_index to file for debug
+std::ofstream log_rcv_index;
+for(int i=0; i<NPROC_TRANS; ++i)
+{
+    log<<"receive index (from proc_trans "<<i<<") is from "<<receiver_displacement_process[i]<<" to
+"<<receiver_displacement_process[i]+receiver_size_process[i]<<std::endl; if(receiver_size_process[i] > 0)
+    {
+        sprintf(f_log, "receiver_index_from_%2.2d_to_%2.2d.log", i, myproc_trans);
+        log_rcv_index.open(f_log, std::ios::app);
+        for(int j=receiver_displacement_process[i]; j<receiver_displacement_process[i]+receiver_size_process[i]; ++j)
+            log_rcv_index<<receiver_index[j]<<std::endl;
+        log_rcv_index.close();
+    }
+}
+log<<"receiver_index values are saved"<<std::endl;
+log.flush();
+// MPI_Barrier(COMM_TRANS);
 
-        for(int i=0; i<receiver_size; ++i)
-        {
-            if(receiver_index[i*2]<0)
-            {
-                log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" < 0"<<std::endl;
-                log.flush();
-            }
-            else if(receiver_index[i*2]>DST_Matrix.nrow)
-            {
-                log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.nrow<<std::endl;
-                log.flush();
-            }
-            if(receiver_index[i*2+1]<0)
-            {
-                log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" < 0"<<std::endl;
-                log.flush();
-            }
-            else if(receiver_index[i*2+1]>DST_Matrix.ncol)
-            {
-                log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.ncol<<std::endl;
-                log.flush();
-            }
-        }
-        log<<"receiver_index values are checked"<<std::endl;
+for(int i=0; i<receiver_size; ++i)
+{
+    if(receiver_index[i*2]<0)
+    {
+        log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" < 0"<<std::endl;
         log.flush();
-        MPI_Barrier(COMM_TRANS);
-        // OUT(ofs_running, "transformCCStoBCD: receiver_index is got");
-        */
-        #endif
+    }
+    else if(receiver_index[i*2]>DST_Matrix.nrow)
+    {
+        log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.nrow<<std::endl;
+        log.flush();
+    }
+    if(receiver_index[i*2+1]<0)
+    {
+        log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" < 0"<<std::endl;
+        log.flush();
+    }
+    else if(receiver_index[i*2+1]>DST_Matrix.ncol)
+    {
+        log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.ncol<<std::endl;
+        log.flush();
+    }
+}
+log<<"receiver_index values are checked"<<std::endl;
+log.flush();
+MPI_Barrier(COMM_TRANS);
+// OUT(ofs_running, "transformCCStoBCD: receiver_index is got");
+*/
+#endif
         // reset size and displacement for transfering matrix value by alltoall
-        for(int i=0; i<NPROC_TRANS; ++i)
+        for (int i = 0; i < NPROC_TRANS; ++i)
         {
-            sender_size_process[i]/=2;
-            sender_displacement_process[i]/=2;
-            receiver_size_process[i]/=2;
-            receiver_displacement_process[i]/=2;
+            sender_size_process[i] /= 2;
+            sender_displacement_process[i] /= 2;
+            receiver_size_process[i] /= 2;
+            receiver_displacement_process[i] /= 2;
         }
-        #ifdef _DEBUG
-        if(myproc<100)
+#ifdef _DEBUG
+        if (myproc < 100)
         {
-            log<<"size_process and displacement_process are reset for buffer transform"<<std::endl;
+            log << "size_process and displacement_process are reset for buffer transform" << std::endl;
             log.flush();
         }
         MPI_Barrier(COMM_TRANS);
-        #endif
+#endif
 
         // transfer DM
         // set up DM sender buffer
-        for(int i=0; i<sender_size; ++i)
+        for (int i = 0; i < sender_size; ++i)
         {
-            sender_buffer[i]=DMnzvalLocal[sender_index[i]];
+            sender_buffer[i] = DMnzvalLocal[sender_index[i]];
         }
-        #ifdef _DEBUG
-        if(myproc<100)
+#ifdef _DEBUG
+        if (myproc < 100)
         {
-            log<<"DM(CCS) is put to sender_buffer"<<std::endl;
+            log << "DM(CCS) is put to sender_buffer" << std::endl;
             log.flush();
             // OUT(ofs_running, "transformCCStoBCD: DM(CCS) is put to sender_buffer");
 
             // check receiver_index, which may be changed after alltoall for buffer
-            for(int i=0; i<receiver_size; ++i)
+            for (int i = 0; i < receiver_size; ++i)
             {
-                if(receiver_index[i*2]<0)
+                if (receiver_index[i * 2] < 0)
                 {
-                    log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" < 0"<<std::endl;
+                    log << "ERROR! receiver_index(BCD)[" << 2 * i << "] = " << receiver_index[i * 2] << " < 0"
+                        << std::endl;
                     log.flush();
                 }
-                else if(receiver_index[i*2]>DST_Matrix.nrow)
+                else if (receiver_index[i * 2] > DST_Matrix.nrow)
                 {
-                    log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.nrow<<std::endl;
+                    log << "ERROR! receiver_index(BCD)[" << 2 * i << "] = " << receiver_index[i * 2] << " > "
+                        << DST_Matrix.nrow << std::endl;
                     log.flush();
                 }
-                if(receiver_index[i*2+1]<0)
+                if (receiver_index[i * 2 + 1] < 0)
                 {
-                    log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" < 0"<<std::endl;
+                    log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " < 0"
+                        << std::endl;
                     log.flush();
                 }
-                else if(receiver_index[i*2+1]>DST_Matrix.ncol)
+                else if (receiver_index[i * 2 + 1] > DST_Matrix.ncol)
                 {
-                    log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.ncol<<std::endl;
+                    log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " > "
+                        << DST_Matrix.ncol << std::endl;
                     log.flush();
                 }
             }
-            log<<"receiver_index values are checked"<<std::endl;
+            log << "receiver_index values are checked" << std::endl;
             log.flush();
             // check parameters for alltoall for buffer
-            log<<"pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  receiver_displacement_process"<<std::endl;
-            for(int i=0; i<NPROC_TRANS; ++i)
+            log << "pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  "
+                   "receiver_displacement_process"
+                << std::endl;
+            for (int i = 0; i < NPROC_TRANS; ++i)
             {
-                log<<i<<"\t"<<sender_size_process[i]<<"\t\t"<<sender_displacement_process[i]<<"\t\t"
-                   <<receiver_size_process[i]<<"\t\t"<<receiver_displacement_process[i]<<std::endl;
+                log << i << "\t" << sender_size_process[i] << "\t\t" << sender_displacement_process[i] << "\t\t"
+                    << receiver_size_process[i] << "\t\t" << receiver_displacement_process[i] << std::endl;
             }
             log.flush();
         }
         MPI_Barrier(COMM_TRANS);
-        #endif
+#endif
         // transfer sender buffer to receiver buffer
-        MPI_Alltoallv(&sender_buffer[0], &sender_size_process[0], &sender_displacement_process[0], MPI_DOUBLE,
-                      &receiver_buffer[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_DOUBLE, COMM_TRANS);
+        MPI_Alltoallv(&sender_buffer[0],
+                      &sender_size_process[0],
+                      &sender_displacement_process[0],
+                      MPI_DOUBLE,
+                      &receiver_buffer[0],
+                      &receiver_size_process[0],
+                      &receiver_displacement_process[0],
+                      MPI_DOUBLE,
+                      COMM_TRANS);
 
-        #ifdef _DEBUG
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100) log<<"receiver_buffer is got from DM"<<std::endl;
-        // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM");
-        #endif
+        if (myproc < 100)
+            log << "receiver_buffer is got from DM" << std::endl;
+// OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM");
+#endif
         // transform receiver_buffer to DM
-        if(DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
+        if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
         {
-            int DST_Matrix_elem=DST_Matrix.nrow*DST_Matrix.ncol;
-            for(int i=0; i<receiver_size; ++i)
+            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            for (int i = 0; i < receiver_size; ++i)
             {
-                int ix=receiver_index[2*i];
-                int iy=receiver_index[2*i+1];
-                int idx=ix*DST_Matrix.ncol+iy;
-                #ifdef _DEBUG
-                if(myproc<100)
+                int ix = receiver_index[2 * i];
+                int iy = receiver_index[2 * i + 1];
+                int idx = ix * DST_Matrix.ncol + iy;
+#ifdef _DEBUG
+                if (myproc < 100)
                 {
-                    if(idx<0 || idx>=DST_Matrix_elem)
+                    if (idx < 0 || idx >= DST_Matrix_elem)
                     {
-                        log<<"idx for DM ERROR: idx is "<<idx<<"; DM total size is "<<DST_Matrix_elem<<std::endl;
-                        log<<"index number is "<<2*i<<" ix = "<<ix<<" iy = "<<iy<<" ncol = "<<DST_Matrix.ncol<<std::endl;
+                        log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem
+                            << std::endl;
+                        log << "index number is " << 2 * i << " ix = " << ix << " iy = " << iy
+                            << " ncol = " << DST_Matrix.ncol << std::endl;
                         log.flush();
                     }
                 }
-                #endif
-                DM[idx]=receiver_buffer[i];
+#endif
+                DM[idx] = receiver_buffer[i];
             }
-        } else
+        }
+        else
         {
-            int DST_Matrix_elem=DST_Matrix.nrow*DST_Matrix.ncol;
-            for(int i=0; i<receiver_size; ++i)
+            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            for (int i = 0; i < receiver_size; ++i)
             {
-                int ix=receiver_index[2*i];
-                int iy=receiver_index[2*i+1];
-                int idx=iy*DST_Matrix.nrow+ix;
-                #ifdef _DEBUG
-                if(myproc<100)
+                int ix = receiver_index[2 * i];
+                int iy = receiver_index[2 * i + 1];
+                int idx = iy * DST_Matrix.nrow + ix;
+#ifdef _DEBUG
+                if (myproc < 100)
                 {
-                    if(idx<0 || idx>=DST_Matrix_elem)
+                    if (idx < 0 || idx >= DST_Matrix_elem)
                     {
-                        log<<"idx for DM ERROR: idx is "<<idx<<"; DM total size is "<<DST_Matrix_elem<<std::endl;
-                        log<<"index number is"<<2*i<<" ix = "<<ix<<" iy = "<<iy<<" nrow = "<<DST_Matrix.nrow<<std::endl;
+                        log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem
+                            << std::endl;
+                        log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
+                            << " nrow = " << DST_Matrix.nrow << std::endl;
                         log.flush();
                     }
                 }
-                #endif
-                DM[idx]=receiver_buffer[i];
+#endif
+                DM[idx] = receiver_buffer[i];
             }
         }
 
-        #ifdef _DEBUG
-        if(myproc<100) log<<"DM(BCD) is got from receiver_buffer"<<std::endl;
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "DM(BCD) is got from receiver_buffer" << std::endl;
         MPI_Barrier(COMM_TRANS);
-        // OUT(ofs_running, "transformCCStoBCD: DM(BCD) is got from receiver_buffer");
-        #endif
+// OUT(ofs_running, "transformCCStoBCD: DM(BCD) is got from receiver_buffer");
+#endif
         // setup up sender buffer of EDM
-        for(int i=0; i<sender_size; ++i)
+        for (int i = 0; i < sender_size; ++i)
         {
-            sender_buffer[i]=EDMnzvalLocal[sender_index[i]];
+            sender_buffer[i] = EDMnzvalLocal[sender_index[i]];
         }
-        #ifdef _DEBUG
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100) log<<"EDM(CCS) is put to sender_buffer"<<std::endl;
-        // OUT(ofs_running, "transformCCStoBCD: EDM(CCS) is put to sender_buffer");
-        #endif
+        if (myproc < 100)
+            log << "EDM(CCS) is put to sender_buffer" << std::endl;
+// OUT(ofs_running, "transformCCStoBCD: EDM(CCS) is put to sender_buffer");
+#endif
 
         // transfer sender buffer to receiver buffer
-        MPI_Alltoallv(&sender_buffer[0], &sender_size_process[0], &sender_displacement_process[0], MPI_DOUBLE,
-                      &receiver_buffer[0], &receiver_size_process[0], &receiver_displacement_process[0], MPI_DOUBLE, COMM_TRANS);
-        #ifdef _DEBUG
+        MPI_Alltoallv(&sender_buffer[0],
+                      &sender_size_process[0],
+                      &sender_displacement_process[0],
+                      MPI_DOUBLE,
+                      &receiver_buffer[0],
+                      &receiver_size_process[0],
+                      &receiver_displacement_process[0],
+                      MPI_DOUBLE,
+                      COMM_TRANS);
+#ifdef _DEBUG
         MPI_Barrier(COMM_TRANS);
-        if(myproc<100) log<<"receiver_buffer is got from EDM"<<std::endl;
-        // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM");
-        #endif
+        if (myproc < 100)
+            log << "receiver_buffer is got from EDM" << std::endl;
+// OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM");
+#endif
         // transform receiver_buffer to EDM
-        if(DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
+        if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
         {
-            int DST_Matrix_elem=DST_Matrix.nrow*DST_Matrix.ncol;
-            for(int i=0; i<receiver_size; ++i)
+            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            for (int i = 0; i < receiver_size; ++i)
             {
-                int ix=receiver_index[2*i];
-                int iy=receiver_index[2*i+1];
-                int idx=ix*DST_Matrix.ncol+iy;
-                #ifdef _DEBUG
-                if(myproc<100)
+                int ix = receiver_index[2 * i];
+                int iy = receiver_index[2 * i + 1];
+                int idx = ix * DST_Matrix.ncol + iy;
+#ifdef _DEBUG
+                if (myproc < 100)
                 {
-                    if(idx<0 || idx>=DST_Matrix_elem)
+                    if (idx < 0 || idx >= DST_Matrix_elem)
                     {
-                        log<<"idx for EDM ERROR: idx is "<<idx<<"; EDM total size is "<<DST_Matrix_elem<<std::endl;
-                        log<<"index number is"<<2*i<<" ix = "<<ix<<" iy = "<<iy<<" ncol = "<<DST_Matrix.ncol<<std::endl;
+                        log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem
+                            << std::endl;
+                        log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
+                            << " ncol = " << DST_Matrix.ncol << std::endl;
                         log.flush();
                     }
                 }
-                #endif
-                EDM[idx]=receiver_buffer[i];
+#endif
+                EDM[idx] = receiver_buffer[i];
             }
-        } else
+        }
+        else
         {
-            int DST_Matrix_elem=DST_Matrix.nrow*DST_Matrix.ncol;
-            for(int i=0; i<receiver_size; ++i)
+            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            for (int i = 0; i < receiver_size; ++i)
             {
-                int ix=receiver_index[2*i];
-                int iy=receiver_index[2*i+1];
-                int idx=iy*DST_Matrix.nrow+ix;
-                #ifdef _DEBUG
-                if(myproc<100)
+                int ix = receiver_index[2 * i];
+                int iy = receiver_index[2 * i + 1];
+                int idx = iy * DST_Matrix.nrow + ix;
+#ifdef _DEBUG
+                if (myproc < 100)
                 {
-                    if(idx<0 || idx>=DST_Matrix_elem)
+                    if (idx < 0 || idx >= DST_Matrix_elem)
                     {
-                        log<<"idx for EDM ERROR: idx is "<<idx<<"; EDM total size is "<<DST_Matrix_elem<<std::endl;
-                        log<<"index number is"<<2*i<<" ix = "<<ix<<" iy = "<<iy<<" nrow = "<<DST_Matrix.nrow<<std::endl;
+                        log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem
+                            << std::endl;
+                        log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
+                            << " nrow = " << DST_Matrix.nrow << std::endl;
                         log.flush();
                     }
                 }
-                #endif
-                EDM[idx]=receiver_buffer[i];
+#endif
+                EDM[idx] = receiver_buffer[i];
             }
         }
-        #ifdef _DEBUG
-        if(myproc<100) log<<"EDM(BCD) is got from receiver_buffer"<<std::endl;
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "EDM(BCD) is got from receiver_buffer" << std::endl;
         MPI_Barrier(COMM_TRANS);
-        #endif
+#endif
         delete[] sender_index;
         delete[] sender_buffer;
         delete[] dst_index;
         delete[] receiver_index;
         delete[] receiver_buffer;
-        #ifdef _DEBUG
-        if(myproc<100) log<<"work arrays are deleted"<<std::endl;
-        #endif
+#ifdef _DEBUG
+        if (myproc < 100)
+            log << "work arrays are deleted" << std::endl;
+#endif
     }
-    #ifdef _DEBUG
-    if(myproc<100) log<<"OUT COMM_TRANS"<<std::endl;
-    if(myproc<100) log<<"before deleteGroupCommTrans"<<std::endl;
-    #endif
+#ifdef _DEBUG
+    if (myproc < 100)
+        log << "OUT COMM_TRANS" << std::endl;
+    if (myproc < 100)
+        log << "before deleteGroupCommTrans" << std::endl;
+#endif
     deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS);
-    #ifdef _DEBUG
+#ifdef _DEBUG
     MPI_Barrier(MPI_COMM_WORLD);
-    if(myproc<100)
+    if (myproc < 100)
     {
-        log<<"COMM_TRANS is deleted"<<std::endl;
+        log << "COMM_TRANS is deleted" << std::endl;
         log.close();
     }
     MPI_Barrier(MPI_COMM_WORLD);
     OUT(ofs_running, "transformCCStoBCD: finish job, COMM_TRANS is deleted");
-    #endif
+#endif
     return 0;
 }
diff --git a/source/module_hsolver/pexsi/pexsi_solver.cpp b/source/module_hsolver/pexsi/pexsi_solver.cpp
index 03929098e0..523e6bb2d5 100644
--- a/source/module_hsolver/pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/pexsi/pexsi_solver.cpp
@@ -1,10 +1,11 @@
 #include "pexsi_solver.h"
 
-#include "module_base/global_variable.h"
-#include "simplePEXSI.h"
+#include <mpi.h>
 
 #include <cstring>
-#include <mpi.h>
+
+#include "module_base/global_variable.h"
+#include "simplePEXSI.h"
 
 PEXSI_Solver::PEXSI_Solver(const int blacs_text,
                            const int nb,
@@ -38,22 +39,23 @@ int PEXSI_Solver::solve()
     extern MPI_Comm DIAG_WORLD;
     extern MPI_Comm GRID_WORLD;
     extern MPI_Group GRID_GROUP;
-    return simplePEXSI(MPI_COMM_WORLD,
-                       MPI_COMM_WORLD,
-                       GRID_GROUP,
-                       this->blacs_text,
-                       GlobalV::NLOCAL,
-                       this->nb,
-                       this->nrow,
-                       this->ncol,
-                       'C',
-                       this->h,
-                       this->s,
-                       GlobalV::nelec,
-                       "PEXSIOPTION",
-                       this->DM,
-                       this->EDM,
-                       this->totalEnergyH,
-                       this->totalEnergyS,
-                       this->totalFreeEnergy);
+    simplePEXSI(DIAG_WORLD,
+                GRID_WORLD,
+                GRID_GROUP,
+                this->blacs_text,
+                GlobalV::NLOCAL,
+                this->nb,
+                this->nrow,
+                this->ncol,
+                'C',
+                this->h,
+                this->s,
+                GlobalV::nelec,
+                "PEXSIOPTION",
+                this->DM,
+                this->EDM,
+                this->totalEnergyH,
+                this->totalEnergyS,
+                this->totalFreeEnergy);
+    return 0;
 }
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/simplePEXSI.cpp b/source/module_hsolver/pexsi/simplePEXSI.cpp
index 438936280f..0fbeb4e0a8 100644
--- a/source/module_hsolver/pexsi/simplePEXSI.cpp
+++ b/source/module_hsolver/pexsi/simplePEXSI.cpp
@@ -2,22 +2,24 @@
 // the H and S matrices are given by 2D block cyclic distribution
 // the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution
 // #include "mpi.h"
-#include <iostream>
-#include <fstream>
-#include <cstring>
-#include <cmath>
+#include <mpi.h>
+
 #include <cfloat>
+#include <cmath>
+#include <cstring>
+#include <fstream>
+#include <iostream>
 #include <memory>
-#include <mpi.h>
+
+#include "DistBCDMatrix.h"
+#include "DistCCSMatrix.h"
+#include "DistMatrixTransformer.h"
 #include "c_pexsi_interface.h"
 #include "module_base/lapack_connector.h"
 #include "module_base/timer.h"
 #include "module_base/tool_quit.h"
-#include "DistCCSMatrix.h"
-#include "DistBCDMatrix.h"
-#include "DistMatrixTransformer.h"
 
-inline void strtolower(char *sa, char *sb)
+inline void strtolower(char* sa, char* sb)
 {
     char c;
     int len = strlen(sa);
@@ -32,30 +34,34 @@ inline void strtolower(char *sa, char *sb)
 inline void setDefaultOption(int* int_para, double* double_para)
 {
     // options.spin=2;
-    double_para[0]=2;
+    double_para[0] = 2;
     // options.gap=0;
-    double_para[2]=0;
+    double_para[2] = 0;
     // ZERO_Limit=DBL_MIN;
-    double_para[11]=DBL_MIN;
+    double_para[11] = DBL_MIN;
     // options.matrixType=0;
-    int_para[3]=0;
+    int_para[3] = 0;
     // options.solver=1;
-    int_para[6]=1;
+    int_para[6] = 1;
     // options.ordering=0;
-    int_para[8]=0;
+    int_para[8] = 0;
     // options.rowOrdering=0;
-    int_para[9]=0;
+    int_para[9] = 0;
     // options.symmetric=0;
-    int_para[11]=0;
+    int_para[11] = 0;
     // options.transpose=0;
-    int_para[12]=0;
+    int_para[12] = 0;
     // options.nPoints=2;
-    int_para[14]=2;
+    int_para[14] = 2;
     // options.verbosity=1;
-    int_para[15]=1;
+    int_para[15] = 1;
 }
 
-int loadPEXSIOption(MPI_Comm comm, const std::string PexsiOptionFile, PPEXSIOptions& options, int& numProcessPerPole, double& ZERO_Limit)
+int loadPEXSIOption(MPI_Comm comm,
+                    const std::string PexsiOptionFile,
+                    PPEXSIOptions& options,
+                    int& numProcessPerPole,
+                    double& ZERO_Limit)
 {
 
     // temp variable arrays read from conf file and will be bcast to all processors
@@ -96,10 +102,10 @@ int loadPEXSIOption(MPI_Comm comm, const std::string PexsiOptionFile, PPEXSIOpti
     double double_para[12];
     int myid;
     MPI_Comm_rank(comm, &myid);
-    if(myid==0)
+    if (myid == 0)
     {
         std::ifstream ifs(PexsiOptionFile.c_str());
-        if(! ifs)
+        if (!ifs)
         {
             return 1;
         }
@@ -110,195 +116,195 @@ int loadPEXSIOption(MPI_Comm comm, const std::string PexsiOptionFile, PPEXSIOpti
 
         char key[128];
         char lowercase_key[128];
-        const int LINE_LINGTH=1024;
+        const int LINE_LINGTH = 1024;
         char unused_string[LINE_LINGTH];
 
-        while(ifs.good())
+        while (ifs.good())
         {
             ifs >> key;
             //~ cout<<"readin word is: "<<key<<endl;
             strtolower(key, lowercase_key);
-            if(strcmp("spin", lowercase_key)==0)
+            if (strcmp("spin", lowercase_key) == 0)
             {
                 //~ ifs>>options.spin;
-                ifs>>double_para[0];
+                ifs >> double_para[0];
                 //~ cout<<"double_para[0]: "<<key<<" = "<<double_para[0]<<endl;
             }
-            else if(strcmp("temperature", lowercase_key)==0)
+            else if (strcmp("temperature", lowercase_key) == 0)
             {
                 //~ ifs>>options.temperature;
-                ifs>>double_para[1];
+                ifs >> double_para[1];
                 //~ cout<<"double_para[1]: "<<key<<" = "<<double_para[1]<<endl;
             }
-            else if(strcmp("gap", lowercase_key)==0)
+            else if (strcmp("gap", lowercase_key) == 0)
             {
                 //~ ifs>>options.gap;
-                ifs>>double_para[2];
+                ifs >> double_para[2];
                 //~ cout<<"double_para[2]: "<<key<<" = "<<double_para[2]<<endl;
             }
-            else if(strcmp("deltae", lowercase_key)==0)
+            else if (strcmp("deltae", lowercase_key) == 0)
             {
                 //~ ifs>>options.deltaE;
-                ifs>>double_para[3];
+                ifs >> double_para[3];
                 //~ cout<<"double_para[3]: "<<key<<" = "<<double_para[3]<<endl;
             }
-            else if(strcmp("numpole", lowercase_key)==0)
+            else if (strcmp("numpole", lowercase_key) == 0)
             {
                 //~ ifs>>options.numPole;
-                ifs>>int_para[0];
+                ifs >> int_para[0];
                 //~ cout<<"int_para[0]: "<<key<<" = "<<int_para[0]<<endl;
             }
-            else if(strcmp("isinertiacount", lowercase_key)==0)
+            else if (strcmp("isinertiacount", lowercase_key) == 0)
             {
                 //~ ifs>>options.isInertiaCount;
-                ifs>>int_para[1];
+                ifs >> int_para[1];
                 //~ cout<<"int_para[1]: "<<key<<" = "<<int_para[1]<<endl;
             }
-            else if(strcmp("maxpexsiiter", lowercase_key)==0)
+            else if (strcmp("maxpexsiiter", lowercase_key) == 0)
             {
                 //~ ifs>>options.maxPEXSIIter;
-                ifs>>int_para[2];
+                ifs >> int_para[2];
                 //~ cout<<"int_para[2]: "<<key<<" = "<<int_para[2]<<endl;
             }
-            else if(strcmp("mumin0", lowercase_key)==0)
+            else if (strcmp("mumin0", lowercase_key) == 0)
             {
                 //~ ifs>>options.muMin0;
-                ifs>>double_para[4];
+                ifs >> double_para[4];
                 //~ cout<<"double_para[4]: "<<key<<" = "<<double_para[4]<<endl;
             }
-            else if(strcmp("mumax0", lowercase_key)==0)
+            else if (strcmp("mumax0", lowercase_key) == 0)
             {
                 //~ ifs>>options.muMax0;
-                ifs>>double_para[5];
+                ifs >> double_para[5];
                 //~ cout<<"double_para[5]: "<<key<<" = "<<double_para[5]<<endl;
             }
-            else if(strcmp("mu0", lowercase_key)==0)
+            else if (strcmp("mu0", lowercase_key) == 0)
             {
                 //~ ifs>>options.mu0;
-                ifs>>double_para[6];
+                ifs >> double_para[6];
                 //~ cout<<"double_para[6]: "<<key<<" = "<<double_para[6]<<endl;
             }
-            else if(strcmp("muinertiatolerance", lowercase_key)==0)
+            else if (strcmp("muinertiatolerance", lowercase_key) == 0)
             {
                 //~ ifs>>options.muInertiaTolerance;
-                ifs>>double_para[7];
+                ifs >> double_para[7];
                 //~ cout<<"double_para[7]: "<<key<<" = "<<double_para[7]<<endl;
             }
-            else if(strcmp("muinertiaexpansion", lowercase_key)==0)
+            else if (strcmp("muinertiaexpansion", lowercase_key) == 0)
             {
                 //~ ifs>>options.muInertiaExpansion;
-                ifs>>double_para[8];
+                ifs >> double_para[8];
                 //~ cout<<"double_para[8]: "<<key<<" = "<<double_para[8]<<endl;
             }
-            else if(strcmp("mupexsisafeguard", lowercase_key)==0)
+            else if (strcmp("mupexsisafeguard", lowercase_key) == 0)
             {
                 //~ ifs>>options.muPEXSISafeGuard;
-                ifs>>double_para[9];
+                ifs >> double_para[9];
                 //~ cout<<"double_para[9]: "<<key<<" = "<<double_para[9]<<endl;
             }
-            else if(strcmp("numelectronpexsitolerance", lowercase_key)==0)
+            else if (strcmp("numelectronpexsitolerance", lowercase_key) == 0)
             {
                 //~ ifs>>options.numElectronPEXSITolerance;
-                ifs>>double_para[10];
+                ifs >> double_para[10];
                 //~ cout<<"double_para[10]: "<<key<<" = "<<double_para[10]<<endl;
             }
-            else if(strcmp("zero_limit", lowercase_key)==0)
+            else if (strcmp("zero_limit", lowercase_key) == 0)
             {
-                ifs>>double_para[11];
+                ifs >> double_para[11];
             }
-            else if(strcmp("matrixtype", lowercase_key)==0)
+            else if (strcmp("matrixtype", lowercase_key) == 0)
             {
                 //~ ifs>>options.matrixType;
-                ifs>>int_para[3];
+                ifs >> int_para[3];
                 //~ cout<<"int_para[3]: "<<key<<" = "<<int_para[3]<<endl;
             }
-            else if(strcmp("issymbolicfactorize", lowercase_key)==0)
+            else if (strcmp("issymbolicfactorize", lowercase_key) == 0)
             {
                 //~ ifs>>options.isSymbolicFactorize;
-                ifs>>int_para[4];
+                ifs >> int_para[4];
                 //~ cout<<"int_para[4]: "<<key<<" = "<<int_para[4]<<endl;
             }
-            else if(strcmp("isconstructcommpattern", lowercase_key)==0)
+            else if (strcmp("isconstructcommpattern", lowercase_key) == 0)
             {
                 //~ ifs>>options.isConstructCommPattern;
-                ifs>>int_para[5];
+                ifs >> int_para[5];
                 //~ cout<<"int_para[5]: "<<key<<" = "<<int_para[5]<<endl;
             }
-            else if(strcmp("solver", lowercase_key)==0)
+            else if (strcmp("solver", lowercase_key) == 0)
             {
                 //~ ifs>>options.solver;
-                ifs>>int_para[6];
+                ifs >> int_para[6];
                 //~ cout<<"int_para[6]: "<<key<<" = "<<int_para[6]<<endl;
             }
-            else if(strcmp("symmetricstorage", lowercase_key)==0)
+            else if (strcmp("symmetricstorage", lowercase_key) == 0)
             {
                 //~ ifs>>options.symmetricStorage;
-                ifs>>int_para[7];
+                ifs >> int_para[7];
                 //~ cout<<"int_para[7]: "<<key<<" = "<<int_para[7]<<endl;
             }
-            else if(strcmp("ordering", lowercase_key)==0)
+            else if (strcmp("ordering", lowercase_key) == 0)
             {
                 //~ ifs>>options.ordering;
-                ifs>>int_para[8];
+                ifs >> int_para[8];
                 //~ cout<<"int_para[8]: "<<key<<" = "<<int_para[8]<<endl;
             }
-            else if(strcmp("rowordering", lowercase_key)==0)
+            else if (strcmp("rowordering", lowercase_key) == 0)
             {
                 //~ ifs>>options.rowOrdering;
-                ifs>>int_para[9];
+                ifs >> int_para[9];
                 //~ cout<<"int_para[9]: "<<key<<" = "<<int_para[9]<<endl;
             }
-            else if(strcmp("npsymbfact", lowercase_key)==0)
+            else if (strcmp("npsymbfact", lowercase_key) == 0)
             {
                 //~ ifs>>options.npSymbFact;
-                ifs>>int_para[10];
+                ifs >> int_para[10];
                 //~ cout<<"int_para[10]: "<<key<<" = "<<int_para[10]<<endl;
             }
-            else if(strcmp("symmetric", lowercase_key)==0)
+            else if (strcmp("symmetric", lowercase_key) == 0)
             {
                 //~ ifs>>options.symmetric;
-                ifs>>int_para[11];
+                ifs >> int_para[11];
                 //~ cout<<"int_para[11]: "<<key<<" = "<<int_para[11]<<endl;
             }
-            else if(strcmp("transpose", lowercase_key)==0)
+            else if (strcmp("transpose", lowercase_key) == 0)
             {
                 //~ ifs>>options.transpose;
-                ifs>>int_para[12];
+                ifs >> int_para[12];
                 //~ cout<<"int_para[12]: "<<key<<" = "<<int_para[12]<<endl;
             }
-            else if(strcmp("method", lowercase_key)==0)
+            else if (strcmp("method", lowercase_key) == 0)
             {
                 //~ ifs>>options.method;
-                ifs>>int_para[13];
+                ifs >> int_para[13];
                 //~ cout<<"int_para[13]: "<<key<<" = "<<int_para[13]<<endl;
             }
-            else if(strcmp("npoints", lowercase_key)==0)
+            else if (strcmp("npoints", lowercase_key) == 0)
             {
                 //~ ifs>>options.nPoints;
-                ifs>>int_para[14];
+                ifs >> int_para[14];
                 //~ cout<<"int_para[14]: "<<key<<" = "<<int_para[14]<<endl;
             }
-            else if(strcmp("verbosity", lowercase_key)==0)
+            else if (strcmp("verbosity", lowercase_key) == 0)
             {
                 //~ ifs>>options.verbosity;
-                ifs>>int_para[15];
+                ifs >> int_para[15];
                 //~ cout<<"int_para[15]: "<<key<<" = "<<int_para[15]<<endl;
             }
-            else if(strcmp("numprocessperpole", lowercase_key)==0)
+            else if (strcmp("numprocessperpole", lowercase_key) == 0)
             {
                 //~ ifs>>options.verbosity;
-                ifs>>int_para[16];
+                ifs >> int_para[16];
                 //~ cout<<"int_para[16]: "<<key<<" = "<<int_para[16]<<endl;
             }
             else
             {
-                if(key[0] == '#' || key[0] == '/')
+                if (key[0] == '#' || key[0] == '/')
                 {
                     ifs.getline(unused_string, LINE_LINGTH);
                 }
                 else
                 {
-                    std::cout<<" THE PARAMETER NAME '" << key << "' IS NOT USED!" << std::endl;
+                    std::cout << " THE PARAMETER NAME '" << key << "' IS NOT USED!" << std::endl;
                     return 1;
                 }
             }
@@ -310,36 +316,36 @@ int loadPEXSIOption(MPI_Comm comm, const std::string PexsiOptionFile, PPEXSIOpti
     MPI_Bcast(double_para, 12, MPI_DOUBLE, 0, comm);
 
     // setup PEXSI options from int_para and double_para
-    options.numPole=int_para[0];
-    options.isInertiaCount=int_para[1];
-    options.maxPEXSIIter=int_para[2];
-    options.matrixType=int_para[3];
-    options.isSymbolicFactorize=int_para[4];
-    options.isConstructCommPattern=int_para[5];
-    options.solver=int_para[6];
-    options.symmetricStorage=int_para[7];
-    options.ordering=int_para[8];
-    options.rowOrdering=int_para[9];
-    options.npSymbFact=int_para[10];
-    options.symmetric=int_para[11];
-    options.transpose=int_para[12];
-    options.method=int_para[13];
-    options.nPoints=int_para[14];
-    options.verbosity=int_para[15];
-    numProcessPerPole=int_para[16];
+    options.numPole = int_para[0];
+    options.isInertiaCount = int_para[1];
+    options.maxPEXSIIter = int_para[2];
+    options.matrixType = int_para[3];
+    options.isSymbolicFactorize = int_para[4];
+    options.isConstructCommPattern = int_para[5];
+    options.solver = int_para[6];
+    options.symmetricStorage = int_para[7];
+    options.ordering = int_para[8];
+    options.rowOrdering = int_para[9];
+    options.npSymbFact = int_para[10];
+    options.symmetric = int_para[11];
+    options.transpose = int_para[12];
+    options.method = int_para[13];
+    options.nPoints = int_para[14];
+    options.verbosity = int_para[15];
+    numProcessPerPole = int_para[16];
 
-    options.spin=double_para[0];
-    options.temperature=double_para[1];
-    options.gap=double_para[2];
-    options.deltaE=double_para[3];
-    options.muMin0=double_para[4];
-    options.muMax0=double_para[5];
-    options.mu0=double_para[6];
-    options.muInertiaTolerance=double_para[7];
-    options.muInertiaExpansion=double_para[8];
-    options.muPEXSISafeGuard=double_para[9];
-    options.numElectronPEXSITolerance=double_para[10];
-    ZERO_Limit=double_para[11];
+    options.spin = double_para[0];
+    options.temperature = double_para[1];
+    options.gap = double_para[2];
+    options.deltaE = double_para[3];
+    options.muMin0 = double_para[4];
+    options.muMax0 = double_para[5];
+    options.mu0 = double_para[6];
+    options.muInertiaTolerance = double_para[7];
+    options.muInertiaExpansion = double_para[8];
+    options.muPEXSISafeGuard = double_para[9];
+    options.numElectronPEXSITolerance = double_para[10];
+    ZERO_Limit = double_para[11];
 
     return 0;
 }
@@ -347,312 +353,308 @@ int loadPEXSIOption(MPI_Comm comm, const std::string PexsiOptionFile, PPEXSIOpti
 void splitNProc2NProwNPcol(const int NPROC, int& nprow, int& npcol)
 {
     int integral_part = (int)sqrt(NPROC);
-    if(NPROC%integral_part == 0)
+    if (NPROC % integral_part == 0)
     {
-        nprow=integral_part;
-        npcol=NPROC/integral_part;
+        nprow = integral_part;
+        npcol = NPROC / integral_part;
     }
     else
     {
         int flag;
         int i;
-        int low=pow(integral_part,2);
-        int high=pow(integral_part+1,2);
-        if( (NPROC-low) >= (high-NPROC))
+        int low = pow(integral_part, 2);
+        int high = pow(integral_part + 1, 2);
+        if ((NPROC - low) >= (high - NPROC))
         {
-            flag=integral_part+1;
+            flag = integral_part + 1;
         }
         else
         {
-            flag=integral_part;
+            flag = integral_part;
         }
-        for(i=flag; i>0; ++i)
+        for (i = flag; i > 0; ++i)
         {
-            if(NPROC%i == 0) break;
+            if (NPROC % i == 0)
+                break;
         }
-        nprow=i;
-        npcol=NPROC/i;
+        nprow = i;
+        npcol = NPROC / i;
     }
 }
 
-int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const int blacs_ctxt,  // communicator parameters
-                const int size, const int nblk, const int nrow, const int ncol, char LAYOUT,  // matrix parameters
-                double* H, double* S,                 // input matrices
-                const double numElectronExact, const std::string PexsiOptionFile,         // pexsi parameters file
-                double*& DM, double*& EDM,      // output matrices
-                double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy)      // output energy
+int simplePEXSI(MPI_Comm comm_PEXSI,
+                MPI_Comm comm_2D,
+                MPI_Group group_2D,
+                const int blacs_ctxt, // communicator parameters
+                const int size,
+                const int nblk,
+                const int nrow,
+                const int ncol,
+                char LAYOUT, // matrix parameters
+                double* H,
+                double* S, // input matrices
+                const double numElectronExact,
+                const std::string PexsiOptionFile, // pexsi parameters file
+                double*& DM,
+                double*& EDM, // output matrices
+                double& totalEnergyH,
+                double& totalEnergyS,
+                double& totalFreeEnergy) // output energy
 {
-    int out_log=0;
-    std::cout << "nrow: " << nrow << std::endl;
-    std::cout << "ncol: " << ncol << std::endl;
-    if(out_log == 1)
-    {
-        std::stringstream ss;
-        int nproc_2D, nproc_PEXSI;
-        int myid_2D, myid_PEXSI;
-        if(comm_2D != MPI_COMM_NULL)
-        {
-            MPI_Comm_size(comm_2D, &nproc_2D);
-            MPI_Comm_rank(comm_2D, &myid_2D);
-            ss.str("");
-            ss<<"\tIn 2D comm, myid = "<<myid_2D<<"; nproc = "<<nproc_2D<<std::endl;
-            ss<<"H[0] = "<<H[0]<<", H["<<nrow*ncol-1<<"] = "<<H[nrow*ncol-1]<<std::endl;
-            ss<<"S[0] = "<<S[0]<<", S["<<nrow*ncol-1<<"] = "<<S[nrow*ncol-1]<<std::endl;
-            ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, ss.str());
-        }
-        if(comm_PEXSI != MPI_COMM_NULL)
-        {
-            MPI_Comm_size(comm_PEXSI, &nproc_PEXSI);
-            MPI_Comm_rank(comm_PEXSI, &myid_PEXSI);
-            ss.str("");
-            ss<<"\tIn PEXSI comm, myid = "<<myid_PEXSI<<"; nproc = "<<nproc_PEXSI;
-            ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, ss.str());
-        }
-    }
-    if(comm_2D == MPI_COMM_NULL && comm_PEXSI == MPI_COMM_NULL) return 0;
+
+    if (comm_2D == MPI_COMM_NULL && comm_PEXSI == MPI_COMM_NULL)
+        return 0;
     int myid;
     std::ofstream f_log;
-    if(comm_PEXSI != MPI_COMM_NULL)
+    if (comm_PEXSI != MPI_COMM_NULL)
     {
         MPI_Comm_rank(comm_PEXSI, &myid);
-        // for log
-        #ifdef _DEBUG
-        if(myid<100) log_openfile(myid, f_log);
-        #endif
+// for log
+#ifdef _DEBUG
+        if (myid < 100)
+            log_openfile(myid, f_log);
+#endif
     }
 
-    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-	//DONE(ofs_running,"set up PEXSI parameter, begin");
-    // set up PEXSI parameter
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // DONE(ofs_running,"set up PEXSI parameter, begin");
+    //  set up PEXSI parameter
     PPEXSIOptions options;
     PPEXSISetDefaultOptions(&options);
     int numProcessPerPole;
     double ZERO_Limit;
     loadPEXSIOption(comm_PEXSI, PexsiOptionFile, options, numProcessPerPole, ZERO_Limit);
-    //OUT(ofs_running, "checkpoint01");
-    // debug
-    #ifdef _DEBUG
-    if(comm_PEXSI != MPI_COMM_NULL)
+// OUT(ofs_running, "checkpoint01");
+//  debug
+#ifdef _DEBUG
+    if (comm_PEXSI != MPI_COMM_NULL)
     {
-        if(myid<100) log_PEXSIOption(numElectronExact, f_log);
+        if (myid < 100)
+            log_PEXSIOption(numElectronExact, f_log);
     }
-    #endif
+#endif
     // end debug
-    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-	//DONE(ofs_running,"set up PEXSI parameter, finish");
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // DONE(ofs_running,"set up PEXSI parameter, finish");
 
     // set up PEXSI plan
-    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-    ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "set_up_PEXSI_plan");
-    //OUT(ofs_running, "checkpoint02");
-    ModuleBase::timer::tick("DiagoPexsi","setup_PEXSI_plan");
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // OUT(ofs_running, "checkpoint02");
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "setup_PEXSI_plan");
     PPEXSIPlan plan;
     int info;
     int outputFileIndex;
     int pexsi_prow, pexsi_pcol;
-    ModuleBase::timer::tick("DiagoPexsi","splitNProc2NProwNPcol");
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "splitNProc2NProwNPcol");
     splitNProc2NProwNPcol(numProcessPerPole, pexsi_prow, pexsi_pcol);
-    ModuleBase::timer::tick("DiagoPexsi","splitNProc2NProwNPcol");
-    //OUT(ofs_running, "checkpoint03");
-    #ifdef _DEBUG
-    //if(comm_PEXSI != MPI_COMM_NULL)
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "splitNProc2NProwNPcol");
+// OUT(ofs_running, "checkpoint03");
+#ifdef _DEBUG
+    // if(comm_PEXSI != MPI_COMM_NULL)
     //{
-        if(myid<100) log_PEXSIgrid(pexsi_prow, pexsi_pcol, f_log);
-    //}
-    #endif
-    if(myid % (pexsi_prow * pexsi_pcol) == 0)
+    if (myid < 100)
+        log_PEXSIgrid(pexsi_prow, pexsi_pcol, f_log);
+//}
+#endif
+    if (myid % (pexsi_prow * pexsi_pcol) == 0)
     {
-        outputFileIndex=myid/(pexsi_prow*pexsi_pcol);
+        outputFileIndex = myid / (pexsi_prow * pexsi_pcol);
     }
     else
     {
-        outputFileIndex=-1;
+        outputFileIndex = -1;
     }
-    //OUT(ofs_running, "checkpoint04");
-    ModuleBase::timer::tick("DiagoPexsi","PEXSIPlanInit");
-    if(comm_PEXSI != MPI_COMM_NULL)
+    // OUT(ofs_running, "checkpoint04");
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIPlanInit");
+    if (comm_PEXSI != MPI_COMM_NULL)
     {
-        //OUT(ofs_running, "checkpoint05");
+        // OUT(ofs_running, "checkpoint05");
         plan = PPEXSIPlanInitialize(comm_PEXSI, pexsi_prow, pexsi_pcol, outputFileIndex, &info);
-        #ifdef _DEBUG
-        //OUT(ofs_running, "checkpoint06");
-        if(myid<100) log_PEXSIinit(info, f_log);
-        //OUT(ofs_running, "checkpoint07");
-        #endif
+#ifdef _DEBUG
+        // OUT(ofs_running, "checkpoint06");
+        if (myid < 100)
+            log_PEXSIinit(info, f_log);
+// OUT(ofs_running, "checkpoint07");
+#endif
     }
-    ModuleBase::timer::tick("DiagoPexsi","PEXSIPlanInit");
-    //OUT(ofs_running, "checkpoint08");
-    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-    ModuleBase::timer::tick("DiagoPexsi","setup_PEXSI_plan");
-	ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"set_up_PEXSI_plan finish");
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIPlanInit");
+    // OUT(ofs_running, "checkpoint08");
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "setup_PEXSI_plan");
 
     // create compressed column storage distribution matrix parameter
-    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-	//DONE(ofs_running,"create compressed column storage distribution matrix parameter, begin");
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // DONE(ofs_running,"create compressed column storage distribution matrix parameter, begin");
     DistCCSMatrix DST_Matrix(comm_PEXSI, numProcessPerPole, size);
-    //OUT(ofs_running, "checkpoint09");
-    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-	//DONE(ofs_running,"create compressed column storage distribution matrix parameter, finish");
+    // OUT(ofs_running, "checkpoint09");
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // DONE(ofs_running,"create compressed column storage distribution matrix parameter, finish");
 
-    #ifdef _DEBUG
-    if(comm_PEXSI != MPI_COMM_NULL)
+#ifdef _DEBUG
+    if (comm_PEXSI != MPI_COMM_NULL)
     {
-        if(myid<100) log_DSTMatrix(DST_Matrix, f_log);
+        if (myid < 100)
+            log_DSTMatrix(DST_Matrix, f_log);
     }
-    #endif
+#endif
 
     // create block cyclic distribution matrix parameter
-    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-	//DONE(ofs_running,"create block cyclic distribution matrix parameter, begin");
-    //OUT(ofs_running, "checkpoint10");
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // DONE(ofs_running,"create block cyclic distribution matrix parameter, begin");
+    // OUT(ofs_running, "checkpoint10");
     DistBCDMatrix SRC_Matrix(comm_2D, group_2D, blacs_ctxt, size, nblk, nrow, ncol, LAYOUT);
-    //OUT(ofs_running, "checkpoint11");
-    #ifdef _DEBUG
-    if(comm_PEXSI != MPI_COMM_NULL)
+// OUT(ofs_running, "checkpoint11");
+#ifdef _DEBUG
+    if (comm_PEXSI != MPI_COMM_NULL)
     {
-        if(myid<100) log_SRCMatrix(SRC_Matrix, f_log);
+        if (myid < 100)
+            log_SRCMatrix(SRC_Matrix, f_log);
     }
-    #endif
-    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-	//DONE(ofs_running,"create block cyclic distribution matrix parameter, finish");
-
-    double *HnzvalLocal=new double[1];
-    double *SnzvalLocal=new double[1];
-    double *DMnzvalLocal=new double[1];
-    double *EDMnzvalLocal=new double[1];
-    double *FDMnzvalLocal=new double[1];
+#endif
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // DONE(ofs_running,"create block cyclic distribution matrix parameter, finish");
+    double* HnzvalLocal = nullptr;
+    double* SnzvalLocal = nullptr;
+    double* DMnzvalLocal = nullptr;
+    double* EDMnzvalLocal = nullptr;
+    double* FDMnzvalLocal = nullptr;
     // transform H and S from 2D block cyclic distribution to compressed column sparse matrix
-    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-	ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"transformBCDtoCCS, begin");
-    //OUT(ofs_running, "checkpoint12");
-    ModuleBase::timer::tick("Diago_LCAO_Matrix","TransMat2PEXSI");
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // OUT(ofs_running, "checkpoint12");
     transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal);
-    ModuleBase::timer::tick("Diago_LCAO_Matrix","TransMat2PEXSI");
-    //MPI_Barrier(MPI_COMM_WORLD);
-    //LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-	ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"transformBCDtoCCS, finish");
-    //OUT(ofs_running, "checkpoint13");
-    if(comm_PEXSI != MPI_COMM_NULL)
+    // MPI_Barrier(MPI_COMM_WORLD);
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // OUT(ofs_running, "checkpoint13");
+    if (comm_PEXSI != MPI_COMM_NULL)
     {
-        // debug
-        #ifdef _DEBUG
-        if(myid<100) log_DSTparameter(DST_Matrix, HnzvalLocal, f_log);
-        #endif
+// debug
+#ifdef _DEBUG
+        if (myid < 100)
+            log_DSTparameter(DST_Matrix, HnzvalLocal, f_log);
+#endif
         // end debug
 
         // Load H and S to PEXSI
-        int isSIdentity=0;
-        //OUT(ofs_running, "checkpoint14");
-        PPEXSILoadRealHSMatrix(plan, options,
-                                size,
-                                DST_Matrix.nnz, DST_Matrix.nnzLocal,
-                                DST_Matrix.numColLocal, DST_Matrix.colptrLocal, DST_Matrix.rowindLocal,
-                                HnzvalLocal,
-                                isSIdentity,
-                                SnzvalLocal,
-                                &info);
-        //OUT(ofs_running, "checkpoint15");
-        #ifdef _DEBUG
-        if(myid<100) log_HSload(f_log);
-        #endif
+        int isSIdentity = 0;
+        // OUT(ofs_running, "checkpoint14");
+        PPEXSILoadRealHSMatrix(plan,
+                               options,
+                               size,
+                               DST_Matrix.nnz,
+                               DST_Matrix.nnzLocal,
+                               DST_Matrix.numColLocal,
+                               DST_Matrix.colptrLocal,
+                               DST_Matrix.rowindLocal,
+                               HnzvalLocal,
+                               isSIdentity,
+                               SnzvalLocal,
+                               &info);
+// OUT(ofs_running, "checkpoint15");
+#ifdef _DEBUG
+        if (myid < 100)
+            log_HSload(f_log);
+#endif
         // call PEXSI to solve Kohn-Sham equation
         // PPEXSIDFTDriver2(plan, &options,
-                         // numElectronExact,
-                         // &muPEXSI,
-                         // &numElectronPEXSI,
-                         // &numTotalInertiaIter,
-                         // &info);
+        // numElectronExact,
+        // &muPEXSI,
+        // &numElectronPEXSI,
+        // &numTotalInertiaIter,
+        // &info);
         double mu;
         double nelec;
         double muMinInertia;
         double muMaxInertia;
         int numTotalPEXSIIter;
         int numTotalInertiaIter; // Number of total inertia[out]
-        //OUT(ofs_running, "checkpoint16");
-        //LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
-	    ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"PPEXSIDFTDriver, begin");
-        ModuleBase::timer::tick("Diago_LCAO_Matrix","PEXSIDFT");
-        PPEXSIDFTDriver(
-        plan, // PEXSI plan[in]
-        options, // PEXSI Options[in]
-        numElectronExact, // exact electron number[in]
-        &mu, // chemical potential[out]
-        &nelec, // number of electrons[out]
-        &muMinInertia, // Lower bound for mu after the last inertia[out]
-        &muMaxInertia, // Upper bound for mu after the last inertia[out]
-        &numTotalInertiaIter, // Number of total inertia[out]
-        &numTotalPEXSIIter, // number of total pexsi evaluation procedure[out]
-        &info ); // 0: successful; otherwise: unsuccessful
-        //LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
-        ModuleBase::timer::tick("Diago_LCAO_Matrix","PEXSIDFT");
-	    ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"PPEXSIDFTDriver, finish");
-        //OUT(ofs_running, "checkpoint17");
+        // OUT(ofs_running, "checkpoint16");
+        // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+        ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT");
+        PPEXSIDFTDriver(plan,                 // PEXSI plan[in]
+                        options,              // PEXSI Options[in]
+                        numElectronExact,     // exact electron number[in]
+                        &mu,                  // chemical potential[out]
+                        &nelec,               // number of electrons[out]
+                        &muMinInertia,        // Lower bound for mu after the last inertia[out]
+                        &muMaxInertia,        // Upper bound for mu after the last inertia[out]
+                        &numTotalInertiaIter, // Number of total inertia[out]
+                        &numTotalPEXSIIter,   // number of total pexsi evaluation procedure[out]
+                        &info);               // 0: successful; otherwise: unsuccessful
+        // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+        ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT");
+// OUT(ofs_running, "checkpoint17");
 
-        // debug
-        #ifdef _DEBUG
-        if(myid<100) log_PEXSIcalled(mu, nelec, muMinInertia, muMaxInertia, numTotalPEXSIIter, f_log);
-        #endif
+// debug
+#ifdef _DEBUG
+        if (myid < 100)
+            log_PEXSIcalled(mu, nelec, muMinInertia, muMaxInertia, numTotalPEXSIIter, f_log);
+#endif
         // end debug
 
         // retrieve the results from the plan
-        delete[] DMnzvalLocal;
-        delete[] EDMnzvalLocal;
-        delete[] FDMnzvalLocal;
-        DMnzvalLocal=new double[DST_Matrix.nnzLocal];
-        EDMnzvalLocal=new double[DST_Matrix.nnzLocal];
-        FDMnzvalLocal=new double[DST_Matrix.nnzLocal];
-        if(myid < numProcessPerPole)
+        if (DMnzvalLocal != nullptr)
+            delete[] DMnzvalLocal;
+        if (EDMnzvalLocal != nullptr)
+            delete[] EDMnzvalLocal;
+        if (FDMnzvalLocal != nullptr)
+            delete[] FDMnzvalLocal;
+        DMnzvalLocal = new double[DST_Matrix.nnzLocal];
+        EDMnzvalLocal = new double[DST_Matrix.nnzLocal];
+        FDMnzvalLocal = new double[DST_Matrix.nnzLocal];
+        if (myid < numProcessPerPole)
         {
-            PPEXSIRetrieveRealDFTMatrix(
-                plan,
-                DMnzvalLocal,
-                EDMnzvalLocal,
-                FDMnzvalLocal,
-                &totalEnergyH,
-                &totalEnergyS,
-                &totalFreeEnergy,
-                &info);
-            #ifdef _DEBUG
-            if(myid<100) log_DM(DST_Matrix, DMnzvalLocal, f_log);
-            #endif
+            PPEXSIRetrieveRealDFTMatrix(plan,
+                                        DMnzvalLocal,
+                                        EDMnzvalLocal,
+                                        FDMnzvalLocal,
+                                        &totalEnergyH,
+                                        &totalEnergyS,
+                                        &totalFreeEnergy,
+                                        &info);
+#ifdef _DEBUG
+            if (myid < 100)
+                log_DM(DST_Matrix, DMnzvalLocal, f_log);
+#endif
         }
-        std::cout << "totalEnergyH:" << totalEnergyH << "\ntotalEnergyS:" << totalEnergyS << "\ntotalFreeEnergy:" << totalFreeEnergy << std::endl; 
         // clean PEXSI
         PPEXSIPlanFinalize(plan, &info);
-        #ifdef _DEBUG
-        if(myid<100) log_PEXSIFinalized(f_log);
-        #endif
+#ifdef _DEBUG
+        if (myid < 100)
+            log_PEXSIFinalized(f_log);
+#endif
     }
-    //OUT(ofs_running, "checkpoint18");
+    // OUT(ofs_running, "checkpoint18");
 
     // transform Density Matrix and Energy Density Matrix from compressed column sparse matrix
     // back to 2D block cyclic distribution if neccessary
-    if(comm_2D != MPI_COMM_NULL)
+    if (comm_2D != MPI_COMM_NULL)
     {
-		delete[] DM;
-		delete[] EDM;
-		DM=new double[SRC_Matrix.nrow*SRC_Matrix.ncol];
-		EDM=new double[SRC_Matrix.nrow*SRC_Matrix.ncol];
-	}
-    #ifdef _DEBUG
-    //OUT(ofs_running, "checkpoint19");
-    if(myid<100) log_DMEDM_in_BCD_allocated(f_log);
+        delete[] DM;
+        delete[] EDM;
+        DM = new double[SRC_Matrix.nrow * SRC_Matrix.ncol];
+        EDM = new double[SRC_Matrix.nrow * SRC_Matrix.ncol];
+    }
+#ifdef _DEBUG
+    // OUT(ofs_running, "checkpoint19");
+    if (myid < 100)
+        log_DMEDM_in_BCD_allocated(f_log);
     MPI_Barrier(MPI_COMM_WORLD);
-    #endif
-    //LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
-	ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"transformCCStoBCD, begin");
-    ModuleBase::timer::tick("Diago_LCAO_Matrix","TransMAT22D");
+#endif
+    // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
     transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM);
-    ModuleBase::timer::tick("Diago_LCAO_Matrix","TransMAT22D");
-    //LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
-	ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running,"transformCCStoBCD, finish");
-    #ifdef _DEBUG
-	MPI_Barrier(MPI_COMM_WORLD);
-    //OUT(ofs_running, "checkpoint20");
-    if(comm_PEXSI != MPI_COMM_NULL)
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
+    // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+
+#ifdef _DEBUG
+    MPI_Barrier(MPI_COMM_WORLD);
+    // OUT(ofs_running, "checkpoint20");
+    if (comm_PEXSI != MPI_COMM_NULL)
     {
-        if(myid<100) log_DMtransformed(f_log);
-        if(myid<100) log_closefile(f_log);
+        if (myid < 100)
+            log_DMtransformed(f_log);
+        if (myid < 100)
+            log_closefile(f_log);
         // output result
         // save local data of DMnzvalLocal
         /*
@@ -684,17 +686,17 @@ int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const
         f_EDM.close();
         */
     }
-    #endif
-	MPI_Barrier(MPI_COMM_WORLD);
-    //OUT(ofs_running, "checkpoint21");
-	MPI_Barrier(MPI_COMM_WORLD);
+#endif
+    MPI_Barrier(MPI_COMM_WORLD);
+    // OUT(ofs_running, "checkpoint21");
+    MPI_Barrier(MPI_COMM_WORLD);
     delete[] DMnzvalLocal;
     delete[] EDMnzvalLocal;
     delete[] FDMnzvalLocal;
     delete[] HnzvalLocal;
     delete[] SnzvalLocal;
-	MPI_Barrier(MPI_COMM_WORLD);
-    //OUT(ofs_running, "checkpoint22");
-	//MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Barrier(MPI_COMM_WORLD);
+    // OUT(ofs_running, "checkpoint22");
+    // MPI_Barrier(MPI_COMM_WORLD);
     return 0;
 }

From ef1ff1aae68765035a44a73aed74786729507324 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Fri, 17 Nov 2023 17:07:23 +0800
Subject: [PATCH 05/44] CMake building implemented

---
 CMakeLists.txt                                |  9 +++++
 cmake/FindPEXSI.cmake                         | 27 ++++++++++++++
 cmake/FindParMETIS.cmake                      | 37 +++++++++++++++++++
 cmake/FindSuperLU.cmake                       | 27 ++++++++++++++
 source/module_hsolver/CMakeLists.txt          | 14 +++++++
 source/module_hsolver/pexsi/CMakeLists.txt    |  5 +++
 source/module_hsolver/test/CMakeLists.txt     | 14 +++++++
 .../module_hsolver/test/diago_lcao_test.cpp   |  7 ++++
 8 files changed, 140 insertions(+)
 create mode 100644 cmake/FindPEXSI.cmake
 create mode 100644 cmake/FindParMETIS.cmake
 create mode 100644 cmake/FindSuperLU.cmake
 create mode 100644 source/module_hsolver/pexsi/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 697b90d33c..41a8f777a1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,6 +33,7 @@ option(DEBUG_INFO "Print message for developers to debug." OFF)
 option(ENABLE_NATIVE_OPTIMIZATION "Enable compilation optimization for the native machine's CPU type" OFF)
 # Do not use the new container by default.
 option(ENABLE_CONTAINER "Enable the new multi-device container." OFF)
+option(USE_PEXSI "Enable support to PEXSI." ON)
 
 if (NOT ENABLE_MPI)
   set (ENABLE_LCAO OFF)
@@ -132,6 +133,14 @@ if(ENABLE_LCAO)
     target_link_libraries(${ABACUS_BIN_NAME} ELPA::ELPA)
     add_compile_definitions(__ELPA)
   endif()
+  if(USE_PEXSI)
+    find_package(PEXSI REQUIRED)
+    find_package(SuperLU REQUIRED)
+    find_package(ParMETIS REQUIRED)
+    target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi)
+    include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR})
+    add_compile_definitions(__PEXSI)
+  endif()
 else()
   set(ENABLE_DEEPKS OFF)
   set(ENABLE_LIBRI OFF)
diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake
new file mode 100644
index 0000000000..6a1d699a81
--- /dev/null
+++ b/cmake/FindPEXSI.cmake
@@ -0,0 +1,27 @@
+###############################################################################
+# - Find cereal
+# Find the native cereal headers.
+#
+#  CEREAL_FOUND - True if cereal is found.
+#  CEREAL_INCLUDE_DIR - Where to find cereal headers.
+
+find_path(PEXSI_INCLUDE_DIR
+    NAMES c_pexsi_interface.h
+    HINTS ${PEXSI_DIR}
+    PATH_SUFFIXES "include"
+)
+
+find_library(PEXSI_LIBRARY
+    NAMES pexsi
+    HINTS ${PEXSI_DIR}
+    PATH_SUFFIXES "lib"
+)
+
+# Handle the QUIET and REQUIRED arguments and
+# set Cereal_FOUND to TRUE if all variables are non-zero.
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR)
+
+# Copy the results to the output variables and target.
+mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR)
+
diff --git a/cmake/FindParMETIS.cmake b/cmake/FindParMETIS.cmake
new file mode 100644
index 0000000000..b350f107f5
--- /dev/null
+++ b/cmake/FindParMETIS.cmake
@@ -0,0 +1,37 @@
+###############################################################################
+# - Find cereal
+# Find the native cereal headers.
+#
+#  CEREAL_FOUND - True if cereal is found.
+#  CEREAL_INCLUDE_DIR - Where to find cereal headers.
+
+find_path(ParMETIS_INCLUDE_DIR
+    NAMES metis.h parmetis.h
+    HINTS ${ParMETIS_DIR}
+    PATH_SUFFIXES "include"
+)
+
+find_library(METIS_LIBRARY
+    NAMES metis
+    HINTS ${ParMETIS_DIR}
+    PATH_SUFFIXES "lib"
+)
+
+find_library(ParMETIS_LIBRARY
+    NAMES parmetis
+    HINTS ${ParMETIS_DIR}
+    PATH_SUFFIXES "lib"
+)
+
+# print libs
+# message(STATUS "ParMETIS_INCLUDE_DIR: ${ParMETIS_INCLUDE_DIR}")
+# message(STATUS "ParMETIS_LIBRARY: ${ParMETIS_LIBRARY}")
+
+# Handle the QUIET and REQUIRED arguments and
+# set Cereal_FOUND to TRUE if all variables are non-zero.
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(ParMETIS DEFAULT_MSG ParMETIS_LIBRARY METIS_LIBRARY)
+
+# Copy the results to the output variables and target.
+mark_as_advanced(ParMETIS_LIBRARY)
+
diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake
new file mode 100644
index 0000000000..ac1d5b4fab
--- /dev/null
+++ b/cmake/FindSuperLU.cmake
@@ -0,0 +1,27 @@
+###############################################################################
+# - Find cereal
+# Find the native cereal headers.
+#
+#  CEREAL_FOUND - True if cereal is found.
+#  CEREAL_INCLUDE_DIR - Where to find cereal headers.
+
+# find_path(SuperLU_INCLUDE_DIR
+#     NAMES *.h
+#     HINTS ${SuperLU_DIR}
+#     PATH_SUFFIXES "include"
+# )
+
+find_library(SuperLU_LIBRARY
+    NAMES libsuperlu_dist.a
+    HINTS ${SuperLU_DIR}
+    PATH_SUFFIXES "lib"
+)
+
+# Handle the QUIET and REQUIRED arguments and
+# set Cereal_FOUND to TRUE if all variables are non-zero.
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(SuperLU DEFAULT_MSG SuperLU_LIBRARY)
+
+# Copy the results to the output variables and target.
+mark_as_advanced(SuperLU_LIBRARY)
+
diff --git a/source/module_hsolver/CMakeLists.txt b/source/module_hsolver/CMakeLists.txt
index c52a3d3773..9855dd3f2f 100644
--- a/source/module_hsolver/CMakeLists.txt
+++ b/source/module_hsolver/CMakeLists.txt
@@ -31,6 +31,19 @@ if(ENABLE_LCAO)
       add_coverage(diag_cusolver)
     endif()
   endif()
+
+  if(USE_PEXSI)
+  list(APPEND objects
+      diago_pexsi.cpp
+    )
+    # add_library(
+    #     pexsi
+    #     OBJECT
+    #     ${objects}
+    # )
+    add_subdirectory(pexsi)
+  endif()
+
 endif()
 
 add_library(
@@ -50,4 +63,5 @@ endif()
 IF (BUILD_TESTING)
   add_subdirectory(test)
   add_subdirectory(kernels/test)
+  message(STATUS "Building tests")
 endif()
diff --git a/source/module_hsolver/pexsi/CMakeLists.txt b/source/module_hsolver/pexsi/CMakeLists.txt
new file mode 100644
index 0000000000..8faab8b4b4
--- /dev/null
+++ b/source/module_hsolver/pexsi/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_library(pexsi OBJECT DistBCDMatrix.cpp DistCCSMatrix.cpp DistMatrixTransformer.cpp pexsi_solver.cpp simplePEXSI.cpp)
+
+if(ENABLE_COVERAGE)
+  add_coverage(pexsi)
+endif()
diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt
index ce124c5a5f..94cf3f0ea3 100644
--- a/source/module_hsolver/test/CMakeLists.txt
+++ b/source/module_hsolver/test/CMakeLists.txt
@@ -52,12 +52,26 @@ AddTest(
 )
 
 if(ENABLE_LCAO)
+  # if(USE_ELPA and USE_PEXSI)
+  # AddTest(
+  #   TARGET HSolver_LCAO
+  #   LIBS ${math_libs} ELPA::ELPA base genelpa psi device
+  #   SOURCES diago_lcao_test.cpp ../diago_elpa.cpp ../diago_blas.cpp 
+  # )
   if(USE_ELPA)
   AddTest(
     TARGET HSolver_LCAO
     LIBS ${math_libs} ELPA::ELPA base genelpa psi device
     SOURCES diago_lcao_test.cpp ../diago_elpa.cpp ../diago_blas.cpp 
   )
+  # elseif(USE_PEXSI)
+  #   AddTest(
+  #     TARGET HSolver_LCAO
+  #     LIBS ${math_libs} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} base psi device
+  #     SOURCES diago_lcao_test.cpp ../diago_pexsi.cpp ../diago_blas.cpp 
+  #   )
+  #   # print out the PEXSI library path
+  #   message(STATUS "PEXSI_LIBRARY: ${PEXSI_LIBRARY}")
   else()
     AddTest(
       TARGET HSolver_LCAO
diff --git a/source/module_hsolver/test/diago_lcao_test.cpp b/source/module_hsolver/test/diago_lcao_test.cpp
index 1cfdf2cae9..5bfa7fc060 100644
--- a/source/module_hsolver/test/diago_lcao_test.cpp
+++ b/source/module_hsolver/test/diago_lcao_test.cpp
@@ -7,6 +7,9 @@
 #ifdef __ELPA
 #include "module_hsolver/diago_elpa.h"
 #endif
+#ifdef __PEXSI
+#include "module_hsolver/diago_pexsi.h"
+#endif
 
 #define PASSTHRESHOLD 1e-10
 #define DETAILINFO    false
@@ -64,6 +67,10 @@ template<class T> class DiagoPrepare
 #ifdef __ELPA
         else if(ks_solver == "genelpa")
             dh = new hsolver::DiagoElpa;
+#endif
+#ifdef __PEXSI
+        else if(ks_solver == "pexsi")
+            dh = new hsolver::DiagoPexsi;
 #endif
         else
         {

From 2529afe9d3862b269a6e97828eaf79744f29b23d Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Thu, 7 Dec 2023 16:13:32 +0800
Subject: [PATCH 06/44] Works

---
 CMakeLists.txt                                |  5 +++
 deps/LibRI                                    |  2 +-
 source/module_basis/module_ao/ORB_control.cpp |  4 +--
 source/module_elecstate/elecstate.cpp         | 33 ++++++++++-------
 source/module_elecstate/elecstate_lcao.cpp    | 23 +++++++++---
 source/module_elecstate/elecstate_print.cpp   |  4 +++
 source/module_esolver/esolver_ks.cpp          |  2 +-
 .../module_gint/gint_rho.cpp                  |  2 +-
 source/module_hsolver/diago_pexsi.cpp         |  8 +++--
 source/module_hsolver/diago_pexsi.h           | 35 ++++++++++---------
 source/module_hsolver/hsolver_lcao.cpp        | 23 ++++++------
 source/module_hsolver/pexsi/DistBCDMatrix.h   |  6 ++++
 source/module_hsolver/pexsi/DistCCSMatrix.h   |  5 +++
 .../pexsi/DistMatrixTransformer.h             |  6 ++++
 source/module_hsolver/pexsi/pexsi_solver.h    |  4 ++-
 source/module_hsolver/pexsi/simplePEXSI.h     |  7 +++-
 source/module_io/write_input.cpp              |  2 +-
 17 files changed, 117 insertions(+), 54 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e4e3f85265..ecf4eb7b0b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,11 @@ project(ABACUS
     LANGUAGES CXX
 )
 
+# private options, should not be pushed to master
+set(PEXSI_DIR "~/projects/pexsi-build/pexsi")
+set(SuperLU_DIR "~/projects/pexsi-build/superlu")
+set(ParMETIS_DIR "~/projects/pexsi-build/parmetis")
+
 option(ENABLE_LCAO "Enable LCAO calculation." ON)
 option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF)
 option(ENABLE_LIBXC "Enable LibXC functionality" OFF)
diff --git a/deps/LibRI b/deps/LibRI
index 553c91c0be..b321b71a86 160000
--- a/deps/LibRI
+++ b/deps/LibRI
@@ -1 +1 @@
-Subproject commit 553c91c0be1d60a86e7666f0502ef866c366c600
+Subproject commit b321b71a8677a88a42bbb78e6d31c10073454e14
diff --git a/source/module_basis/module_ao/ORB_control.cpp b/source/module_basis/module_ao/ORB_control.cpp
index c2e6c9b9a1..580a99d7d0 100644
--- a/source/module_basis/module_ao/ORB_control.cpp
+++ b/source/module_basis/module_ao/ORB_control.cpp
@@ -205,7 +205,7 @@ void ORB_control::setup_2d_division(std::ofstream& ofs_running,
     bool div_2d;
     if (ks_solver == "lapack" || ks_solver == "cg" || ks_solver == "dav") div_2d = false;
 #ifdef __MPI
-    else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver") div_2d = true;
+    else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi") div_2d = true;
 #endif
     else
     {
@@ -382,7 +382,7 @@ assert(nb2d > 0);
     }
 
     // init blacs context for genelpa
-    if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver")
+    if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi")
     {
         pv->set_desc(nlocal, nlocal, pv->nrow);
         pv->set_desc_wfc_Eij(nlocal, nbands, pv->nrow);
diff --git a/source/module_elecstate/elecstate.cpp b/source/module_elecstate/elecstate.cpp
index 393c2d07d5..7e09f0f509 100644
--- a/source/module_elecstate/elecstate.cpp
+++ b/source/module_elecstate/elecstate.cpp
@@ -174,26 +174,33 @@ void ElecState::calEBand()
     ModuleBase::TITLE("ElecState", "calEBand");
     // calculate ebands using wg and ekb
     double eband = 0.0;
+    // if (GlobalV::KS_SOLVER == "pexsi")
+    // {
+    //     // tbd
+    // }
+    // else
+    {
 #ifdef _OPENMP
 #pragma omp parallel for collapse(2) reduction(+:eband)
 #endif
-    for (int ik = 0; ik < this->ekb.nr; ++ik)
-    {
-        for (int ibnd = 0; ibnd < this->ekb.nc; ibnd++)
+        for (int ik = 0; ik < this->ekb.nr; ++ik)
         {
-            eband += this->ekb(ik, ibnd) * this->wg(ik, ibnd);
+            for (int ibnd = 0; ibnd < this->ekb.nc; ibnd++)
+            {
+                eband += this->ekb(ik, ibnd) * this->wg(ik, ibnd);
+            }
         }
-    }
-    this->f_en.eband = eband;
-    if (GlobalV::KPAR != 1 && GlobalV::ESOLVER_TYPE != "sdft")
-    {
-        //==================================
-        // Reduce all the Energy in each cpu
-        //==================================
-        this->f_en.eband /= GlobalV::NPROC_IN_POOL;
+        this->f_en.eband = eband;
+        if (GlobalV::KPAR != 1 && GlobalV::ESOLVER_TYPE != "sdft")
+        {
+            //==================================
+            // Reduce all the Energy in each cpu
+            //==================================
+            this->f_en.eband /= GlobalV::NPROC_IN_POOL;
 #ifdef __MPI
-        Parallel_Reduce::reduce_all(this->f_en.eband);
+            Parallel_Reduce::reduce_all(this->f_en.eband);
 #endif
+        }
     }
     return;
 }
diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index 9e8e732553..c43cdb9fd2 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -181,6 +181,7 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)
             //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma);
             elecstate::cal_dm_psi(this->DM->get_paraV_pointer(), this->wg, psi, *(this->DM));
             this->DM->cal_DMR();
+
             if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready
             {
                 this->loc->dm_gamma.resize(GlobalV::NSPIN);
@@ -189,7 +190,11 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)
                     this->loc->set_dm_gamma(is, this->DM->get_DMK_pointer(is));
                 }
             }
+
         }
+
+        
+
         ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d");
 
         for (int ik = 0; ik < psi.get_nk(); ++ik)
@@ -273,13 +278,23 @@ double ElecStateLCAO<std::complex<double>>::get_spin_constrain_energy()
     return sc.cal_escon();
 }
 
-template class ElecStateLCAO<double>; // Gamma_only case
-template class ElecStateLCAO<std::complex<double>>; // multi-k case
-
-void ElecStateLCAO::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV)
+template<>
+void ElecStateLCAO<double>::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV)
 {
     this->loc->dm_gamma[0].create(ParaV->ncol, ParaV->nrow);
     this->loc->dm_gamma[0].c = DM;
+    this->loc->out_dm = 1;
 }
 
+template<>
+void ElecStateLCAO<std::complex<double>>::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV)
+{
+    ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case");
+}
+
+template class ElecStateLCAO<double>; // Gamma_only case
+template class ElecStateLCAO<std::complex<double>>; // multi-k case
+
+
+
 } // namespace elecstate
\ No newline at end of file
diff --git a/source/module_elecstate/elecstate_print.cpp b/source/module_elecstate/elecstate_print.cpp
index 1cc402ede5..e0da5777b5 100644
--- a/source/module_elecstate/elecstate_print.cpp
+++ b/source/module_elecstate/elecstate_print.cpp
@@ -295,6 +295,10 @@ void ElecState::print_etot(const bool converged,
     {
         label = "BP";
     }
+    else if (ks_solver_type == "pexsi")
+    {
+        label = "PE";
+    }
     else
     {
         ModuleBase::WARNING_QUIT("Energy", "print_etot found unknown ks_solver_type");
diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp
index 63592d9b66..2876c60f79 100644
--- a/source/module_esolver/esolver_ks.cpp
+++ b/source/module_esolver/esolver_ks.cpp
@@ -423,7 +423,7 @@ namespace ModuleESolver
                 double duration = (std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now() - iterstart)).count() / static_cast<double>(1e6);
 #endif
                 printiter(iter, drho, duration, diag_ethr);
-                if (this->conv_elec)
+                if (this->conv_elec && iter >= 5)
                 {
                     std::cout << "this->conv_elec" << std::endl;
                     this->niter = iter;
diff --git a/source/module_hamilt_lcao/module_gint/gint_rho.cpp b/source/module_hamilt_lcao/module_gint/gint_rho.cpp
index fe40162b0c..6d791f985f 100644
--- a/source/module_hamilt_lcao/module_gint/gint_rho.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_rho.cpp
@@ -36,7 +36,7 @@ void Gint::gint_kernel_rho(
 		ModuleBase::GlobalFunc::ZEROS(psir_DM.ptr_1D, this->bxyz*LD_pool);
 		if(GlobalV::GAMMA_ONLY_LOCAL)
 		{
-			if (GlobalV::CALCULATION == "get_pchg")
+			if (GlobalV::CALCULATION == "get_pchg" || GlobalV::KS_SOLVER == "pexsi")
 			{
 				Gint_Tools::mult_psi_DM(
 					*this->gridt, this->bxyz, na_grid, LD_pool,
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index c0b4e3e24c..21750c13e8 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -1,3 +1,4 @@
+#include <complex>
 #ifdef __PEXSI
 #include "diago_pexsi.h"
 
@@ -15,7 +16,8 @@ typedef hamilt::MatrixBlock<std::complex<double>> matcd;
 namespace hsolver
 {
 
-void DiagoPexsi::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in)
+template<>
+void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in)
 {
     ModuleBase::TITLE("DiagoPEXSI", "diag");
     matd h_mat, s_mat;
@@ -40,7 +42,9 @@ void DiagoPexsi::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, dou
     this->totalEnergyH = this->ps->totalEnergyH;
     this->totalEnergyS = this->ps->totalEnergyS;
 }
-void DiagoPexsi::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<std::complex<double>>& psi, double* eigenvalue_in)
+
+template<>
+void DiagoPexsi<std::complex<double>>::diag(hamilt::Hamilt<std::complex<double>>* phm_in, psi::Psi<std::complex<double>>& psi, double* eigenvalue_in)
 {
     ModuleBase::TITLE("DiagoPEXSI", "diag");
     ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case");
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index a48b92b867..802ec51e97 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -13,24 +13,25 @@
 namespace hsolver
 {
 
-class DiagoPexsi : public DiagH<double>
-{
+  template <typename T>
+  class DiagoPexsi : public DiagH<T>
+  {
+  private:
+      using Real = typename GetTypeReal<T>::type;
   public:
-    DiagoPexsi(const Parallel_Orbitals* ParaV_in)
-    {
-      this->ParaV = ParaV_in;
-    }
-    void diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in) override;
-    void diag(hamilt::Hamilt<double>* phm_in, psi::Psi<std::complex<double>> &psi, double *eigenvalue_in) override;
-    const Parallel_Orbitals* ParaV;
-    double* DM;
-    double* EDM;
-    double totalEnergyH;
-    double totalEnergyS;
-    double totalFreeEnergy;
-    PEXSI_Solver* ps;
-};
-
+      DiagoPexsi(const Parallel_Orbitals* ParaV_in)
+      {
+          this->ParaV = ParaV_in;
+      }
+      void diag(hamilt::Hamilt<T>* phm_in, psi::Psi<T>& psi, Real* eigenvalue_in) override;
+      const Parallel_Orbitals* ParaV;
+      double* DM;
+      double* EDM;
+      double totalEnergyH;
+      double totalEnergyS;
+      double totalFreeEnergy;
+      PEXSI_Solver* ps;
+  };
 }
 
 #endif
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index 0494e5897a..6ae226268e 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -81,23 +81,26 @@ void HSolverLCAO<T>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
         */
         ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "This method of DiagH is not supported!");
     }
+#ifdef __PEXSI
     else if (this->method == "pexsi")
     {
-        if (pdiagh != nullptr)
+        if (this->pdiagh != nullptr)
         {
-            if (pdiagh->method != this->method)
+            if (this->pdiagh->method != this->method)
             {
-                delete[] pdiagh;
-                pdiagh = nullptr;
+                delete[] this->pdiagh;
+                this->pdiagh = nullptr;
             }
         }
-        if (pdiagh == nullptr)
+        if (this->pdiagh == nullptr)
         {
-            DiagoPexsi* tem = new DiagoPexsi(this->ParaV);
+            DiagoPexsi<T>* tem = new DiagoPexsi<T>(this->ParaV);
             this->pdiagh = tem;
-            pdiagh->method = this->method;
+            // this->pdiagh = dynamic_cast<DiagoPexsi<T>*>(tem);
+            this->pdiagh->method = this->method;
         }
     }
+#endif
     else
     {
         ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "This method of DiagH is not supported!");
@@ -140,10 +143,10 @@ void HSolverLCAO<T>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
     // called in scf calculation
     if (this->method == "pexsi")
     {
-        DiagoPexsi* tem = dynamic_cast<DiagoPexsi*>(this->pdiagh);
+        DiagoPexsi<T>* tem = dynamic_cast<DiagoPexsi<T>*>(this->pdiagh);
         if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!");
-        elecstate::ElecStateLCAO* _pes = dynamic_cast<elecstate::ElecStateLCAO*>(pes);
-        pes->eband = tem->totalFreeEnergy;
+        elecstate::ElecStateLCAO<T>* _pes = dynamic_cast<elecstate::ElecStateLCAO<T>*>(pes);
+        pes->f_en.eband = tem->totalFreeEnergy;
         _pes->get_DM_from_pexsi(tem->DM, tem->ParaV);
     }
     pes->psiToRho(psi);
diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.h b/source/module_hsolver/pexsi/DistBCDMatrix.h
index a0b8c7a907..97c5e8652f 100644
--- a/source/module_hsolver/pexsi/DistBCDMatrix.h
+++ b/source/module_hsolver/pexsi/DistBCDMatrix.h
@@ -1,3 +1,7 @@
+#ifndef DISTBCDMATRIX_H
+#define DISTBCDMATRIX_H
+
+#include <mpi.h>
 // a Block Cyclic Data Distribution matrix
 // http://www.netlib.org/utk/papers/factor/node3.html
 // local matrix elements is stored in column major
@@ -61,3 +65,5 @@ class DistBCDMatrix {
         // 'C' or 'c' for column-major, which is used in Fortran
         char LAYOUT;
 };
+
+#endif // DISTBCDMATRIX_H
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.h b/source/module_hsolver/pexsi/DistCCSMatrix.h
index 43d1126bf6..48ec95d0fc 100644
--- a/source/module_hsolver/pexsi/DistCCSMatrix.h
+++ b/source/module_hsolver/pexsi/DistCCSMatrix.h
@@ -1,3 +1,7 @@
+#ifndef DISTCCSMATRIX_H
+#define DISTCCSMATRIX_H
+
+#include <mpi.h>
 // Distributed Compressed Column Storage Matrix format
 // used for PEXSI
 class DistCCSMatrix {
@@ -44,3 +48,4 @@ class DistCCSMatrix {
         int* rowindLocal;
 };
 
+#endif // DISTCCSMATRIX_H
diff --git a/source/module_hsolver/pexsi/DistMatrixTransformer.h b/source/module_hsolver/pexsi/DistMatrixTransformer.h
index be92935642..cdc0a53f74 100644
--- a/source/module_hsolver/pexsi/DistMatrixTransformer.h
+++ b/source/module_hsolver/pexsi/DistMatrixTransformer.h
@@ -1,3 +1,8 @@
+#ifndef DISTMATRIXTRANSFORMER_H
+#define DISTMATRIXTRANSFORMER_H
+
+#include "DistBCDMatrix.h"
+#include "DistCCSMatrix.h"
 // transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
 // they should have same MPI communicator
 // The local matrix of BCD is column-major order
@@ -18,3 +23,4 @@ int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, double* S_2d, con
 int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* ENDnzvalLocal,
                     DistBCDMatrix& DST_Matrix, double* DM_2d, double* END_2d);
 
+#endif // DISTMATRIXTRANSFORMER_H
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/pexsi_solver.h b/source/module_hsolver/pexsi/pexsi_solver.h
index 95ade7c15f..52f23b663b 100644
--- a/source/module_hsolver/pexsi/pexsi_solver.h
+++ b/source/module_hsolver/pexsi/pexsi_solver.h
@@ -1,5 +1,6 @@
 #ifndef PEXSI_Solver_H
 #define PEXSI_Solver_H
+
 class PEXSI_Solver
 {
   public:
@@ -27,4 +28,5 @@ class PEXSI_Solver
     double totalEnergyS;
     double totalFreeEnergy;
 };
-#endif
\ No newline at end of file
+
+#endif // PEXSI_Solver_H
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/simplePEXSI.h b/source/module_hsolver/pexsi/simplePEXSI.h
index 5bdf8d8bbb..6a23ba6600 100644
--- a/source/module_hsolver/pexsi/simplePEXSI.h
+++ b/source/module_hsolver/pexsi/simplePEXSI.h
@@ -1,3 +1,6 @@
+#ifndef SIMPLE_PEXSI_H
+#define SIMPLE_PEXSI_H
+
 #include <mpi.h>
 // a simple interface for calling pexsi with 2D block cyclic distributed matrix
 int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const int blacs_ctxt,  // communicator parameters
@@ -5,4 +8,6 @@ int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const
                 double* H, double* S,                 // input matrices
                 const double nElectronExact, const std::string PexsiOptionFile,        // pexsi parameters file
                 double*& DM, double*& EDM,      // output matrices
-                double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy);
\ No newline at end of file
+                double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy);
+
+#endif // SIMPLE_PEXSI_H
\ No newline at end of file
diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp
index 703549a0be..addc18a976 100644
--- a/source/module_io/write_input.cpp
+++ b/source/module_io/write_input.cpp
@@ -208,7 +208,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
 
     ofs << "\n#Parameters (5.LCAO)" << std::endl;
     ModuleBase::GlobalFunc::OUTP(ofs, "basis_type", basis_type, "PW; LCAO in pw; LCAO");
-    if (ks_solver == "HPSEPS" || ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver")
+    if (ks_solver == "HPSEPS" || ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi")
     {
         ModuleBase::GlobalFunc::OUTP(ofs, "nb2d", nb2d, "2d distribution of atoms");
     }

From 09da6b1a31231a23280531ead84e612482df3d1d Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Mon, 15 Jan 2024 17:11:02 +0800
Subject: [PATCH 07/44] adapt to the new container

---
 CMakeLists.txt                                | 11 ++-
 cmake/FindPEXSI.cmake                         | 33 +++++++-
 cmake/FindParMETIS.cmake                      | 37 ---------
 cmake/FindSuperLU.cmake                       | 27 -------
 source/module_elecstate/elecstate.cpp         |  5 --
 source/module_elecstate/elecstate_lcao.cpp    | 78 +++++++++++++------
 source/module_elecstate/elecstate_lcao.h      |  4 +-
 .../module_gint/gint_rho.cpp                  |  2 +-
 source/module_hsolver/diago_pexsi.h           |  7 +-
 source/module_hsolver/hsolver_lcao.cpp        | 13 +++-
 10 files changed, 105 insertions(+), 112 deletions(-)
 delete mode 100644 cmake/FindParMETIS.cmake
 delete mode 100644 cmake/FindSuperLU.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ecf4eb7b0b..d6d270a9bb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,9 +11,10 @@ project(ABACUS
 )
 
 # private options, should not be pushed to master
-set(PEXSI_DIR "~/projects/pexsi-build/pexsi")
-set(SuperLU_DIR "~/projects/pexsi-build/superlu")
-set(ParMETIS_DIR "~/projects/pexsi-build/parmetis")
+# set(PEXSI_DIR "~/Documents/projects/abacus-dependencies/pexsi")
+# set(SuperLU_DIR "~/Documents/projects/abacus-dependencies/superlu")
+# set(ParMETIS_DIR "~/Documents/projects/abacus-dependencies/parmetis")
+# set(ELPA_INCLUDE_DIR "/usr/include/elpa_openmp-2023.05.001")
 
 option(ENABLE_LCAO "Enable LCAO calculation." ON)
 option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF)
@@ -39,7 +40,7 @@ option(DEBUG_INFO "Print message for developers to debug." OFF)
 option(ENABLE_NATIVE_OPTIMIZATION "Enable compilation optimization for the native machine's CPU type" OFF)
 option(COMMIT_INFO "Print commit information in log" ON)
 option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON)
-option(USE_PEXSI "Enable support to PEXSI." ON)
+option(USE_PEXSI "Enable support for PEXSI." ON)
 
 # get commit info
 if(COMMIT_INFO)
@@ -179,8 +180,6 @@ if(ENABLE_LCAO)
   
   if(USE_PEXSI)
     find_package(PEXSI REQUIRED)
-    find_package(SuperLU REQUIRED)
-    find_package(ParMETIS REQUIRED)
     target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi)
     include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR})
     add_compile_definitions(__PEXSI)
diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake
index 6a1d699a81..22fe4dd01c 100644
--- a/cmake/FindPEXSI.cmake
+++ b/cmake/FindPEXSI.cmake
@@ -2,8 +2,8 @@
 # - Find cereal
 # Find the native cereal headers.
 #
-#  CEREAL_FOUND - True if cereal is found.
-#  CEREAL_INCLUDE_DIR - Where to find cereal headers.
+#  PEXSI_FOUND - True if cereal is found.
+#  PEXSI_INCLUDE_DIR - Where to find cereal headers.
 
 find_path(PEXSI_INCLUDE_DIR
     NAMES c_pexsi_interface.h
@@ -17,11 +17,36 @@ find_library(PEXSI_LIBRARY
     PATH_SUFFIXES "lib"
 )
 
+find_path(ParMETIS_INCLUDE_DIR
+    NAMES metis.h parmetis.h
+    HINTS ${ParMETIS_DIR}
+    PATH_SUFFIXES "include"
+)
+
+find_library(METIS_LIBRARY
+    NAMES metis
+    HINTS ${ParMETIS_DIR}
+    PATH_SUFFIXES "lib"
+)
+
+find_library(ParMETIS_LIBRARY
+    NAMES parmetis
+    HINTS ${ParMETIS_DIR}
+    PATH_SUFFIXES "lib"
+)
+
+find_library(SuperLU_LIBRARY
+    NAMES libsuperlu_dist.a
+    HINTS ${SuperLU_DIR}
+    PATH_SUFFIXES "lib"
+)
+
 # Handle the QUIET and REQUIRED arguments and
 # set Cereal_FOUND to TRUE if all variables are non-zero.
 include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR)
+find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_LIBRARY)
+
 
 # Copy the results to the output variables and target.
-mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR)
+mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_LIBRARY)
 
diff --git a/cmake/FindParMETIS.cmake b/cmake/FindParMETIS.cmake
deleted file mode 100644
index b350f107f5..0000000000
--- a/cmake/FindParMETIS.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-###############################################################################
-# - Find cereal
-# Find the native cereal headers.
-#
-#  CEREAL_FOUND - True if cereal is found.
-#  CEREAL_INCLUDE_DIR - Where to find cereal headers.
-
-find_path(ParMETIS_INCLUDE_DIR
-    NAMES metis.h parmetis.h
-    HINTS ${ParMETIS_DIR}
-    PATH_SUFFIXES "include"
-)
-
-find_library(METIS_LIBRARY
-    NAMES metis
-    HINTS ${ParMETIS_DIR}
-    PATH_SUFFIXES "lib"
-)
-
-find_library(ParMETIS_LIBRARY
-    NAMES parmetis
-    HINTS ${ParMETIS_DIR}
-    PATH_SUFFIXES "lib"
-)
-
-# print libs
-# message(STATUS "ParMETIS_INCLUDE_DIR: ${ParMETIS_INCLUDE_DIR}")
-# message(STATUS "ParMETIS_LIBRARY: ${ParMETIS_LIBRARY}")
-
-# Handle the QUIET and REQUIRED arguments and
-# set Cereal_FOUND to TRUE if all variables are non-zero.
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(ParMETIS DEFAULT_MSG ParMETIS_LIBRARY METIS_LIBRARY)
-
-# Copy the results to the output variables and target.
-mark_as_advanced(ParMETIS_LIBRARY)
-
diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake
deleted file mode 100644
index ac1d5b4fab..0000000000
--- a/cmake/FindSuperLU.cmake
+++ /dev/null
@@ -1,27 +0,0 @@
-###############################################################################
-# - Find cereal
-# Find the native cereal headers.
-#
-#  CEREAL_FOUND - True if cereal is found.
-#  CEREAL_INCLUDE_DIR - Where to find cereal headers.
-
-# find_path(SuperLU_INCLUDE_DIR
-#     NAMES *.h
-#     HINTS ${SuperLU_DIR}
-#     PATH_SUFFIXES "include"
-# )
-
-find_library(SuperLU_LIBRARY
-    NAMES libsuperlu_dist.a
-    HINTS ${SuperLU_DIR}
-    PATH_SUFFIXES "lib"
-)
-
-# Handle the QUIET and REQUIRED arguments and
-# set Cereal_FOUND to TRUE if all variables are non-zero.
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(SuperLU DEFAULT_MSG SuperLU_LIBRARY)
-
-# Copy the results to the output variables and target.
-mark_as_advanced(SuperLU_LIBRARY)
-
diff --git a/source/module_elecstate/elecstate.cpp b/source/module_elecstate/elecstate.cpp
index 7e09f0f509..e2a4c3eec6 100644
--- a/source/module_elecstate/elecstate.cpp
+++ b/source/module_elecstate/elecstate.cpp
@@ -174,11 +174,6 @@ void ElecState::calEBand()
     ModuleBase::TITLE("ElecState", "calEBand");
     // calculate ebands using wg and ekb
     double eband = 0.0;
-    // if (GlobalV::KS_SOLVER == "pexsi")
-    // {
-    //     // tbd
-    // }
-    // else
     {
 #ifdef _OPENMP
 #pragma omp parallel for collapse(2) reduction(+:eband)
diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index c43cdb9fd2..8ca7d2b050 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -166,33 +166,28 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)
     ModuleBase::TITLE("ElecStateLCAO", "psiToRho");
     ModuleBase::timer::tick("ElecStateLCAO", "psiToRho");
 
-    if (GlobalV::KS_SOLVER != "pexsi") // useless for pexsi
-    {
-        this->calculate_weights();
-        this->calEBand();
-    }
+    this->calculate_weights();
+    this->calEBand();
 
     if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack" || GlobalV::KS_SOLVER == "pexsi")
     {
         ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d");
-        if (GlobalV::KS_SOLVER != "pexsi")
-        {
-            // get DMK in 2d-block format
-            //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma);
-            elecstate::cal_dm_psi(this->DM->get_paraV_pointer(), this->wg, psi, *(this->DM));
-            this->DM->cal_DMR();
 
-            if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready
+        // get DMK in 2d-block format
+        //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma);
+        elecstate::cal_dm_psi(this->DM->get_paraV_pointer(), this->wg, psi, *(this->DM));
+        this->DM->cal_DMR();
+
+        if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready
+        {
+            this->loc->dm_gamma.resize(GlobalV::NSPIN);
+            for (int is = 0; is < GlobalV::NSPIN; ++is)
             {
-                this->loc->dm_gamma.resize(GlobalV::NSPIN);
-                for (int is = 0; is < GlobalV::NSPIN; ++is)
-                {
-                    this->loc->set_dm_gamma(is, this->DM->get_DMK_pointer(is));
-                }
+                this->loc->set_dm_gamma(is, this->DM->get_DMK_pointer(is));
             }
-
         }
 
+
         
 
         ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d");
@@ -278,20 +273,57 @@ double ElecStateLCAO<std::complex<double>>::get_spin_constrain_energy()
     return sc.cal_escon();
 }
 
+#ifdef __PEXSI
 template<>
-void ElecStateLCAO<double>::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV)
+void ElecStateLCAO<double>::dmToRho(double* pexsi_DM)
 {
-    this->loc->dm_gamma[0].create(ParaV->ncol, ParaV->nrow);
-    this->loc->dm_gamma[0].c = DM;
-    this->loc->out_dm = 1;
+    ModuleBase::timer::tick("ElecStateLCAO", "dmToRho");
+
+    this->loc->set_dm_gamma(0, pexsi_DM);
+
+    // old 2D-to-Grid conversion has been replaced by new Gint Refactor 2023/09/25
+    if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready
+    {
+        this->loc->cal_dk_gamma_from_2D_pub();
+    }
+
+    auto DM = this->get_DM();
+    DM->set_DMK_pointer(0, pexsi_DM);
+    DM->cal_DMR();
+    
+    for (int is = 0; is < GlobalV::NSPIN; is++)
+    {
+        ModuleBase::GlobalFunc::ZEROS(this->charge->rho[is], this->charge->nrxx); // mohan 2009-11-10
+    }
+
+    ModuleBase::GlobalFunc::NOTE("Calculate the charge on real space grid!");
+    this->uhm->GG.transfer_DM2DtoGrid(this->DM->get_DMR_vector()); // transfer DM2D to DM_grid in gint
+    Gint_inout inout(this->loc->DM, this->charge->rho, Gint_Tools::job_type::rho);
+    this->uhm->GG.cal_gint(&inout);
+    if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
+    {
+        for (int is = 0; is < GlobalV::NSPIN; is++)
+        {
+            ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[0], this->charge->nrxx);
+        }
+        Gint_inout inout1(this->loc->DM, this->charge->kin_r, Gint_Tools::job_type::tau);
+        this->uhm->GG.cal_gint(&inout1);
+    }
+
+    this->charge->renormalize_rho();
+
+    ModuleBase::timer::tick("ElecStateLCAO", "dmToRho");
+    return;
 }
 
 template<>
-void ElecStateLCAO<std::complex<double>>::get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV)
+void ElecStateLCAO<std::complex<double>>::dmToRho(std::complex<double>* DM)
 {
     ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case");
 }
 
+#endif
+
 template class ElecStateLCAO<double>; // Gamma_only case
 template class ElecStateLCAO<std::complex<double>>; // multi-k case
 
diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h
index 6adefc6cda..8c86844486 100644
--- a/source/module_elecstate/elecstate_lcao.h
+++ b/source/module_elecstate/elecstate_lcao.h
@@ -59,8 +59,10 @@ class ElecStateLCAO : public ElecState
 
     double get_spin_constrain_energy() override;
 
+#ifdef __PEXSI
     //use for pexsi
-    void get_DM_from_pexsi(double* DM, const Parallel_Orbitals* ParaV);
+    void dmToRho(TK* DM);
+#endif
 
   protected:
     // calculate electronic charge density on grid points or density matrix in real space
diff --git a/source/module_hamilt_lcao/module_gint/gint_rho.cpp b/source/module_hamilt_lcao/module_gint/gint_rho.cpp
index 6d791f985f..fe40162b0c 100644
--- a/source/module_hamilt_lcao/module_gint/gint_rho.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_rho.cpp
@@ -36,7 +36,7 @@ void Gint::gint_kernel_rho(
 		ModuleBase::GlobalFunc::ZEROS(psir_DM.ptr_1D, this->bxyz*LD_pool);
 		if(GlobalV::GAMMA_ONLY_LOCAL)
 		{
-			if (GlobalV::CALCULATION == "get_pchg" || GlobalV::KS_SOLVER == "pexsi")
+			if (GlobalV::CALCULATION == "get_pchg")
 			{
 				Gint_Tools::mult_psi_DM(
 					*this->gridt, this->bxyz, na_grid, LD_pool,
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index 802ec51e97..486b3a90b1 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -1,11 +1,6 @@
 #ifndef DIGAOPEXSI_H
 #define DIGAOPEXSI_H
 
-#ifdef  __PEXSI
-
-#define DIGAOPEXSI_H
-#endif
-
 #include "module_basis/module_ao/parallel_orbitals.h"
 #include "diagh.h"
 #include "pexsi/pexsi_solver.h"
@@ -25,7 +20,7 @@ namespace hsolver
       }
       void diag(hamilt::Hamilt<T>* phm_in, psi::Psi<T>& psi, Real* eigenvalue_in) override;
       const Parallel_Orbitals* ParaV;
-      double* DM;
+      T* DM;
       double* EDM;
       double totalEnergyH;
       double totalEnergyS;
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index 6ae226268e..259e043e4b 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -7,7 +7,11 @@
 #ifdef __ELPA
 #include "diago_elpa.h"
 #endif
+
+#ifdef __PEXSI
 #include "diago_pexsi.h"
+#endif
+
 #include "module_elecstate/elecstate_lcao.h"
 
 namespace hsolver
@@ -141,15 +145,20 @@ void HSolverLCAO<T>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
 
     // calculate charge by psi
     // called in scf calculation
+#ifdef __PEXSI
     if (this->method == "pexsi")
     {
         DiagoPexsi<T>* tem = dynamic_cast<DiagoPexsi<T>*>(this->pdiagh);
         if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!");
         elecstate::ElecStateLCAO<T>* _pes = dynamic_cast<elecstate::ElecStateLCAO<T>*>(pes);
         pes->f_en.eband = tem->totalFreeEnergy;
-        _pes->get_DM_from_pexsi(tem->DM, tem->ParaV);
+        _pes->dmToRho(tem->DM);
+    }
+    else
+#endif
+    {
+        pes->psiToRho(psi);
     }
-    pes->psiToRho(psi);
     ModuleBase::timer::tick("HSolverLCAO", "solve");
 }
 template <typename T>

From f52bd9909789c71b8ca3a49bfaf5e4ca530067cc Mon Sep 17 00:00:00 2001
From: Hongxu Ren <60290838+Flying-dragon-boxing@users.noreply.github.com>
Date: Mon, 15 Jan 2024 17:13:55 +0800
Subject: [PATCH 08/44] Turn off USE_PEXSI

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d6d270a9bb..f00594b3b2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -40,7 +40,7 @@ option(DEBUG_INFO "Print message for developers to debug." OFF)
 option(ENABLE_NATIVE_OPTIMIZATION "Enable compilation optimization for the native machine's CPU type" OFF)
 option(COMMIT_INFO "Print commit information in log" ON)
 option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON)
-option(USE_PEXSI "Enable support for PEXSI." ON)
+option(USE_PEXSI "Enable support for PEXSI." OFF)
 
 # get commit info
 if(COMMIT_INFO)
@@ -651,4 +651,4 @@ install(PROGRAMS ${ABACUS_BIN_PATH}
 
 if(ENABLE_COVERAGE)
   coverage_evaluate()
-endif()
\ No newline at end of file
+endif()

From 682d19baab43329caee5d0070691a3fc1c48d572 Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Tue, 16 Jan 2024 21:14:37 +0800
Subject: [PATCH 09/44] Update LibRI to 553c91c

---
 deps/LibRI | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deps/LibRI b/deps/LibRI
index b321b71a86..553c91c0be 160000
--- a/deps/LibRI
+++ b/deps/LibRI
@@ -1 +1 @@
-Subproject commit b321b71a8677a88a42bbb78e6d31c10073454e14
+Subproject commit 553c91c0be1d60a86e7666f0502ef866c366c600

From 90f600e57a744506118eecf65614a648b2c96540 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Wed, 17 Jan 2024 16:21:44 +0800
Subject: [PATCH 10/44] modify include files

---
 CMakeLists.txt                         | 6 +++---
 source/module_hsolver/hsolver_lcao.cpp | 4 ++++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1e3f6b4ac8..8440662355 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,9 +11,9 @@ project(ABACUS
 )
 
 # private options, should not be pushed to master
-# set(PEXSI_DIR "~/Documents/projects/abacus-dependencies/pexsi")
-# set(SuperLU_DIR "~/Documents/projects/abacus-dependencies/superlu")
-# set(ParMETIS_DIR "~/Documents/projects/abacus-dependencies/parmetis")
+# set(PEXSI_DIR "~/projects/pexsi-build/pexsi")
+# set(SuperLU_DIR "~/projects/pexsi-build/superlu")
+# set(ParMETIS_DIR "~/projects/pexsi-build/parmetis")
 # set(ELPA_INCLUDE_DIR "/usr/include/elpa_openmp-2023.05.001")
 
 option(ENABLE_LCAO "Enable LCAO calculation." ON)
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index 085d6ac389..7adea8b91d 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -16,6 +16,10 @@
 #ifdef __CUSOLVER_LCAO
 #include "diago_cusolver.h"
 #endif
+#ifdef __PEXSI
+#include "diago_pexsi.h"
+#include "module_elecstate/elecstate_lcao.h"
+#endif
 
 namespace hsolver
 {

From 3d8c3668bf5b5e0f28e14075fbcbf5f8901feb91 Mon Sep 17 00:00:00 2001
From: FlyingDragonBoxing <renhongxu0820@hotmail.com>
Date: Mon, 22 Jan 2024 14:12:47 +0800
Subject: [PATCH 11/44] namespace-ize

---
 source/module_hsolver/CMakeLists.txt          |   7 +-
 source/module_hsolver/diago_pexsi.cpp         |   2 +-
 source/module_hsolver/diago_pexsi.h           |  39 +++--
 .../{pexsi => module_pexsi}/CMakeLists.txt    |   0
 .../module_pexsi/dist_bcd_matrix.cpp          | 113 ++++++++++++
 .../module_pexsi/dist_bcd_matrix.h            |  73 ++++++++
 .../module_pexsi/dist_ccs_matrix.cpp          | 117 +++++++++++++
 .../module_pexsi/dist_ccs_matrix.h            |  55 ++++++
 .../dist_matrix_transformer.cpp}              |  10 +-
 .../module_pexsi/dist_matrix_transformer.h    |  36 ++++
 .../{pexsi => module_pexsi}/pexsi_solver.cpp  |  21 ++-
 .../{pexsi => module_pexsi}/pexsi_solver.h    |   9 +-
 .../simple_pexsi.cpp}                         |   9 +-
 .../module_pexsi/simple_pexsi.h               |  27 +++
 source/module_hsolver/pexsi/DistBCDMatrix.cpp | 164 ------------------
 source/module_hsolver/pexsi/DistBCDMatrix.h   |  69 --------
 source/module_hsolver/pexsi/DistCCSMatrix.cpp | 113 ------------
 source/module_hsolver/pexsi/DistCCSMatrix.h   |  51 ------
 .../pexsi/DistMatrixTransformer.h             |  26 ---
 source/module_hsolver/pexsi/simplePEXSI.h     |  13 --
 20 files changed, 483 insertions(+), 471 deletions(-)
 rename source/module_hsolver/{pexsi => module_pexsi}/CMakeLists.txt (100%)
 create mode 100644 source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
 create mode 100644 source/module_hsolver/module_pexsi/dist_bcd_matrix.h
 create mode 100644 source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
 create mode 100644 source/module_hsolver/module_pexsi/dist_ccs_matrix.h
 rename source/module_hsolver/{pexsi/DistMatrixTransformer.cpp => module_pexsi/dist_matrix_transformer.cpp} (99%)
 create mode 100644 source/module_hsolver/module_pexsi/dist_matrix_transformer.h
 rename source/module_hsolver/{pexsi => module_pexsi}/pexsi_solver.cpp (87%)
 rename source/module_hsolver/{pexsi => module_pexsi}/pexsi_solver.h (81%)
 rename source/module_hsolver/{pexsi/simplePEXSI.cpp => module_pexsi/simple_pexsi.cpp} (99%)
 create mode 100644 source/module_hsolver/module_pexsi/simple_pexsi.h
 delete mode 100644 source/module_hsolver/pexsi/DistBCDMatrix.cpp
 delete mode 100644 source/module_hsolver/pexsi/DistBCDMatrix.h
 delete mode 100644 source/module_hsolver/pexsi/DistCCSMatrix.cpp
 delete mode 100644 source/module_hsolver/pexsi/DistCCSMatrix.h
 delete mode 100644 source/module_hsolver/pexsi/DistMatrixTransformer.h
 delete mode 100644 source/module_hsolver/pexsi/simplePEXSI.h

diff --git a/source/module_hsolver/CMakeLists.txt b/source/module_hsolver/CMakeLists.txt
index 498380f728..9a023fb5d0 100644
--- a/source/module_hsolver/CMakeLists.txt
+++ b/source/module_hsolver/CMakeLists.txt
@@ -42,12 +42,7 @@ if(ENABLE_LCAO)
   list(APPEND objects
       diago_pexsi.cpp
     )
-    # add_library(
-    #     pexsi
-    #     OBJECT
-    #     ${objects}
-    # )
-    add_subdirectory(pexsi)
+    add_subdirectory(module_pexsi)
   endif()
 
 endif()
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index 21750c13e8..fbaf7b1806 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -24,7 +24,7 @@ void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>&
     phm_in->matrix(h_mat, s_mat);
     std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
     MPI_Comm COMM_DIAG = MPI_COMM_WORLD;
-    this->ps = new PEXSI_Solver(this->ParaV->blacs_ctxt,
+    this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt,
                                 this->ParaV->nb,
                                 this->ParaV->nrow,
                                 this->ParaV->ncol,
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index 486b3a90b1..018397a33d 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -1,32 +1,33 @@
 #ifndef DIGAOPEXSI_H
 #define DIGAOPEXSI_H
 
-#include "module_basis/module_ao/parallel_orbitals.h"
 #include "diagh.h"
+#include "module_basis/module_ao/parallel_orbitals.h"
 #include "pexsi/pexsi_solver.h"
 
 namespace hsolver
 {
 
-  template <typename T>
-  class DiagoPexsi : public DiagH<T>
-  {
+template <typename T>
+class DiagoPexsi : public DiagH<T>
+{
   private:
-      using Real = typename GetTypeReal<T>::type;
+    using Real = typename GetTypeReal<T>::type;
+
   public:
-      DiagoPexsi(const Parallel_Orbitals* ParaV_in)
-      {
-          this->ParaV = ParaV_in;
-      }
-      void diag(hamilt::Hamilt<T>* phm_in, psi::Psi<T>& psi, Real* eigenvalue_in) override;
-      const Parallel_Orbitals* ParaV;
-      T* DM;
-      double* EDM;
-      double totalEnergyH;
-      double totalEnergyS;
-      double totalFreeEnergy;
-      PEXSI_Solver* ps;
-  };
-}
+    DiagoPexsi(const Parallel_Orbitals* ParaV_in)
+    {
+        this->ParaV = ParaV_in;
+    }
+    void diag(hamilt::Hamilt<T>* phm_in, psi::Psi<T>& psi, Real* eigenvalue_in) override;
+    const Parallel_Orbitals* ParaV;
+    T* DM;
+    double* EDM;
+    double totalEnergyH;
+    double totalEnergyS;
+    double totalFreeEnergy;
+    pexsi::PEXSI_Solver* ps;
+};
+} // namespace hsolver
 
 #endif
diff --git a/source/module_hsolver/pexsi/CMakeLists.txt b/source/module_hsolver/module_pexsi/CMakeLists.txt
similarity index 100%
rename from source/module_hsolver/pexsi/CMakeLists.txt
rename to source/module_hsolver/module_pexsi/CMakeLists.txt
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
new file mode 100644
index 0000000000..cf815bd4ae
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
@@ -0,0 +1,113 @@
+#include "dist_bcd_matrix.h"
+
+#include <mpi.h>
+extern "C"
+{
+    void Cblacs_gridinfo(int icontxt, int* nprow, int* npcol, int* myprow, int* mypcol);
+    int Cblacs_pnum(int blacs_ctxt, int prow, int pcol);
+};
+
+namespace pexsi
+{
+DistBCDMatrix::DistBCDMatrix(MPI_Comm comm,
+                             MPI_Group group,
+                             int blacs_ctxt,
+                             int size,
+                             int nblk,
+                             int nrow,
+                             int ncol,
+                             char LAYOUT)
+{
+    this->comm = comm;
+    this->group = group;
+    this->blacs_ctxt = blacs_ctxt;
+    this->size = size;
+    this->nblk = nblk;
+    this->nrow = nrow;
+    this->ncol = ncol;
+    if (LAYOUT == 'R' || LAYOUT == 'r' || LAYOUT == 'C' || LAYOUT == 'c')
+    {
+        this->LAYOUT = LAYOUT;
+    }
+    else
+    {
+        throw("The LAYOUT must be 'R', 'r', 'C', or 'c'");
+    }
+
+    if (comm != MPI_COMM_NULL)
+    {
+        MPI_Comm_rank(comm, &this->myproc);
+        Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol);
+    }
+    else
+    {
+        this->myproc = -1;
+        this->myprow = -1;
+        this->mypcol = -1;
+    }
+
+    // synchronize matrix parameters to all processes, including those are not in bcd group
+    int myid_in_comm_world;
+    MPI_Comm_rank(MPI_COMM_WORLD, &myid_in_comm_world);
+    if (myid_in_comm_world == 0)
+    {
+        MPI_Comm_size(comm, &this->nprocs);
+        int PARA_BCAST[4] = {this->nblk, this->nprocs, this->nprows, this->npcols};
+        MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD);
+    }
+    else
+    {
+        int PARA_BCAST[4];
+        MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD);
+        this->nblk = PARA_BCAST[0];
+        this->nprocs = PARA_BCAST[1];
+        this->nprows = PARA_BCAST[2];
+        this->npcols = PARA_BCAST[3];
+    }
+    this->prowpcol2pnum = new int[this->nprocs];
+    if (myid_in_comm_world == 0)
+    {
+        for (int i = 0; i < this->nprows; ++i)
+        {
+            for (int j = 0; j < this->npcols; ++j)
+            {
+                this->prowpcol2pnum[i * this->npcols + j] = Cblacs_pnum(this->blacs_ctxt, i, j);
+            }
+        }
+    }
+    MPI_Bcast(this->prowpcol2pnum, this->nprocs, MPI_INT, 0, MPI_COMM_WORLD);
+}
+
+DistBCDMatrix::~DistBCDMatrix()
+{
+    delete[] prowpcol2pnum;
+}
+
+int DistBCDMatrix::globalRow(const int localRow)
+{
+    return (localRow / nblk * nprows + myprow) * nblk + localRow % nblk;
+}
+
+int DistBCDMatrix::globalCol(const int localCol)
+{
+
+    return (localCol / nblk * npcols + mypcol) * nblk + localCol % nblk;
+}
+
+int DistBCDMatrix::localRow(const int globalRow, int& myprow)
+{
+    myprow = int((globalRow % (nblk * nprows)) / nblk);
+    return int(globalRow / (nblk * nprows)) * nblk + globalRow % nblk;
+}
+
+int DistBCDMatrix::localCol(const int globalCol, int& mypcol)
+{
+    mypcol = int((globalCol % (nblk * npcols)) / nblk);
+    return int(globalCol / (nblk * npcols)) * nblk + globalCol % nblk;
+}
+
+int DistBCDMatrix::pnum(const int prow, const int pcol)
+{
+    return this->prowpcol2pnum[prow * this->npcols + pcol];
+}
+} // namespace pexsi
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
new file mode 100644
index 0000000000..7dbddbad7c
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
@@ -0,0 +1,73 @@
+#ifndef DISTBCDMATRIX_H
+#define DISTBCDMATRIX_H
+
+#include <mpi.h>
+// a Block Cyclic Data Distribution matrix
+// http://www.netlib.org/utk/papers/factor/node3.html
+// local matrix elements is stored in column major
+// used for pexsi
+namespace pexsi
+{
+class DistBCDMatrix
+{
+
+  public:
+    // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol);
+    // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char
+    // LAYOUT);
+
+    // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol);
+    DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT);
+    ~DistBCDMatrix();
+
+    int globalRow(const int localRow);
+    int globalCol(const int localCol);
+    int localRow(const int globalRow, int& myprow);
+    int localCol(const int globalCol, int& mypcol);
+    int pnum(const int prow, const int pcol);
+    //~DistBCDMatrix();
+
+  private:
+    // MPI communicator
+    MPI_Comm comm;
+    MPI_Group group;
+
+    // blacs context
+    int blacs_ctxt;
+
+    // row and column of process grid
+    int nprows;
+    int npcols;
+
+    // total number of processes
+    int nprocs;
+
+    // Matrix size
+    int size;
+
+    // block size
+    int nblk;
+
+    // row and c0lumn of Local matrix part
+    int nrow;
+    int ncol;
+
+    // protected:
+
+    // private:
+
+    // current process row and column
+    int myprow;
+    int mypcol;
+
+    // current process id
+    int myproc;
+
+    int* prowpcol2pnum;
+    // the local data layout
+    // 'R' or 'r' for row-major, which is used in C/C++
+    // 'C' or 'c' for column-major, which is used in Fortran
+    char LAYOUT;
+};
+} // namespace pexsi
+#endif // DISTBCDMATRIX_H
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
new file mode 100644
index 0000000000..365622d249
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
@@ -0,0 +1,117 @@
+#include "dist_ccs_matrix.h"
+
+#include <mpi.h>
+
+namespace pexsi
+{
+DistCCSMatrix::DistCCSMatrix(void)
+{
+    this->comm = MPI_COMM_WORLD;
+    this->size = 0;
+    this->nnz = 0;
+    this->nnzLocal = 0;
+    this->numColLocal = 0;
+    this->colptrLocal = NULL;
+    this->rowindLocal = NULL;
+}
+
+DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in)
+{
+    this->comm = comm_in;
+    this->size = 0;
+    this->nnz = 0;
+    this->nnzLocal = 0;
+    this->numColLocal = 0;
+    this->colptrLocal = NULL;
+    this->rowindLocal = NULL;
+}
+
+DistCCSMatrix::DistCCSMatrix(int size_in, int nnzLocal_in)
+{
+    this->comm = MPI_COMM_WORLD;
+    this->size = size_in;
+    this->nnzLocal = nnzLocal_in;
+    MPI_Request req;
+    MPI_Iallreduce(&nnzLocal, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm, &req);
+    this->numColLocal = 0;
+    this->colptrLocal = new int[size];
+    this->rowindLocal = new int[nnzLocal];
+
+    MPI_Status req_status;
+    MPI_Wait(&req, &req_status);
+}
+
+DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in, int nproc_data_in, int size_in)
+{
+    this->comm = comm_in;
+    this->nproc_data = nproc_data_in;
+    int nproc_data_range[3] = {0, this->nproc_data - 1, 1};
+    // create processes group with data: this->group_data and associated communicator
+    MPI_Comm_group(this->comm, &this->group);
+    MPI_Group_range_incl(this->group, 1, &nproc_data_range, &this->group_data);
+    this->comm_data = MPI_COMM_NULL;
+    MPI_Comm_create(this->comm, this->group_data, &this->comm_data);
+    this->size = size_in;
+    this->nnz = 0;
+    this->nnzLocal = 0;
+    int myproc;
+    if (comm != MPI_COMM_NULL)
+    {
+        MPI_Comm_size(comm, &nprocs);
+        MPI_Comm_rank(comm, &myproc);
+        if (myproc < nproc_data - 1)
+        {
+            this->numColLocal = size / nproc_data;
+            this->firstCol = size / nproc_data * myproc;
+            this->colptrLocal = new int[this->numColLocal + 1];
+            this->rowindLocal = NULL;
+        }
+        else if (myproc == nproc_data - 1)
+        {
+            this->numColLocal = size - myproc * (size / nproc_data);
+            this->firstCol = size / nproc_data * myproc;
+            this->colptrLocal = new int[this->numColLocal + 1];
+            this->rowindLocal = NULL;
+        }
+        else
+        {
+            this->numColLocal = 0;
+            this->firstCol = size - 1;
+            this->colptrLocal = new int[this->numColLocal + 1];
+            this->rowindLocal = NULL;
+        }
+    }
+}
+
+int DistCCSMatrix::globalCol(int localCol)
+{
+    return this->firstCol + localCol;
+}
+
+// NOTE: the process id is 0-based
+int DistCCSMatrix::localCol(int globalCol, int& mypcol)
+{
+    mypcol = int(globalCol / int(this->size / this->nproc_data));
+    if (mypcol >= this->nproc_data)
+        mypcol = this->nproc_data - 1;
+
+    return mypcol > 0 ? globalCol - (this->size / this->nproc_data) * mypcol : globalCol;
+}
+
+void DistCCSMatrix::setnnz(int nnzLocal_in)
+{
+    if (this->comm_data != MPI_COMM_NULL)
+    {
+        MPI_Allreduce(&nnzLocal_in, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm_data);
+        this->nnzLocal = nnzLocal_in;
+        this->rowindLocal = new int[nnzLocal];
+        this->colptrLocal[this->numColLocal] = nnzLocal_in + 1;
+    }
+}
+
+DistCCSMatrix::~DistCCSMatrix()
+{
+    delete[] colptrLocal;
+    delete[] rowindLocal;
+}
+} // namespace pexsi
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
new file mode 100644
index 0000000000..aa5e67b6ab
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
@@ -0,0 +1,55 @@
+#ifndef DISTCCSMATRIX_H
+#define DISTCCSMATRIX_H
+
+#include <mpi.h>
+// Distributed Compressed Column Storage Matrix format
+// used for PEXSI
+namespace pexsi
+{
+class DistCCSMatrix
+{
+
+  public:
+    DistCCSMatrix();
+    DistCCSMatrix(MPI_Comm comm);
+    DistCCSMatrix(int size, int nnzLocal);
+    DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal);
+    DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal, double* valLocal, int* index);
+
+    int globalCol(int localCol);
+    int localCol(int globalCol, int& mypcol);
+    void setnnz(int nnzLocal);
+    ~DistCCSMatrix();
+
+  private:
+    // MPI communicator
+    MPI_Comm comm;
+    MPI_Group group;
+
+    // total number of processes and the processes with data in
+    int nprocs;
+    int nproc_data;
+    MPI_Group group_data;
+    MPI_Comm comm_data;
+
+    // Matrix size
+    int size;
+
+    // Number of non-zero values in the matrix
+    int nnz;
+
+    // Number of non-zero values in the matrix of the local process
+    int nnzLocal;
+
+    // number of columns in current process
+    int numColLocal;
+
+    // the first column index in current process
+    int firstCol;
+
+    // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal
+    int* colptrLocal;
+    int* rowindLocal;
+};
+} // namespace pexsi
+#endif // DISTCCSMATRIX_H
diff --git a/source/module_hsolver/pexsi/DistMatrixTransformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
similarity index 99%
rename from source/module_hsolver/pexsi/DistMatrixTransformer.cpp
rename to source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
index 1eec8dca12..01b96f42cc 100644
--- a/source/module_hsolver/pexsi/DistMatrixTransformer.cpp
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
@@ -7,8 +7,8 @@
 #include <map>
 #include <vector>
 
-#include "DistBCDMatrix.h"
-#include "DistCCSMatrix.h"
+#include "dist_bcd_matrix.h"
+#include "dist_ccs_matrix.h"
 
 // for debug
 #ifdef _DEBUG
@@ -21,6 +21,8 @@
 #endif
 // end debug
 
+namespace pexsi
+{
 // find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a
 // negtive value the size_process and displacement_process array will be changed after the index is found isFirst:
 // wether this function is called for the first time for a index array; nprocs: total number of processes size_process:
@@ -232,7 +234,7 @@ inline int getNonZeroIndex(char LAYOUT,
                 idx = i * nrow + j;
                 if (fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit)
                 {
-                     ++nnz;
+                    ++nnz;
                     colidx.push_back(i);
                     rowidx.push_back(j);
                 }
@@ -1592,3 +1594,5 @@ MPI_Barrier(COMM_TRANS);
 #endif
     return 0;
 }
+
+} // namespace pexsi
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
new file mode 100644
index 0000000000..1d28866c96
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
@@ -0,0 +1,36 @@
+#ifndef DISTMATRIXTRANSFORMER_H
+#define DISTMATRIXTRANSFORMER_H
+
+#include "dist_bcd_matrix.h"
+#include "dist_ccs_matrix.h"
+// transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
+// they should have same MPI communicator
+// The local matrix of BCD is column-major order
+// int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, const double ZERO_Limit,
+//                    DistCCSMatrix &DST_Matrix, double*& H_ccs);
+
+// transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
+// two destination matrices share the same non-zero elements positions
+// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero,
+// even if one of them is acturely zero All matrices must have same MPI communicator
+namespace pexsi
+{
+int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
+                      double* H_2d,
+                      double* S_2d,
+                      const double ZERO_Limit,
+                      DistCCSMatrix& DST_Matrix,
+                      double*& H_ccs,
+                      double*& S_ccs);
+
+// int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal,
+// DistBCDMatrix& DST_Matrix, double* DM_2d);
+
+int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
+                      double* DMnzvalLocal,
+                      double* ENDnzvalLocal,
+                      DistBCDMatrix& DST_Matrix,
+                      double* DM_2d,
+                      double* END_2d);
+} // namespace pexsi
+#endif // DISTMATRIXTRANSFORMER_H
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
similarity index 87%
rename from source/module_hsolver/pexsi/pexsi_solver.cpp
rename to source/module_hsolver/module_pexsi/pexsi_solver.cpp
index 523e6bb2d5..90d16ae993 100644
--- a/source/module_hsolver/pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -5,8 +5,9 @@
 #include <cstring>
 
 #include "module_base/global_variable.h"
-#include "simplePEXSI.h"
 
+namespace pexsi
+{
 PEXSI_Solver::PEXSI_Solver(const int blacs_text,
                            const int nb,
                            const int nrow,
@@ -58,4 +59,20 @@ int PEXSI_Solver::solve()
                 this->totalEnergyS,
                 this->totalFreeEnergy);
     return 0;
-}
\ No newline at end of file
+}
+
+const double* PEXSI_Solver::get_DM() const
+{
+    return DM;
+}
+
+const double* PEXSI_Solver::get_EDM() const
+{
+    return EDM;
+}
+
+const double PEXSI_Solver::get_totalFreeEnergy() const
+{
+    return totalFreeEnergy;
+}
+} // namespace pexsi
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h
similarity index 81%
rename from source/module_hsolver/pexsi/pexsi_solver.h
rename to source/module_hsolver/module_pexsi/pexsi_solver.h
index 52f23b663b..0c3164e5f0 100644
--- a/source/module_hsolver/pexsi/pexsi_solver.h
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.h
@@ -1,6 +1,8 @@
 #ifndef PEXSI_Solver_H
 #define PEXSI_Solver_H
 
+namespace pexsi
+{
 class PEXSI_Solver
 {
   public:
@@ -16,6 +18,11 @@ class PEXSI_Solver
                  double& totalEnergyS,
                  double& totalFreeEnergy);
     int solve();
+    const double* get_DM() const;
+    const double* get_EDM() const;
+    const double get_totalFreeEnergy() const;
+
+  private:
     int blacs_text;
     int nb;
     int nrow;
@@ -28,5 +35,5 @@ class PEXSI_Solver
     double totalEnergyS;
     double totalFreeEnergy;
 };
-
+} // namespace pexsi
 #endif // PEXSI_Solver_H
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/simplePEXSI.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
similarity index 99%
rename from source/module_hsolver/pexsi/simplePEXSI.cpp
rename to source/module_hsolver/module_pexsi/simple_pexsi.cpp
index 0fbeb4e0a8..845beef18c 100644
--- a/source/module_hsolver/pexsi/simplePEXSI.cpp
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -11,14 +11,16 @@
 #include <iostream>
 #include <memory>
 
-#include "DistBCDMatrix.h"
-#include "DistCCSMatrix.h"
-#include "DistMatrixTransformer.h"
 #include "c_pexsi_interface.h"
+#include "dist_bcd_matrix.h"
+#include "dist_ccs_matrix.h"
+#include "dist_matrix_transformer.h"
 #include "module_base/lapack_connector.h"
 #include "module_base/timer.h"
 #include "module_base/tool_quit.h"
 
+namespace pexsi
+{
 inline void strtolower(char* sa, char* sb)
 {
     char c;
@@ -700,3 +702,4 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     // MPI_Barrier(MPI_COMM_WORLD);
     return 0;
 }
+} // namespace pexsi
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.h b/source/module_hsolver/module_pexsi/simple_pexsi.h
new file mode 100644
index 0000000000..6d569154e9
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.h
@@ -0,0 +1,27 @@
+#ifndef SIMPLE_PEXSI_H
+#define SIMPLE_PEXSI_H
+
+#include <mpi.h>
+// a simple interface for calling pexsi with 2D block cyclic distributed matrix
+namespace pexsi
+{
+int simplePEXSI(MPI_Comm comm_PEXSI,
+                MPI_Comm comm_2D,
+                MPI_Group group_2D,
+                const int blacs_ctxt, // communicator parameters
+                const int size,
+                const int nblk,
+                const int nrow,
+                const int ncol,
+                char LAYOUT, // input matrix parameters
+                double* H,
+                double* S, // input matrices
+                const double nElectronExact,
+                const std::string PexsiOptionFile, // pexsi parameters file
+                double*& DM,
+                double*& EDM, // output matrices
+                double& totalEnergyH,
+                double& totalEnergyS,
+                double& totalFreeEnergy);
+}
+#endif // SIMPLE_PEXSI_H
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.cpp b/source/module_hsolver/pexsi/DistBCDMatrix.cpp
deleted file mode 100644
index 383875dc87..0000000000
--- a/source/module_hsolver/pexsi/DistBCDMatrix.cpp
+++ /dev/null
@@ -1,164 +0,0 @@
-#include <mpi.h>
-#include "DistBCDMatrix.h"
-extern "C"
-{
-    void Cblacs_gridinfo(int icontxt, int* nprow, int *npcol, int *myprow, int *mypcol);
-    int Cblacs_pnum(int blacs_ctxt, int prow, int pcol);
-};
-
-/*
-DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol)
-{
-    this->comm=comm;
-    this->group=group;
-    MPI_Comm_rank(comm, &this->myproc);
-    this->nprows=nprow;
-    this->npcols=npcol;
-    this->size=size;
-    this->nblk=nblk;
-    this->nrow=nrow;
-    this->ncol=ncol;
-    this->LAYOUT='R';
-}
-
-DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char LAYOUT)
-{
-    this->comm=comm;
-    this->group=group;
-    MPI_Comm_rank(comm, &this->myproc);
-    this->nprows=nprow;
-    this->npcols=npcol;
-    this->size=size;
-    this->nblk=nblk;
-    this->nrow=nrow;
-    this->ncol=ncol;
-    if(LAYOUT == 'R' ||
-       LAYOUT == 'r' ||
-       LAYOUT == 'C' ||
-       LAYOUT == 'c')
-    {
-        this->LAYOUT=LAYOUT;
-    } else
-    {
-        throw("The LAYOUT must be 'R', 'r', 'C', or 'c'");
-    }
-}
-
-DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol)
-{
-    this->comm=comm;
-    this->group=group;
-    this->blacs_ctxt=blacs_ctxt;
-    this->size=size;
-    this->nblk=nblk;
-    this->nrow=nrow;
-    this->ncol=ncol;
-    this->LAYOUT='R';
-    Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol);
-    if(comm != MPI_COMM_NULL)
-    {
-        MPI_Comm_rank(comm, &this->myproc);
-        MPI_Comm_size(comm, &this->nprocs);
-    }else
-    {
-        this->myproc=-1;
-        this->nprocs=-1;
-    }
-}
-*/
-
-DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT)
-{
-    this->comm=comm;
-    this->group=group;
-    this->blacs_ctxt=blacs_ctxt;
-    this->size=size;
-    this->nblk=nblk;
-    this->nrow=nrow;
-    this->ncol=ncol;
-    if(LAYOUT == 'R' ||
-       LAYOUT == 'r' ||
-       LAYOUT == 'C' ||
-       LAYOUT == 'c')
-    {
-        this->LAYOUT=LAYOUT;
-    } else
-    {
-        throw("The LAYOUT must be 'R', 'r', 'C', or 'c'");
-    }
-
-    if(comm != MPI_COMM_NULL)
-    {
-        MPI_Comm_rank(comm, &this->myproc);
-        Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol);
-    }else
-    {
-        this->myproc=-1;
-        this->myprow=-1;
-        this->mypcol=-1;
-    }
-
-    // synchronize matrix parameters to all processes, including those are not in bcd group
-    int myid_in_comm_world;
-    MPI_Comm_rank(MPI_COMM_WORLD, &myid_in_comm_world);
-    if(myid_in_comm_world == 0)
-    {
-        MPI_Comm_size(comm, &this->nprocs);
-        int PARA_BCAST[4]={this->nblk, this->nprocs, this->nprows, this->npcols};
-        MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD);
-    }
-    else
-    {
-        int PARA_BCAST[4];
-        MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD);
-        this->nblk=PARA_BCAST[0];
-        this->nprocs=PARA_BCAST[1];
-        this->nprows=PARA_BCAST[2];
-        this->npcols=PARA_BCAST[3];
-    }
-    this->prowpcol2pnum=new int[this->nprocs];
-    if(myid_in_comm_world == 0)
-    {
-        for(int i=0; i<this->nprows; ++i)
-        {
-            for(int j=0; j<this->npcols; ++j)
-            {
-                this->prowpcol2pnum[i*this->npcols+j]=Cblacs_pnum(this->blacs_ctxt, i, j);
-            }
-        }
-    }
-    MPI_Bcast(this->prowpcol2pnum, this->nprocs, MPI_INT, 0, MPI_COMM_WORLD);
-}
-
-DistBCDMatrix::~DistBCDMatrix()
-{
-    delete[] prowpcol2pnum;
-}
-
-int DistBCDMatrix::globalRow(const int localRow)
-{
-    return (localRow/nblk*nprows+myprow)*nblk+localRow%nblk;
-}
-
-int DistBCDMatrix::globalCol(const int localCol)
-{
-    
-    return (localCol/nblk*npcols+mypcol)*nblk+localCol%nblk;
-}
-
-int DistBCDMatrix::localRow(const int globalRow, int& myprow)
-{
-    myprow=int((globalRow%(nblk*nprows))/nblk);
-    return int(globalRow/(nblk*nprows))*nblk+globalRow%nblk;
-}
-
-int DistBCDMatrix::localCol(const int globalCol, int& mypcol)
-{
-    mypcol=int((globalCol%(nblk*npcols))/nblk);
-    return int(globalCol/(nblk*npcols))*nblk+globalCol%nblk;
-}
-
-int DistBCDMatrix::pnum(const int prow, const int pcol)
-{
-    return this->prowpcol2pnum[prow*this->npcols+pcol];
-}
diff --git a/source/module_hsolver/pexsi/DistBCDMatrix.h b/source/module_hsolver/pexsi/DistBCDMatrix.h
deleted file mode 100644
index 97c5e8652f..0000000000
--- a/source/module_hsolver/pexsi/DistBCDMatrix.h
+++ /dev/null
@@ -1,69 +0,0 @@
-#ifndef DISTBCDMATRIX_H
-#define DISTBCDMATRIX_H
-
-#include <mpi.h>
-// a Block Cyclic Data Distribution matrix
-// http://www.netlib.org/utk/papers/factor/node3.html
-// local matrix elements is stored in column major
-// used for pexsi
-class DistBCDMatrix {
-
-        public:
-        // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol);
-        // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char LAYOUT);
-
-        // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol);
-        DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT);
-        ~DistBCDMatrix();
-
-        int globalRow(const int localRow);
-        int globalCol(const int localCol);
-        int localRow(const int globalRow, int& myprow);
-        int localCol(const int globalCol, int& mypcol);
-        int pnum(const int prow, const int pcol);
-        //~DistBCDMatrix();
-
-        // MPI communicator
-        MPI_Comm comm;
-        MPI_Group group;
-
-        // blacs context
-        int blacs_ctxt;
-
-        // row and column of process grid
-        int nprows;
-        int npcols;
-
-        // total number of processes
-        int nprocs;
-
-        // Matrix size
-        int size;
-
-        // block size
-        int nblk;
-
-        // row and c0lumn of Local matrix part
-        int nrow;
-        int ncol;
-
-
-        // protected:
-
-        // private:
-
-        // current process row and column
-        int myprow;
-        int mypcol;
-
-        // current process id
-        int myproc;
-
-        int *prowpcol2pnum;
-        // the local data layout
-        // 'R' or 'r' for row-major, which is used in C/C++
-        // 'C' or 'c' for column-major, which is used in Fortran
-        char LAYOUT;
-};
-
-#endif // DISTBCDMATRIX_H
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.cpp b/source/module_hsolver/pexsi/DistCCSMatrix.cpp
deleted file mode 100644
index 9e3fc728fa..0000000000
--- a/source/module_hsolver/pexsi/DistCCSMatrix.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-#include <mpi.h>
-#include "DistCCSMatrix.h"
-
-DistCCSMatrix::DistCCSMatrix(void)
-{
-    this->comm=MPI_COMM_WORLD;
-    this->size=0;
-    this->nnz=0;
-    this->nnzLocal=0;
-    this->numColLocal=0;
-    this->colptrLocal=NULL;
-    this->rowindLocal=NULL;
-}
-
-DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in)
-{
-    this->comm=comm_in;
-    this->size=0;
-    this->nnz=0;
-    this->nnzLocal=0;
-    this->numColLocal=0;
-    this->colptrLocal=NULL;
-    this->rowindLocal=NULL;
-}
-
-DistCCSMatrix::DistCCSMatrix(int size_in, int nnzLocal_in)
-{
-    this->comm=MPI_COMM_WORLD;
-    this->size=size_in;
-    this->nnzLocal=nnzLocal_in;
-    MPI_Request req;
-    MPI_Iallreduce(&nnzLocal, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm, &req);
-    this->numColLocal=0;
-    this->colptrLocal=new int[size];
-    this->rowindLocal=new int[nnzLocal];
-
-    MPI_Status req_status;
-    MPI_Wait(&req, &req_status);
-}
-
-DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in, int nproc_data_in, int size_in)
-{
-    this->comm=comm_in;
-    this->nproc_data=nproc_data_in;
-    int nproc_data_range[3]={0, this->nproc_data-1, 1};
-    // create processes group with data: this->group_data and associated communicator
-    MPI_Comm_group(this->comm, &this->group);
-    MPI_Group_range_incl(this->group, 1, &nproc_data_range, &this->group_data);
-    this->comm_data=MPI_COMM_NULL;
-    MPI_Comm_create(this->comm, this->group_data, &this->comm_data);
-    this->size=size_in;
-    this->nnz=0;
-    this->nnzLocal=0;
-    int myproc;
-    if(comm != MPI_COMM_NULL)
-    {
-        MPI_Comm_size(comm, &nprocs);
-        MPI_Comm_rank(comm, &myproc);
-        if(myproc<nproc_data-1)
-        {
-            this->numColLocal=size/nproc_data;
-            this->firstCol=size/nproc_data*myproc;
-            this->colptrLocal=new int[this->numColLocal+1];
-            this->rowindLocal=NULL;
-        }
-        else if(myproc==nproc_data-1)
-        {
-            this->numColLocal=size-myproc*(size/nproc_data);
-            this->firstCol=size/nproc_data*myproc;
-            this->colptrLocal=new int[this->numColLocal+1];
-            this->rowindLocal=NULL;
-        }
-        else
-        {
-            this->numColLocal=0;
-            this->firstCol=size-1;
-            this->colptrLocal=new int[this->numColLocal+1];
-            this->rowindLocal=NULL;
-        }
-    }
-}
-
-int DistCCSMatrix::globalCol(int localCol)
-{
-    return this->firstCol+localCol;
-}
-
-
-// NOTE: the process id is 0-based
-int DistCCSMatrix::localCol(int globalCol, int& mypcol)
-{
-    mypcol=int(globalCol/int(this->size/this->nproc_data));
-    if(mypcol >= this->nproc_data) mypcol=this->nproc_data-1;
-    
-    return mypcol>0 ? globalCol-(this->size/this->nproc_data)*mypcol : globalCol;
-}
-
-void DistCCSMatrix::setnnz(int nnzLocal_in)
-{
-    if(this->comm_data != MPI_COMM_NULL)
-    {
-        MPI_Allreduce(&nnzLocal_in, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm_data);
-        this->nnzLocal=nnzLocal_in;
-        this->rowindLocal=new int[nnzLocal];
-        this->colptrLocal[this->numColLocal]=nnzLocal_in+1;
-    }
-}
-
-DistCCSMatrix::~DistCCSMatrix()
-{
-    delete[] colptrLocal;
-    delete[] rowindLocal;
-}
diff --git a/source/module_hsolver/pexsi/DistCCSMatrix.h b/source/module_hsolver/pexsi/DistCCSMatrix.h
deleted file mode 100644
index 48ec95d0fc..0000000000
--- a/source/module_hsolver/pexsi/DistCCSMatrix.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef DISTCCSMATRIX_H
-#define DISTCCSMATRIX_H
-
-#include <mpi.h>
-// Distributed Compressed Column Storage Matrix format
-// used for PEXSI
-class DistCCSMatrix {
-
-        public:
-        DistCCSMatrix();
-        DistCCSMatrix(MPI_Comm comm);
-        DistCCSMatrix(int size, int nnzLocal);
-        DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal);
-        DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal, double* valLocal, int* index);
-
-        int globalCol(int localCol);
-        int localCol(int globalCol, int& mypcol);
-        void setnnz(int nnzLocal);
-        ~DistCCSMatrix();
-
-        // MPI communicator
-        MPI_Comm comm;
-        MPI_Group group;
-
-        // total number of processes and the processes with data in
-        int nprocs;
-        int nproc_data;
-        MPI_Group group_data;
-        MPI_Comm comm_data;
-
-        // Matrix size
-        int size;
-
-        // Number of non-zero values in the matrix
-        int nnz;
-
-        // Number of non-zero values in the matrix of the local process
-        int nnzLocal;
-
-        // number of columns in current process
-        int numColLocal;
-
-        // the first column index in current process
-        int firstCol;
-
-        // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal
-        int* colptrLocal;
-        int* rowindLocal;
-};
-
-#endif // DISTCCSMATRIX_H
diff --git a/source/module_hsolver/pexsi/DistMatrixTransformer.h b/source/module_hsolver/pexsi/DistMatrixTransformer.h
deleted file mode 100644
index cdc0a53f74..0000000000
--- a/source/module_hsolver/pexsi/DistMatrixTransformer.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef DISTMATRIXTRANSFORMER_H
-#define DISTMATRIXTRANSFORMER_H
-
-#include "DistBCDMatrix.h"
-#include "DistCCSMatrix.h"
-// transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
-// they should have same MPI communicator
-// The local matrix of BCD is column-major order
-// int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, const double ZERO_Limit, 
-//                    DistCCSMatrix &DST_Matrix, double*& H_ccs);
-
-// transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
-// two destination matrices share the same non-zero elements positions
-// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, even if
-// one of them is acturely zero
-// All matrices must have same MPI communicator
-int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, double* S_2d, const double ZERO_Limit,
-                    DistCCSMatrix &DST_Matrix,  double*& H_ccs, double*& S_ccs);
-
-// int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, 
-                    // DistBCDMatrix& DST_Matrix, double* DM_2d);
-
-int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal, double* ENDnzvalLocal,
-                    DistBCDMatrix& DST_Matrix, double* DM_2d, double* END_2d);
-
-#endif // DISTMATRIXTRANSFORMER_H
\ No newline at end of file
diff --git a/source/module_hsolver/pexsi/simplePEXSI.h b/source/module_hsolver/pexsi/simplePEXSI.h
deleted file mode 100644
index 6a23ba6600..0000000000
--- a/source/module_hsolver/pexsi/simplePEXSI.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef SIMPLE_PEXSI_H
-#define SIMPLE_PEXSI_H
-
-#include <mpi.h>
-// a simple interface for calling pexsi with 2D block cyclic distributed matrix
-int simplePEXSI(MPI_Comm comm_PEXSI, MPI_Comm comm_2D, MPI_Group group_2D, const int blacs_ctxt,  // communicator parameters
-                const int size, const int nblk, const int nrow, const int ncol, char LAYOUT, // input matrix parameters
-                double* H, double* S,                 // input matrices
-                const double nElectronExact, const std::string PexsiOptionFile,        // pexsi parameters file
-                double*& DM, double*& EDM,      // output matrices
-                double& totalEnergyH, double& totalEnergyS, double& totalFreeEnergy);
-
-#endif // SIMPLE_PEXSI_H
\ No newline at end of file

From 8075b14fd0e74e80ff98776416a0e922592f1441 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Wed, 24 Jan 2024 22:52:16 +0800
Subject: [PATCH 12/44] new inputs added

---
 CMakeLists.txt                                |   8 +-
 cmake/FindPEXSI.cmake                         |   8 +-
 source/module_base/global_variable.cpp        |  29 ++
 source/module_base/global_variable.h          |  27 +
 source/module_hsolver/diago_pexsi.cpp         |  44 +-
 source/module_hsolver/diago_pexsi.h           |   2 +-
 .../module_pexsi/CMakeLists.txt               |   2 +-
 .../module_pexsi/dist_bcd_matrix.h            |  23 +
 .../module_pexsi/dist_ccs_matrix.h            |  40 ++
 .../module_pexsi/dist_matrix_transformer.cpp  | 231 +++++----
 .../module_pexsi/dist_matrix_transformer.h    |  64 ++-
 .../module_pexsi/pexsi_solver.cpp             |  24 +-
 .../module_pexsi/pexsi_solver.h               |   6 +-
 .../module_pexsi/simple_pexsi.cpp             | 486 +++++++++---------
 source/module_io/input.cpp                    | 167 +++++-
 source/module_io/input.h                      |  28 +
 source/module_io/input_conv.cpp               |  29 ++
 source/module_io/write_input.cpp              |  27 +-
 18 files changed, 853 insertions(+), 392 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8440662355..619c7ac6fe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,12 +10,6 @@ project(ABACUS
     LANGUAGES CXX
 )
 
-# private options, should not be pushed to master
-# set(PEXSI_DIR "~/projects/pexsi-build/pexsi")
-# set(SuperLU_DIR "~/projects/pexsi-build/superlu")
-# set(ParMETIS_DIR "~/projects/pexsi-build/parmetis")
-# set(ELPA_INCLUDE_DIR "/usr/include/elpa_openmp-2023.05.001")
-
 option(ENABLE_LCAO "Enable LCAO calculation." ON)
 option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF)
 option(ENABLE_LIBXC "Enable LibXC functionality" OFF)
@@ -189,7 +183,7 @@ if(ENABLE_LCAO)
   
   if(USE_PEXSI)
     find_package(PEXSI REQUIRED)
-    target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi)
+    target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi)
     include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR})
     add_compile_definitions(__PEXSI)
   endif()
diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake
index 22fe4dd01c..062764acce 100644
--- a/cmake/FindPEXSI.cmake
+++ b/cmake/FindPEXSI.cmake
@@ -35,18 +35,18 @@ find_library(ParMETIS_LIBRARY
     PATH_SUFFIXES "lib"
 )
 
-find_library(SuperLU_LIBRARY
+find_library(SuperLU_DIST_LIBRARY
     NAMES libsuperlu_dist.a
-    HINTS ${SuperLU_DIR}
+    HINTS ${SuperLU_DIST_DIR}
     PATH_SUFFIXES "lib"
 )
 
 # Handle the QUIET and REQUIRED arguments and
 # set Cereal_FOUND to TRUE if all variables are non-zero.
 include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_LIBRARY)
+find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_DIST_LIBRARY)
 
 
 # Copy the results to the output variables and target.
-mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_LIBRARY)
+mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_DIST_LIBRARY)
 
diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp
index 696bcd6088..2af22b8856 100644
--- a/source/module_base/global_variable.cpp
+++ b/source/module_base/global_variable.cpp
@@ -300,4 +300,33 @@ std::string qo_basis = "hydrogen";
 std::vector<std::string> qo_strategy = {};
 double qo_thr = 1.0e-6;
 std::vector<double> qo_screening_coeff = {};
+
+//==========================================================
+// PEXSI related
+//==========================================================
+int pexsi_npole = 54;
+int pexsi_inertia = 1;
+int pexsi_nmax = 80;
+// int pexsi_symbolic = 1;
+int pexsi_comm = 1;
+int pexsi_storage = 1;
+int pexsi_ordering = 0;
+int pexsi_row_ordering = 1;
+int pexsi_nproc = 1;
+int pexsi_symm = 1;
+int pexsi_trans = 0;
+int pexsi_method = 1;
+int pexsi_nproc_pole = 1;
+// double pexsi_spin = 2;
+double pexsi_temp = 0.0001;
+double pexsi_gap = 0;
+double pexsi_delta_e = 20.0;
+double pexsi_mu_lower = -10;
+double pexsi_mu_upper = 10;
+double pexsi_mu = 0.0;
+double pexsi_mu_thr = 0.05;
+double pexsi_mu_expand = 0.3;
+double pexsi_mu_guard = 0.2;
+double pexsi_elec_thr = 0.001;
+double pexsi_zero_thr = 1e-10;
 } // namespace GlobalV
diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h
index b1fbb1748d..843c954451 100644
--- a/source/module_base/global_variable.h
+++ b/source/module_base/global_variable.h
@@ -328,5 +328,32 @@ extern std::string qo_basis;
 extern std::vector<std::string> qo_strategy;
 extern double qo_thr;
 extern std::vector<double> qo_screening_coeff;
+
+// PEXSI related
+extern int pexsi_npole;
+extern int pexsi_inertia;
+extern int pexsi_nmax;
+// extern int pexsi_symbolic;
+extern int pexsi_comm;
+extern int pexsi_storage;
+extern int pexsi_ordering;
+extern int pexsi_row_ordering;
+extern int pexsi_nproc;
+extern int pexsi_symm;
+extern int pexsi_trans;
+extern int pexsi_method;
+extern int pexsi_nproc_pole;
+// extern double pexsi_spin;
+extern double pexsi_temp;
+extern double pexsi_gap;
+extern double pexsi_delta_e;
+extern double pexsi_mu_lower;
+extern double pexsi_mu_upper;
+extern double pexsi_mu;
+extern double pexsi_mu_thr;
+extern double pexsi_mu_expand;
+extern double pexsi_mu_guard;
+extern double pexsi_elec_thr;
+extern double pexsi_zero_thr;
 } // namespace GlobalV
 #endif
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index fbaf7b1806..8e4ee5b15b 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -1,14 +1,13 @@
 #include <complex>
 #ifdef __PEXSI
-#include "diago_pexsi.h"
-
 #include "c_pexsi_interface.h"
+#include "diago_pexsi.h"
 #include "module_base/global_variable.h"
 #include "module_base/lapack_connector.h"
 #include "module_base/timer.h"
 #include "module_base/tool_quit.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
-#include "pexsi/pexsi_solver.h"
+#include "module_pexsi/pexsi_solver.h"
 
 typedef hamilt::MatrixBlock<double> matd;
 typedef hamilt::MatrixBlock<std::complex<double>> matcd;
@@ -16,7 +15,7 @@ typedef hamilt::MatrixBlock<std::complex<double>> matcd;
 namespace hsolver
 {
 
-template<>
+template <>
 void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in)
 {
     ModuleBase::TITLE("DiagoPEXSI", "diag");
@@ -25,30 +24,31 @@ void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>&
     std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
     MPI_Comm COMM_DIAG = MPI_COMM_WORLD;
     this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt,
-                                this->ParaV->nb,
-                                this->ParaV->nrow,
-                                this->ParaV->ncol,
-                                h_mat.p,
-                                s_mat.p,
-                                this->DM,
-                                this->EDM,
-                                this->totalEnergyH,
-                                this->totalEnergyS,
-                                this->totalFreeEnergy);
+                                       this->ParaV->nb,
+                                       this->ParaV->nrow,
+                                       this->ParaV->ncol,
+                                       h_mat.p,
+                                       s_mat.p,
+                                       this->DM,
+                                       this->EDM,
+                                       this->totalEnergyH,
+                                       this->totalEnergyS,
+                                       this->totalFreeEnergy);
     this->ps->solve();
-    this->EDM = this->ps->EDM;
-    this->DM = this->ps->DM; // loc.dm_gamma[ik] loc.dm_gamma[0]?
-    this->totalFreeEnergy = this->ps->totalFreeEnergy;
-    this->totalEnergyH = this->ps->totalEnergyH;
-    this->totalEnergyS = this->ps->totalEnergyS;
+    this->EDM = this->ps->get_EDM();
+    this->DM = this->ps->get_DM(); // loc.dm_gamma[ik] loc.dm_gamma[0]?
+    this->totalFreeEnergy = this->ps->get_totalFreeEnergy();
+    this->totalEnergyH = this->ps->get_totalEnergyH();
+    this->totalEnergyS = this->ps->get_totalEnergyS();
 }
 
-template<>
-void DiagoPexsi<std::complex<double>>::diag(hamilt::Hamilt<std::complex<double>>* phm_in, psi::Psi<std::complex<double>>& psi, double* eigenvalue_in)
+template <>
+void DiagoPexsi<std::complex<double>>::diag(hamilt::Hamilt<std::complex<double>>* phm_in,
+                                            psi::Psi<std::complex<double>>& psi,
+                                            double* eigenvalue_in)
 {
     ModuleBase::TITLE("DiagoPEXSI", "diag");
     ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case");
-    
 }
 
 } // namespace hsolver
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index 018397a33d..c212d7795a 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -3,7 +3,7 @@
 
 #include "diagh.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
-#include "pexsi/pexsi_solver.h"
+#include "module_pexsi/pexsi_solver.h"
 
 namespace hsolver
 {
diff --git a/source/module_hsolver/module_pexsi/CMakeLists.txt b/source/module_hsolver/module_pexsi/CMakeLists.txt
index 8faab8b4b4..87d16ff557 100644
--- a/source/module_hsolver/module_pexsi/CMakeLists.txt
+++ b/source/module_hsolver/module_pexsi/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_library(pexsi OBJECT DistBCDMatrix.cpp DistCCSMatrix.cpp DistMatrixTransformer.cpp pexsi_solver.cpp simplePEXSI.cpp)
+add_library(pexsi OBJECT dist_bcd_matrix.cpp dist_ccs_matrix.cpp dist_matrix_transformer.cpp pexsi_solver.cpp simple_pexsi.cpp)
 
 if(ENABLE_COVERAGE)
   add_coverage(pexsi)
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
index 7dbddbad7c..98b8512893 100644
--- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
@@ -2,6 +2,8 @@
 #define DISTBCDMATRIX_H
 
 #include <mpi.h>
+
+#include "module_hsolver/module_pexsi/dist_matrix_transformer.h"
 // a Block Cyclic Data Distribution matrix
 // http://www.netlib.org/utk/papers/factor/node3.html
 // local matrix elements is stored in column major
@@ -27,6 +29,27 @@ class DistBCDMatrix
     int pnum(const int prow, const int pcol);
     //~DistBCDMatrix();
 
+    const MPI_Comm get_comm() const
+    {
+        return comm;
+    };
+    const MPI_Group get_group() const
+    {
+        return group;
+    };
+    const int get_nrow() const
+    {
+        return nrow;
+    };
+    const int get_ncol() const
+    {
+        return ncol;
+    };
+    const char get_LAYOUT() const
+    {
+        return LAYOUT;
+    };
+
   private:
     // MPI communicator
     MPI_Comm comm;
diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
index aa5e67b6ab..a63a0dc16c 100644
--- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
+++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
@@ -19,6 +19,44 @@ class DistCCSMatrix
     int globalCol(int localCol);
     int localCol(int globalCol, int& mypcol);
     void setnnz(int nnzLocal);
+
+    const MPI_Comm get_comm() const
+    {
+        return comm;
+    };
+    const MPI_Group get_group() const
+    {
+        return group;
+    };
+    const MPI_Group get_group_data() const
+    {
+        return group_data;
+    };
+    const int get_size() const
+    {
+        return size;
+    };
+    const int get_nnz() const
+    {
+        return nnz;
+    };
+    const int get_nnzlocal() const
+    {
+        return nnzLocal;
+    };
+    const int get_numcol_local() const
+    {
+        return numColLocal;
+    };
+    int* get_colptr_local() const
+    {
+        return colptrLocal;
+    };
+    int* get_rowind_local() const
+    {
+        return rowindLocal;
+    };
+
     ~DistCCSMatrix();
 
   private:
@@ -50,6 +88,8 @@ class DistCCSMatrix
     // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal
     int* colptrLocal;
     int* rowindLocal;
+
+    // friend class DistMatrixTransformer;
 };
 } // namespace pexsi
 #endif // DISTCCSMATRIX_H
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
index 01b96f42cc..18fe445043 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
@@ -1,3 +1,5 @@
+#include "dist_matrix_transformer.h"
+
 #include <mpi.h>
 
 #include <climits>
@@ -28,11 +30,11 @@ namespace pexsi
 // wether this function is called for the first time for a index array; nprocs: total number of processes size_process:
 // the number of indices in each process displacement_process: the start position in each process index: the array
 // contains the indices
-inline int MinimumIndexPosition(const bool isFirst,
-                                const int nprocs,
-                                int* size_process,
-                                int* displacement_process,
-                                const int* index)
+inline int DistMatrixTransformer::MinimumIndexPosition(const bool isFirst,
+                                                       const int nprocs,
+                                                       int* size_process,
+                                                       int* displacement_process,
+                                                       const int* index)
 {
     // usually the minimum index is continuous, so it will be a good idea to
     // check the one next to the previous index first.
@@ -104,16 +106,16 @@ inline int MinimumIndexPosition(const bool isFirst,
     }
 }
 
-inline void buildCCSParameter(const int size,
-                              const int nprocs,
-                              std::vector<int> size_process,
-                              std::vector<int> displacement_process,
-                              const int* position_index,
-                              DistCCSMatrix& DST_Matrix,
-                              int* buffer2ccsIndex)
+inline void DistMatrixTransformer::buildCCSParameter(const int size,
+                                                     const int nprocs,
+                                                     std::vector<int> size_process,
+                                                     std::vector<int> displacement_process,
+                                                     const int* position_index,
+                                                     DistCCSMatrix& DST_Matrix,
+                                                     int* buffer2ccsIndex)
 {
     // find the minimum one from left buffer index
-    if (DST_Matrix.nnzLocal <= 0)
+    if (DST_Matrix.get_nnzlocal() <= 0)
         return;
 
     int pre_col = -1;
@@ -123,31 +125,34 @@ inline void buildCCSParameter(const int size,
     while (p_mini >= 0)
     {
         int index_mini = position_index[p_mini];
-        int col_mini = index_mini / DST_Matrix.size; //-DST_Matrix.firstCol;
-        int row_mini = index_mini % DST_Matrix.size;
+        int col_mini = index_mini / DST_Matrix.get_size(); //-DST_Matrix.firstCol;
+        int row_mini = index_mini % DST_Matrix.get_size();
         if (col_mini > pre_col) // a new column starts, column pointer is a 1-based array
         {
             pre_col = col_mini;
-            DST_Matrix.colptrLocal[col_mini] = nnz_now + 1;
+            DST_Matrix.get_colptr_local()[col_mini] = nnz_now + 1;
         }
-        DST_Matrix.rowindLocal[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based
+        DST_Matrix.get_rowind_local()[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based
         // copy data from buffer to M, be careful M is a 0-based array
         buffer2ccsIndex[nnz_now] = p_mini;
         ++nnz_now;
         p_mini = MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index);
     }
     // The last element of colptrLocal is nnzLocal+1
-    DST_Matrix.colptrLocal[DST_Matrix.numColLocal] = nnz_now + 1;
+    DST_Matrix.get_colptr_local()[DST_Matrix.get_numcol_local()] = nnz_now + 1;
 }
 
-inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal)
+inline void DistMatrixTransformer::buffer2CCSvalue(int nnzLocal,
+                                                   int* buffer2ccsIndex,
+                                                   double* buffer,
+                                                   double* nzvalLocal)
 {
     for (int i = 0; i < nnzLocal; ++i)
     {
         nzvalLocal[i] = buffer[buffer2ccsIndex[i]];
     }
 }
-inline void countMatrixDistribution(int N, double* A, std::map<int, int>& P)
+inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std::map<int, int>& P)
 {
     for (int i = 0; i < N; ++i)
     {
@@ -161,15 +166,15 @@ inline void countMatrixDistribution(int N, double* A, std::map<int, int>& P)
 }
 
 // find out the index of non-zero elements
-inline int getNonZeroIndex(char LAYOUT,
-                           const int nrow,
-                           const int ncol,
-                           double* H_2d,
-                           double* S_2d,
-                           const double ZERO_Limit,
-                           int& nnz,
-                           std::vector<int>& rowidx,
-                           std::vector<int>& colidx)
+inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT,
+                                                  const int nrow,
+                                                  const int ncol,
+                                                  double* H_2d,
+                                                  double* S_2d,
+                                                  const double ZERO_Limit,
+                                                  int& nnz,
+                                                  std::vector<int>& rowidx,
+                                                  std::vector<int>& colidx)
 {
 #ifdef _DEBUG
     char f_log[80];
@@ -275,21 +280,21 @@ inline int getNonZeroIndex(char LAYOUT,
     return 0;
 }
 
-int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
-                            DistCCSMatrix& DST_Matrix,
-                            const int NPROC_TRANS,
-                            MPI_Group& GROUP_TRANS,
-                            MPI_Comm& COMM_TRANS,
-                            const int nnz,
-                            std::vector<int>& rowidx,
-                            std::vector<int>& colidx,
-                            int& sender_size,
-                            std::vector<int>& sender_size_process,
-                            std::vector<int>& sender_displacement_process,
-                            int& receiver_size,
-                            std::vector<int>& receiver_size_process,
-                            std::vector<int>& receiver_displacement_process,
-                            std::vector<int>& buffer2ccsIndex)
+int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
+                                                   DistCCSMatrix& DST_Matrix,
+                                                   const int NPROC_TRANS,
+                                                   MPI_Group& GROUP_TRANS,
+                                                   MPI_Comm& COMM_TRANS,
+                                                   const int nnz,
+                                                   std::vector<int>& rowidx,
+                                                   std::vector<int>& colidx,
+                                                   int& sender_size,
+                                                   std::vector<int>& sender_size_process,
+                                                   std::vector<int>& sender_displacement_process,
+                                                   int& receiver_size,
+                                                   std::vector<int>& receiver_size_process,
+                                                   std::vector<int>& receiver_displacement_process,
+                                                   std::vector<int>& buffer2ccsIndex)
 {
     // debug
     int myproc;
@@ -322,12 +327,12 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
     std::vector<int> proc_map_data_trans;
     if (myproc == 0)
     {
-        MPI_Group_size(DST_Matrix.group_data, &nproc_data);
+        MPI_Group_size(DST_Matrix.get_group_data(), &nproc_data);
         MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS);
         proc_map_data_trans.resize(nproc_data, 0);
         for (int i = 0; i < nproc_data; ++i)
         {
-            MPI_Group_translate_ranks(DST_Matrix.group_data, 1, &i, GROUP_TRANS, &proc_map_data_trans[i]);
+            MPI_Group_translate_ranks(DST_Matrix.get_group_data(), 1, &i, GROUP_TRANS, &proc_map_data_trans[i]);
         }
         MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS);
     }
@@ -429,7 +434,7 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
         int dst_col = DST_Matrix.localCol(g_col, dst_process);
         int l_row = rowidx[i];
         int dst_row = SRC_Matrix.globalRow(l_row);
-        sender_index[i] = dst_col * DST_Matrix.size + dst_row;
+        sender_index[i] = dst_col * DST_Matrix.get_size() + dst_row;
     }
 // debug
 #ifdef _DEBUG
@@ -478,10 +483,10 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
     return 0;
 }
 
-int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
-                      DistCCSMatrix& DST_Matrix,
-                      MPI_Group& GROUP_TRANS,
-                      MPI_Comm& COMM_TRANS)
+int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
+                                             DistCCSMatrix& DST_Matrix,
+                                             MPI_Group& GROUP_TRANS,
+                                             MPI_Comm& COMM_TRANS)
 {
 // debug
 #ifdef _DEBUG
@@ -499,7 +504,7 @@ int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
 #endif
     // build transfortram communicator which contains both processes of BCD processors and
     // CCS processors with nonzero elements
-    MPI_Group_union(DST_Matrix.group_data, SRC_Matrix.group, &GROUP_TRANS);
+    MPI_Group_union(DST_Matrix.get_group_data(), SRC_Matrix.get_group(), &GROUP_TRANS);
     MPI_Comm_create(MPI_COMM_WORLD, GROUP_TRANS, &COMM_TRANS);
 // debug
 #ifdef _DEBUG
@@ -557,7 +562,7 @@ int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
     return 0;
 }
 
-int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS)
+int DistMatrixTransformer::deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS)
 {
     MPI_Group_free(&GROUP_TRANS);
     if (COMM_TRANS != MPI_COMM_NULL)
@@ -571,13 +576,13 @@ int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS)
 // two destination matrices share the same non-zero elements positions
 // if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero,
 // even if one of them is acturely zero All matrices must have same MPI communicator
-int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
-                      double* H_2d,
-                      double* S_2d,
-                      const double ZERO_Limit,
-                      DistCCSMatrix& DST_Matrix,
-                      double*& H_ccs,
-                      double*& S_ccs)
+int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
+                                             double* H_2d,
+                                             double* S_2d,
+                                             const double ZERO_Limit,
+                                             DistCCSMatrix& DST_Matrix,
+                                             double*& H_ccs,
+                                             double*& S_ccs)
 {
 // debug
 #ifdef _DEBUG
@@ -614,9 +619,9 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
         {
             log << "nprocs: " << SRC_Matrix.nprocs << " ; myprow: " << SRC_Matrix.myprow
                 << " ; mypcol: " << SRC_Matrix.mypcol << std::endl;
-            log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.nrow << " ; ncol: " << SRC_Matrix.ncol
+            log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.get_nrow() << " ; ncol: " << SRC_Matrix.get_ncol()
                 << std::endl;
-            log << "layout:" << SRC_Matrix.LAYOUT << std::endl;
+            log << "layout:" << SRC_Matrix.get_LAYOUT() << std::endl;
             log << "ZERO = " << ZERO_Limit << std::endl;
             log << "DST_Matrix parameters:" << std::endl;
             log << "size: " << DST_Matrix.size << " ;nproc_data: " << DST_Matrix.nproc_data << std::endl;
@@ -633,11 +638,11 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
         if (myproc < 100)
             log << "start counting nnz..." << std::endl;
 #endif
-        if (SRC_Matrix.comm != MPI_COMM_NULL)
+        if (SRC_Matrix.get_comm() != MPI_COMM_NULL)
         {
-            getNonZeroIndex(SRC_Matrix.LAYOUT,
-                            SRC_Matrix.nrow,
-                            SRC_Matrix.ncol,
+            getNonZeroIndex(SRC_Matrix.get_LAYOUT(),
+                            SRC_Matrix.get_nrow(),
+                            SRC_Matrix.get_ncol(),
                             H_2d,
                             S_2d,
                             ZERO_Limit,
@@ -654,11 +659,11 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
             if(SRC_Matrix.comm != MPI_COMM_NULL)
             {
                 log<<"NonZeroIndex :"<<std::endl;
-                if(SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
+                if(SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
                 {
                     for(int i=0; i<nnz; ++i)
                     {
-                        int HS_idx=rowidx[i]*SRC_Matrix.ncol+colidx[i];
+                        int HS_idx=rowidx[i]*SRC_Matrix.get_ncol()+colidx[i];
                         log<<rowidx[i]<<' '<<colidx[i]<<' '<<HS_idx;
                         log<<' '<<H_2d[HS_idx]<<' '<<S_2d[HS_idx]<<std::endl;
                     }
@@ -667,7 +672,7 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                 {
                     for(int i=0; i<nnz; ++i)
                     {
-                        int HS_idx=colidx[i]*SRC_Matrix.nrow+rowidx[i];
+                        int HS_idx=colidx[i]*SRC_Matrix.get_nrow()+rowidx[i];
                         log<<rowidx[i]<<' '<<colidx[i]<<' '<<HS_idx;
                         log<<' '<<H_2d[HS_idx]<<' '<<S_2d[HS_idx]<<std::endl;
                     }
@@ -707,18 +712,18 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
         std::vector<double> sender_buffer(sender_size);
         std::vector<double> receiver_buffer(receiver_size);
         // put H to sender buffer
-        if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
+        if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]];
+                sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]];
             }
         }
         else
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]];
+                sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]];
             }
         }
 #ifdef _DEBUG
@@ -749,18 +754,18 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
 #endif
 
         // put S to sender buffer
-        if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
+        if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]];
+                sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]];
             }
         }
         else
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]];
+                sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]];
             }
         }
 #ifdef _DEBUG
@@ -804,12 +809,12 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
 
 // transform two sparse matrices from Compressed Column Storage (CCS) to block cyclic distribution (BCD) distribution
 // two source matrices share the same non-zero elements positions
-int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
-                      double* DMnzvalLocal,
-                      double* EDMnzvalLocal,
-                      DistBCDMatrix& DST_Matrix,
-                      double* DM,
-                      double* EDM)
+int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
+                                             double* DMnzvalLocal,
+                                             double* EDMnzvalLocal,
+                                             DistBCDMatrix& DST_Matrix,
+                                             double* DM,
+                                             double* EDM)
 {
 // debug
 #ifdef _DEBUG
@@ -840,7 +845,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
     if (COMM_TRANS != MPI_COMM_NULL)
     {
         // init DM and EDM with 0
-        for (int i = 0; i < DST_Matrix.nrow * DST_Matrix.ncol; ++i)
+        for (int i = 0; i < DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); ++i)
         {
             DM[i] = 0;
             EDM[i] = 0;
@@ -877,12 +882,12 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
         MPI_Comm_rank(COMM_TRANS, &myproc_trans);
         if (myproc_trans == 0)
         {
-            MPI_Group_size(DST_Matrix.group, &nproc_bcd);
+            MPI_Group_size(DST_Matrix.get_group(), &nproc_bcd);
             MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS);
             proc_map_bcd_trans.resize(nproc_bcd, 0);
             for (int i = 0; i < nproc_bcd; ++i)
             {
-                MPI_Group_translate_ranks(DST_Matrix.group, 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]);
+                MPI_Group_translate_ranks(DST_Matrix.get_group(), 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]);
             }
             MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS);
         }
@@ -933,7 +938,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             log << "display all columns and rows of nonzeros values:\n";
         int log_nnz = 0;
 #endif
-        for (int icol = 0; icol < SRC_Matrix.numColLocal; ++icol)
+        for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol)
         {
             int g_col = SRC_Matrix.globalCol(icol);
             int recv_pcol_bcd;
@@ -942,9 +947,9 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             // log<<g_col<<"\n ";
             // #endif
             // OUT(ofs_running, "transformCCStoBCD: recv_pcol_bcd", recv_pcol_bcd);
-            for (int rowidx = SRC_Matrix.colptrLocal[icol] - 1; rowidx < SRC_Matrix.colptrLocal[icol + 1] - 1; ++rowidx)
+            for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; ++rowidx)
             {
-                int g_row = SRC_Matrix.rowindLocal[rowidx] - 1;
+                int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1;
                 int recv_prow_bcd;
                 int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd);
                 int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
@@ -1020,7 +1025,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
 #endif
 
         // setup up sender index and receiver index
-        int sender_size = SRC_Matrix.nnzLocal;
+        int sender_size = SRC_Matrix.get_nnzlocal();
         int* sender_index;
         double* sender_buffer;
         int* dst_index;
@@ -1119,14 +1124,14 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
         if (myproc < 100)
             log << "idx start at " << idx << std::endl;
 #endif
-        for (int icol = 0; icol < SRC_Matrix.numColLocal; ++icol)
+        for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol)
         {
             int g_col = SRC_Matrix.globalCol(icol);
             int recv_pcol_bcd;
             int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd);
-            for (int rowidx = SRC_Matrix.colptrLocal[icol] - 1; rowidx < SRC_Matrix.colptrLocal[icol + 1] - 1; ++rowidx)
+            for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; ++rowidx)
             {
-                int g_row = SRC_Matrix.rowindLocal[rowidx] - 1;
+                int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1;
                 int recv_prow_bcd;
                 int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd);
 #ifdef _DEBUG
@@ -1315,9 +1320,9 @@ for(int i=0; i<receiver_size; ++i)
         log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" < 0"<<std::endl;
         log.flush();
     }
-    else if(receiver_index[i*2]>DST_Matrix.nrow)
+    else if(receiver_index[i*2]>DST_Matrix.get_nrow())
     {
-        log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.nrow<<std::endl;
+        log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.get_nrow()<<std::endl;
         log.flush();
     }
     if(receiver_index[i*2+1]<0)
@@ -1325,9 +1330,9 @@ for(int i=0; i<receiver_size; ++i)
         log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" < 0"<<std::endl;
         log.flush();
     }
-    else if(receiver_index[i*2+1]>DST_Matrix.ncol)
+    else if(receiver_index[i*2+1]>DST_Matrix.get_ncol())
     {
-        log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.ncol<<std::endl;
+        log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.get_ncol()<<std::endl;
         log.flush();
     }
 }
@@ -1376,10 +1381,10 @@ MPI_Barrier(COMM_TRANS);
                         << std::endl;
                     log.flush();
                 }
-                else if (receiver_index[i * 2] > DST_Matrix.nrow)
+                else if (receiver_index[i * 2] > DST_Matrix.get_nrow())
                 {
                     log << "ERROR! receiver_index(BCD)[" << 2 * i << "] = " << receiver_index[i * 2] << " > "
-                        << DST_Matrix.nrow << std::endl;
+                        << DST_Matrix.get_nrow() << std::endl;
                     log.flush();
                 }
                 if (receiver_index[i * 2 + 1] < 0)
@@ -1388,10 +1393,10 @@ MPI_Barrier(COMM_TRANS);
                         << std::endl;
                     log.flush();
                 }
-                else if (receiver_index[i * 2 + 1] > DST_Matrix.ncol)
+                else if (receiver_index[i * 2 + 1] > DST_Matrix.get_ncol())
                 {
                     log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " > "
-                        << DST_Matrix.ncol << std::endl;
+                        << DST_Matrix.get_ncol() << std::endl;
                     log.flush();
                 }
             }
@@ -1428,14 +1433,14 @@ MPI_Barrier(COMM_TRANS);
 // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM");
 #endif
         // transform receiver_buffer to DM
-        if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
+        if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r')
         {
-            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = ix * DST_Matrix.ncol + iy;
+                int idx = ix * DST_Matrix.get_ncol() + iy;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1444,7 +1449,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is " << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " ncol = " << DST_Matrix.ncol << std::endl;
+                            << " ncol = " << DST_Matrix.get_ncol() << std::endl;
                         log.flush();
                     }
                 }
@@ -1454,12 +1459,12 @@ MPI_Barrier(COMM_TRANS);
         }
         else
         {
-            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = iy * DST_Matrix.nrow + ix;
+                int idx = iy * DST_Matrix.get_nrow() + ix;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1468,7 +1473,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " nrow = " << DST_Matrix.nrow << std::endl;
+                            << " nrow = " << DST_Matrix.get_nrow() << std::endl;
                         log.flush();
                     }
                 }
@@ -1512,14 +1517,14 @@ MPI_Barrier(COMM_TRANS);
 // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM");
 #endif
         // transform receiver_buffer to EDM
-        if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
+        if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r')
         {
-            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = ix * DST_Matrix.ncol + iy;
+                int idx = ix * DST_Matrix.get_ncol() + iy;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1528,7 +1533,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " ncol = " << DST_Matrix.ncol << std::endl;
+                            << " ncol = " << DST_Matrix.get_ncol() << std::endl;
                         log.flush();
                     }
                 }
@@ -1538,12 +1543,12 @@ MPI_Barrier(COMM_TRANS);
         }
         else
         {
-            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = iy * DST_Matrix.nrow + ix;
+                int idx = iy * DST_Matrix.get_nrow() + ix;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1552,7 +1557,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " nrow = " << DST_Matrix.nrow << std::endl;
+                            << " nrow = " << DST_Matrix.get_nrow() << std::endl;
                         log.flush();
                     }
                 }
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
index 1d28866c96..e3e27e995a 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
@@ -1,8 +1,9 @@
 #ifndef DISTMATRIXTRANSFORMER_H
 #define DISTMATRIXTRANSFORMER_H
 
-#include "dist_bcd_matrix.h"
-#include "dist_ccs_matrix.h"
+#include <mpi.h>
+
+#include <vector>
 // transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
 // they should have same MPI communicator
 // The local matrix of BCD is column-major order
@@ -15,6 +16,62 @@
 // even if one of them is acturely zero All matrices must have same MPI communicator
 namespace pexsi
 {
+class DistBCDMatrix;
+class DistCCSMatrix;
+
+namespace DistMatrixTransformer
+{
+int MinimumIndexPosition(const bool isFirst,
+                         const int nprocs,
+                         int* size_process,
+                         int* displacement_process,
+                         const int* index);
+
+void buildCCSParameter(const int size,
+                       const int nprocs,
+                       std::vector<int> size_process,
+                       std::vector<int> displacement_process,
+                       const int* position_index,
+                       DistCCSMatrix& DST_Matrix,
+                       int* buffer2ccsIndex);
+
+void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal);
+
+void countMatrixDistribution(int N, double* A, std::map<int, int>& P);
+
+int getNonZeroIndex(char LAYOUT,
+                    const int nrow,
+                    const int ncol,
+                    double* H_2d,
+                    double* S_2d,
+                    const double ZERO_Limit,
+                    int& nnz,
+                    std::vector<int>& rowidx,
+                    std::vector<int>& colidx);
+
+int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
+                            DistCCSMatrix& DST_Matrix,
+                            const int NPROC_TRANS,
+                            MPI_Group& GROUP_TRANS,
+                            MPI_Comm& COMM_TRANS,
+                            const int nnz,
+                            std::vector<int>& rowidx,
+                            std::vector<int>& colidx,
+                            int& sender_size,
+                            std::vector<int>& sender_size_process,
+                            std::vector<int>& sender_displacement_process,
+                            int& receiver_size,
+                            std::vector<int>& receiver_size_process,
+                            std::vector<int>& receiver_displacement_process,
+                            std::vector<int>& buffer2ccsIndex);
+
+int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
+                      DistCCSMatrix& DST_Matrix,
+                      MPI_Group& GROUP_TRANS,
+                      MPI_Comm& COMM_TRANS);
+
+int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS);
+
 int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                       double* H_2d,
                       double* S_2d,
@@ -31,6 +88,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
                       double* ENDnzvalLocal,
                       DistBCDMatrix& DST_Matrix,
                       double* DM_2d,
-                      double* END_2d);
+                      double* ED_2d);
+}; // namespace DistMatrixTransformer
 } // namespace pexsi
 #endif // DISTMATRIXTRANSFORMER_H
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
index 90d16ae993..2d6f2674d9 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -5,6 +5,11 @@
 #include <cstring>
 
 #include "module_base/global_variable.h"
+#include "simple_pexsi.h"
+
+extern MPI_Comm DIAG_WORLD;
+extern MPI_Comm GRID_WORLD;
+extern MPI_Group GRID_GROUP;
 
 namespace pexsi
 {
@@ -37,9 +42,7 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text,
 
 int PEXSI_Solver::solve()
 {
-    extern MPI_Comm DIAG_WORLD;
-    extern MPI_Comm GRID_WORLD;
-    extern MPI_Group GRID_GROUP;
+
     simplePEXSI(DIAG_WORLD,
                 GRID_WORLD,
                 GRID_GROUP,
@@ -61,12 +64,12 @@ int PEXSI_Solver::solve()
     return 0;
 }
 
-const double* PEXSI_Solver::get_DM() const
+double* PEXSI_Solver::get_DM() const
 {
     return DM;
 }
 
-const double* PEXSI_Solver::get_EDM() const
+double* PEXSI_Solver::get_EDM() const
 {
     return EDM;
 }
@@ -75,4 +78,15 @@ const double PEXSI_Solver::get_totalFreeEnergy() const
 {
     return totalFreeEnergy;
 }
+
+const double PEXSI_Solver::get_totalEnergyH() const
+{
+    return totalEnergyH;
+}
+
+const double PEXSI_Solver::get_totalEnergyS() const
+{
+    return totalEnergyS;
+}
+
 } // namespace pexsi
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h
index 0c3164e5f0..b3d7aed152 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.h
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.h
@@ -18,9 +18,11 @@ class PEXSI_Solver
                  double& totalEnergyS,
                  double& totalFreeEnergy);
     int solve();
-    const double* get_DM() const;
-    const double* get_EDM() const;
+    double* get_DM() const;
+    double* get_EDM() const;
     const double get_totalFreeEnergy() const;
+    const double get_totalEnergyH() const;
+    const double get_totalEnergyS() const;
 
   private:
     int blacs_text;
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
index 845beef18c..2d1705557c 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -18,6 +18,7 @@
 #include "module_base/lapack_connector.h"
 #include "module_base/timer.h"
 #include "module_base/tool_quit.h"
+#include "module_base/global_variable.h"
 
 namespace pexsi
 {
@@ -102,220 +103,252 @@ int loadPEXSIOption(MPI_Comm comm,
     // 10: numElectronPEXSITolerance
     // 11: ZERO_Limit
     double double_para[12];
-    int myid;
-    MPI_Comm_rank(comm, &myid);
-    if (myid == 0)
-    {
-        std::ifstream ifs(PexsiOptionFile.c_str());
-        if (!ifs)
-        {
-            return 1;
-        }
-        setDefaultOption(int_para, double_para);
 
-        ifs.clear();
-        ifs.seekg(0);
+    // read in PEXSI options from GlobalV
+    int_para[0] = GlobalV::pexsi_npole;
+    int_para[1] = GlobalV::pexsi_inertia;
+    int_para[2] = GlobalV::pexsi_nmax;
+    int_para[3] = 0;
+    int_para[4] = 1; // GlobalV::pexsi_symbolic;
+    int_para[5] = GlobalV::pexsi_comm;
+    int_para[6] = 0;
+    int_para[7] = GlobalV::pexsi_storage;
+    int_para[8] = GlobalV::pexsi_ordering;
+    int_para[9] = GlobalV::pexsi_row_ordering;
+    int_para[10] = GlobalV::pexsi_nproc;
+    int_para[11] = GlobalV::pexsi_symm;
+    int_para[12] = GlobalV::pexsi_trans;
+    int_para[13] = GlobalV::pexsi_method;
+    int_para[14] = 2;
+    int_para[15] = 0;
+    int_para[16] = GlobalV::pexsi_nproc_pole;
 
-        char key[128];
-        char lowercase_key[128];
-        const int LINE_LINGTH = 1024;
-        char unused_string[LINE_LINGTH];
+    double_para[0] = GlobalV::NSPIN; // GlobalV::pexsi_spin;
+    double_para[1] = GlobalV::pexsi_temp;
+    double_para[2] = GlobalV::pexsi_gap;
+    double_para[3] = GlobalV::pexsi_delta_e;
+    double_para[4] = GlobalV::pexsi_mu_lower;
+    double_para[5] = GlobalV::pexsi_mu_upper;
+    double_para[6] = GlobalV::pexsi_mu;
+    double_para[7] = GlobalV::pexsi_mu_thr;
+    double_para[8] = GlobalV::pexsi_mu_expand;
+    double_para[9] = GlobalV::pexsi_mu_guard;
+    double_para[10] = GlobalV::pexsi_elec_thr;
+    double_para[11] = GlobalV::pexsi_zero_thr;
+    // int myid;
+    // MPI_Comm_rank(comm, &myid);
+    // if (myid == 0)
+    // {
+    //     std::ifstream ifs(PexsiOptionFile.c_str());
+    //     if (!ifs)
+    //     {
+    //         return 1;
+    //     }
+    //     setDefaultOption(int_para, double_para);
 
-        while (ifs.good())
-        {
-            ifs >> key;
-            //~ cout<<"readin word is: "<<key<<endl;
-            strtolower(key, lowercase_key);
-            if (strcmp("spin", lowercase_key) == 0)
-            {
-                //~ ifs>>options.spin;
-                ifs >> double_para[0];
-                //~ cout<<"double_para[0]: "<<key<<" = "<<double_para[0]<<endl;
-            }
-            else if (strcmp("temperature", lowercase_key) == 0)
-            {
-                //~ ifs>>options.temperature;
-                ifs >> double_para[1];
-                //~ cout<<"double_para[1]: "<<key<<" = "<<double_para[1]<<endl;
-            }
-            else if (strcmp("gap", lowercase_key) == 0)
-            {
-                //~ ifs>>options.gap;
-                ifs >> double_para[2];
-                //~ cout<<"double_para[2]: "<<key<<" = "<<double_para[2]<<endl;
-            }
-            else if (strcmp("deltae", lowercase_key) == 0)
-            {
-                //~ ifs>>options.deltaE;
-                ifs >> double_para[3];
-                //~ cout<<"double_para[3]: "<<key<<" = "<<double_para[3]<<endl;
-            }
-            else if (strcmp("numpole", lowercase_key) == 0)
-            {
-                //~ ifs>>options.numPole;
-                ifs >> int_para[0];
-                //~ cout<<"int_para[0]: "<<key<<" = "<<int_para[0]<<endl;
-            }
-            else if (strcmp("isinertiacount", lowercase_key) == 0)
-            {
-                //~ ifs>>options.isInertiaCount;
-                ifs >> int_para[1];
-                //~ cout<<"int_para[1]: "<<key<<" = "<<int_para[1]<<endl;
-            }
-            else if (strcmp("maxpexsiiter", lowercase_key) == 0)
-            {
-                //~ ifs>>options.maxPEXSIIter;
-                ifs >> int_para[2];
-                //~ cout<<"int_para[2]: "<<key<<" = "<<int_para[2]<<endl;
-            }
-            else if (strcmp("mumin0", lowercase_key) == 0)
-            {
-                //~ ifs>>options.muMin0;
-                ifs >> double_para[4];
-                //~ cout<<"double_para[4]: "<<key<<" = "<<double_para[4]<<endl;
-            }
-            else if (strcmp("mumax0", lowercase_key) == 0)
-            {
-                //~ ifs>>options.muMax0;
-                ifs >> double_para[5];
-                //~ cout<<"double_para[5]: "<<key<<" = "<<double_para[5]<<endl;
-            }
-            else if (strcmp("mu0", lowercase_key) == 0)
-            {
-                //~ ifs>>options.mu0;
-                ifs >> double_para[6];
-                //~ cout<<"double_para[6]: "<<key<<" = "<<double_para[6]<<endl;
-            }
-            else if (strcmp("muinertiatolerance", lowercase_key) == 0)
-            {
-                //~ ifs>>options.muInertiaTolerance;
-                ifs >> double_para[7];
-                //~ cout<<"double_para[7]: "<<key<<" = "<<double_para[7]<<endl;
-            }
-            else if (strcmp("muinertiaexpansion", lowercase_key) == 0)
-            {
-                //~ ifs>>options.muInertiaExpansion;
-                ifs >> double_para[8];
-                //~ cout<<"double_para[8]: "<<key<<" = "<<double_para[8]<<endl;
-            }
-            else if (strcmp("mupexsisafeguard", lowercase_key) == 0)
-            {
-                //~ ifs>>options.muPEXSISafeGuard;
-                ifs >> double_para[9];
-                //~ cout<<"double_para[9]: "<<key<<" = "<<double_para[9]<<endl;
-            }
-            else if (strcmp("numelectronpexsitolerance", lowercase_key) == 0)
-            {
-                //~ ifs>>options.numElectronPEXSITolerance;
-                ifs >> double_para[10];
-                //~ cout<<"double_para[10]: "<<key<<" = "<<double_para[10]<<endl;
-            }
-            else if (strcmp("zero_limit", lowercase_key) == 0)
-            {
-                ifs >> double_para[11];
-            }
-            else if (strcmp("matrixtype", lowercase_key) == 0)
-            {
-                //~ ifs>>options.matrixType;
-                ifs >> int_para[3];
-                //~ cout<<"int_para[3]: "<<key<<" = "<<int_para[3]<<endl;
-            }
-            else if (strcmp("issymbolicfactorize", lowercase_key) == 0)
-            {
-                //~ ifs>>options.isSymbolicFactorize;
-                ifs >> int_para[4];
-                //~ cout<<"int_para[4]: "<<key<<" = "<<int_para[4]<<endl;
-            }
-            else if (strcmp("isconstructcommpattern", lowercase_key) == 0)
-            {
-                //~ ifs>>options.isConstructCommPattern;
-                ifs >> int_para[5];
-                //~ cout<<"int_para[5]: "<<key<<" = "<<int_para[5]<<endl;
-            }
-            else if (strcmp("solver", lowercase_key) == 0)
-            {
-                //~ ifs>>options.solver;
-                ifs >> int_para[6];
-                //~ cout<<"int_para[6]: "<<key<<" = "<<int_para[6]<<endl;
-            }
-            else if (strcmp("symmetricstorage", lowercase_key) == 0)
-            {
-                //~ ifs>>options.symmetricStorage;
-                ifs >> int_para[7];
-                //~ cout<<"int_para[7]: "<<key<<" = "<<int_para[7]<<endl;
-            }
-            else if (strcmp("ordering", lowercase_key) == 0)
-            {
-                //~ ifs>>options.ordering;
-                ifs >> int_para[8];
-                //~ cout<<"int_para[8]: "<<key<<" = "<<int_para[8]<<endl;
-            }
-            else if (strcmp("rowordering", lowercase_key) == 0)
-            {
-                //~ ifs>>options.rowOrdering;
-                ifs >> int_para[9];
-                //~ cout<<"int_para[9]: "<<key<<" = "<<int_para[9]<<endl;
-            }
-            else if (strcmp("npsymbfact", lowercase_key) == 0)
-            {
-                //~ ifs>>options.npSymbFact;
-                ifs >> int_para[10];
-                //~ cout<<"int_para[10]: "<<key<<" = "<<int_para[10]<<endl;
-            }
-            else if (strcmp("symmetric", lowercase_key) == 0)
-            {
-                //~ ifs>>options.symmetric;
-                ifs >> int_para[11];
-                //~ cout<<"int_para[11]: "<<key<<" = "<<int_para[11]<<endl;
-            }
-            else if (strcmp("transpose", lowercase_key) == 0)
-            {
-                //~ ifs>>options.transpose;
-                ifs >> int_para[12];
-                //~ cout<<"int_para[12]: "<<key<<" = "<<int_para[12]<<endl;
-            }
-            else if (strcmp("method", lowercase_key) == 0)
-            {
-                //~ ifs>>options.method;
-                ifs >> int_para[13];
-                //~ cout<<"int_para[13]: "<<key<<" = "<<int_para[13]<<endl;
-            }
-            else if (strcmp("npoints", lowercase_key) == 0)
-            {
-                //~ ifs>>options.nPoints;
-                ifs >> int_para[14];
-                //~ cout<<"int_para[14]: "<<key<<" = "<<int_para[14]<<endl;
-            }
-            else if (strcmp("verbosity", lowercase_key) == 0)
-            {
-                //~ ifs>>options.verbosity;
-                ifs >> int_para[15];
-                //~ cout<<"int_para[15]: "<<key<<" = "<<int_para[15]<<endl;
-            }
-            else if (strcmp("numprocessperpole", lowercase_key) == 0)
-            {
-                //~ ifs>>options.verbosity;
-                ifs >> int_para[16];
-                //~ cout<<"int_para[16]: "<<key<<" = "<<int_para[16]<<endl;
-            }
-            else
-            {
-                if (key[0] == '#' || key[0] == '/')
-                {
-                    ifs.getline(unused_string, LINE_LINGTH);
-                }
-                else
-                {
-                    std::cout << " THE PARAMETER NAME '" << key << "' IS NOT USED!" << std::endl;
-                    return 1;
-                }
-            }
-        }
-    }
+    //     ifs.clear();
+    //     ifs.seekg(0);
+
+    //     char key[128];
+    //     char lowercase_key[128];
+    //     const int LINE_LINGTH = 1024;
+    //     char unused_string[LINE_LINGTH];
 
-    // broadcast all options
-    MPI_Bcast(int_para, 17, MPI_INT, 0, comm);
-    MPI_Bcast(double_para, 12, MPI_DOUBLE, 0, comm);
+    //     while (ifs.good())
+    //     {
+    //         ifs >> key;
+    //         //~ cout<<"readin word is: "<<key<<endl;
+    //         strtolower(key, lowercase_key);
+    //         if (strcmp("spin", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.spin;
+    //             ifs >> double_para[0];
+    //             //~ cout<<"double_para[0]: "<<key<<" = "<<double_para[0]<<endl;
+    //         }
+    //         else if (strcmp("temperature", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.temperature;
+    //             ifs >> double_para[1];
+    //             //~ cout<<"double_para[1]: "<<key<<" = "<<double_para[1]<<endl;
+    //         }
+    //         else if (strcmp("gap", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.gap;
+    //             ifs >> double_para[2];
+    //             //~ cout<<"double_para[2]: "<<key<<" = "<<double_para[2]<<endl;
+    //         }
+    //         else if (strcmp("deltae", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.deltaE;
+    //             ifs >> double_para[3];
+    //             //~ cout<<"double_para[3]: "<<key<<" = "<<double_para[3]<<endl;
+    //         }
+    //         else if (strcmp("numpole", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.numPole;
+    //             ifs >> int_para[0];
+    //             //~ cout<<"int_para[0]: "<<key<<" = "<<int_para[0]<<endl;
+    //         }
+    //         else if (strcmp("isinertiacount", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.isInertiaCount;
+    //             ifs >> int_para[1];
+    //             //~ cout<<"int_para[1]: "<<key<<" = "<<int_para[1]<<endl;
+    //         }
+    //         else if (strcmp("maxpexsiiter", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.maxPEXSIIter;
+    //             ifs >> int_para[2];
+    //             //~ cout<<"int_para[2]: "<<key<<" = "<<int_para[2]<<endl;
+    //         }
+    //         else if (strcmp("mumin0", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.muMin0;
+    //             ifs >> double_para[4];
+    //             //~ cout<<"double_para[4]: "<<key<<" = "<<double_para[4]<<endl;
+    //         }
+    //         else if (strcmp("mumax0", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.muMax0;
+    //             ifs >> double_para[5];
+    //             //~ cout<<"double_para[5]: "<<key<<" = "<<double_para[5]<<endl;
+    //         }
+    //         else if (strcmp("mu0", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.mu0;
+    //             ifs >> double_para[6];
+    //             //~ cout<<"double_para[6]: "<<key<<" = "<<double_para[6]<<endl;
+    //         }
+    //         else if (strcmp("muinertiatolerance", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.muInertiaTolerance;
+    //             ifs >> double_para[7];
+    //             //~ cout<<"double_para[7]: "<<key<<" = "<<double_para[7]<<endl;
+    //         }
+    //         else if (strcmp("muinertiaexpansion", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.muInertiaExpansion;
+    //             ifs >> double_para[8];
+    //             //~ cout<<"double_para[8]: "<<key<<" = "<<double_para[8]<<endl;
+    //         }
+    //         else if (strcmp("mupexsisafeguard", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.muPEXSISafeGuard;
+    //             ifs >> double_para[9];
+    //             //~ cout<<"double_para[9]: "<<key<<" = "<<double_para[9]<<endl;
+    //         }
+    //         else if (strcmp("numelectronpexsitolerance", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.numElectronPEXSITolerance;
+    //             ifs >> double_para[10];
+    //             //~ cout<<"double_para[10]: "<<key<<" = "<<double_para[10]<<endl;
+    //         }
+    //         else if (strcmp("zero_limit", lowercase_key) == 0)
+    //         {
+    //             ifs >> double_para[11];
+    //         }
+    //         else if (strcmp("matrixtype", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.matrixType;
+    //             ifs >> int_para[3];
+    //             //~ cout<<"int_para[3]: "<<key<<" = "<<int_para[3]<<endl;
+    //         }
+    //         else if (strcmp("issymbolicfactorize", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.isSymbolicFactorize;
+    //             ifs >> int_para[4];
+    //             //~ cout<<"int_para[4]: "<<key<<" = "<<int_para[4]<<endl;
+    //         }
+    //         else if (strcmp("isconstructcommpattern", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.isConstructCommPattern;
+    //             ifs >> int_para[5];
+    //             //~ cout<<"int_para[5]: "<<key<<" = "<<int_para[5]<<endl;
+    //         }
+    //         else if (strcmp("solver", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.solver;
+    //             ifs >> int_para[6];
+    //             //~ cout<<"int_para[6]: "<<key<<" = "<<int_para[6]<<endl;
+    //         }
+    //         else if (strcmp("symmetricstorage", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.symmetricStorage;
+    //             ifs >> int_para[7];
+    //             //~ cout<<"int_para[7]: "<<key<<" = "<<int_para[7]<<endl;
+    //         }
+    //         else if (strcmp("ordering", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.ordering;
+    //             ifs >> int_para[8];
+    //             //~ cout<<"int_para[8]: "<<key<<" = "<<int_para[8]<<endl;
+    //         }
+    //         else if (strcmp("rowordering", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.rowOrdering;
+    //             ifs >> int_para[9];
+    //             //~ cout<<"int_para[9]: "<<key<<" = "<<int_para[9]<<endl;
+    //         }
+    //         else if (strcmp("npsymbfact", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.npSymbFact;
+    //             ifs >> int_para[10];
+    //             //~ cout<<"int_para[10]: "<<key<<" = "<<int_para[10]<<endl;
+    //         }
+    //         else if (strcmp("symmetric", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.symmetric;
+    //             ifs >> int_para[11];
+    //             //~ cout<<"int_para[11]: "<<key<<" = "<<int_para[11]<<endl;
+    //         }
+    //         else if (strcmp("transpose", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.transpose;
+    //             ifs >> int_para[12];
+    //             //~ cout<<"int_para[12]: "<<key<<" = "<<int_para[12]<<endl;
+    //         }
+    //         else if (strcmp("method", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.method;
+    //             ifs >> int_para[13];
+    //             //~ cout<<"int_para[13]: "<<key<<" = "<<int_para[13]<<endl;
+    //         }
+    //         else if (strcmp("npoints", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.nPoints;
+    //             ifs >> int_para[14];
+    //             //~ cout<<"int_para[14]: "<<key<<" = "<<int_para[14]<<endl;
+    //         }
+    //         else if (strcmp("verbosity", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.verbosity;
+    //             ifs >> int_para[15];
+    //             //~ cout<<"int_para[15]: "<<key<<" = "<<int_para[15]<<endl;
+    //         }
+    //         else if (strcmp("numprocessperpole", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.verbosity;
+    //             ifs >> int_para[16];
+    //             //~ cout<<"int_para[16]: "<<key<<" = "<<int_para[16]<<endl;
+    //         }
+    //         else
+    //         {
+    //             if (key[0] == '#' || key[0] == '/')
+    //             {
+    //                 ifs.getline(unused_string, LINE_LINGTH);
+    //             }
+    //             else
+    //             {
+    //                 std::cout << " THE PARAMETER NAME '" << key << "' IS NOT USED!" << std::endl;
+    //                 return 1;
+    //             }
+    //         }
+    //     }
+    // }
+
+    // // broadcast all options
+    // MPI_Bcast(int_para, 17, MPI_INT, 0, comm);
+    // MPI_Bcast(double_para, 12, MPI_DOUBLE, 0, comm);
 
     // setup PEXSI options from int_para and double_para
     options.numPole = int_para[0];
@@ -458,14 +491,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
         log_PEXSIgrid(pexsi_prow, pexsi_pcol, f_log);
 //}
 #endif
-    if (myid % (pexsi_prow * pexsi_pcol) == 0)
-    {
-        outputFileIndex = myid / (pexsi_prow * pexsi_pcol);
-    }
-    else
-    {
-        outputFileIndex = -1;
-    }
+    outputFileIndex = -1;
     // OUT(ofs_running, "checkpoint04");
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIPlanInit");
     if (comm_PEXSI != MPI_COMM_NULL)
@@ -523,7 +549,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     // transform H and S from 2D block cyclic distribution to compressed column sparse matrix
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
     // OUT(ofs_running, "checkpoint12");
-    transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal);
+    DistMatrixTransformer::transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal);
     // MPI_Barrier(MPI_COMM_WORLD);
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
     // OUT(ofs_running, "checkpoint13");
@@ -542,11 +568,11 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
         PPEXSILoadRealHSMatrix(plan,
                                options,
                                size,
-                               DST_Matrix.nnz,
-                               DST_Matrix.nnzLocal,
-                               DST_Matrix.numColLocal,
-                               DST_Matrix.colptrLocal,
-                               DST_Matrix.rowindLocal,
+                               DST_Matrix.get_nnz(),
+                               DST_Matrix.get_nnzlocal(),
+                               DST_Matrix.get_numcol_local(),
+                               DST_Matrix.get_colptr_local(),
+                               DST_Matrix.get_rowind_local(),
                                HnzvalLocal,
                                isSIdentity,
                                SnzvalLocal,
@@ -600,9 +626,9 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
             delete[] EDMnzvalLocal;
         if (FDMnzvalLocal != nullptr)
             delete[] FDMnzvalLocal;
-        DMnzvalLocal = new double[DST_Matrix.nnzLocal];
-        EDMnzvalLocal = new double[DST_Matrix.nnzLocal];
-        FDMnzvalLocal = new double[DST_Matrix.nnzLocal];
+        DMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
+        EDMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
+        FDMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
         if (myid < numProcessPerPole)
         {
             PPEXSIRetrieveRealDFTMatrix(plan,
@@ -633,8 +659,8 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     {
         delete[] DM;
         delete[] EDM;
-        DM = new double[SRC_Matrix.nrow * SRC_Matrix.ncol];
-        EDM = new double[SRC_Matrix.nrow * SRC_Matrix.ncol];
+        DM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
+        EDM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
     }
 #ifdef _DEBUG
     // OUT(ofs_running, "checkpoint19");
@@ -644,7 +670,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
 #endif
     // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
-    transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM);
+    DistMatrixTransformer::transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM);
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
     // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
 
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index cc079cf208..1edc3a092a 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -22,6 +22,7 @@
 #include "module_base/global_variable.h"
 #include "module_base/parallel_common.h"
 #include "module_base/timer.h"
+#include "module_base/tool_quit.h"
 #include "version.h"
 Input INPUT;
 
@@ -635,6 +636,34 @@ void Input::Default(void)
     qo_thr = 1e-6;
     qo_screening_coeff = {};
 
+    //==========================================================
+    // variables for PEXSI
+    //==========================================================
+    pexsi_npole = 54;
+    pexsi_inertia = 1;
+    pexsi_nmax = 80;
+    // pexsi_symbolic = 1;
+    pexsi_comm = 1;
+    pexsi_storage = 1;
+    pexsi_ordering = 0;
+    pexsi_row_ordering = 1;
+    pexsi_nproc = 1;
+    pexsi_symm = 1;
+    pexsi_trans = 0;
+    pexsi_method = 1;
+    pexsi_nproc_pole = 1;
+    // pexsi_spin = 2;
+    pexsi_temp = 0.0001;
+    pexsi_gap = 0;
+    pexsi_delta_e = 20.0;
+    pexsi_mu_lower = -10;
+    pexsi_mu_upper = 10;
+    pexsi_mu = 0.0;
+    pexsi_mu_thr = 0.05;
+    pexsi_mu_expand = 0.3;
+    pexsi_mu_guard = 0.2;
+    pexsi_elec_thr = 0.001;
+    pexsi_zero_thr = 1e-10;
     return;
 }
 
@@ -2290,6 +2319,9 @@ bool Input::Read(const std::string& fn)
         {
             read_value(ifs, sc_file);
         }
+        //----------------------------------------------------------------------------------
+        //    Quasiatomic orbital
+        //----------------------------------------------------------------------------------
         else if (strcmp("qo_switch", word) == 0){
             read_bool(ifs, qo_switch);
         }
@@ -2305,6 +2337,106 @@ bool Input::Read(const std::string& fn)
         else if (strcmp("qo_screening_coeff", word) == 0){
             read_value2stdvector(ifs, qo_screening_coeff);
         }
+        //----------------------------------------------------------------------------------
+        //    PEXSI
+        //----------------------------------------------------------------------------------
+        else if (strcmp("pexsi_npole", word) == 0){
+            read_value(ifs, pexsi_npole);
+        }
+        else if (strcmp("pexsi_inertia", word) == 0){
+            read_value(ifs, pexsi_inertia);
+        }
+        else if (strcmp("pexsi_nmax", word) == 0) {
+            read_value(ifs, pexsi_nmax);
+        }
+        // else if (strcmp("pexsi_symbolic", word) == 0)
+        // {
+        //     read_value(ifs, pexsi_symbolic);
+        // }
+        else if (strcmp("pexsi_comm", word) == 0)
+        {
+            read_value(ifs, pexsi_comm);
+        }
+        else if (strcmp("pexsi_storage", word) == 0)
+        {
+            read_value(ifs, pexsi_storage);
+        }
+        else if (strcmp("pexsi_ordering", word) == 0)
+        {
+            read_value(ifs, pexsi_ordering);
+        }
+        else if (strcmp("pexsi_row_ordering", word) == 0)
+        {
+            read_value(ifs, pexsi_row_ordering);
+        }
+        else if (strcmp("pexsi_nproc", word) == 0)
+        {
+            read_value(ifs, pexsi_nproc);
+        }
+        else if (strcmp("pexsi_symm", word) == 0)
+        {
+            read_value(ifs, pexsi_symm);
+        }
+        else if (strcmp("pexsi_trans", word) == 0)
+        {
+            read_value(ifs, pexsi_trans);
+        }
+        else if (strcmp("pexsi_method", word) == 0)
+        {
+            read_value(ifs, pexsi_method);
+        }
+        else if (strcmp("pexsi_nproc_pole", word) == 0)
+        {
+            read_value(ifs, pexsi_nproc_pole);
+        }
+        // else if (strcmp("pexsi_spin", word) == 0)
+        // {
+        //     read_value(ifs, pexsi_spin);
+        // }
+        else if (strcmp("pexsi_temp", word) == 0)
+        {
+            read_value(ifs, pexsi_temp);
+        }
+        else if (strcmp("pexsi_gap", word) == 0)
+        {
+            read_value(ifs, pexsi_gap);
+        }
+        else if (strcmp("pexsi_delta_e", word) == 0)
+        {
+            read_value(ifs, pexsi_delta_e);
+        }
+        else if (strcmp("pexsi_mu_lower", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_lower);
+        }
+        else if (strcmp("pexsi_mu_upper", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_upper);
+        }
+        else if (strcmp("pexsi_mu", word) == 0)
+        {
+            read_value(ifs, pexsi_mu);
+        }
+        else if (strcmp("pexsi_mu_thr", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_thr);
+        }
+        else if (strcmp("pexsi_mu_expand", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_expand);
+        }
+        else if (strcmp("pexsi_mu_guard", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_guard);
+        }
+        else if (strcmp("pexsi_elec_thr", word) == 0)
+        {
+            read_value(ifs, pexsi_elec_thr);
+        }
+        else if (strcmp("pexsi_zero_thr", word) == 0)
+        {
+            read_value(ifs, pexsi_zero_thr);
+        }
         else
         {
             // xiaohui add 2015-09-15
@@ -3625,6 +3757,34 @@ void Input::Bcast()
     Parallel_Common::bcast_bool(qo_switch);
     Parallel_Common::bcast_string(qo_basis);
     Parallel_Common::bcast_double(qo_thr);
+    //==========================================================
+    // PEXSI
+    //==========================================================
+    Parallel_Common::bcast_int(pexsi_npole);
+    Parallel_Common::bcast_int(pexsi_inertia);
+    Parallel_Common::bcast_int(pexsi_nmax);
+    // Parallel_Common::bcast_int(pexsi_symbolic);
+    Parallel_Common::bcast_int(pexsi_comm);
+    Parallel_Common::bcast_int(pexsi_storage);
+    Parallel_Common::bcast_int(pexsi_ordering);
+    Parallel_Common::bcast_int(pexsi_row_ordering);
+    Parallel_Common::bcast_int(pexsi_nproc);
+    Parallel_Common::bcast_int(pexsi_symm);
+    Parallel_Common::bcast_int(pexsi_trans);
+    Parallel_Common::bcast_int(pexsi_method);
+    Parallel_Common::bcast_int(pexsi_nproc_pole);
+    // Parallel_Common::bcast_double(pexsi_spin);
+    Parallel_Common::bcast_double(pexsi_temp);
+    Parallel_Common::bcast_double(pexsi_gap);
+    Parallel_Common::bcast_double(pexsi_delta_e);
+    Parallel_Common::bcast_double(pexsi_mu_lower);
+    Parallel_Common::bcast_double(pexsi_mu_upper);
+    Parallel_Common::bcast_double(pexsi_mu);
+    Parallel_Common::bcast_double(pexsi_mu_thr);
+    Parallel_Common::bcast_double(pexsi_mu_expand);
+    Parallel_Common::bcast_double(pexsi_mu_guard);
+    Parallel_Common::bcast_double(pexsi_elec_thr);
+    Parallel_Common::bcast_double(pexsi_zero_thr);
     /* broadcasting std::vector is sometime a annorying task... */
     if (ntype != 0) /* ntype has been broadcasted before */
     {
@@ -3922,10 +4082,11 @@ void Input::Check(void)
         }
         else if (ks_solver == "pexsi")
         {
-#ifndef __MPI
-            ModuleBase::WARNING_QUIT("Input", "Cusolver can not be used for series version.");
-#else
+#ifdef __PEXSI
             GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl;
+#else
+            ModuleBase::WARNING_QUIT("Input",
+                "Can not use genelpa if abacus is not compiled with PEXSI. Please change ks_solver to scalapack_gvx.");
 #endif
 
 
diff --git a/source/module_io/input.h b/source/module_io/input.h
index b4e983abad..d4b0bccdad 100644
--- a/source/module_io/input.h
+++ b/source/module_io/input.h
@@ -599,6 +599,34 @@ class Input
     double qo_thr = 1e-6;
     std::vector<std::string> qo_strategy = {};
     std::vector<double> qo_screening_coeff = {};
+    //==========================================================
+    // variables for PEXSI
+    //==========================================================
+    int pexsi_npole = 54;
+    int pexsi_inertia = 1;
+    int pexsi_nmax = 80;
+    // int pexsi_symbolic = 1;
+    int pexsi_comm = 1;
+    int pexsi_storage = 1;
+    int pexsi_ordering = 0;
+    int pexsi_row_ordering = 1;
+    int pexsi_nproc = 1;
+    int pexsi_symm = 1;
+    int pexsi_trans = 0;
+    int pexsi_method = 1;
+    int pexsi_nproc_pole = 1;
+    // double pexsi_spin = 2;
+    double pexsi_temp = 0.0001;
+    double pexsi_gap = 0;
+    double pexsi_delta_e = 20.0;
+    double pexsi_mu_lower = -10;
+    double pexsi_mu_upper = 10;
+    double pexsi_mu = 0.0;
+    double pexsi_mu_thr = 0.05;
+    double pexsi_mu_expand = 0.3;
+    double pexsi_mu_guard = 0.2;
+    double pexsi_elec_thr = 0.001;
+    double pexsi_zero_thr = 1e-10;
     
   private:
     //==========================================================
diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp
index a52245d05c..89aff00eed 100644
--- a/source/module_io/input_conv.cpp
+++ b/source/module_io/input_conv.cpp
@@ -765,6 +765,35 @@ void Input_Conv::Convert(void)
     GlobalV::qo_strategy = INPUT.qo_strategy;
     GlobalV::qo_thr = INPUT.qo_thr;
     GlobalV::qo_screening_coeff = INPUT.qo_screening_coeff;
+
+    //-----------------------------------------------
+    // PEXSI related parameters
+    //-----------------------------------------------
+    GlobalV::pexsi_npole = INPUT.pexsi_npole;
+    GlobalV::pexsi_inertia = INPUT.pexsi_inertia;
+    GlobalV::pexsi_nmax = INPUT.pexsi_nmax;
+    // GlobalV::pexsi_symbolic = INPUT.pexsi_symbolic;
+    GlobalV::pexsi_comm = INPUT.pexsi_comm;
+    GlobalV::pexsi_storage = INPUT.pexsi_storage;
+    GlobalV::pexsi_ordering = INPUT.pexsi_ordering;
+    GlobalV::pexsi_row_ordering = INPUT.pexsi_row_ordering;
+    GlobalV::pexsi_nproc = INPUT.pexsi_nproc;
+    GlobalV::pexsi_symm = INPUT.pexsi_symm;
+    GlobalV::pexsi_trans = INPUT.pexsi_trans;
+    GlobalV::pexsi_method = INPUT.pexsi_method;
+    GlobalV::pexsi_nproc_pole = INPUT.pexsi_nproc_pole;
+    // GlobalV::pexsi_spin = INPUT.pexsi_spin;
+    GlobalV::pexsi_temp = INPUT.pexsi_temp;
+    GlobalV::pexsi_gap = INPUT.pexsi_gap;
+    GlobalV::pexsi_delta_e = INPUT.pexsi_delta_e;
+    GlobalV::pexsi_mu_lower = INPUT.pexsi_mu_lower;
+    GlobalV::pexsi_mu_upper = INPUT.pexsi_mu_upper;
+    GlobalV::pexsi_mu = INPUT.pexsi_mu;
+    GlobalV::pexsi_mu_thr = INPUT.pexsi_mu_thr;
+    GlobalV::pexsi_mu_expand = INPUT.pexsi_mu_expand;
+    GlobalV::pexsi_mu_guard = INPUT.pexsi_mu_guard;
+    GlobalV::pexsi_elec_thr = INPUT.pexsi_elec_thr;
+    GlobalV::pexsi_zero_thr = INPUT.pexsi_zero_thr;
     ModuleBase::timer::tick("Input_Conv", "Convert");
     return;
 }
diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp
index 6003a4fdb0..2c30d8ff47 100644
--- a/source/module_io/write_input.cpp
+++ b/source/module_io/write_input.cpp
@@ -493,7 +493,32 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_switch", qo_switch, "0: no QO analysis; 1: QO analysis");
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_basis", qo_basis, "type of QO basis function: hydrogen: hydrogen-like basis, pswfc: read basis from pseudopotential");
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_thr", qo_thr, "accuracy for evaluating cutoff radius of QO basis function");
-  
+
+    ofs << "\n#Parameters (24.PEXSI)" << std::endl;
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_npole", pexsi_npole, "Number of poles in expansion");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_inertia", pexsi_inertia, "Whether inertia counting is used at the very beginning of PEXSI process");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nmax", pexsi_nmax, "Maximum number of PEXSI iterations after each inertia counting procedure.");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_comm", pexsi_comm, "Whether to construct PSelInv communication pattern");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices, 0: non-symmetric, 1: symmetric");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_ordering", pexsi_ordering, "Ordering strategy for factorization and selected inversion");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc", pexsi_nproc, "Number of processors for parmetis");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "matrix symmetry, 0: non-symmetric, 1: symmetric");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "transpose, 0: no transpose, 1: transpose");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_method", pexsi_method, "pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc_pole", pexsi_nproc_pole, "Number of processes used by each pole");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_temp", pexsi_temp, "Temperature, in the same unit as H");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_gap", pexsi_gap, "Spectral gap");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \f$S^{-1} H\f$");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_lower", pexsi_mu_lower, "Initial guess of lower bound for mu");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_upper", pexsi_mu_upper, "Initial guess of upper bound for mu");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu", pexsi_mu, "Initial guess for mu (for the solver)");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_thr", pexsi_mu_thr, "Stopping criterion in terms of the chemical potential for the inertia counting procedure");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_expand", pexsi_mu_expand, "If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_guard", pexsi_mu_guard, "Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_elec_thr", pexsi_elec_thr, "Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_zero_thr", pexsi_zero_thr, "if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0");
+
     ofs.close();
     return;
 }
\ No newline at end of file

From 1e428d27ba2b63d8531f6dd51bc1cf8fcb2abe4a Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Fri, 26 Jan 2024 15:38:55 +0800
Subject: [PATCH 13/44] Configure Makefile Compiling, fix typos

---
 source/Makefile            | 17 +++++++++--------
 source/Makefile.Objects    | 18 ++++++++++--------
 source/Makefile.vars       | 20 +++++---------------
 source/module_io/input.cpp |  2 +-
 4 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/source/Makefile b/source/Makefile
index 0dcd329161..7bd81f26d9 100644
--- a/source/Makefile
+++ b/source/Makefile
@@ -7,7 +7,7 @@ include Makefile.vars
 INCLUDES = -I. -Icommands -I../ -Imodule_base/module_container
 
 LIBS = -lm -lpthread
-OPTS = ${INCLUDES} -Ofast -g -traceback -xHost -std=c++11 -simd -march=native -m64 -qopenmp -Werror -Wall -pedantic 
+OPTS = ${INCLUDES} -std=c++14 -pedantic -m64 ${INCLUDES}
 HONG = -D__LCAO
 HONG += -D__ELPA
 ifeq ($(OPENMP), ON)
@@ -75,7 +75,7 @@ else
     FFTW_INCLUDE_DIR = ${FFTW_DIR}/include
     FFTW_LIB_DIR     = ${FFTW_DIR}/lib
     HONG  += -D__FFTW3
-    LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} -qmkl
+    LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR}
     INCLUDES += -I${FFTW_INCLUDE_DIR}
     
     #==========================
@@ -140,12 +140,6 @@ ifdef LIBTORCH_DIR
   endif
 endif
 
-ifdef PEXSI_DIR
-    INCLUDES += -I${PEXSI_INCLUDE_DIR} ${SCOTCH_INCLUDE} ${DSUPERLU_INCLUDE}
-    LIBS += -L${PEXSI_LIB_DIR} -lpexsi_linux_release_v2.0 ${DSUPERLU_LIB} ${PTSCOTCH_LIB} ${SCOTCH_LIB}
-    HONG += -D__PEXSI
-endif
-
 ifdef DeePMD_DIR
     HONG  += -D__DPMD -DHIGH_PREC 
     OPTS  += -Wl,--no-as-needed
@@ -175,6 +169,13 @@ ifdef DeePMD_DIR
     INCLUDES += -I${TensorFlow_INCLUDE_DIR}
 endif
 
+ifdef PEXSI_DIR
+    OBJS_ABACUS += ${OBJS_HSOLVER_PEXSI}
+    INCLUDES += -I${PEXSI_DIR}/include -I${PARMETIS_DIR}/include -I${DSUPERLU_DIR}/include
+    LIBS += -L${PEXSI_DIR}/lib -lpexsi -L${DSUPERLU_DIR}/lib -lsuperlu_dist -L${PARMETIS_DIR}/lib -lparmetis -lmetis
+    HONG += -D__PEXSI
+endif
+
 include Makefile.Objects
 
 #==========================
diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index 2a69761da3..71e637a80b 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -30,7 +30,7 @@ VPATH=./src_global:\
 ./module_hsolver:\
 ./module_hsolver/kernels:\
 ./module_hsolver/genelpa:\
-./module_hsolver/pexsi:\
+./module_hsolver/module_pexsi:\
 ./module_elecstate:\
 ./module_elecstate/kernels:\
 ./module_elecstate/potentials:\
@@ -102,6 +102,7 @@ ${OBJS_VDW}\
 ${OBJS_DFTU}\
 ${OBJS_DELTASPIN}\
 ${OBJS_TENSOR}\
+${OBJS_HSOLVER_PEXSI}\
 
 OBJS_MAIN=main.o\
     driver.o\
@@ -290,13 +291,7 @@ OBJS_HSOLVER=diago_cg.o\
     diago_iter_assist.o\
     math_kernel_op.o\
     dngvd_op.o\
-    diago_pexsi.o\
-    DistBCDMatrix.o\
-    DistCCSMatrix.o\
-    DistMatrixTransformer.o\
-    pexsi_solver.o\
-    simplePEXSI.o\
-
+    
 OBJS_HSOLVER_LCAO=hsolver_lcao.o\
       diago_blas.o\
       diago_elpa.o\
@@ -305,6 +300,13 @@ OBJS_HSOLVER_LCAO=hsolver_lcao.o\
       elpa_new_complex.o\
       utils.o\
 
+OBJS_HSOLVER_PEXSI=diago_pexsi.o\
+      pexsi_solver.o\
+      simple_pexsi.o\
+      dist_bcd_matrix.o\
+      dist_ccs_matrix.o\
+      dist_matrix_transformer.o\
+      
 OBJS_MD=fire.o\
     langevin.o\
     md_base.o\
diff --git a/source/Makefile.vars b/source/Makefile.vars
index 860bbdd806..477b0a251d 100644
--- a/source/Makefile.vars
+++ b/source/Makefile.vars
@@ -33,15 +33,6 @@ ELPA_DIR      = /root/lib/ELPA
 ELPA_INCLUDE_DIR = ${ELPA_DIR}/include/
 
 CEREAL_DIR    = /root/lib/cereal
-DSUPERLU_DIR = /root/workspace/superlu_dist-7.2.0
-DSUPERLU_INCLUDE = -I${DSUPERLU_DIR}/include
-DSUPERLU_LIB    = ${DSUPERLU_DIR}/lib/libsuperlu_dist.a
-
-SCOTCH_INCLUDE  = -I/usr/local/include
-PTSCOTCH_DIR    = /root/workspace/scotch_6.0.0
-PTSCOTCH_LIB    = ${PTSCOTCH_DIR}/lib/libptscotchparmetis.a ${PTSCOTCH_DIR}/lib/libptscotch.a ${PTSCOTCH_DIR}/lib/libptscotcherrexit.a ${PTSCOTCH_DIR}/lib/libptscotcherr.a
-SCOTCH_LIB      = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libscotch.a ${PTSCOTCH_DIR}/lib/libscotcherr.a ${PTSCOTCH_DIR}/lib/libscotcherrexit.a
-
 
 
 ##-------------------  FOR GNU COMPILER  ------------------------------
@@ -68,14 +59,9 @@ SCOTCH_LIB      = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libsc
 ## To use LIBXC:  set LIBXC_DIR which contains include and lib/libxc.a (>5.1.7)
 ## To use DeePMD: set DeePMD_DIR and TensorFlow_DIR
 ## To use LibRI:  set LIBRI_DIR and LIBCOMM_DIR
-## To use PEXSI: set PEXSI_DIR which contains include and libpexsi.a
+## To use PEXSI: set PEXSI_DIR DSUPERLU_DIR and PARMETIS_DIR
 ##---------------------------------------------------------------------
 
-PEXSI_DIR = /root/workspace/pexsi_v2.0.0
-PEXSI_LIB_DIR = ${PEXSI_DIR}/src
-PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include
-
-
 # LIBTORCH_DIR  = /usr/local
 # LIBNPY_DIR    = /usr/local
 
@@ -87,6 +73,10 @@ PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include
 # LIBRI_DIR     = /public/software/LibRI
 # LIBCOMM_DIR   = /public/software/LibComm
 
+# PEXSI_DIR = /home/rhx/projects/pexsi-build/pexsi
+# DSUPERLU_DIR = /home/rhx/projects/pexsi-build/superlu
+# PARMETIS_DIR    = /home/rhx/projects/pexsi-build/parmetis
+
 ##---------------------------------------------------------------------
 # NP = 14 # It is not supported. use make -j14 or make -j to parallelly compile
 # DEBUG = OFF
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index 1edc3a092a..9d4b984728 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -4086,7 +4086,7 @@ void Input::Check(void)
             GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl;
 #else
             ModuleBase::WARNING_QUIT("Input",
-                "Can not use genelpa if abacus is not compiled with PEXSI. Please change ks_solver to scalapack_gvx.");
+                "Can not use PEXSI if abacus is not compiled with PEXSI. Please change ks_solver to scalapack_gvx.");
 #endif
 
 

From 1264b6475e73485d9c3f4aaca8157c44899a430f Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Fri, 26 Jan 2024 15:59:34 +0800
Subject: [PATCH 14/44] Fix Makefile Intel toolchains compile errors

---
 source/module_hsolver/module_pexsi/simple_pexsi.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
index 2d1705557c..df72a061c5 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -2,6 +2,7 @@
 // the H and S matrices are given by 2D block cyclic distribution
 // the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution
 // #include "mpi.h"
+#ifdef __PEXSI
 #include <mpi.h>
 
 #include <cfloat>
@@ -728,4 +729,5 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     // MPI_Barrier(MPI_COMM_WORLD);
     return 0;
 }
-} // namespace pexsi
\ No newline at end of file
+} // namespace pexsi
+#endif
\ No newline at end of file

From a6941467a5295cb5c584a193e5bffd3e13e1eff4 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Fri, 26 Jan 2024 16:14:15 +0800
Subject: [PATCH 15/44] Fix even more PEXSI related Makefile compiling issues

---
 source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp        | 4 +++-
 source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp        | 4 +++-
 .../module_hsolver/module_pexsi/dist_matrix_transformer.cpp   | 2 ++
 source/module_hsolver/module_pexsi/pexsi_solver.cpp           | 4 +++-
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
index cf815bd4ae..e498b83a2e 100644
--- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
@@ -1,3 +1,4 @@
+#ifdef __PEXSI
 #include "dist_bcd_matrix.h"
 
 #include <mpi.h>
@@ -110,4 +111,5 @@ int DistBCDMatrix::pnum(const int prow, const int pcol)
 {
     return this->prowpcol2pnum[prow * this->npcols + pcol];
 }
-} // namespace pexsi
\ No newline at end of file
+} // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
index 365622d249..ddd02aaa9a 100644
--- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
+++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
@@ -1,3 +1,4 @@
+#ifdef __PEXSI
 #include "dist_ccs_matrix.h"
 
 #include <mpi.h>
@@ -114,4 +115,5 @@ DistCCSMatrix::~DistCCSMatrix()
     delete[] colptrLocal;
     delete[] rowindLocal;
 }
-} // namespace pexsi
\ No newline at end of file
+} // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
index 18fe445043..ef6c6fec72 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
@@ -1,3 +1,4 @@
+#ifdef __PEXSI
 #include "dist_matrix_transformer.h"
 
 #include <mpi.h>
@@ -1601,3 +1602,4 @@ MPI_Barrier(COMM_TRANS);
 }
 
 } // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
index 2d6f2674d9..1be66abf59 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -1,3 +1,4 @@
+#ifdef __PEXSI
 #include "pexsi_solver.h"
 
 #include <mpi.h>
@@ -89,4 +90,5 @@ const double PEXSI_Solver::get_totalEnergyS() const
     return totalEnergyS;
 }
 
-} // namespace pexsi
\ No newline at end of file
+} // namespace pexsi
+#endif
\ No newline at end of file

From cd3a02880fa2a045cd3f06237fffbb99ae3e1440 Mon Sep 17 00:00:00 2001
From: Hongxu Ren <60290838+Flying-dragon-boxing@users.noreply.github.com>
Date: Sun, 28 Jan 2024 14:00:20 +0800
Subject: [PATCH 16/44] Modify inputs and update to latest version (#2)

* run INPUT.Default() in every process in InputParaTest (#3490)

Co-authored-by: kirk0830 <67682086+kirk0830@users.noreply.github.com>

* add blas support for FindLAPACK.cmake (#3497)

* more unittest of QO: towards orbital selection (#3499)

* Fix: fix bug in mulliken charge calculation (#3503)

* fix phase

* fix case test

* Refactor: namespace Conv_Coulomb_Pot_K (#3446)

* Refactor: namespace Conv_Coulomb_Pot_K

* Refactor: namespace Conv_Coulomb_Pot_K

---------

Co-authored-by: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com>

* enable the computation of all zeros in one function call (#3449)

Co-authored-by: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com>

* replace ios.eof() by ios.good() to avoid meeting badbit and failbit in reading STRU (#3506)

* Build: add ccache to accelerate the testing process (#3509)

* Build: add ccache to accelerate the testing process

* Update test.yml

* Update test.yml

* Update test.yml

* Docs: to avoid the misunderstanding in docs (#3518)

* to avoid the misunderstanding in docs

* Update docs/quick_start/hands_on.md

Co-authored-by: Chun Cai <amoycaic@gmail.com>

---------

Co-authored-by: Chun Cai <amoycaic@gmail.com>

* Docs: fix a missing depencency in conda build env (#3508)

* Feature: Add ENABLE_RAPIDJSON option to control the output of abacus.json (#3519)

Add ENABLE_RAPIDJSON option to control the output of abacus.json

* Feature: add python wrapper for math sphbes (#3475)

* recommit for review

* add python wrapper

* remove timer since performace tests add

* Feature: support segment split in kline mode in KPT file and `out_band` band output precision control, `8` as default (#3493)

* add precision control

* correct serial version of nscf_band function

* fix issue 3482

* update unit and integrated test

* update document

* correct unittest and make compatible with false and true

* fix: bug in Autotest.sh when result.ref has no totaltimeref (#3523)

* Fix : unit test of module_xc (#3524)

* Fix: omit small magnetic moments to avoid numerical instability (#3530)

* update deltalambda

* avoid numerical error in orbMulP

* add constrain on Mi

* change case reference value

* Fix: fix multiple compiler warnings (#3515)

* Fix: add noreturn attribute to warning_quit

* Add type conversion

* fix string literal

* fix small number trunctuation

* Fix system call returned value not checked

* fix missing braket

* Refactor parameter_pool.cpp and parameter_pool.h

* remove duplicated return statements

* Change WARNING_QUIT occurances in tests

* Add warning message to help debug UT

* output the default precision flag (#3496)

Co-authored-by: kirk0830 <67682086+kirk0830@users.noreply.github.com>

* Build: Improving CMake performance for finding LibXC and ELPA (#3478)

* Fix for finding LibXC and ELPA

* For compatibility to previous routines

* syntax fix for FindELPA.cmake

* Update cmake/FindELPA.cmake

Co-authored-by: Chun Cai <amoycaic@gmail.com>

* Using CMake interface as default for finding LibXC

* update docs

* fix for FindLibxc: changing imcompatible if statement

* fix for FindLibxc: changing imcompatible if statement

* fix for FindLibxc: changing imcompatible if statement

* update docs for installing pkg-config

* Update FindLibxc.cmake

* Update FindLibxc.cmake

* remove previous LibXC routine in CMakeLists.txt

Co-authored-by: Chun Cai <amoycaic@gmail.com>

* Update easy_install.md with Makefile-built LibXC supported

* Update easy_install.md to include different behavior in different version on finding ELPA

---------

Co-authored-by: Chun Cai <amoycaic@gmail.com>

* Docs: correct some docs about mp2 smearing method (#3533)

* correct some docs about mp2 smearing method

* add docs about mv method

* Feature : printing band density (#3501)

Co-authored-by: wenfei-li <liwenfei@gmail.com>
Co-authored-by: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com>

* add some docs for PR#3501 (#3537)

* Feature: enable restart charge density mixing during SCF (#3542)

* add a new parameter mixing_restart

* do not update rho if iter==mixing_restart

* do not update rho if iter==mixing_restart-1

* reset mix and rho_mdata if iter==mixing_restart

* fix SCF exit directly since drho=0 if iter=GlobalV::MIXING_RESTART

* re-set_mixing in eachiterinit for PW and LCAO

* enable SCF restarts in esolver_ks::RUN

* add some UnitTests

* add some Docs

* new inputs added

* Update input-main.md (#3551)

Solve the format problem mentioned in issue 3543

* Build: fix compatibility issue against toolchain install (#3540)

* Fix for finding LibXC and ELPA

* For compatibility to previous routines

* syntax fix for FindELPA.cmake

* Update cmake/FindELPA.cmake

Co-authored-by: Chun Cai <amoycaic@gmail.com>

* Using CMake interface as default for finding LibXC

* update docs

* fix for FindLibxc: changing imcompatible if statement

* fix for FindLibxc: changing imcompatible if statement

* fix for FindLibxc: changing imcompatible if statement

* update docs for installing pkg-config

* Update FindLibxc.cmake

* Update FindLibxc.cmake

* remove previous LibXC routine in CMakeLists.txt

Co-authored-by: Chun Cai <amoycaic@gmail.com>

* Update easy_install.md with Makefile-built LibXC supported

* Update easy_install.md to include different behavior in different version on finding ELPA

* fix compatibility issue against toolchain

* Change default ELPA install routine to old one

---------

Co-authored-by: Chun Cai <amoycaic@gmail.com>

* Test: Configure performance tests for math libraries (#3511)

* add performace test of sphbes functions.

* fix benchmark cmake errors

* add dependencies for docker

* update docs

* add performance tests for sphbes

* add google benchmark

* rewrite benchmark tests in fixtures

* disable internal testing in benchmark

* merge benchmark into integration test

---------

Co-authored-by: StarGrys <771582678@qq.com>

* Configure Makefile Compiling, fix typos

* Fix Makefile Intel toolchains compile errors

* Fix even more PEXSI related Makefile compiling issues

* Update hsolver_pw.cpp (#3556)

when use_uspp==false, overlap matrix should be E.

* Fix: cuda build target (#3276)

* Fix: cuda buid target

* Update CMakeLists.txt

---------

Co-authored-by: Denghui Lu <denghuilu@pku.edu.cn>

---------

Co-authored-by: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com>
Co-authored-by: kirk0830 <67682086+kirk0830@users.noreply.github.com>
Co-authored-by: Haozhi Han <haozhi.han@outlook.com>
Co-authored-by: Zhao Tianqi <hongriTianqi@users.noreply.github.com>
Co-authored-by: PeizeLin <78645006+PeizeLin@users.noreply.github.com>
Co-authored-by: jinzx10 <jzx016@hotmail.com>
Co-authored-by: Chun Cai <amoycaic@gmail.com>
Co-authored-by: Peng Xingliang <91927439+pxlxingliang@users.noreply.github.com>
Co-authored-by: Jie Li <76780849+jieli-matrix@users.noreply.github.com>
Co-authored-by: Wenfei Li <38569667+wenfei-li@users.noreply.github.com>
Co-authored-by: Denghui Lu <denghuilu@pku.edu.cn>
Co-authored-by: YI Zeping <18586016708@163.com>
Co-authored-by: wenfei-li <liwenfei@gmail.com>
Co-authored-by: jingan-181 <78459531+jingan-181@users.noreply.github.com>
Co-authored-by: StarGrys <771582678@qq.com>
Co-authored-by: Haozhi Han <haozhi.han@stu.pku.edu.cn>
---
 .github/workflows/test.yml                    |  18 +-
 CMakeLists.txt                                |  77 +-
 Dockerfile.cuda                               |   2 +-
 Dockerfile.gnu                                |   2 +-
 Dockerfile.intel                              |   2 +-
 cmake/FindELPA.cmake                          |  44 +-
 cmake/FindLAPACK.cmake                        |   2 +-
 cmake/FindLibxc.cmake                         |  36 +
 cmake/FindPEXSI.cmake                         |   8 +-
 deps/libpaw_interface                         |   2 +-
 docs/advanced/input_files/input-main.md       |  37 +-
 docs/advanced/install.md                      |  10 +
 docs/quick_start/easy_install.md              |   9 +-
 docs/quick_start/hands_on.md                  |   2 +-
 python/pyabacus/CMakeLists.txt                |  11 +-
 python/pyabacus/src/py_abacus.cpp             |  13 +
 python/pyabacus/src/py_math_base.cpp          |  63 ++
 python/pyabacus/src/py_numerical_radial.cpp   |   4 +-
 python/pyabacus/src/pyabacus/__init__.py      |   5 +-
 python/pyabacus/tests/test_base_math.py       |  15 +
 python/pyabacus/tests/test_nr.py              |  25 -
 source/Makefile                               |  17 +-
 source/Makefile.Objects                       |  18 +-
 source/Makefile.vars                          |  20 +-
 source/module_base/global_variable.cpp        |  30 +
 source/module_base/global_variable.h          |  28 +
 source/module_base/math_sphbes.cpp            |  56 +-
 source/module_base/math_sphbes.h              |  13 +-
 source/module_base/para_json.cpp              | 977 ++++++++++++++++++
 source/module_base/para_json.h                | 560 ++++++++++
 source/module_base/test/CMakeLists.txt        |  14 +
 .../module_base/test/complexmatrix_test.cpp   |  22 +-
 .../module_base/test/inverse_matrix_test.cpp  |   2 +-
 source/module_base/test/math_sphbes_test.cpp  |  16 +-
 source/module_base/test/math_ylmreal_test.cpp | 364 +++----
 source/module_base/test/para_json_test.cpp    |  68 ++
 source/module_base/test/perf_sphbes_test.cpp  |  72 ++
 source/module_base/tool_quit.h                |   8 +-
 source/module_cell/klist.cpp                  |  44 +-
 source/module_cell/klist.h                    |   1 +
 .../test/sltk_atom_input_test.cpp             |   2 +-
 source/module_cell/read_atoms.cpp             | 189 ++--
 source/module_elecstate/occupy.cpp            | 414 +-------
 source/module_esolver/esolver_ks.cpp          |  19 +-
 source/module_esolver/esolver_ks_lcao.cpp     |  27 +-
 source/module_esolver/esolver_ks_pw.cpp       |  71 +-
 .../module_xc/test/test_xc.cpp                |   4 +-
 .../module_xc/test/test_xc1.cpp               |   2 +-
 .../module_xc/test/test_xc2.cpp               |  10 +-
 .../module_xc/test/test_xc4.cpp               |   4 +-
 .../module_xc/test/test_xc5.cpp               |  65 --
 .../module_xc/test/xc3_mock.h                 |  16 +-
 .../module_deltaspin/cal_mw.cpp               |   2 +-
 .../module_deltaspin/cal_mw_helper.cpp        |  14 +-
 .../module_deltaspin/lambda_loop.cpp          |   4 +-
 .../module_tddft/test/tddft_test.cpp          |   3 +-
 source/module_hsolver/diago_pexsi.cpp         |  44 +-
 source/module_hsolver/diago_pexsi.h           |   2 +-
 source/module_hsolver/hsolver_pw.cpp          |  22 +-
 .../module_pexsi/CMakeLists.txt               |   2 +-
 .../module_pexsi/dist_bcd_matrix.cpp          |   4 +-
 .../module_pexsi/dist_bcd_matrix.h            |  23 +
 .../module_pexsi/dist_ccs_matrix.cpp          |   4 +-
 .../module_pexsi/dist_ccs_matrix.h            |  40 +
 .../module_pexsi/dist_matrix_transformer.cpp  | 233 +++--
 .../module_pexsi/dist_matrix_transformer.h    |  64 +-
 .../module_pexsi/pexsi_solver.cpp             |  28 +-
 .../module_pexsi/pexsi_solver.h               |   6 +-
 .../module_pexsi/simple_pexsi.cpp             | 490 ++++-----
 source/module_io/input.cpp                    | 230 ++++-
 source/module_io/input.h                      |  43 +-
 source/module_io/input_conv.cpp               |  30 +
 source/module_io/mulliken_charge.cpp          |   4 +-
 source/module_io/nscf_band.cpp                |  41 +-
 source/module_io/nscf_band.h                  |   1 +
 source/module_io/parameter_pool.cpp           |  39 +-
 source/module_io/parameter_pool.h             |   8 +-
 source/module_io/test/input_conv_test.cpp     |   1 +
 source/module_io/test/input_test.cpp          |  14 +-
 source/module_io/test/input_test_para.cpp     |   9 +-
 source/module_io/test/support/INPUT           |   2 +-
 source/module_io/test/support/witestfile      |   2 +-
 source/module_io/test/to_qo_test.cpp          |  34 +-
 source/module_io/test/write_input_test.cpp    |  11 +-
 .../module_io/test_serial/nscf_band_test.cpp  |  11 +-
 source/module_io/write_input.cpp              |  33 +-
 source/module_ri/Exx_LRI.hpp                  |  10 +-
 source/module_ri/LRI_CV_Tools.hpp             |  11 +-
 .../module_ri/conv_coulomb_pot_k-template.h   |  51 -
 source/module_ri/conv_coulomb_pot_k.cpp       | 181 ++--
 source/module_ri/conv_coulomb_pot_k.h         |  47 +-
 source/module_ri/conv_coulomb_pot_k.hpp       |  37 +
 source/module_ri/exx_lip.cpp                  |  14 +-
 .../107_PW_OBOD_MemSaver/refBANDS_1.dat       |  12 +-
 .../107_PW_OB_outputbands/refBANDS_1.dat      |  12 +-
 .../204_NO_KP_NC_deltaspin/mulliken.txt.ref   | 168 +--
 .../204_NO_KP_NC_deltaspin/result.ref         |   6 +-
 tests/integrate/207_NO_KP_OB/refBANDS_1.dat   |  12 +-
 tests/integrate/Autotest.sh                   |  10 +-
 toolchain/README.md                           |  15 +-
 toolchain/install_abacus_toolchain.sh         |  14 +-
 toolchain/scripts/common_vars.sh              |   3 +-
 toolchain/scripts/stage4/install_rapidjson.sh |  93 ++
 toolchain/scripts/stage4/install_stage4.sh    |   1 +
 toolchain/scripts/tool_kit.sh                 |   9 +-
 toolchain/toolchain_gnu.sh                    |   1 +
 toolchain/toolchain_intel-mpich.sh            |   1 +
 toolchain/toolchain_intel.sh                  |   1 +
 108 files changed, 4025 insertions(+), 1722 deletions(-)
 create mode 100644 cmake/FindLibxc.cmake
 create mode 100644 python/pyabacus/src/py_abacus.cpp
 create mode 100644 python/pyabacus/src/py_math_base.cpp
 create mode 100644 python/pyabacus/tests/test_base_math.py
 delete mode 100644 python/pyabacus/tests/test_nr.py
 create mode 100644 source/module_base/para_json.cpp
 create mode 100644 source/module_base/para_json.h
 create mode 100644 source/module_base/test/para_json_test.cpp
 create mode 100644 source/module_base/test/perf_sphbes_test.cpp
 delete mode 100644 source/module_ri/conv_coulomb_pot_k-template.h
 create mode 100644 source/module_ri/conv_coulomb_pot_k.hpp
 create mode 100755 toolchain/scripts/stage4/install_rapidjson.sh

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 856e56d97a..76f48347a8 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -2,27 +2,37 @@ name: Integration Test and Unit Test
 
 on:
   pull_request:
-  
+
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
-  
+
 jobs:
   test:
     name: Test
     runs-on: self-hosted
     if: github.repository_owner == 'deepmodeling'
-    container: ghcr.io/deepmodeling/abacus-gnu
+    container:
+      image: ghcr.io/deepmodeling/abacus-gnu
+      volumes:
+        - /tmp/ccache:/github/home/.ccache
     steps:
       - name: Checkout
         uses: actions/checkout@v4
         with:
           submodules: recursive
+
+      - name: Install Ccache
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y ccache
+
       - name: Build
         run: |
-          cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON
+          cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON -DENABLE_GOOGLEBENCH=ON
           cmake --build build -j8
           cmake --install build
+
       - name: Test
         env:
           GTEST_COLOR: 'yes'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8440662355..73a846304b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,12 +10,6 @@ project(ABACUS
     LANGUAGES CXX
 )
 
-# private options, should not be pushed to master
-# set(PEXSI_DIR "~/projects/pexsi-build/pexsi")
-# set(SuperLU_DIR "~/projects/pexsi-build/superlu")
-# set(ParMETIS_DIR "~/projects/pexsi-build/parmetis")
-# set(ELPA_INCLUDE_DIR "/usr/include/elpa_openmp-2023.05.001")
-
 option(ENABLE_LCAO "Enable LCAO calculation." ON)
 option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF)
 option(ENABLE_LIBXC "Enable LibXC functionality" OFF)
@@ -40,7 +34,36 @@ option(DEBUG_INFO "Print message for developers to debug." OFF)
 option(ENABLE_NATIVE_OPTIMIZATION "Enable compilation optimization for the native machine's CPU type" OFF)
 option(COMMIT_INFO "Print commit information in log" ON)
 option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON)
+option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF)
+option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF)
 option(USE_PEXSI "Enable support for PEXSI." OFF)
+
+
+
+# enable json support
+if(ENABLE_RAPIDJSON)
+  find_package(RapidJSON)
+  if(NOT RapidJSON_FOUND)
+    message(WARNING "Rapidjson is not found, trying downloading from github, or you can install Rapidjson first and reinstall abacus.")
+    include(FetchContent)
+    FetchContent_Declare(
+      rapidjson
+      GIT_REPOSITORY https://github.com/Tencent/rapidjson.git
+      GIT_TAG "origin/master"
+      GIT_SHALLOW TRUE
+      GIT_PROGRESS TRUE
+    )
+    set(RAPIDJSON_BUILD_TESTS OFF CACHE INTERNAL "")
+    set(RAPIDJSON_BUILD_EXAMPLES OFF CACHE INTERNAL "")
+    FetchContent_MakeAvailable(rapidjson)
+    set(RapidJSON_INCLUDE_PATH "${rapidjson_SOURCE_DIR}/include")
+  endif()
+  add_compile_definitions(__RAPIDJSON)
+  add_definitions(-DRAPIDJSON_HAS_CXX11_NOEXCEPT=0)
+  include_directories(${RapidJSON_INCLUDE_PATH})
+endif()
+
+
 if (USE_CUDA)
   set(USE_CUSOLVER_LCAO ON)
 else()
@@ -189,7 +212,7 @@ if(ENABLE_LCAO)
   
   if(USE_PEXSI)
     find_package(PEXSI REQUIRED)
-    target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi)
+    target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi)
     include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR})
     add_compile_definitions(__PEXSI)
   endif()
@@ -414,8 +437,7 @@ endif()
 
 if(ENABLE_DEEPKS)
   # Torch uses outdated components to detech CUDA arch, causing failure on latest CUDA kits.
-  # See above for setting CMAKE_CUDA_ARCHITECTURES
-  set(TORCH_CUDA_ARCH_LIST CMAKE_CUDA_ARCHITECTURES)
+  # Set CMake variable TORCH_CUDA_ARCH_LIST in the form of "major.minor" if required.
   find_package(Torch REQUIRED)
   if(NOT Torch_VERSION VERSION_LESS "2.1.0")
     set_if_higher(CMAKE_CXX_STANDARD 17)
@@ -522,11 +544,8 @@ if(DEFINED Libxc_DIR)
   set(ENABLE_LIBXC ON)
 endif()
 if(ENABLE_LIBXC)
-  find_package(Libxc REQUIRED HINTS
-    ${Libxc_DIR}/share/cmake/Libxc
-    ${Libxc_DIR}/lib/cmake/Libxc
-    ${Libxc_DIR}/lib64/cmake/Libxc
-  )
+  # use `cmake/FindLibxc.cmake` to detect Libxc installation with `pkg-config`
+  find_package(Libxc REQUIRED)
   message(STATUS "Found Libxc: version " ${Libxc_VERSION})
   if(${Libxc_VERSION} VERSION_LESS 5.1.7)
     message(FATAL_ERROR "LibXC >= 5.1.7 is required.")
@@ -580,6 +599,25 @@ if(INFO)
   # modifications on blas_connector and lapack_connector
 endif()
 
+#  Add performance test in abacus
+IF (ENABLE_GOOGLEBENCH)
+  set(BUILD_TESTING ON)
+  find_package(benchmark HINTS ${BENCHMARK_DIR})
+  if(NOT ${benchmark_FOUND})
+    set(BENCHMARK_USE_BUNDLED_GTEST OFF)
+    include(FetchContent)
+    FetchContent_Declare(
+      benchmark
+      GIT_REPOSITORY https://github.com/google/benchmark.git
+      GIT_TAG "origin/main"
+      GIT_SHALLOW TRUE
+      GIT_PROGRESS TRUE 
+    )
+    set(BENCHMARK_ENABLE_TESTING OFF)
+    FetchContent_MakeAvailable(benchmark)
+  endif()
+endif()
+
 IF (BUILD_TESTING)
   set_if_higher(CMAKE_CXX_STANDARD 14) # Required in orbital
   include(CTest)
@@ -609,8 +647,14 @@ IF (BUILD_TESTING)
     endif()
 
     #dependencies & link library
-    target_link_libraries(${UT_TARGET} ${UT_LIBS}
-      Threads::Threads GTest::gtest_main GTest::gmock_main)
+    if(ENABLE_GOOGLEBENCH)
+      target_link_libraries(${UT_TARGET} ${UT_LIBS}
+        Threads::Threads GTest::gtest_main GTest::gmock_main benchmark::benchmark)
+      else()
+        target_link_libraries(${UT_TARGET} ${UT_LIBS}
+          Threads::Threads GTest::gtest_main GTest::gmock_main)
+    endif()
+      
     if(USE_OPENMP)
       target_link_libraries(${UT_TARGET} OpenMP::OpenMP_CXX)
     endif()
@@ -620,6 +664,7 @@ IF (BUILD_TESTING)
       WORKING_DIRECTORY $<TARGET_FILE_DIR:${UT_TARGET}>
     )
   endfunction(AddTest)
+
 endif()
 
 add_subdirectory(source)
diff --git a/Dockerfile.cuda b/Dockerfile.cuda
index 719f7c4278..e950f097f9 100644
--- a/Dockerfile.cuda
+++ b/Dockerfile.cuda
@@ -2,7 +2,7 @@ FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
 
 RUN apt update && apt install -y --no-install-recommends \
     libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev \
-    libxc-dev libgtest-dev libgmock-dev python3-numpy \
+    libxc-dev libgtest-dev libgmock-dev libbenchmark-dev python3-numpy \
     bc cmake git g++ make bc time sudo unzip vim wget
 
 ENV GIT_SSL_NO_VERIFY=true TERM=xterm-256color \
diff --git a/Dockerfile.gnu b/Dockerfile.gnu
index 0b6b45d248..060d930563 100644
--- a/Dockerfile.gnu
+++ b/Dockerfile.gnu
@@ -1,7 +1,7 @@
 FROM ubuntu:22.04
 RUN apt update && apt install -y --no-install-recommends \
     libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev \
-    libxc-dev libgtest-dev libgmock-dev python3-numpy \
+    libxc-dev libgtest-dev libgmock-dev libbenchmark-dev python3-numpy \
     bc cmake git g++ make bc time sudo unzip vim wget gfortran
 
 ENV GIT_SSL_NO_VERIFY=true TERM=xterm-256color \
diff --git a/Dockerfile.intel b/Dockerfile.intel
index 6cac8c9f5f..3947f05b9e 100644
--- a/Dockerfile.intel
+++ b/Dockerfile.intel
@@ -2,7 +2,7 @@ FROM ubuntu:22.04
 
 RUN apt-get update && apt-get install -y \
     bc cmake git gnupg gcc g++ python3-numpy sudo wget vim unzip \
-    libcereal-dev libxc-dev libgtest-dev libgmock-dev
+    libcereal-dev libxc-dev libgtest-dev libgmock-dev libbenchmark-dev
 
 # Following steps by https://software.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/installation/install-using-package-managers/apt.html .
 RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
diff --git a/cmake/FindELPA.cmake b/cmake/FindELPA.cmake
index 5769f7248c..4105e47592 100644
--- a/cmake/FindELPA.cmake
+++ b/cmake/FindELPA.cmake
@@ -7,34 +7,50 @@
 #  ELPA_INCLUDE_DIR - Where to find ELPA headers.
 #
 
-find_path(ELPA_INCLUDE_DIR
+find_package(PkgConfig)
+
+find_path(ELPA_INCLUDE_DIRS
     elpa/elpa.h
     HINTS ${ELPA_DIR}
     PATH_SUFFIXES "include" "include/elpa"
     )
 if(USE_OPENMP)
-    find_library(ELPA_LIBRARY
-        NAMES elpa_openmp elpa
-        HINTS ${ELPA_DIR}
-        PATH_SUFFIXES "lib"
-        )
+    find_library(ELPA_LINK_LIBRARIES
+    NAMES elpa_openmp elpa
+    HINTS ${ELPA_DIR}
+    PATH_SUFFIXES "lib"
+    )
 else()
-    find_library(ELPA_LIBRARY
-        NAMES elpa
-        HINTS ${ELPA_DIR}
-        PATH_SUFFIXES "lib"
-        )
+    find_library(ELPA_LINK_LIBRARIES
+    NAMES elpa
+    HINTS ${ELPA_DIR}
+    PATH_SUFFIXES "lib"
+    )
+endif()
+
+if(NOT ELPA_INCLUDE_DIRS AND PKG_CONFIG_FOUND)
+  if(DEFINED ELPA_DIR)
+    string(APPEND CMAKE_PREFIX_PATH ";${ELPA_DIR}")
+  endif()
+  if(USE_OPENMP)
+    pkg_search_module(ELPA REQUIRED IMPORTED_TARGET GLOBAL elpa_openmp)
+  else()
+    pkg_search_module(ELPA REQUIRED IMPORTED_TARGET GLOBAL elpa)
+  endif()
+elseif(NOT PKG_CONFIG_FOUND)
+  message(
+    "ELPA : We need pkg-config to get all information about the elpa library")
 endif()
 
 # Handle the QUIET and REQUIRED arguments and
 # set ELPA_FOUND to TRUE if all variables are non-zero.
 include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(ELPA DEFAULT_MSG ELPA_LIBRARY ELPA_INCLUDE_DIR)
+find_package_handle_standard_args(ELPA DEFAULT_MSG ELPA_LINK_LIBRARIES ELPA_INCLUDE_DIRS)
 
 # Copy the results to the output variables and target.
 if(ELPA_FOUND)
-    set(ELPA_LIBRARIES ${ELPA_LIBRARY})
-    set(ELPA_INCLUDE_DIR ${ELPA_INCLUDE_DIR})
+    list(GET ELPA_LINK_LIBRARIES 0 ELPA_LIBRARY)
+    set(ELPA_INCLUDE_DIR ${ELPA_INCLUDE_DIRS})
 
     if(NOT TARGET ELPA::ELPA)
         add_library(ELPA::ELPA UNKNOWN IMPORTED)
diff --git a/cmake/FindLAPACK.cmake b/cmake/FindLAPACK.cmake
index 4f4bfbc425..c240d5facf 100644
--- a/cmake/FindLAPACK.cmake
+++ b/cmake/FindLAPACK.cmake
@@ -6,7 +6,7 @@
 #
 
 find_library(LAPACK_LIBRARY
-    NAMES openblas
+    NAMES openblas blas
     HINTS ${LAPACK_DIR}
     PATH_SUFFIXES "lib"
 )
diff --git a/cmake/FindLibxc.cmake b/cmake/FindLibxc.cmake
new file mode 100644
index 0000000000..4a3c04cba7
--- /dev/null
+++ b/cmake/FindLibxc.cmake
@@ -0,0 +1,36 @@
+include(FindPackageHandleStandardArgs)
+
+if(DEFINED Libxc_DIR)
+  string(APPEND CMAKE_PREFIX_PATH ";${Libxc_DIR}")
+endif()
+# Using CMake interface as default.
+# NO REQUIRED here, otherwhile it would throw error
+# with no LibXC found.
+find_package(Libxc HINTS
+    ${Libxc_DIR}/share/cmake/Libxc
+    ${Libxc_DIR}/lib/cmake/Libxc
+    ${Libxc_DIR}/lib64/cmake/Libxc
+  )
+if(NOT TARGET Libxc::xc)
+  find_package(PkgConfig REQUIRED)
+  pkg_search_module(Libxc REQUIRED IMPORTED_TARGET GLOBAL libxc)
+  find_package_handle_standard_args(Libxc DEFAULT_MSG Libxc_LINK_LIBRARIES Libxc_INCLUDE_DIRS)
+endif()
+
+
+# Copy the results to the output variables and target.
+# if find_package() above works, Libxc::xc would be present and
+# below would be skipped.
+if(Libxc_FOUND AND NOT TARGET Libxc::xc)
+	set(Libxc_LIBRARY ${Libxc_LINK_LIBRARIES})
+	set(Libxc_LIBRARIES ${Libxc_LIBRARY})
+	set(Libxc_INCLUDE_DIR ${Libxc_INCLUDE_DIRS})
+	add_library(Libxc::xc UNKNOWN IMPORTED)
+	set_target_properties(Libxc::xc PROPERTIES
+		IMPORTED_LOCATION "${Libxc_LIBRARY}"
+		INTERFACE_INCLUDE_DIRECTORIES "${Libxc_INCLUDE_DIR}")
+endif()
+
+set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${Libxc_INCLUDE_DIR})
+
+mark_as_advanced(Libxc_INCLUDE_DIR Libxc_LIBRARY)
diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake
index 22fe4dd01c..062764acce 100644
--- a/cmake/FindPEXSI.cmake
+++ b/cmake/FindPEXSI.cmake
@@ -35,18 +35,18 @@ find_library(ParMETIS_LIBRARY
     PATH_SUFFIXES "lib"
 )
 
-find_library(SuperLU_LIBRARY
+find_library(SuperLU_DIST_LIBRARY
     NAMES libsuperlu_dist.a
-    HINTS ${SuperLU_DIR}
+    HINTS ${SuperLU_DIST_DIR}
     PATH_SUFFIXES "lib"
 )
 
 # Handle the QUIET and REQUIRED arguments and
 # set Cereal_FOUND to TRUE if all variables are non-zero.
 include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_LIBRARY)
+find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_DIST_LIBRARY)
 
 
 # Copy the results to the output variables and target.
-mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_LIBRARY)
+mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_DIST_LIBRARY)
 
diff --git a/deps/libpaw_interface b/deps/libpaw_interface
index 893cfe5b88..c211c0ab33 160000
--- a/deps/libpaw_interface
+++ b/deps/libpaw_interface
@@ -1 +1 @@
-Subproject commit 893cfe5b88c4b640b88a82335474d9f67d4c4cf6
+Subproject commit c211c0ab330adf3cc374f50ab3edee46b174e64c
diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index b9ee122af6..8c675cab92 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -72,6 +72,7 @@
     - [mixing\_beta](#mixing_beta)
     - [mixing\_beta\_mag](#mixing_beta_mag)
     - [mixing\_ndim](#mixing_ndim)
+    - [mixing\_restart](#mixing_restart)
     - [mixing\_gg0](#mixing_gg0)
     - [mixing\_gg0\_mag](#mixing_gg0_mag)
     - [mixing\_gg0\_min](#mixing_gg0_min)
@@ -145,6 +146,8 @@
     - [out\_app\_flag](#out_app_flag)
     - [out\_ndigits](#out_ndigits)
     - [out\_interval](#out_interval)
+    - [band\_print\_num](#band_print_num)
+    - [bands\_to\_print](#bands_to_print)
     - [out\_element\_info](#out_element_info)
     - [restart\_save](#restart_save)
     - [restart\_load](#restart_load)
@@ -945,6 +948,8 @@ calculations.
   - **fixed**: fixed occupations (available for non-coductors only)
   - **gauss** or **gaussian**: Gaussian smearing method.
   - **mp**: methfessel-paxton smearing method; recommended for metals.
+  - **mp2**: 2-nd methfessel-paxton smearing method; recommended for metals.
+  - **mv** or **cold**: marzari-vanderbilt smearing method.
   - **fd**: Fermi-Dirac smearing method: $f=1/\{1+\exp[(E-\mu)/kT]\}$ and smearing_sigma below is the temperature $T$ (in Ry).
 - **Default**: gauss
 
@@ -1001,6 +1006,13 @@ We recommend the following options:
   For systems that are difficult to converge, one could try increasing the value of 'mixing_ndim' to enhance the stability of the self-consistent field (SCF) calculation.
 - **Default**: 8
 
+### mixing_restart
+
+- **Type**: Integer
+- **Description**: At `mixing_restart`-th iteration, SCF will restart by using output charge density from perivos iteration as input charge density directly, and start a new mixing. `mixing_restart=0|1` means SCF starts from scratch.
+  
+- **Default**: 0
+
 ### mixing_gg0
 
 - **Type**: Real
@@ -1494,8 +1506,8 @@ These variables are used to control the output of properties.
 
 ### out_band
 
-- **Type**: Boolean
-- **Description**: Whether to output the band structure (in eV). For more information, refer to the [band.md](../elec_properties/band.md)
+- **Type**: Boolean Integer(optional)
+- **Description**: Whether to output the band structure (in eV), optionally output precision can be set by a second parameter, default is 8. For more information, refer to the [band.md](../elec_properties/band.md)
 - **Default**: False
 
 ### out_proj_band
@@ -1599,6 +1611,20 @@ These variables are used to control the output of properties.
 - **Description**: Control the interval for printing Mulliken population analysis, $r(R)$, $H(R)$, $S(R)$, $T(R)$, $dH(R)$, $H(k)$, $S(k)$ and $wfc(k)$ matrices during molecular dynamics calculations. Check input parameters [out_mul](#out_mul), [out_mat_r](#out_mat_r), [out_mat_hs2](#out_mat_hs2), [out_mat_t](#out_mat_t), [out_mat_dh](#out_mat_dh), [out_mat_hs](#out_mat_hs) and [out_wfc_lcao](#out_wfc_lcao) for more information, respectively.
 - **Default**: 1
 
+### band_print_num
+
+- **Type**: Integer
+- **Availability**: PW basis
+- **Description**: If you want to plot a partial charge density contributed from some chosen bands. `band_print_num` define the number of band list. The result can be found in "band*.cube".
+- **Default**: 0
+
+### bands_to_print
+
+- **Type**: vector
+- **Availability**: band_print_num > 0
+- **Description**: define which band you want to choose for partial charge density.
+- **Default**: []
+
 ### out_element_info
 
 - **Type**: Boolean
@@ -2776,9 +2802,9 @@ These variables are used to control berry phase and wannier90 interface paramete
 
 - **Type**: String
 - **Description**: the spin direction for the Wannier function calculation when nspin is set to 2
-  - "up": Calculate spin up for the Wannier function.
-  - "down": Calculate spin down for the Wannier function.
-- **Default**: "up"
+  - `up`: Calculate spin up for the Wannier function.
+  - `down`: Calculate spin down for the Wannier function.
+- **Default**: `up`
 
 ### out_wannier_mmn
 
@@ -2818,6 +2844,7 @@ These variables are used to control berry phase and wannier90 interface paramete
 - **Description**: write the "UNK.*" file in ASCII format or binary format.
   - 0: write the "UNK.*" file in binary format.
   - 1: write the "UNK.*" file in ASCII format (text file format).
+- **Default**: 1
 
 [back to top](#full-list-of-input-keywords)
 
diff --git a/docs/advanced/install.md b/docs/advanced/install.md
index e929fac34c..d6201a060f 100644
--- a/docs/advanced/install.md
+++ b/docs/advanced/install.md
@@ -69,6 +69,16 @@ After building and installing, unit tests can be performed with `ctest`.
 
 To run a subset of unit test, use `ctest -R <test-match-pattern>` to perform tests with name matched by given pattern.
 
+## Build Performance Tests
+
+To build performance tests for ABACUS, define `ENABLE_GOOGLEBENCH` flag. You can also specify the path to a local installation of [Google Benchmark](https://github.com/google/benchmark.git) by setting `BENCHMARK_DIR` flags. If not found locally, the configuration process will try to download it automatically.
+
+```bash
+cmake -B build -DENABLE_GOOGLEBENCH=1
+```
+
+Google Benchmark requires Google Test to build and run the tests. When setting `ENABLE_GOOGLEBENCH` to ON, `BUILD_TESTING` is automatically enabled. After building and installing, performance tests can be executed with `ctest`.
+
 ## Build with CUDA support
 
 ### Extra prerequisites
diff --git a/docs/quick_start/easy_install.md b/docs/quick_start/easy_install.md
index 957b9d3262..4089e303a3 100644
--- a/docs/quick_start/easy_install.md
+++ b/docs/quick_start/easy_install.md
@@ -28,7 +28,7 @@ These requirements support the calculation of plane-wave basis in ABACUS. For LC
 Some of these packages can be installed with popular package management system, such as `apt` and `yum`:
 
 ```bash
-sudo apt update && sudo apt install -y libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev libxc-dev g++ make cmake bc git
+sudo apt update && sudo apt install -y libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev libxc-dev g++ make cmake bc git pkgconf
 ```
 
 > Installing ELPA by apt only matches requirements on Ubuntu 22.04. For earlier linux distributions, you should build ELPA from source.
@@ -111,12 +111,12 @@ Here, 'build' is the path for building ABACUS; and '-D' is used for setting up s
   - `LAPACK_DIR`: Path to OpenBLAS library `libopenblas.so`(including BLAS and LAPACK)
   - `SCALAPACK_DIR`: Path to ScaLAPACK library `libscalapack.so`
   - `ELPA_DIR`: Path to ELPA install directory; should be the folder containing 'include' and 'lib'.
-  > Note: If you install ELPA from source, please add a symlink to avoid the additional include file folder with version name: `ln -s elpa/include/elpa-2021.05.002/elpa elpa/include/elpa`. This is a known behavior of ELPA.
+  > Note: In ABACUS v3.5.1 or earlier, if you install ELPA from source , please add a symlink to avoid the additional include file folder with version name: `ln -s elpa/include/elpa-2021.05.002/elpa elpa/include/elpa` to help the build system find ELPA headers.
 
   - `FFTW3_DIR`: Path to FFTW3.
   - `CEREAL_INCLUDE_DIR`: Path to the parent folder of `cereal/cereal.hpp`. Will download from GitHub if absent.
   - `Libxc_DIR`: (Optional) Path to Libxc.
-  > Note: Building Libxc from source with Makefile does NOT support using it in CMake here. Please compile Libxc with CMake instead.
+  > Note: In ABACUS v3.5.1 or earlier, Libxc built from source with Makefile is NOT supported; please compile Libxc with CMake instead.
   - `LIBRI_DIR`: (Optional) Path to LibRI.
   - `LIBCOMM_DIR`: (Optional) Path to LibComm.
 
@@ -126,6 +126,7 @@ Here, 'build' is the path for building ABACUS; and '-D' is used for setting up s
   - `ENABLE_LIBRI=OFF`: [Enable LibRI](../advanced/install.md#add-libri-support) to suppport variety of functionals. If `LIBRI_DIR` and `LIBCOMM_DIR` is defined, `ENABLE_LIBRI` will set to 'ON'.
   - `USE_OPENMP=ON`: Enable OpenMP support. Building ABACUS without OpenMP is not fully tested yet.
   - `BUILD_TESTING=OFF`: [Build unit tests](../advanced/install.md#build-unit-tests).
+  - `ENABLE_GOOGLEBENCH=OFF`: [Build performance tests](../advanced/install.md#build-performance-tests)
   - `ENABLE_MPI=ON`: Enable MPI parallel compilation. If set to `OFF`, a serial version of ABACUS with PW basis only will be compiled. Currently serial version of ABACUS with LCAO basis is not supported yet, so `ENABLE_LCAO` will be automatically set to `OFF`.
   - `ENABLE_COVERAGE=OFF`: Build ABACUS executable supporting [coverage analysis](../CONTRIBUTING.md#generating-code-coverage-report). This feature has a drastic impact on performance.
   - `ENABLE_ASAN=OFF`: Build with Address Sanitizer. This feature would help detecting memory problems.
@@ -229,7 +230,7 @@ conda create -n abacus_env abacus -c conda-forge
 conda activate abacus_env
 export CMAKE_PREFIX_PATH=$CONDA_PREFIX:$CMAKE_PREFIX_PATH
 
-# By default OpenBLAS is used; run `conda install "blas=*=mkl" mkl_fft -c conda-forge` to switch implementation.
+# By default OpenBLAS is used; run `conda install "blas=*=mkl" mkl_fft mkl-devel -c conda-forge` to switch implementation.
 export MKLROOT=$CONDA_PREFIX # If Intel MKL is required.
 
 export CMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'`:$CMAKE_PREFIX_PATH # If DEEPKS support is required;
diff --git a/docs/quick_start/hands_on.md b/docs/quick_start/hands_on.md
index d63c6b0232..2e0e768169 100644
--- a/docs/quick_start/hands_on.md
+++ b/docs/quick_start/hands_on.md
@@ -57,7 +57,7 @@ basis_type              lcao
 calculation             scf		# this is the key parameter telling abacus to do a scf calculation
 ```
 
-The pseudopotential files of `Mg_ONCV_PBE-1.0.upf` and `O_ONCV_PBE-1.0.upf` should be provided under the directory of `pseudo_dir`, and the orbital files `Mg_gga_8au_100Ry_4s2p1d.orb` and `O_gga_8au_100Ry_2s2p1d.orb` under the directory of `orbital_dir`. The pseudopotential and orbital files can be downloaded from the [ABACUS website](http://abacus.ustc.edu.cn/pseudo/list.htm).
+The pseudopotential files of `Mg_ONCV_PBE-1.0.upf` and `O_ONCV_PBE-1.0.upf` should be provided under the directory of `pseudo_dir` defined in `INPUT` (the default directory is "./"), and the orbital files `Mg_gga_8au_100Ry_4s2p1d.orb` and `O_gga_8au_100Ry_2s2p1d.orb` under the directory of `orbital_dir` also defined in `INPUT` (the default directory is "./"). The pseudopotential and orbital files can be downloaded from the [ABACUS website](http://abacus.ustc.edu.cn/pseudo/list.htm).
 
 The final mandatory input file is called `KPT`, which sets the reciprocal space k-mesh. Below is an example:
 
diff --git a/python/pyabacus/CMakeLists.txt b/python/pyabacus/CMakeLists.txt
index 399bd4fe57..0effbe83f2 100644
--- a/python/pyabacus/CMakeLists.txt
+++ b/python/pyabacus/CMakeLists.txt
@@ -12,9 +12,14 @@ set(BASE_PATH "${PROJECT_SOURCE_DIR}/../../source/module_base")
 set(ABACUS_SOURCE_DIR "${PROJECT_SOURCE_DIR}/../../source")
 include_directories(${BASE_PATH} ${ABACUS_SOURCE_DIR})
 list(APPEND _sources
-    ${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.h
-    ${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.cpp
-    ${PROJECT_SOURCE_DIR}/src/py_numerical_radial.cpp)
+    #${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.h
+    #${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.cpp
+    ${ABACUS_SOURCE_DIR}/module_base/constants.h
+    ${ABACUS_SOURCE_DIR}/module_base/math_sphbes.h
+    ${ABACUS_SOURCE_DIR}/module_base/math_sphbes.cpp
+    ${PROJECT_SOURCE_DIR}/src/py_abacus.cpp
+    #${PROJECT_SOURCE_DIR}/src/py_numerical_radial.cpp
+    ${PROJECT_SOURCE_DIR}/src/py_math_base.cpp)
 python_add_library(_core MODULE ${_sources} WITH_SOABI)
 target_link_libraries(_core PRIVATE pybind11::headers)
 target_compile_definitions(_core PRIVATE VERSION_INFO=${PROJECT_VERSION})
diff --git a/python/pyabacus/src/py_abacus.cpp b/python/pyabacus/src/py_abacus.cpp
new file mode 100644
index 0000000000..34b354dc6b
--- /dev/null
+++ b/python/pyabacus/src/py_abacus.cpp
@@ -0,0 +1,13 @@
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+void bind_numerical_radial(py::module& m);
+void bind_math_base(py::module& m);
+
+PYBIND11_MODULE(_core, m)
+{
+    // bind_numerical_radial(m);
+    bind_math_base(m);
+}
\ No newline at end of file
diff --git a/python/pyabacus/src/py_math_base.cpp b/python/pyabacus/src/py_math_base.cpp
new file mode 100644
index 0000000000..4378690897
--- /dev/null
+++ b/python/pyabacus/src/py_math_base.cpp
@@ -0,0 +1,63 @@
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+
+#include "module_base/math_sphbes.h"
+
+namespace py = pybind11;
+using namespace pybind11::literals;
+template <typename... Args>
+using overload_cast_ = pybind11::detail::overload_cast_impl<Args...>;
+
+void bind_math_base(py::module& m)
+{
+    py::module module_base = m.def_submodule("ModuleBase");
+
+    py::class_<ModuleBase::Sphbes>(module_base, "Sphbes")
+        .def(py::init<>())
+        .def_static("sphbesj", overload_cast_<const int, const double>()(&ModuleBase::Sphbes::sphbesj), "l"_a, "x"_a)
+        .def_static("dsphbesj", overload_cast_<const int, const double>()(&ModuleBase::Sphbes::dsphbesj), "l"_a, "x"_a)
+        .def_static("sphbesj",
+                    [](const int n, py::array_t<double> r, const double q, const int l, py::array_t<double> jl) {
+                        py::buffer_info r_info = r.request();
+                        if (r_info.ndim != 1)
+                        {
+                            throw std::runtime_error("r array must be 1-dimensional");
+                        }
+                        py::buffer_info jl_info = jl.request();
+                        if (jl_info.ndim != 1)
+                        {
+                            throw std::runtime_error("jl array must be 1-dimensional");
+                        }
+                        ModuleBase::Sphbes::sphbesj(n,
+                                                    static_cast<const double* const>(r_info.ptr),
+                                                    q,
+                                                    l,
+                                                    static_cast<double* const>(jl_info.ptr));
+                    })
+        .def_static("dsphbesj",
+                    [](const int n, py::array_t<double> r, const double q, const int l, py::array_t<double> djl) {
+                        py::buffer_info r_info = r.request();
+                        if (r_info.ndim != 1)
+                        {
+                            throw std::runtime_error("r array must be 1-dimensional");
+                        }
+                        py::buffer_info djl_info = djl.request();
+                        if (djl_info.ndim != 1)
+                        {
+                            throw std::runtime_error("djl array must be 1-dimensional");
+                        }
+                        ModuleBase::Sphbes::dsphbesj(n,
+                                                     static_cast<const double* const>(r_info.ptr),
+                                                     q,
+                                                     l,
+                                                     static_cast<double* const>(djl_info.ptr));
+                    })
+        .def_static("sphbes_zeros", [](const int l, const int n, py::array_t<double> zeros) {
+            py::buffer_info zeros_info = zeros.request();
+            if (zeros_info.ndim != 1)
+            {
+                throw std::runtime_error("zeros array must be 1-dimensional");
+            }
+            ModuleBase::Sphbes::sphbes_zeros(l, n, static_cast<double* const>(zeros_info.ptr));
+        });
+}
\ No newline at end of file
diff --git a/python/pyabacus/src/py_numerical_radial.cpp b/python/pyabacus/src/py_numerical_radial.cpp
index 296229b3d1..ebda8f080b 100644
--- a/python/pyabacus/src/py_numerical_radial.cpp
+++ b/python/pyabacus/src/py_numerical_radial.cpp
@@ -8,7 +8,7 @@ using namespace pybind11::literals;
 template <typename... Args>
 using overload_cast_ = pybind11::detail::overload_cast_impl<Args...>;
 
-PYBIND11_MODULE(_core, m)
+void bind_numerical_radial(py::module& m)
 {
     // Create the submodule for NumericalRadial
     py::module m_numerical_radial = m.def_submodule("NumericalRadial");
@@ -165,4 +165,4 @@ PYBIND11_MODULE(_core, m)
         .def_property_readonly("kgrid", overload_cast_<int>()(&NumericalRadial::kgrid, py::const_))
         .def_property_readonly("rvalue", overload_cast_<int>()(&NumericalRadial::rvalue, py::const_))
         .def_property_readonly("kvalue", overload_cast_<int>()(&NumericalRadial::kvalue, py::const_));
-}
+}
\ No newline at end of file
diff --git a/python/pyabacus/src/pyabacus/__init__.py b/python/pyabacus/src/pyabacus/__init__.py
index cda9318053..94d8c0d5b8 100644
--- a/python/pyabacus/src/pyabacus/__init__.py
+++ b/python/pyabacus/src/pyabacus/__init__.py
@@ -1,3 +1,4 @@
 from __future__ import annotations
-from ._core import __doc__, __version__, NumericalRadial
-__all__ = ["__doc__", "__version__", "NumericalRadial"]
\ No newline at end of file
+# from ._core import __doc__, __version__, NumericalRadial, ModuleBase
+from ._core import ModuleBase
+__all__ = ["ModuleBase"]
\ No newline at end of file
diff --git a/python/pyabacus/tests/test_base_math.py b/python/pyabacus/tests/test_base_math.py
new file mode 100644
index 0000000000..97d5118bac
--- /dev/null
+++ b/python/pyabacus/tests/test_base_math.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+import pyabacus as m
+import numpy as np
+
+
+def test_version():
+    assert m.__version__ == "0.0.1"
+
+def test_sphbes():
+    s = m.ModuleBase.Sphbes()
+    # test for sphbesj
+    assert s.sphbesj(1, 0.0) == 0.0
+    assert s.sphbesj(0, 0.0) == 1.0
+
diff --git a/python/pyabacus/tests/test_nr.py b/python/pyabacus/tests/test_nr.py
deleted file mode 100644
index 4986331b25..0000000000
--- a/python/pyabacus/tests/test_nr.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from __future__ import annotations
-
-import pyabacus as m
-
-
-def test_version():
-    assert m.__version__ == "0.0.1"
-
-def test_attributes():
-    chi = m.NumericalRadial()
-    # string
-    assert chi.symbol == ''
-    # integer
-    assert chi.itype == 0
-    assert chi.izeta == 0
-    assert chi.l == -1
-    assert chi.nr == 0
-    assert chi.nk == 0
-    # float
-    assert chi.rcut == 0.0
-    assert chi.kcut == 0.0
-    assert chi.pr == 0.0
-    assert chi.pk == 0.0
-    # bool
-    assert chi.is_fft_compliant == False
diff --git a/source/Makefile b/source/Makefile
index 0dcd329161..7bd81f26d9 100644
--- a/source/Makefile
+++ b/source/Makefile
@@ -7,7 +7,7 @@ include Makefile.vars
 INCLUDES = -I. -Icommands -I../ -Imodule_base/module_container
 
 LIBS = -lm -lpthread
-OPTS = ${INCLUDES} -Ofast -g -traceback -xHost -std=c++11 -simd -march=native -m64 -qopenmp -Werror -Wall -pedantic 
+OPTS = ${INCLUDES} -std=c++14 -pedantic -m64 ${INCLUDES}
 HONG = -D__LCAO
 HONG += -D__ELPA
 ifeq ($(OPENMP), ON)
@@ -75,7 +75,7 @@ else
     FFTW_INCLUDE_DIR = ${FFTW_DIR}/include
     FFTW_LIB_DIR     = ${FFTW_DIR}/lib
     HONG  += -D__FFTW3
-    LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} -qmkl
+    LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR}
     INCLUDES += -I${FFTW_INCLUDE_DIR}
     
     #==========================
@@ -140,12 +140,6 @@ ifdef LIBTORCH_DIR
   endif
 endif
 
-ifdef PEXSI_DIR
-    INCLUDES += -I${PEXSI_INCLUDE_DIR} ${SCOTCH_INCLUDE} ${DSUPERLU_INCLUDE}
-    LIBS += -L${PEXSI_LIB_DIR} -lpexsi_linux_release_v2.0 ${DSUPERLU_LIB} ${PTSCOTCH_LIB} ${SCOTCH_LIB}
-    HONG += -D__PEXSI
-endif
-
 ifdef DeePMD_DIR
     HONG  += -D__DPMD -DHIGH_PREC 
     OPTS  += -Wl,--no-as-needed
@@ -175,6 +169,13 @@ ifdef DeePMD_DIR
     INCLUDES += -I${TensorFlow_INCLUDE_DIR}
 endif
 
+ifdef PEXSI_DIR
+    OBJS_ABACUS += ${OBJS_HSOLVER_PEXSI}
+    INCLUDES += -I${PEXSI_DIR}/include -I${PARMETIS_DIR}/include -I${DSUPERLU_DIR}/include
+    LIBS += -L${PEXSI_DIR}/lib -lpexsi -L${DSUPERLU_DIR}/lib -lsuperlu_dist -L${PARMETIS_DIR}/lib -lparmetis -lmetis
+    HONG += -D__PEXSI
+endif
+
 include Makefile.Objects
 
 #==========================
diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index 2a69761da3..71e637a80b 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -30,7 +30,7 @@ VPATH=./src_global:\
 ./module_hsolver:\
 ./module_hsolver/kernels:\
 ./module_hsolver/genelpa:\
-./module_hsolver/pexsi:\
+./module_hsolver/module_pexsi:\
 ./module_elecstate:\
 ./module_elecstate/kernels:\
 ./module_elecstate/potentials:\
@@ -102,6 +102,7 @@ ${OBJS_VDW}\
 ${OBJS_DFTU}\
 ${OBJS_DELTASPIN}\
 ${OBJS_TENSOR}\
+${OBJS_HSOLVER_PEXSI}\
 
 OBJS_MAIN=main.o\
     driver.o\
@@ -290,13 +291,7 @@ OBJS_HSOLVER=diago_cg.o\
     diago_iter_assist.o\
     math_kernel_op.o\
     dngvd_op.o\
-    diago_pexsi.o\
-    DistBCDMatrix.o\
-    DistCCSMatrix.o\
-    DistMatrixTransformer.o\
-    pexsi_solver.o\
-    simplePEXSI.o\
-
+    
 OBJS_HSOLVER_LCAO=hsolver_lcao.o\
       diago_blas.o\
       diago_elpa.o\
@@ -305,6 +300,13 @@ OBJS_HSOLVER_LCAO=hsolver_lcao.o\
       elpa_new_complex.o\
       utils.o\
 
+OBJS_HSOLVER_PEXSI=diago_pexsi.o\
+      pexsi_solver.o\
+      simple_pexsi.o\
+      dist_bcd_matrix.o\
+      dist_ccs_matrix.o\
+      dist_matrix_transformer.o\
+      
 OBJS_MD=fire.o\
     langevin.o\
     md_base.o\
diff --git a/source/Makefile.vars b/source/Makefile.vars
index 860bbdd806..477b0a251d 100644
--- a/source/Makefile.vars
+++ b/source/Makefile.vars
@@ -33,15 +33,6 @@ ELPA_DIR      = /root/lib/ELPA
 ELPA_INCLUDE_DIR = ${ELPA_DIR}/include/
 
 CEREAL_DIR    = /root/lib/cereal
-DSUPERLU_DIR = /root/workspace/superlu_dist-7.2.0
-DSUPERLU_INCLUDE = -I${DSUPERLU_DIR}/include
-DSUPERLU_LIB    = ${DSUPERLU_DIR}/lib/libsuperlu_dist.a
-
-SCOTCH_INCLUDE  = -I/usr/local/include
-PTSCOTCH_DIR    = /root/workspace/scotch_6.0.0
-PTSCOTCH_LIB    = ${PTSCOTCH_DIR}/lib/libptscotchparmetis.a ${PTSCOTCH_DIR}/lib/libptscotch.a ${PTSCOTCH_DIR}/lib/libptscotcherrexit.a ${PTSCOTCH_DIR}/lib/libptscotcherr.a
-SCOTCH_LIB      = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libscotch.a ${PTSCOTCH_DIR}/lib/libscotcherr.a ${PTSCOTCH_DIR}/lib/libscotcherrexit.a
-
 
 
 ##-------------------  FOR GNU COMPILER  ------------------------------
@@ -68,14 +59,9 @@ SCOTCH_LIB      = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libsc
 ## To use LIBXC:  set LIBXC_DIR which contains include and lib/libxc.a (>5.1.7)
 ## To use DeePMD: set DeePMD_DIR and TensorFlow_DIR
 ## To use LibRI:  set LIBRI_DIR and LIBCOMM_DIR
-## To use PEXSI: set PEXSI_DIR which contains include and libpexsi.a
+## To use PEXSI: set PEXSI_DIR DSUPERLU_DIR and PARMETIS_DIR
 ##---------------------------------------------------------------------
 
-PEXSI_DIR = /root/workspace/pexsi_v2.0.0
-PEXSI_LIB_DIR = ${PEXSI_DIR}/src
-PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include
-
-
 # LIBTORCH_DIR  = /usr/local
 # LIBNPY_DIR    = /usr/local
 
@@ -87,6 +73,10 @@ PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include
 # LIBRI_DIR     = /public/software/LibRI
 # LIBCOMM_DIR   = /public/software/LibComm
 
+# PEXSI_DIR = /home/rhx/projects/pexsi-build/pexsi
+# DSUPERLU_DIR = /home/rhx/projects/pexsi-build/superlu
+# PARMETIS_DIR    = /home/rhx/projects/pexsi-build/parmetis
+
 ##---------------------------------------------------------------------
 # NP = 14 # It is not supported. use make -j14 or make -j to parallelly compile
 # DEBUG = OFF
diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp
index 696bcd6088..6b7015dc25 100644
--- a/source/module_base/global_variable.cpp
+++ b/source/module_base/global_variable.cpp
@@ -248,6 +248,7 @@ std::string of_kernel_file = "WTkernel.txt";
 std::string MIXING_MODE = "broyden";
 double MIXING_BETA = 0.7;
 int MIXING_NDIM = 8;
+int MIXING_RESTART = 0;
 double MIXING_GG0 = 1.00;
 double MIXING_BETA_MAG = 1.6;
 double MIXING_GG0_MAG = 1.00;
@@ -300,4 +301,33 @@ std::string qo_basis = "hydrogen";
 std::vector<std::string> qo_strategy = {};
 double qo_thr = 1.0e-6;
 std::vector<double> qo_screening_coeff = {};
+
+//==========================================================
+// PEXSI related
+//==========================================================
+int pexsi_npole = 54;
+int pexsi_inertia = 1;
+int pexsi_nmax = 80;
+// int pexsi_symbolic = 1;
+int pexsi_comm = 1;
+int pexsi_storage = 1;
+int pexsi_ordering = 0;
+int pexsi_row_ordering = 1;
+int pexsi_nproc = 1;
+int pexsi_symm = 1;
+int pexsi_trans = 0;
+int pexsi_method = 1;
+int pexsi_nproc_pole = 1;
+// double pexsi_spin = 2;
+double pexsi_temp = 0.0001;
+double pexsi_gap = 0;
+double pexsi_delta_e = 20.0;
+double pexsi_mu_lower = -10;
+double pexsi_mu_upper = 10;
+double pexsi_mu = 0.0;
+double pexsi_mu_thr = 0.05;
+double pexsi_mu_expand = 0.3;
+double pexsi_mu_guard = 0.2;
+double pexsi_elec_thr = 0.001;
+double pexsi_zero_thr = 1e-10;
 } // namespace GlobalV
diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h
index b1fbb1748d..9808ca080b 100644
--- a/source/module_base/global_variable.h
+++ b/source/module_base/global_variable.h
@@ -277,6 +277,7 @@ extern std::string of_kernel_file; // The name of WT kernel file.
 extern std::string MIXING_MODE;
 extern double MIXING_BETA;
 extern int MIXING_NDIM;
+extern int MIXING_RESTART;
 extern double MIXING_GG0;
 extern bool MIXING_TAU;
 extern double MIXING_BETA_MAG;
@@ -328,5 +329,32 @@ extern std::string qo_basis;
 extern std::vector<std::string> qo_strategy;
 extern double qo_thr;
 extern std::vector<double> qo_screening_coeff;
+
+// PEXSI related
+extern int pexsi_npole;
+extern int pexsi_inertia;
+extern int pexsi_nmax;
+// extern int pexsi_symbolic;
+extern int pexsi_comm;
+extern int pexsi_storage;
+extern int pexsi_ordering;
+extern int pexsi_row_ordering;
+extern int pexsi_nproc;
+extern int pexsi_symm;
+extern int pexsi_trans;
+extern int pexsi_method;
+extern int pexsi_nproc_pole;
+// extern double pexsi_spin;
+extern double pexsi_temp;
+extern double pexsi_gap;
+extern double pexsi_delta_e;
+extern double pexsi_mu_lower;
+extern double pexsi_mu_upper;
+extern double pexsi_mu;
+extern double pexsi_mu_thr;
+extern double pexsi_mu_expand;
+extern double pexsi_mu_guard;
+extern double pexsi_elec_thr;
+extern double pexsi_zero_thr;
 } // namespace GlobalV
 #endif
diff --git a/source/module_base/math_sphbes.cpp b/source/module_base/math_sphbes.cpp
index 5e7f41de54..73e0127e6b 100644
--- a/source/module_base/math_sphbes.cpp
+++ b/source/module_base/math_sphbes.cpp
@@ -1,7 +1,7 @@
 #include "math_sphbes.h"
-#include "timer.h"
 #include "constants.h"
 #include <algorithm>
+#include <iostream>
 
 #include <cassert>
 
@@ -425,7 +425,6 @@ void Sphbes::Spherical_Bessel
     double *jl		 // jl(1:msh) = j_l(q*r(i)),spherical bessel function
 )
 {
-    ModuleBase::timer::tick("Sphbes","Spherical_Bessel");
     double x1=0.0;
 
     int i=0;
@@ -598,7 +597,6 @@ void Sphbes::Spherical_Bessel
         }
     }
 
-    ModuleBase::timer::tick("Sphbes","Spherical_Bessel");
     return;
 }
 
@@ -613,7 +611,6 @@ void Sphbes::Spherical_Bessel
 	double *sjp
 )
 {
-	ModuleBase::timer::tick("Sphbes","Spherical_Bessel");
 
 	//calculate jlx first
 	Spherical_Bessel (msh, r, q, l, sj);
@@ -634,7 +631,6 @@ void Sphbes::dSpherical_Bessel_dx
     double *djl		 // jl(1:msh) = j_l(q*r(i)),spherical bessel function
 )
 {
-    ModuleBase::timer::tick("Sphbes","dSpherical_Bessel_dq");
     if (l < 0 )
     {
 		std::cout << "We temporarily only calculate derivative of l >= 0." << std::endl;
@@ -682,7 +678,6 @@ void Sphbes::dSpherical_Bessel_dx
         }
         delete[] jl;
     }
-    ModuleBase::timer::tick("Sphbes","dSpherical_Bessel_dq");
     return;
 }
 
@@ -808,7 +803,7 @@ void Sphbes::dsphbesj(const int n,
     }
 }
 
-void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros)
+void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros, const bool return_all)
 {
     assert( n > 0 );
     assert( l >= 0 );
@@ -818,10 +813,22 @@ void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros)
     // This property enables us to use bracketing method recursively
     // to find all zeros of j_l from the zeros of j_0.
 
-    // if l is odd , j_0 --> j_1 --> j_3 --> j_5 --> ...
-    // if l is even, j_0 --> j_2 --> j_4 --> j_6 --> ...
-
-    int nz = n + (l+1)/2; // number of effective zeros in buffer
+    // If return_all is true, zeros of j_0, j_1, ..., j_l will all be returned
+    // such that zeros[l*n+i] is the i-th zero of j_l. As such, it is required
+    // that the array "zeros" has a size of (l+1)*n.
+    //
+    // If return_all is false, only the zeros of j_l will be returned
+    // and "zeros" is merely required to have a size of n.
+    // Note that in this case the bracketing method can be applied with a stride
+    // of 2 instead of 1:
+    // j_0 --> j_1 --> j_3 --> j_5 --> ... --> j_l  (odd  l)
+    // j_0 --> j_2 --> j_4 --> j_6 --> ... --> j_l  (even l)
+
+    // Every recursion step reduces the number of zeros by 1.
+    // If return_all is true, one needs to start with n+l zeros of j_0
+    // to ensure n zeros of j_l; otherwise with a stride of 2 one only
+    // needs to start with n+(l+1)/2 zeros of j_0
+    int nz = n + ( return_all ? l : (l+1)/2 );
     double* buffer = new double[nz];
 
     // zeros of j_0 = sin(x)/x is just n*pi
@@ -831,27 +838,34 @@ void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros)
         buffer[i] = (i+1) * PI;
     }
 
-    int ll = 1;
+    int ll; // active l
     auto jl = [&ll] (double x) { return sphbesj(ll, x); };
-
-    if (l % 2 == 1)
+    int stride;
+    std::function<void()> copy_if_needed;
+    int offset = 0; // keeps track of the position in zeros for next copy (used when return_all == true)
+    if (return_all)
     {
-        for (int i = 0; i < nz-1; i++)
-        {
-            buffer[i] = illinois(jl, buffer[i], buffer[i+1], 1e-15, 50);
-        }
-        --nz;
+        copy_if_needed = [&](){ std::copy(buffer, buffer + n, zeros + offset); offset += n; };
+        stride = 1;
+        ll = 1;
+    }
+    else
+    {
+        copy_if_needed = [](){};
+        stride = 2;
+        ll = 2 - l % 2;
     }
 
-    for (ll = 2 + l%2; ll <= l; ll += 2, --nz)
+    for (; ll <= l; ll += stride, --nz)
     {
+        copy_if_needed();
         for (int i = 0; i < nz-1; i++)
         {
             buffer[i] = illinois(jl, buffer[i], buffer[i+1], 1e-15, 50);
         }
     }
 
-    std::copy(buffer, buffer + n, zeros);
+    std::copy(buffer, buffer + n, zeros + offset);
     delete[] buffer;
 }
 
diff --git a/source/module_base/math_sphbes.h b/source/module_base/math_sphbes.h
index c654847a5d..7aa9c78a48 100644
--- a/source/module_base/math_sphbes.h
+++ b/source/module_base/math_sphbes.h
@@ -126,13 +126,18 @@ class Sphbes
      * This function computes the first n positive zeros of the l-th order
      * spherical Bessel function of the first kind. 
      *
-     * @param[in]   l       order of the spherical Bessel function
-     * @param[in]   n       number of zeros to be computed
-     * @param[out]  zeros   on exit, contains the first n positive zeros in ascending order
+     * @param[in]   l           (maximum) order of the spherical Bessel function
+     * @param[in]   n           number of zeros to be computed (for each j_l if return_all is true)
+     * @param[out]  zeros       on exit, contains the positive zeros.
+     * @param[in]   return_all  if true, return all zeros from j_0 to j_l such that zeros[l*n+i]
+     *                          is the i-th zero of j_l. If false, return only the first n zeros of j_l.
+     *
+     * @note The size of array "zeros" must be at least (l+1)*n if return_all is true, and n otherwise.
      */
     static void sphbes_zeros(const int l,
                              const int n,
-                             double* const zeros
+                             double* const zeros,
+                             bool return_all = false
     );
 
 private:
diff --git a/source/module_base/para_json.cpp b/source/module_base/para_json.cpp
new file mode 100644
index 0000000000..1f042271f8
--- /dev/null
+++ b/source/module_base/para_json.cpp
@@ -0,0 +1,977 @@
+#include "para_json.h"
+#include "module_base/global_variable.h"
+
+#ifdef __RAPIDJSON
+
+namespace Para_Json
+{
+    int test=4;
+    // @param doc: the output json file
+    rapidjson::Document doc;
+    rapidjson::Value abacus(rapidjson::kObjectType);
+  
+    // @param general_info ：
+    rapidjson::Value general_info(rapidjson::kObjectType);
+    rapidjson::Value version;
+     
+    rapidjson::Value commit;
+    rapidjson::Value begin_time;
+    rapidjson::Value begin_date;
+    rapidjson::Value device_g;
+    // @param general_info -- parallel：
+    rapidjson::Value parallel(rapidjson::kObjectType);
+    rapidjson::Value drank;
+    rapidjson::Value dsize;
+    rapidjson::Value dcolor ;
+    // @param general_info -- path
+    rapidjson::Value path(rapidjson::kObjectType);
+    rapidjson::Value global_out_dir;
+    rapidjson::Value global_in_card;
+    rapidjson::Value pseudo_dir_path ;
+    rapidjson::Value orbital_dir_path;
+
+    
+    // @param reading_information：
+    rapidjson::Value readin_info(rapidjson::kObjectType);
+    // @param reading_information -- input_file：
+    rapidjson::Value input_file(rapidjson::kObjectType);
+
+
+    // @param reading_information -- input_file -- system_variables：
+    rapidjson::Value input_suffix;
+    rapidjson::Value ntype;
+    rapidjson::Value calculation;
+    rapidjson::Value esolver_type;
+    rapidjson::Value symmetry;
+    rapidjson::Value symmetry_precfield;
+    rapidjson::Value symmetry_autoclose;
+    rapidjson::Value kpar;
+    rapidjson::Value bndpar;
+    rapidjson::Value latname;
+    rapidjson::Value init_wfc;
+    rapidjson::Value init_chg;
+    rapidjson::Value init_vel;
+    rapidjson::Value nelec;
+    rapidjson::Value nupdown;
+    rapidjson::Value dft_functional;
+    rapidjson::Value xc_temperature;
+    rapidjson::Value pseudo_rcut(rapidjson::kNumberType );
+    rapidjson::Value pseudo_mesh;
+    rapidjson::Value mem_saver;
+    rapidjson::Value diago_proc;
+    rapidjson::Value nbspline;
+    rapidjson::Value kspacing(rapidjson::kArrayType);
+    rapidjson::Value min_dist_coef(rapidjson::kNumberType);
+    rapidjson::Value device;
+    // @param reading_information -- input_file -- files_related
+    rapidjson::Value stru_file;
+    rapidjson::Value kpoint_file;
+    rapidjson::Value pseudo_dir;
+    rapidjson::Value orbital_dir;
+    rapidjson::Value read_file_dir;
+    rapidjson::Value wannier_card;
+
+    // @param reading_information -- input_file -- planewave_related
+    rapidjson::Value ecutwfc;
+    rapidjson::Value nx;
+    rapidjson::Value ny;
+    rapidjson::Value nz;
+    rapidjson::Value pw_seed;
+    rapidjson::Value pw_diag_thr;
+    rapidjson::Value pw_diag_nmax;
+    rapidjson::Value pw_diag_ndim;
+    // @param reading_information -- input_file -- numerical_atomic_orbitals_related
+    rapidjson::Value nb2d;
+    rapidjson::Value lmaxmax;
+    rapidjson::Value lcao_ecut;
+    rapidjson::Value lcao_dk;
+    rapidjson::Value lcao_dr;
+    rapidjson::Value lcao_rmax;
+    rapidjson::Value search_radius;
+    rapidjson::Value search_pbc;
+    rapidjson::Value bx;
+    rapidjson::Value by;
+    rapidjson::Value bz;
+    // @param reading_information -- input_file -- electronic_structure
+    rapidjson::Value basis_type;
+    rapidjson::Value ks_solver;
+    rapidjson::Value nbands;
+    rapidjson::Value nbands_istate;
+    rapidjson::Value nspin;
+    rapidjson::Value smearing_method;
+    rapidjson::Value smearing_sigma;
+    rapidjson::Value smearing_sigma_temp;
+    rapidjson::Value mixing_type;
+    rapidjson::Value mixing_beta;
+    rapidjson::Value mixing_ndim;
+    rapidjson::Value mixing_gg0;
+    rapidjson::Value mixing_tau;
+    rapidjson::Value mixing_dftu;
+    rapidjson::Value gamma_only;
+    rapidjson::Value printe;
+    rapidjson::Value scf_nmax;
+    rapidjson::Value scf_thr;
+    rapidjson::Value scf_thr_type;
+    rapidjson::Value chg_extrap;
+    rapidjson::Value lspinorb;
+    rapidjson::Value noncolin;
+    rapidjson::Value soc_lambda;
+    // @param reading_information -- input_file -- electronic_structure_SDFT
+    rapidjson::Value method_sto;
+    rapidjson::Value nbands_sto;
+    rapidjson::Value nche_sto(rapidjson::kNumberType);
+    rapidjson::Value emin_sto;
+    rapidjson::Value emax_sto;
+    rapidjson::Value seed_sto;
+    rapidjson::Value initsto_freq;
+    rapidjson::Value npart_sto;
+    // @param reading_information -- input_file -- geometry_relaxation
+    rapidjson::Value relax_method;
+    rapidjson::Value relax_new;
+    rapidjson::Value relax_scale_force;
+    rapidjson::Value relax_nmax;
+    rapidjson::Value relax_cg_thr;
+    rapidjson::Value cal_force;
+    rapidjson::Value force_thr;
+    rapidjson::Value force_thr_ev;
+    rapidjson::Value force_thr_ev2;
+    rapidjson::Value relax_bfgs_w1;
+    rapidjson::Value relax_bfgs_w2;
+    rapidjson::Value relax_bfgs_rmax;
+    rapidjson::Value relax_bfgs_rmin;
+    rapidjson::Value relax_bfgs_init;
+    rapidjson::Value cal_stress;
+    rapidjson::Value stress_thr;
+    rapidjson::Value press1;
+    rapidjson::Value press2;
+    rapidjson::Value press3;
+    rapidjson::Value fixed_axes;
+    rapidjson::Value fixed_ibrav;
+    rapidjson::Value fixed_atoms;
+    rapidjson::Value cell_factor;
+
+    // @param reading_information -- input_file -- output_information_related
+    rapidjson::Value out_mul;
+    rapidjson::Value out_freq_elec;
+    rapidjson::Value out_freq_ion;
+    rapidjson::Value out_chg;
+    rapidjson::Value out_pot;
+    rapidjson::Value out_dm;
+    rapidjson::Value out_dm1;
+    rapidjson::Value out_wfc_pw;
+    rapidjson::Value out_wfc_r;
+    rapidjson::Value out_wfc_lcao;
+    rapidjson::Value out_dos;
+    rapidjson::Value out_band;
+    rapidjson::Value out_proj_band;
+    rapidjson::Value out_stru;
+    rapidjson::Value out_bandgap;
+    rapidjson::Value out_level;
+    rapidjson::Value out_alllog;
+    rapidjson::Value out_mat_hs;
+    rapidjson::Value out_mat_r;
+    rapidjson::Value out_mat_hs2;
+    rapidjson::Value out_mat_t;
+    rapidjson::Value out_mat_dh;
+    rapidjson::Value out_app_flag;
+    rapidjson::Value out_interval;
+    rapidjson::Value out_element_info;
+    rapidjson::Value restart_save;
+    rapidjson::Value restart_load;
+    rapidjson::Value rpa;
+
+    // @param reading_information -- input_file -- density_of_states
+    rapidjson::Value dos_edelta_ev;
+    rapidjson::Value dos_sigma;
+    rapidjson::Value dos_scale;
+    rapidjson::Value dos_emin_ev;
+    rapidjson::Value dos_emax_ev;
+    rapidjson::Value dos_nche;
+    // @param reading_information -- input_file -- naos
+    rapidjson::Value bessel_nao_ecut;
+    rapidjson::Value bessel_nao_tolerence;
+    rapidjson::Value bessel_nao_rcut;
+    rapidjson::Value bessel_nao_smooth;
+    rapidjson::Value bessel_nao_sigma;
+    // @param reading_information -- input_file -- deepks
+    rapidjson::Value input_file_out_labels;
+    rapidjson::Value input_file_scf;
+    rapidjson::Value input_file_model;
+    rapidjson::Value bessel_descriptor_lmax;
+    rapidjson::Value bessel_descriptor_ecut;
+    rapidjson::Value bessel_descriptor_tolerence;
+    rapidjson::Value bessel_descriptor_rcut;
+    rapidjson::Value bessel_descriptor_smooth;
+    rapidjson::Value bessel_descriptor_sigma;
+    rapidjson::Value input_file_bandgap;
+    rapidjson::Value input_file_out_unittest;
+    // @param reading_information -- input_file -- ofdft
+    rapidjson::Value of_kinetic;
+    rapidjson::Value of_method;
+    rapidjson::Value of_conv;
+    rapidjson::Value of_tole;
+    rapidjson::Value of_tolp;
+    rapidjson::Value of_tf_weight;
+    rapidjson::Value of_vw_weight;
+    rapidjson::Value of_wt_alpha;
+    rapidjson::Value of_wt_beta;
+    rapidjson::Value of_wt_rho0;
+    rapidjson::Value of_hold_rho0;
+    rapidjson::Value of_lkt_a;
+    rapidjson::Value of_read_kernel;
+    rapidjson::Value of_kernel_file;
+    rapidjson::Value of_full_pw;
+    rapidjson::Value of_full_pw_dim;
+
+    // @param reading_information -- input_file -- electric_field_and_dipole_correction
+    rapidjson::Value efield_flag;
+    rapidjson::Value dip_cor_flag;
+    rapidjson::Value efield_dir;
+    rapidjson::Value efield_pos_max;
+    rapidjson::Value efield_pos_dec;
+    rapidjson::Value efield_amp;
+    // @param reading_information -- input_file -- gate_field 
+    rapidjson::Value gate_flag;
+    rapidjson::Value zgate;
+    rapidjson::Value block;
+    rapidjson::Value block_down;
+    rapidjson::Value block_up;
+    rapidjson::Value block_height;
+    // @param reading_information -- input_file -- exact_exchange
+    rapidjson::Value exx_hybrid_alpha;
+    rapidjson::Value exx_hse_omega;
+    rapidjson::Value exx_separate_loop;
+    rapidjson::Value exx_hybrid_step;
+    rapidjson::Value exx_mixing_beta;
+    rapidjson::Value exx_lambda;
+    rapidjson::Value exx_pca_threshold;
+    rapidjson::Value exx_c_threshold;
+    rapidjson::Value exx_v_threshold;
+    rapidjson::Value exx_dm_threshold;
+    rapidjson::Value exx_c_grad_threshold;
+    rapidjson::Value exx_v_grad_threshold;
+    rapidjson::Value exx_schwarz_threshold;
+    rapidjson::Value exx_cauchy_threshold;
+    rapidjson::Value exx_cauchy_force_threshold;
+    rapidjson::Value exx_cauchy_stress_threshold;
+    rapidjson::Value exx_ccp_threshold;
+    rapidjson::Value exx_ccp_rmesh_times;
+    rapidjson::Value exx_distribute_type;
+    rapidjson::Value exx_opt_orb_lmax;
+    rapidjson::Value exx_opt_orb_ecut;
+    rapidjson::Value exx_opt_orb_tolerence;
+    rapidjson::Value exx_real_number;
+
+    // @param reading_information -- input_file -- molecular_dynamics
+    rapidjson::Value md_type;
+    rapidjson::Value md_nstep;
+    rapidjson::Value md_dt;
+    rapidjson::Value md_thermostat;
+    rapidjson::Value md_tlast;
+    rapidjson::Value md_tfirst;
+    rapidjson::Value md_restart;
+    rapidjson::Value md_restartfreq;
+    rapidjson::Value md_dumpfreq;
+    rapidjson::Value dump_force;
+    rapidjson::Value dump_vel;
+    rapidjson::Value dump_virial;
+    rapidjson::Value md_seed;
+    rapidjson::Value md_tfreq;
+    rapidjson::Value md_tchain;
+    rapidjson::Value md_pmode;
+    rapidjson::Value md_prec_level;
+    rapidjson::Value ref_cell_factor;
+    rapidjson::Value md_pcouple;
+    rapidjson::Value md_pfirst;
+    rapidjson::Value md_plast;
+    rapidjson::Value md_pfreq;
+    rapidjson::Value md_pchain;
+    rapidjson::Value lj_rcut;
+    rapidjson::Value lj_epsilon;
+    rapidjson::Value lj_sigma;
+    rapidjson::Value pot_file;
+    rapidjson::Value msst_direction;
+    rapidjson::Value msst_vel;
+    rapidjson::Value msst_vis;
+    rapidjson::Value msst_tscale;
+    rapidjson::Value msst_qmass;
+    rapidjson::Value md_damp;
+    rapidjson::Value md_tolerance;
+    rapidjson::Value md_nraise;
+    rapidjson::Value cal_syns;
+    rapidjson::Value dmax;
+
+    // @param reading_information -- input_file -- dft_plus_u
+    rapidjson::Value orbital_corr(rapidjson::kArrayType);
+    rapidjson::Value hubbard_u(rapidjson::kArrayType);
+    rapidjson::Value yukawa_potential;
+    rapidjson::Value yukawa_lambda;
+    rapidjson::Value omc;
+
+    // @param reading_information -- input_file -- vdw_correction
+    rapidjson::Value vdw_method;
+    rapidjson::Value vdw_s6;
+    rapidjson::Value vdw_s8;
+    rapidjson::Value vdw_a1;
+    rapidjson::Value vdw_a2;
+    rapidjson::Value vdw_d;
+    rapidjson::Value vdw_abc;
+    rapidjson::Value vdw_C6_file;
+    rapidjson::Value vdw_C6_unit;
+    rapidjson::Value vdw_R0_file;
+    rapidjson::Value vdw_R0_unit;
+    rapidjson::Value vdw_cutoff_type;
+    rapidjson::Value vdw_cutoff_radius;
+    rapidjson::Value vdw_radius_unit;
+    rapidjson::Value vdw_cutoff_period(rapidjson::kArrayType);
+    rapidjson::Value vdw_cn_thr;
+    rapidjson::Value vdw_cn_thr_unit;
+
+    // @param reading_information -- input_file -- berry_phase_and_wannier90_interface
+    rapidjson::Value berry_phase;
+    rapidjson::Value gdir;
+    rapidjson::Value towannier90;
+    rapidjson::Value nnkpfile;
+    rapidjson::Value wannier_spin;
+
+    // @param reading_information -- input_file -- tddft
+    rapidjson::Value td_edm;
+    rapidjson::Value td_print_eij;
+    rapidjson::Value td_propagator;
+    rapidjson::Value td_vext;
+    rapidjson::Value td_vext_dire;
+    rapidjson::Value td_stype;
+    rapidjson::Value td_ttype;
+    rapidjson::Value td_tstart;
+    rapidjson::Value td_tend;
+    rapidjson::Value td_lcut1;
+    rapidjson::Value td_lcut2;
+    rapidjson::Value td_gauss_freq;
+    rapidjson::Value td_gauss_phase;
+    rapidjson::Value td_gauss_sigma;
+    rapidjson::Value td_gauss_t0;
+    rapidjson::Value td_gauss_amp;
+    rapidjson::Value td_trape_freq;
+    rapidjson::Value td_trape_phase;
+    rapidjson::Value td_trape_t1;
+    rapidjson::Value td_trape_t2;
+    rapidjson::Value td_trape_t3;
+    rapidjson::Value td_trape_amp;
+    rapidjson::Value td_trigo_freq1;
+    rapidjson::Value td_trigo_freq2;
+    rapidjson::Value td_trigo_phase1;
+    rapidjson::Value td_trigo_phase2;
+    rapidjson::Value td_trigo_amp;
+    rapidjson::Value td_heavi_t0;
+    rapidjson::Value td_heavi_amp;
+    rapidjson::Value td_out_dipole;
+    rapidjson::Value td_out_efield;
+    rapidjson::Value ocp;
+    rapidjson::Value ocp_set;
+
+    // @param reading_information -- input_file -- debuging_related
+    rapidjson::Value t_in_h;
+    rapidjson::Value vl_in_h;
+    rapidjson::Value vnl_in_h;
+    rapidjson::Value vh_in_h;
+    rapidjson::Value vion_in_h;
+    rapidjson::Value test_force;
+    rapidjson::Value test_stress;
+    rapidjson::Value colour;
+    rapidjson::Value test_skip_ewald;
+
+    // @param reading_information -- input_file -- electronic_conductivities
+    rapidjson::Value cal_cond;
+    rapidjson::Value cond_nche;
+    rapidjson::Value cond_dw;
+    rapidjson::Value cond_wcut;
+    rapidjson::Value cond_dt;
+    rapidjson::Value cond_dtbatch;
+    rapidjson::Value cond_fwhm;
+    rapidjson::Value cond_nonlocal;
+    // @param reading_information -- input_file -- implicit_solvation_model
+    rapidjson::Value imp_sol;
+    rapidjson::Value eb_k;
+    rapidjson::Value tau;
+    rapidjson::Value sigma_k;
+    rapidjson::Value nc_k;
+
+    // @param reading_information -- stru_infos：
+    rapidjson::Value stru_infos(rapidjson::kObjectType);
+    // rapidjson::Value ATOMIC_SPECIES(rapidjson::kArrayType);
+    // rapidjson::Value NUMERICAL_ORBITAL;
+    // rapidjson::Value LATTICE_CONSTANT(rapidjson::kArrayType);
+    // rapidjson::Value ATOMIC_POSITIONS(rapidjson::kArrayType);
+
+    // @param reading_information -- KPT_infos
+    rapidjson::Value KPT_infos(rapidjson::kObjectType);
+    // rapidjson::Value total_number;
+    // rapidjson::Value mode;
+    // rapidjson::Value vectors(rapidjson::kArrayType);
+
+    // @param reading_information -- orb_infos
+    rapidjson::Value orb_infos(rapidjson::kObjectType);
+
+    // @param reading_information -- pp
+    rapidjson::Value pp(rapidjson::kObjectType);
+
+    // @param init
+    rapidjson::Value init(rapidjson::kObjectType);
+    // @param init -- general
+    // rapidjson::Value calculation;
+    // rapidjson::Value esolver_type;
+    // rapidjson::Value basis_type;
+    // rapidjson::Value gamma_only;
+    // rapidjson::Value ks_solver;
+    // rapidjson::Value ntype;
+    // rapidjson::Value nspin;
+    // rapidjson::Value ecutwfc;
+    // rapidjson::Value scf_thr;
+    // rapidjson::Value scf_nmax;
+
+    // @param init -- symmetry
+    // rapidjson::Value symmetry(rapidjson::kObjectType);
+    // rapidjson::Value BRAVAIS_TYPE;
+    // rapidjson::Value BRAVAIS_LATTICE_NAME;
+    // rapidjson::Value IBRAV;
+    // rapidjson::Value LATTICE_CONSTANT_A;
+    // rapidjson::Value right_hand_lattice;
+
+    // @param init -- Kpoints
+    rapidjson::Value kpoints(rapidjson::kObjectType);
+    rapidjson::Value nkstot;
+    rapidjson::Value nkstot_ibz;
+    rapidjson::Value coordinates(rapidjson::kArrayType);
+    rapidjson::Value weight(rapidjson::kArrayType);
+
+    // @param init -- grid
+    rapidjson::Value grid(rapidjson::kObjectType);
+    rapidjson::Value energy_cutoff_for_wavefunc;
+    rapidjson::Value fft_grid_for_wave_functions(rapidjson::kArrayType);
+    rapidjson::Value number_of_plane_waves;
+    rapidjson::Value number_of_sticks;
+
+    // @param init -- Smearing
+    // rapidjson::Value smearing_method;
+    // rapidjson::Value smearing_sigma;
+
+    // @param init -- mixing
+    rapidjson::Value mixing;
+
+
+    // @param output
+    rapidjson::Value output(rapidjson::kArrayType);
+
+
+
+    // @param final_stru
+    rapidjson::Value final_stru(rapidjson::kObjectType);
+    rapidjson::Value cell;
+    rapidjson::Value coordinate;
+
+
+
+    /**
+     *  The functions below initialize the json output parameter 
+     *  tree to connect the nodes of the module
+    */
+
+    /**
+     * @brief   add Top stage：parameter in Abacus:
+     */
+    void Init_json_abacus()
+    {
+
+
+        // add First stage：parameter in abcus:
+
+        abacus.AddMember("general_info", general_info, doc.GetAllocator());
+
+        abacus.AddMember("readin_info", readin_info, doc.GetAllocator());
+        
+        abacus.AddMember("init", init, doc.GetAllocator());
+
+        abacus.AddMember("output", output, doc.GetAllocator());
+
+        abacus.AddMember("final_stru", final_stru, doc.GetAllocator());
+
+        doc.SetObject();
+        // abacus.SetObject();
+        doc.AddMember("ABACUS", abacus, doc.GetAllocator());
+        /**
+         * .
+         * .
+         * .
+         * .
+         * .
+         * .
+         * .
+         * */
+    }
+    /**
+     * @brief   add Second stage：parameter in Abacus - general_info:
+     */
+    void Init_json_abacus_generalInfo(){
+        general_info.AddMember("version", version, doc.GetAllocator());
+
+        general_info.AddMember("commit", commit, doc.GetAllocator());      
+
+        general_info.AddMember("begin_time", begin_time, doc.GetAllocator());      
+
+        general_info.AddMember("begin_date", begin_date, doc.GetAllocator());     
+
+        general_info.AddMember("device", device_g, doc.GetAllocator());                
+
+
+        
+        parallel.AddMember("drank", drank, doc.GetAllocator());
+
+        parallel.AddMember("dsize", dsize, doc.GetAllocator());
+                        
+        parallel.AddMember("dcolor", dcolor, doc.GetAllocator());
+    
+
+        // add Third stage：parameter in parallel:
+        general_info.AddMember("parallel", parallel, doc.GetAllocator());
+                
+    }
+    /**
+     * @brief   delete null node 
+     */
+    void RemoveNullValues(rapidjson::Value& parent) {
+        if (parent.IsObject()) {
+            for (rapidjson::Value::MemberIterator itr = parent.MemberBegin(); itr != parent.MemberEnd(); ) {
+                if (itr->value.IsNull()) {
+                    itr = parent.EraseMember(itr);
+                } else {
+                    // delet son null node
+                    RemoveNullValues(itr->value);
+                    ++itr;
+                }
+            }
+        } else if (parent.IsArray()) {
+            for (int i = 0; i < parent.Size(); ) {
+                if (parent[i].IsNull()) {
+                    parent.Erase(parent.Begin() + i);
+                } else {
+                    // delet son null node
+                    RemoveNullValues(parent[i]);
+                    ++i;
+                }
+            }
+        }
+    }
+
+    /**
+     * @brief   add Second stage：parameter in Abacus - readin_info:
+     */
+    void Init_json_abacus_readinInfo(){
+        //add Third stage：parameter in system_variables:
+        input_file.AddMember("suffix", input_suffix, doc.GetAllocator());
+        input_file.AddMember("ntype", ntype, doc.GetAllocator());
+        input_file.AddMember("calculation", calculation, doc.GetAllocator());
+        input_file.AddMember("esolver_type", esolver_type, doc.GetAllocator());
+        input_file.AddMember("symmetry", symmetry, doc.GetAllocator());
+        input_file.AddMember("symmetry_precfield", symmetry_precfield, doc.GetAllocator());
+        input_file.AddMember("symmetry_autoclose", symmetry_autoclose, doc.GetAllocator());
+        input_file.AddMember("kpar", kpar, doc.GetAllocator());
+        input_file.AddMember("bndpar", bndpar, doc.GetAllocator());
+        input_file.AddMember("latname", latname, doc.GetAllocator());
+        input_file.AddMember("init_wfc", init_wfc, doc.GetAllocator());
+        input_file.AddMember("init_chg", init_chg, doc.GetAllocator());
+        input_file.AddMember("init_vel", init_vel, doc.GetAllocator());
+        input_file.AddMember("nelec", nelec, doc.GetAllocator());
+        input_file.AddMember("nupdown", nupdown, doc.GetAllocator());
+        input_file.AddMember("dft_functional", dft_functional, doc.GetAllocator());
+        input_file.AddMember("xc_temperature", xc_temperature, doc.GetAllocator());
+        input_file.AddMember("pseudo_rcut", pseudo_rcut, doc.GetAllocator());
+        input_file.AddMember("pseudo_mesh", pseudo_mesh, doc.GetAllocator());
+        input_file.AddMember("mem_saver", mem_saver, doc.GetAllocator());
+        input_file.AddMember("diago_proc", diago_proc, doc.GetAllocator());
+        input_file.AddMember("nbspline", nbspline, doc.GetAllocator());
+        input_file.AddMember("kspacing", kspacing, doc.GetAllocator());
+        input_file.AddMember("min_dist_coef", min_dist_coef, doc.GetAllocator());
+        input_file.AddMember("device", device, doc.GetAllocator());
+
+        //add Third stage：parameter in files_related:
+        input_file.AddMember("stru_file", stru_file, doc.GetAllocator());
+        input_file.AddMember("kpoint_file", kpoint_file, doc.GetAllocator());
+        input_file.AddMember("pseudo_dir", pseudo_dir, doc.GetAllocator());
+        input_file.AddMember("orbital_dir", orbital_dir, doc.GetAllocator());
+        input_file.AddMember("read_file_dir", read_file_dir, doc.GetAllocator());
+        input_file.AddMember("wannier_card", wannier_card, doc.GetAllocator());
+    
+        //add Third stage：parameter in planewave_related:
+        input_file.AddMember("ecutwfc", ecutwfc, doc.GetAllocator());
+        input_file.AddMember("nx", nx, doc.GetAllocator());
+        input_file.AddMember("ny", ny, doc.GetAllocator());
+        input_file.AddMember("nz", nz, doc.GetAllocator());
+        input_file.AddMember("pw_seed", pw_seed, doc.GetAllocator());
+        input_file.AddMember("pw_diag_thr", pw_diag_thr, doc.GetAllocator());
+        input_file.AddMember("pw_diag_nmax", pw_diag_nmax, doc.GetAllocator());
+        input_file.AddMember("pw_diag_ndim", pw_diag_ndim, doc.GetAllocator());    
+    
+    
+        //add Third stage：parameter in numerical_atomic_orbitals_related:
+        input_file.AddMember("nb2d", nb2d, doc.GetAllocator());
+        input_file.AddMember("lmaxmax", lmaxmax, doc.GetAllocator());
+        input_file.AddMember("lcao_ecut", lcao_ecut, doc.GetAllocator());
+        input_file.AddMember("lcao_dk", lcao_dk, doc.GetAllocator());
+        input_file.AddMember("lcao_dr", lcao_dr, doc.GetAllocator());
+        input_file.AddMember("lcao_rmax", lcao_rmax, doc.GetAllocator());
+        input_file.AddMember("search_radius", search_radius, doc.GetAllocator());
+        input_file.AddMember("search_pbc", search_pbc, doc.GetAllocator());
+        input_file.AddMember("bx", bx, doc.GetAllocator());
+        input_file.AddMember("by", by, doc.GetAllocator());
+        input_file.AddMember("bz", bz, doc.GetAllocator());        
+    
+        //add Third stage：parameter in electronic_structure:
+        input_file.AddMember("basis_type", basis_type, doc.GetAllocator());
+        input_file.AddMember("ks_solver", ks_solver, doc.GetAllocator());
+        input_file.AddMember("nbands", nbands, doc.GetAllocator());
+        input_file.AddMember("nbands_istate", nbands_istate, doc.GetAllocator());
+        input_file.AddMember("nspin", nspin, doc.GetAllocator());
+        input_file.AddMember("smearing_method", smearing_method, doc.GetAllocator());
+        input_file.AddMember("smearing_sigma", smearing_sigma, doc.GetAllocator());
+        input_file.AddMember("smearing_sigma_temp", smearing_sigma_temp, doc.GetAllocator());
+        input_file.AddMember("mixing_type", mixing_type, doc.GetAllocator());
+        input_file.AddMember("mixing_beta", mixing_beta, doc.GetAllocator());
+        input_file.AddMember("mixing_ndim", mixing_ndim, doc.GetAllocator());
+        input_file.AddMember("mixing_gg0", mixing_gg0, doc.GetAllocator());
+        input_file.AddMember("mixing_tau", mixing_tau, doc.GetAllocator());
+        input_file.AddMember("mixing_dftu", mixing_dftu, doc.GetAllocator());
+        input_file.AddMember("gamma_only", gamma_only, doc.GetAllocator());
+        input_file.AddMember("printe", printe, doc.GetAllocator());
+        input_file.AddMember("scf_nmax", scf_nmax, doc.GetAllocator());
+        input_file.AddMember("scf_thr", scf_thr, doc.GetAllocator());
+        input_file.AddMember("scf_thr_type", scf_thr_type, doc.GetAllocator());
+        input_file.AddMember("chg_extrap", chg_extrap, doc.GetAllocator());
+        input_file.AddMember("lspinorb", lspinorb, doc.GetAllocator());
+        input_file.AddMember("noncolin", noncolin, doc.GetAllocator());
+        input_file.AddMember("soc_lambda", soc_lambda, doc.GetAllocator());    
+
+
+        //add Third stage：parameter in electronic_structure_SDFT:
+        input_file.AddMember("method_sto", method_sto, doc.GetAllocator());
+        input_file.AddMember("nbands_sto", nbands_sto, doc.GetAllocator());
+        input_file.AddMember("nche_sto", nche_sto, doc.GetAllocator());
+        input_file.AddMember("emin_sto", emin_sto, doc.GetAllocator());
+        input_file.AddMember("emax_sto", emax_sto, doc.GetAllocator());
+        input_file.AddMember("seed_sto", seed_sto, doc.GetAllocator());
+        input_file.AddMember("initsto_freq", initsto_freq, doc.GetAllocator());
+        input_file.AddMember("npart_sto", npart_sto, doc.GetAllocator());
+        
+        
+        //add Third stage：parameter in geometry_relaxation:
+        input_file.AddMember("relax_method", relax_method, doc.GetAllocator());
+        input_file.AddMember("relax_new", relax_new, doc.GetAllocator());
+        input_file.AddMember("relax_scale_force", relax_scale_force, doc.GetAllocator());
+        input_file.AddMember("relax_nmax", relax_nmax, doc.GetAllocator());
+        input_file.AddMember("relax_cg_thr", relax_cg_thr, doc.GetAllocator());
+        input_file.AddMember("cal_force", cal_force, doc.GetAllocator());
+        input_file.AddMember("force_thr", force_thr, doc.GetAllocator());
+        input_file.AddMember("force_thr_ev", force_thr_ev, doc.GetAllocator());
+        input_file.AddMember("force_thr_ev2", force_thr_ev2, doc.GetAllocator());
+        input_file.AddMember("relax_bfgs_w1", relax_bfgs_w1, doc.GetAllocator());
+        input_file.AddMember("relax_bfgs_w2", relax_bfgs_w2, doc.GetAllocator());
+        input_file.AddMember("relax_bfgs_rmax", relax_bfgs_rmax, doc.GetAllocator());
+        input_file.AddMember("relax_bfgs_rmin", relax_bfgs_rmin, doc.GetAllocator());
+        input_file.AddMember("relax_bfgs_init", relax_bfgs_init, doc.GetAllocator());
+        input_file.AddMember("cal_stress", cal_stress, doc.GetAllocator());
+        input_file.AddMember("stress_thr", stress_thr, doc.GetAllocator());
+        input_file.AddMember("press1", press1, doc.GetAllocator());
+        input_file.AddMember("press2", press2, doc.GetAllocator());
+        input_file.AddMember("press3", press3, doc.GetAllocator());
+        input_file.AddMember("fixed_axes", fixed_axes, doc.GetAllocator());
+        input_file.AddMember("fixed_ibrav", fixed_ibrav, doc.GetAllocator());
+        input_file.AddMember("fixed_atoms", fixed_atoms, doc.GetAllocator());
+        input_file.AddMember("cell_factor", cell_factor, doc.GetAllocator());
+        
+        
+        //add Third stage：parameter in output_information_related:
+        input_file.AddMember("out_mul", out_mul, doc.GetAllocator());
+        input_file.AddMember("out_freq_elec", out_freq_elec, doc.GetAllocator());
+        input_file.AddMember("out_freq_ion", out_freq_ion, doc.GetAllocator());        
+        input_file.AddMember("out_chg", out_chg, doc.GetAllocator());
+        input_file.AddMember("out_pot", out_pot, doc.GetAllocator());
+        input_file.AddMember("out_dm", out_dm, doc.GetAllocator());
+        input_file.AddMember("out_dm1", out_dm1, doc.GetAllocator());
+        input_file.AddMember("out_wfc_pw", out_wfc_pw, doc.GetAllocator());
+        input_file.AddMember("out_wfc_r", out_wfc_r, doc.GetAllocator());
+        input_file.AddMember("out_wfc_lcao", out_wfc_lcao, doc.GetAllocator());
+        input_file.AddMember("out_dos", out_dos, doc.GetAllocator());
+        input_file.AddMember("out_band", out_band, doc.GetAllocator());
+        input_file.AddMember("out_proj_band", out_proj_band, doc.GetAllocator());
+        input_file.AddMember("out_stru", out_stru, doc.GetAllocator());
+        input_file.AddMember("out_bandgap", out_bandgap, doc.GetAllocator());
+        input_file.AddMember("out_level", out_level, doc.GetAllocator());
+        input_file.AddMember("out_alllog", out_alllog, doc.GetAllocator());
+        input_file.AddMember("out_mat_hs", out_mat_hs, doc.GetAllocator());
+        input_file.AddMember("out_mat_r", out_mat_r, doc.GetAllocator());
+        input_file.AddMember("out_mat_hs2", out_mat_hs2, doc.GetAllocator());
+        input_file.AddMember("out_mat_t", out_mat_t, doc.GetAllocator());
+        input_file.AddMember("out_mat_dh", out_mat_dh, doc.GetAllocator());
+        input_file.AddMember("out_app_flag", out_app_flag, doc.GetAllocator());
+        input_file.AddMember("out_interval", out_interval, doc.GetAllocator());
+        input_file.AddMember("out_element_info", out_element_info, doc.GetAllocator());
+        input_file.AddMember("restart_save", restart_save, doc.GetAllocator());
+        input_file.AddMember("restart_load", restart_load, doc.GetAllocator());
+        input_file.AddMember("rpa", rpa, doc.GetAllocator());
+
+        //add Third stage：parameter in density_of_states:
+        input_file.AddMember("dos_edelta_ev", dos_edelta_ev, doc.GetAllocator());
+        input_file.AddMember("dos_sigma", dos_sigma, doc.GetAllocator());
+        input_file.AddMember("dos_scale", dos_scale, doc.GetAllocator());
+        input_file.AddMember("dos_emin_ev", dos_emin_ev, doc.GetAllocator());
+        input_file.AddMember("dos_emax_ev", dos_emax_ev, doc.GetAllocator());
+        input_file.AddMember("dos_nche", dos_nche, doc.GetAllocator());
+        
+        //add Third stage：parameter in naos:
+        input_file.AddMember("bessel_nao_ecut", bessel_nao_ecut, doc.GetAllocator());
+        input_file.AddMember("bessel_nao_tolerence", bessel_nao_tolerence, doc.GetAllocator());
+        input_file.AddMember("bessel_nao_rcut", bessel_nao_rcut, doc.GetAllocator());
+        input_file.AddMember("bessel_nao_smooth", bessel_nao_smooth, doc.GetAllocator());
+        input_file.AddMember("bessel_nao_sigma", bessel_nao_sigma, doc.GetAllocator());
+        
+        //add Third stage：parameter in deepks:
+        input_file.AddMember("input_file_out_labels", input_file_out_labels, doc.GetAllocator());
+        input_file.AddMember("input_file_scf", input_file_scf, doc.GetAllocator());
+        input_file.AddMember("input_file_model", input_file_model, doc.GetAllocator());
+        input_file.AddMember("bessel_descriptor_lmax", bessel_descriptor_lmax, doc.GetAllocator());
+        input_file.AddMember("bessel_descriptor_ecut", bessel_descriptor_ecut, doc.GetAllocator());
+        input_file.AddMember("bessel_descriptor_tolerence", bessel_descriptor_tolerence, doc.GetAllocator());
+        input_file.AddMember("bessel_descriptor_rcut", bessel_descriptor_rcut, doc.GetAllocator());
+        input_file.AddMember("bessel_descriptor_smooth", bessel_descriptor_smooth, doc.GetAllocator());
+        input_file.AddMember("bessel_descriptor_sigma", bessel_descriptor_sigma, doc.GetAllocator());
+        input_file.AddMember("input_file_bandgap", input_file_bandgap, doc.GetAllocator());
+        input_file.AddMember("input_file_out_unittest", input_file_out_unittest, doc.GetAllocator());
+        
+        //add Third stage：parameter in ofdft:
+        input_file.AddMember("of_kinetic", of_kinetic, doc.GetAllocator());
+        input_file.AddMember("of_method", of_method, doc.GetAllocator());
+        input_file.AddMember("of_conv", of_conv, doc.GetAllocator());
+        input_file.AddMember("of_tole", of_tole, doc.GetAllocator());
+        input_file.AddMember("of_tolp", of_tolp, doc.GetAllocator());
+        input_file.AddMember("of_tf_weight", of_tf_weight, doc.GetAllocator());
+        input_file.AddMember("of_vw_weight", of_vw_weight, doc.GetAllocator());
+        input_file.AddMember("of_wt_alpha", of_wt_alpha, doc.GetAllocator());
+        input_file.AddMember("of_wt_beta", of_wt_beta, doc.GetAllocator());
+        input_file.AddMember("of_wt_rho0", of_wt_rho0, doc.GetAllocator());
+        input_file.AddMember("of_hold_rho0", of_hold_rho0, doc.GetAllocator());
+        input_file.AddMember("of_lkt_a", of_lkt_a, doc.GetAllocator());
+        input_file.AddMember("of_read_kernel", of_read_kernel, doc.GetAllocator());
+        input_file.AddMember("of_kernel_file", of_kernel_file, doc.GetAllocator());
+        input_file.AddMember("of_full_pw", of_full_pw, doc.GetAllocator());
+        input_file.AddMember("of_full_pw_dim", of_full_pw_dim, doc.GetAllocator());
+        
+        
+        //add Third stage：parameter in electric_field_and_dipole_correction:
+        input_file.AddMember("efield_flag", efield_flag, doc.GetAllocator());
+        input_file.AddMember("dip_cor_flag", dip_cor_flag, doc.GetAllocator());
+        input_file.AddMember("efield_dir", efield_dir, doc.GetAllocator());
+        input_file.AddMember("efield_pos_max", efield_pos_max, doc.GetAllocator());
+        input_file.AddMember("efield_pos_dec", efield_pos_dec, doc.GetAllocator());
+        input_file.AddMember("efield_amp", efield_amp, doc.GetAllocator());
+        
+        //add Third stage：parameter in gate_field:
+        input_file.AddMember("gate_flag", gate_flag, doc.GetAllocator());
+        input_file.AddMember("zgate", zgate, doc.GetAllocator());
+        input_file.AddMember("block", block, doc.GetAllocator());
+        input_file.AddMember("block_down", block_down, doc.GetAllocator());
+        input_file.AddMember("block_up", block_up, doc.GetAllocator());
+        input_file.AddMember("block_height", block_height, doc.GetAllocator());
+    
+        //add Third stage：parameter in exact_exchange:
+        input_file.AddMember("exx_hybrid_alpha", exx_hybrid_alpha, doc.GetAllocator());
+        input_file.AddMember("exx_hse_omega", exx_hse_omega, doc.GetAllocator());
+        input_file.AddMember("exx_separate_loop", exx_separate_loop, doc.GetAllocator());
+        input_file.AddMember("exx_hybrid_step", exx_hybrid_step, doc.GetAllocator());
+        input_file.AddMember("exx_mixing_beta", exx_mixing_beta, doc.GetAllocator());
+        input_file.AddMember("exx_lambda", exx_lambda, doc.GetAllocator());
+        input_file.AddMember("exx_pca_threshold", exx_pca_threshold, doc.GetAllocator());
+        input_file.AddMember("exx_c_threshold", exx_c_threshold, doc.GetAllocator());
+        input_file.AddMember("exx_v_threshold", exx_v_threshold, doc.GetAllocator());
+        input_file.AddMember("exx_dm_threshold", exx_dm_threshold, doc.GetAllocator());
+        input_file.AddMember("exx_c_grad_threshold", exx_c_grad_threshold, doc.GetAllocator());
+        input_file.AddMember("exx_v_grad_threshold", exx_v_grad_threshold, doc.GetAllocator());
+        input_file.AddMember("exx_schwarz_threshold", exx_schwarz_threshold, doc.GetAllocator());
+        input_file.AddMember("exx_cauchy_threshold", exx_cauchy_threshold, doc.GetAllocator());
+        input_file.AddMember("exx_cauchy_force_threshold", exx_cauchy_force_threshold, doc.GetAllocator());
+        input_file.AddMember("exx_cauchy_stress_threshold", exx_cauchy_stress_threshold, doc.GetAllocator());
+        input_file.AddMember("exx_ccp_threshold", exx_ccp_threshold, doc.GetAllocator());
+        input_file.AddMember("exx_ccp_rmesh_times", exx_ccp_rmesh_times, doc.GetAllocator());
+        input_file.AddMember("exx_distribute_type", exx_distribute_type, doc.GetAllocator());
+        input_file.AddMember("exx_opt_orb_lmax", exx_opt_orb_lmax, doc.GetAllocator());
+        input_file.AddMember("exx_opt_orb_ecut", exx_opt_orb_ecut, doc.GetAllocator());
+        input_file.AddMember("exx_opt_orb_tolerence", exx_opt_orb_tolerence, doc.GetAllocator());
+        input_file.AddMember("exx_real_number", exx_real_number, doc.GetAllocator());
+        
+        
+        //add Third stage：parameter in molecular_dynamics:
+        input_file.AddMember("md_type", md_type, doc.GetAllocator());
+        input_file.AddMember("md_nstep", md_nstep, doc.GetAllocator());
+        input_file.AddMember("md_dt", md_dt, doc.GetAllocator());
+        input_file.AddMember("md_thermostat", md_thermostat, doc.GetAllocator());
+        input_file.AddMember("md_tlast", md_tlast, doc.GetAllocator());
+        input_file.AddMember("md_tfirst", md_tfirst, doc.GetAllocator());
+        input_file.AddMember("md_restart", md_restart, doc.GetAllocator());
+        input_file.AddMember("md_restartfreq", md_restartfreq, doc.GetAllocator());
+        input_file.AddMember("md_dumpfreq", md_dumpfreq, doc.GetAllocator());
+        input_file.AddMember("dump_force", dump_force, doc.GetAllocator());
+        input_file.AddMember("dump_vel", dump_vel, doc.GetAllocator());
+        input_file.AddMember("dump_virial", dump_virial, doc.GetAllocator());
+        input_file.AddMember("md_seed", md_seed, doc.GetAllocator());
+        input_file.AddMember("md_tfreq", md_tfreq, doc.GetAllocator());
+        input_file.AddMember("md_tchain", md_tchain, doc.GetAllocator());
+        input_file.AddMember("md_pmode", md_pmode, doc.GetAllocator());
+        input_file.AddMember("md_prec_level", md_prec_level, doc.GetAllocator());
+        input_file.AddMember("ref_cell_factor", ref_cell_factor, doc.GetAllocator());
+        input_file.AddMember("md_pcouple", md_pcouple, doc.GetAllocator());
+        input_file.AddMember("md_pfirst", md_pfirst, doc.GetAllocator());
+        input_file.AddMember("md_plast", md_plast, doc.GetAllocator());
+        input_file.AddMember("md_pfreq", md_pfreq, doc.GetAllocator());
+        input_file.AddMember("md_pchain", md_pchain, doc.GetAllocator());
+        input_file.AddMember("lj_rcut", lj_rcut, doc.GetAllocator());
+        input_file.AddMember("lj_epsilon", lj_epsilon, doc.GetAllocator());
+        input_file.AddMember("lj_sigma", lj_sigma, doc.GetAllocator());
+        input_file.AddMember("pot_file", pot_file, doc.GetAllocator());
+        input_file.AddMember("msst_direction", msst_direction, doc.GetAllocator());
+        input_file.AddMember("msst_vel", msst_vel, doc.GetAllocator());
+        input_file.AddMember("msst_vis", msst_vis, doc.GetAllocator());
+        input_file.AddMember("msst_tscale", msst_tscale, doc.GetAllocator());
+        input_file.AddMember("msst_qmass", msst_qmass, doc.GetAllocator());
+        input_file.AddMember("md_damp", md_damp, doc.GetAllocator());
+        input_file.AddMember("md_tolerance", md_tolerance, doc.GetAllocator());
+        input_file.AddMember("md_nraise", md_nraise, doc.GetAllocator());
+        input_file.AddMember("cal_syns", cal_syns, doc.GetAllocator());
+        input_file.AddMember("dmax", dmax, doc.GetAllocator());
+
+        //add Third stage：parameter in dft_plus_u:
+        input_file.AddMember("orbital_corr", orbital_corr, doc.GetAllocator());
+        input_file.AddMember("hubbard_u", hubbard_u, doc.GetAllocator());
+        input_file.AddMember("yukawa_potential", yukawa_potential, doc.GetAllocator());
+        input_file.AddMember("yukawa_lambda", yukawa_lambda, doc.GetAllocator());
+        input_file.AddMember("omc", omc, doc.GetAllocator());
+
+        //add Third stage：parameter in vdw_correction:
+        input_file.AddMember("vdw_method", vdw_method, doc.GetAllocator());
+        input_file.AddMember("vdw_s6", vdw_s6, doc.GetAllocator());
+        input_file.AddMember("vdw_s8", vdw_s8, doc.GetAllocator());
+        input_file.AddMember("vdw_a1", vdw_a1, doc.GetAllocator());
+        input_file.AddMember("vdw_a2", vdw_a2, doc.GetAllocator());
+        input_file.AddMember("vdw_d", vdw_d, doc.GetAllocator());
+        input_file.AddMember("vdw_abc", vdw_abc, doc.GetAllocator());
+        input_file.AddMember("vdw_C6_file", vdw_C6_file, doc.GetAllocator());
+        input_file.AddMember("vdw_C6_unit", vdw_C6_unit, doc.GetAllocator());
+        input_file.AddMember("vdw_R0_file", vdw_R0_file, doc.GetAllocator());
+        input_file.AddMember("vdw_R0_unit", vdw_R0_unit, doc.GetAllocator());
+        input_file.AddMember("vdw_cutoff_type", vdw_cutoff_type, doc.GetAllocator());
+        input_file.AddMember("vdw_cutoff_radius", vdw_cutoff_radius, doc.GetAllocator());
+        input_file.AddMember("vdw_radius_unit", vdw_radius_unit, doc.GetAllocator());
+        input_file.AddMember("vdw_cutoff_period", vdw_cutoff_period, doc.GetAllocator());
+        input_file.AddMember("vdw_cn_thr", vdw_cn_thr, doc.GetAllocator());
+        input_file.AddMember("vdw_cn_thr_unit", vdw_cn_thr_unit, doc.GetAllocator());
+
+        //add Third stage：parameter in berry_phase_and_wannier90_interface:
+        input_file.AddMember("berry_phase", berry_phase, doc.GetAllocator());
+        input_file.AddMember("gdir", gdir, doc.GetAllocator());
+        input_file.AddMember("towannier90", towannier90, doc.GetAllocator());
+        input_file.AddMember("nnkpfile", nnkpfile, doc.GetAllocator());
+        input_file.AddMember("wannier_spin", wannier_spin, doc.GetAllocator());    
+    
+        //add Third stage：parameter in tddft:
+        input_file.AddMember("td_edm", td_edm, doc.GetAllocator());
+        input_file.AddMember("td_print_eij", td_print_eij, doc.GetAllocator());
+        input_file.AddMember("td_propagator", td_propagator, doc.GetAllocator());
+        input_file.AddMember("td_vext", td_vext, doc.GetAllocator());
+        input_file.AddMember("td_vext_dire", td_vext_dire, doc.GetAllocator());
+        input_file.AddMember("td_stype", td_stype, doc.GetAllocator());
+        input_file.AddMember("td_ttype", td_ttype, doc.GetAllocator());
+        input_file.AddMember("td_tstart", td_tstart, doc.GetAllocator());
+        input_file.AddMember("td_tend", td_tend, doc.GetAllocator());
+        input_file.AddMember("td_lcut1", td_lcut1, doc.GetAllocator());
+        input_file.AddMember("td_lcut2", td_lcut2, doc.GetAllocator());
+        input_file.AddMember("td_gauss_freq", td_gauss_freq, doc.GetAllocator());
+        input_file.AddMember("td_gauss_phase", td_gauss_phase, doc.GetAllocator());
+        input_file.AddMember("td_gauss_sigma", td_gauss_sigma, doc.GetAllocator());
+        input_file.AddMember("td_gauss_t0", td_gauss_t0, doc.GetAllocator());
+        input_file.AddMember("td_gauss_amp", td_gauss_amp, doc.GetAllocator());
+        input_file.AddMember("td_trape_freq", td_trape_freq, doc.GetAllocator());
+        input_file.AddMember("td_trape_phase", td_trape_phase, doc.GetAllocator());
+        input_file.AddMember("td_trape_t1", td_trape_t1, doc.GetAllocator());
+        input_file.AddMember("td_trape_t2", td_trape_t2, doc.GetAllocator());
+        input_file.AddMember("td_trape_t3", td_trape_t3, doc.GetAllocator());
+        input_file.AddMember("td_trape_amp", td_trape_amp, doc.GetAllocator());
+        input_file.AddMember("td_trigo_freq1", td_trigo_freq1, doc.GetAllocator());
+        input_file.AddMember("td_trigo_freq2", td_trigo_freq2, doc.GetAllocator());
+        input_file.AddMember("td_trigo_phase1", td_trigo_phase1, doc.GetAllocator());
+        input_file.AddMember("td_trigo_phase2", td_trigo_phase2, doc.GetAllocator());
+        input_file.AddMember("td_trigo_amp", td_trigo_amp, doc.GetAllocator());
+        input_file.AddMember("td_heavi_t0", td_heavi_t0, doc.GetAllocator());
+        input_file.AddMember("td_heavi_amp", td_heavi_amp, doc.GetAllocator());
+        input_file.AddMember("td_out_dipole", td_out_dipole, doc.GetAllocator());
+        input_file.AddMember("td_out_efield", td_out_efield, doc.GetAllocator());
+        input_file.AddMember("ocp", ocp, doc.GetAllocator());
+        input_file.AddMember("ocp_set", ocp_set, doc.GetAllocator());
+
+        //add Third stage：parameter in debuging_related:
+        input_file.AddMember("t_in_h", t_in_h, doc.GetAllocator());
+        input_file.AddMember("vl_in_h", vl_in_h, doc.GetAllocator());
+        input_file.AddMember("vnl_in_h", vnl_in_h, doc.GetAllocator());
+        input_file.AddMember("vh_in_h", vh_in_h, doc.GetAllocator());
+        input_file.AddMember("vion_in_h", vion_in_h, doc.GetAllocator());
+        input_file.AddMember("test_force", test_force, doc.GetAllocator());
+        input_file.AddMember("test_stress", test_stress, doc.GetAllocator());
+        input_file.AddMember("colour", colour, doc.GetAllocator());
+        input_file.AddMember("test_skip_ewald", test_skip_ewald, doc.GetAllocator());
+
+        //add Third stage：parameter in electronic_conductivities:
+        input_file.AddMember("cal_cond", cal_cond, doc.GetAllocator());
+        input_file.AddMember("cond_nche", cond_nche, doc.GetAllocator());
+        input_file.AddMember("cond_dw", cond_dw, doc.GetAllocator());
+        input_file.AddMember("cond_wcut", cond_wcut, doc.GetAllocator());
+        input_file.AddMember("cond_dt", cond_dt, doc.GetAllocator());
+        input_file.AddMember("cond_dtbatch", cond_dtbatch, doc.GetAllocator());
+        input_file.AddMember("cond_fwhm", cond_fwhm, doc.GetAllocator());
+        input_file.AddMember("cond_nonlocal", cond_nonlocal, doc.GetAllocator());
+
+        //add Third stage：parameter in implicit_solvation_model:
+        input_file.AddMember("imp_sol", imp_sol, doc.GetAllocator());
+        input_file.AddMember("eb_k", eb_k, doc.GetAllocator());
+        input_file.AddMember("tau", tau, doc.GetAllocator());
+        input_file.AddMember("sigma_k", sigma_k, doc.GetAllocator());
+        input_file.AddMember("nc_k", nc_k, doc.GetAllocator());
+
+
+        RemoveNullValues(input_file);
+
+
+        // after add child_node's node in readin_info, add child node
+        // add parameters in readin_info:
+        readin_info.AddMember("input_file", input_file, doc.GetAllocator());
+
+    }
+
+
+    void Finish_json_tree(){
+        // Converts a json object to a string
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        doc.Accept(writer);
+
+        // Output the json string to a file
+        std::string json_path;
+        json_path.append("abacus.json");
+
+        std::ofstream ofs(json_path);
+        ofs << buffer.GetString() << std::endl;
+        ofs.close();
+    }
+
+
+
+
+}
+
+
+#endif
\ No newline at end of file
diff --git a/source/module_base/para_json.h b/source/module_base/para_json.h
new file mode 100644
index 0000000000..0e829dc80e
--- /dev/null
+++ b/source/module_base/para_json.h
@@ -0,0 +1,560 @@
+
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+#ifdef __RAPIDJSON
+
+#include <rapidjson/document.h>
+#include <rapidjson/writer.h>
+#include <rapidjson/stringbuffer.h>
+
+
+/**
+ * @brief   This namespace is used to store the json object of the 
+ *          abacus parameter and its handlers. Used to read the parameters 
+ *          at run time and finally organize them into json format files
+ * 
+*/
+namespace Para_Json
+{
+
+    extern int test;
+    // @param doc: the output json file
+    extern rapidjson::Document doc;
+    extern rapidjson::Value abacus;
+
+    // @param general_info ：
+    extern rapidjson::Value general_info;
+    extern rapidjson::Value version;
+    extern rapidjson::Value commit;
+    extern rapidjson::Value begin_time;
+    extern rapidjson::Value begin_date;
+    extern rapidjson::Value device_g;
+    // @param general_info -- parallel：
+    extern rapidjson::Value parallel;
+    extern rapidjson::Value drank;
+    extern rapidjson::Value dsize;
+    extern rapidjson::Value dcolor ;
+    // @param general_info -- path
+    extern rapidjson::Value path;
+    extern rapidjson::Value global_out_dir;
+    extern rapidjson::Value global_in_card;
+    extern rapidjson::Value pseudo_dir_path ;
+    extern rapidjson::Value orbital_dir_path;
+
+    
+    // @param reading_information：
+    extern rapidjson::Value readin_info;
+    // @param reading_information -- input_para：
+    
+    // @param reading_information -- input_para -- system_variables：
+    extern rapidjson::Value system_variables;
+
+    extern rapidjson::Value input_file;
+    extern rapidjson::Value input_suffix;
+    extern rapidjson::Value ntype;
+    extern rapidjson::Value calculation;
+    extern rapidjson::Value esolver_type;
+    extern rapidjson::Value symmetry;
+    extern rapidjson::Value symmetry_precfield;
+    extern rapidjson::Value symmetry_autoclose;
+    extern rapidjson::Value kpar;
+    extern rapidjson::Value bndpar;
+    extern rapidjson::Value latname;
+    extern rapidjson::Value init_wfc;
+    extern rapidjson::Value init_chg;
+    extern rapidjson::Value init_vel;
+    extern rapidjson::Value nelec;
+    extern rapidjson::Value nupdown;
+    extern rapidjson::Value dft_functional;
+    extern rapidjson::Value xc_temperature;
+    extern rapidjson::Value pseudo_rcut;
+    extern rapidjson::Value pseudo_mesh;
+    extern rapidjson::Value mem_saver;
+    extern rapidjson::Value diago_proc;
+    extern rapidjson::Value nbspline;
+    extern rapidjson::Value kspacing;
+    extern rapidjson::Value min_dist_coef;
+    extern rapidjson::Value device;
+    // @param reading_information -- input_para -- files_related
+
+    extern rapidjson::Value stru_file;
+    extern rapidjson::Value kpoint_file;
+    extern rapidjson::Value pseudo_dir;
+    extern rapidjson::Value orbital_dir;
+    extern rapidjson::Value read_file_dir;
+    extern rapidjson::Value wannier_card;
+    // @param reading_information -- input_para -- planewave_related
+
+    extern rapidjson::Value ecutwfc;
+    extern rapidjson::Value nx;
+    extern rapidjson::Value ny;
+    extern rapidjson::Value nz;
+    extern rapidjson::Value pw_seed;
+    extern rapidjson::Value pw_diag_thr;
+    extern rapidjson::Value pw_diag_nmax;
+    extern rapidjson::Value pw_diag_ndim;
+    // @param reading_information -- input_para -- numerical_atomic_orbitals_related
+    
+    extern rapidjson::Value nb2d;
+    extern rapidjson::Value lmaxmax;
+    extern rapidjson::Value lcao_ecut;
+    extern rapidjson::Value lcao_dk;
+    extern rapidjson::Value lcao_dr;
+    extern rapidjson::Value lcao_rmax;
+    extern rapidjson::Value search_radius;
+    extern rapidjson::Value search_pbc;
+    extern rapidjson::Value bx;
+    extern rapidjson::Value by;
+    extern rapidjson::Value bz;
+    // @param reading_information -- input_para -- electronic_structure
+    
+    extern rapidjson::Value basis_type;
+    extern rapidjson::Value ks_solver;
+    extern rapidjson::Value nbands;
+    extern rapidjson::Value nbands_istate;
+    extern rapidjson::Value nspin;
+    extern rapidjson::Value smearing_method;
+    extern rapidjson::Value smearing_sigma;
+    extern rapidjson::Value smearing_sigma_temp;
+    extern rapidjson::Value mixing_type;
+    extern rapidjson::Value mixing_beta;
+    extern rapidjson::Value mixing_ndim;
+    extern rapidjson::Value mixing_gg0;
+    extern rapidjson::Value mixing_tau;
+    extern rapidjson::Value mixing_dftu;
+    extern rapidjson::Value gamma_only;
+    extern rapidjson::Value printe;
+    extern rapidjson::Value scf_nmax;
+    extern rapidjson::Value scf_thr;
+    extern rapidjson::Value scf_thr_type;
+    extern rapidjson::Value chg_extrap;
+    extern rapidjson::Value lspinorb;
+    extern rapidjson::Value noncolin;
+    extern rapidjson::Value soc_lambda;
+    // @param reading_information -- input_para -- electronic_structure_SDFT
+
+    extern rapidjson::Value method_sto;
+    extern rapidjson::Value nbands_sto;
+    extern rapidjson::Value nche_sto;
+    extern rapidjson::Value emin_sto;
+    extern rapidjson::Value emax_sto;
+    extern rapidjson::Value seed_sto;
+    extern rapidjson::Value initsto_freq;
+    extern rapidjson::Value npart_sto;
+    // @param reading_information -- input_para -- geometry_relaxation
+
+    extern rapidjson::Value relax_method;
+    extern rapidjson::Value relax_new;
+    extern rapidjson::Value relax_scale_force;
+    extern rapidjson::Value relax_nmax;
+    extern rapidjson::Value relax_cg_thr;
+    extern rapidjson::Value cal_force;
+    extern rapidjson::Value force_thr;
+    extern rapidjson::Value force_thr_ev;
+    extern rapidjson::Value force_thr_ev2;
+    extern rapidjson::Value relax_bfgs_w1;
+    extern rapidjson::Value relax_bfgs_w2;
+    extern rapidjson::Value relax_bfgs_rmax;
+    extern rapidjson::Value relax_bfgs_rmin;
+    extern rapidjson::Value relax_bfgs_init;
+    extern rapidjson::Value cal_stress;
+    extern rapidjson::Value stress_thr;
+    extern rapidjson::Value press1;
+    extern rapidjson::Value press2;
+    extern rapidjson::Value press3;
+    extern rapidjson::Value fixed_axes;
+    extern rapidjson::Value fixed_ibrav;
+    extern rapidjson::Value fixed_atoms;
+    extern rapidjson::Value cell_factor;
+
+    // @param reading_information -- input_para -- output_information_related
+
+    extern rapidjson::Value out_mul;
+    extern rapidjson::Value out_freq_elec;
+    extern rapidjson::Value out_freq_ion;
+    extern rapidjson::Value out_chg;
+    extern rapidjson::Value out_pot;
+    extern rapidjson::Value out_dm;
+    extern rapidjson::Value out_dm1;
+    extern rapidjson::Value out_wfc_pw;
+    extern rapidjson::Value out_wfc_r;
+    extern rapidjson::Value out_wfc_lcao;
+    extern rapidjson::Value out_dos;
+    extern rapidjson::Value out_band;
+    extern rapidjson::Value out_proj_band;
+    extern rapidjson::Value out_stru;
+    extern rapidjson::Value out_bandgap;
+    extern rapidjson::Value out_level;
+    extern rapidjson::Value out_alllog;
+    extern rapidjson::Value out_mat_hs;
+    extern rapidjson::Value out_mat_r;
+    extern rapidjson::Value out_mat_hs2;
+    extern rapidjson::Value out_mat_t;
+    extern rapidjson::Value out_mat_dh;
+    extern rapidjson::Value out_app_flag;
+    extern rapidjson::Value out_interval;
+    extern rapidjson::Value out_element_info;
+    extern rapidjson::Value restart_save;
+    extern rapidjson::Value restart_load;
+    extern rapidjson::Value rpa;
+
+    // @param reading_information -- input_para -- density_of_states
+
+    extern rapidjson::Value dos_edelta_ev;
+    extern rapidjson::Value dos_sigma;
+    extern rapidjson::Value dos_scale;
+    extern rapidjson::Value dos_emin_ev;
+    extern rapidjson::Value dos_emax_ev;
+    extern rapidjson::Value dos_nche;
+    // @param reading_information -- input_para -- naos
+    extern rapidjson::Value bessel_nao_ecut;
+    extern rapidjson::Value bessel_nao_tolerence;
+    extern rapidjson::Value bessel_nao_rcut;
+    extern rapidjson::Value bessel_nao_smooth;
+    extern rapidjson::Value bessel_nao_sigma;
+    // @param reading_information -- input_para -- deepks
+
+    extern rapidjson::Value deepks_out_labels;
+    extern rapidjson::Value deepks_scf;
+    extern rapidjson::Value deepks_model;
+    extern rapidjson::Value bessel_descriptor_lmax;
+    extern rapidjson::Value bessel_descriptor_ecut;
+    extern rapidjson::Value bessel_descriptor_tolerence;
+    extern rapidjson::Value bessel_descriptor_rcut;
+    extern rapidjson::Value bessel_descriptor_smooth;
+    extern rapidjson::Value bessel_descriptor_sigma;
+    extern rapidjson::Value deepks_bandgap;
+    extern rapidjson::Value deepks_out_unittest;
+    // @param reading_information -- input_para -- ofdft
+    extern rapidjson::Value of_kinetic;
+    extern rapidjson::Value of_method;
+    extern rapidjson::Value of_conv;
+    extern rapidjson::Value of_tole;
+    extern rapidjson::Value of_tolp;
+    extern rapidjson::Value of_tf_weight;
+    extern rapidjson::Value of_vw_weight;
+    extern rapidjson::Value of_wt_alpha;
+    extern rapidjson::Value of_wt_beta;
+    extern rapidjson::Value of_wt_rho0;
+    extern rapidjson::Value of_hold_rho0;
+    extern rapidjson::Value of_lkt_a;
+    extern rapidjson::Value of_read_kernel;
+    extern rapidjson::Value of_kernel_file;
+    extern rapidjson::Value of_full_pw;
+    extern rapidjson::Value of_full_pw_dim;
+
+    // @param reading_information -- input_para -- electric_field_and_dipole_correction
+    
+    extern rapidjson::Value efield_flag;
+    extern rapidjson::Value dip_cor_flag;
+    extern rapidjson::Value efield_dir;
+    extern rapidjson::Value efield_pos_max;
+    extern rapidjson::Value efield_pos_dec;
+    extern rapidjson::Value efield_amp;
+    // @param reading_information -- input_para -- gate_field 
+    
+    extern rapidjson::Value gate_flag;
+    extern rapidjson::Value zgate;
+    extern rapidjson::Value block;
+    extern rapidjson::Value block_down;
+    extern rapidjson::Value block_up;
+    extern rapidjson::Value block_height;
+    // @param reading_information -- input_para -- exact_exchange
+    extern rapidjson::Value exx_hybrid_alpha;
+    extern rapidjson::Value exx_hse_omega;
+    extern rapidjson::Value exx_separate_loop;
+    extern rapidjson::Value exx_hybrid_step;
+    extern rapidjson::Value exx_mixing_beta;
+    extern rapidjson::Value exx_lambda;
+    extern rapidjson::Value exx_pca_threshold;
+    extern rapidjson::Value exx_c_threshold;
+    extern rapidjson::Value exx_v_threshold;
+    extern rapidjson::Value exx_dm_threshold;
+    extern rapidjson::Value exx_c_grad_threshold;
+    extern rapidjson::Value exx_v_grad_threshold;
+    extern rapidjson::Value exx_schwarz_threshold;
+    extern rapidjson::Value exx_cauchy_threshold;
+    extern rapidjson::Value exx_cauchy_force_threshold;
+    extern rapidjson::Value exx_cauchy_stress_threshold;
+    extern rapidjson::Value exx_ccp_threshold;
+    extern rapidjson::Value exx_ccp_rmesh_times;
+    extern rapidjson::Value exx_distribute_type;
+    extern rapidjson::Value exx_opt_orb_lmax;
+    extern rapidjson::Value exx_opt_orb_ecut;
+    extern rapidjson::Value exx_opt_orb_tolerence;
+    extern rapidjson::Value exx_real_number;
+
+    // @param reading_information -- input_para -- molecular_dynamics
+    extern rapidjson::Value md_type;
+    extern rapidjson::Value md_nstep;
+    extern rapidjson::Value md_dt;
+    extern rapidjson::Value md_thermostat;
+    extern rapidjson::Value md_tlast;
+    extern rapidjson::Value md_tfirst;
+    extern rapidjson::Value md_restart;
+    extern rapidjson::Value md_restartfreq;
+    extern rapidjson::Value md_dumpfreq;
+    extern rapidjson::Value dump_force;
+    extern rapidjson::Value dump_vel;
+    extern rapidjson::Value dump_virial;
+    extern rapidjson::Value md_seed;
+    extern rapidjson::Value md_tfreq;
+    extern rapidjson::Value md_tchain;
+    extern rapidjson::Value md_pmode;
+    extern rapidjson::Value md_prec_level;
+    extern rapidjson::Value ref_cell_factor;
+    extern rapidjson::Value md_pcouple;
+    extern rapidjson::Value md_pfirst;
+    extern rapidjson::Value md_plast;
+    extern rapidjson::Value md_pfreq;
+    extern rapidjson::Value md_pchain;
+    extern rapidjson::Value lj_rcut;
+    extern rapidjson::Value lj_epsilon;
+    extern rapidjson::Value lj_sigma;
+    extern rapidjson::Value pot_file;
+    extern rapidjson::Value msst_direction;
+    extern rapidjson::Value msst_vel;
+    extern rapidjson::Value msst_vis;
+    extern rapidjson::Value msst_tscale;
+    extern rapidjson::Value msst_qmass;
+    extern rapidjson::Value md_damp;
+    extern rapidjson::Value md_tolerance;
+    extern rapidjson::Value md_nraise;
+    extern rapidjson::Value cal_syns;
+    extern rapidjson::Value dmax;
+
+    // @param reading_information -- input_para -- dft_plus_u
+    extern rapidjson::Value orbital_corr;
+    extern rapidjson::Value hubbard_u;
+    extern rapidjson::Value yukawa_potential;
+    extern rapidjson::Value yukawa_lambda;
+    extern rapidjson::Value omc;
+
+    // @param reading_information -- input_para -- vdw_correction
+    extern rapidjson::Value vdw_method;
+    extern rapidjson::Value vdw_s6;
+    extern rapidjson::Value vdw_s8;
+    extern rapidjson::Value vdw_a1;
+    extern rapidjson::Value vdw_a2;
+    extern rapidjson::Value vdw_d;
+    extern rapidjson::Value vdw_abc;
+    extern rapidjson::Value vdw_C6_file;
+    extern rapidjson::Value vdw_C6_unit;
+    extern rapidjson::Value vdw_R0_file;
+    extern rapidjson::Value vdw_R0_unit;
+    extern rapidjson::Value vdw_cutoff_type;
+    extern rapidjson::Value vdw_cutoff_radius;
+    extern rapidjson::Value vdw_radius_unit;
+    extern rapidjson::Value vdw_cutoff_period;
+    extern rapidjson::Value vdw_cn_thr;
+    extern rapidjson::Value vdw_cn_thr_unit;
+
+    // @param reading_information -- input_para -- berry_phase_and_wannier90_interface
+    extern rapidjson::Value berry_phase;
+    extern rapidjson::Value gdir;
+    extern rapidjson::Value towannier90;
+    extern rapidjson::Value nnkpfile;
+    extern rapidjson::Value wannier_spin;
+
+    // @param reading_information -- input_para -- tddft
+    extern rapidjson::Value td_edm;
+    extern rapidjson::Value td_print_eij;
+    extern rapidjson::Value td_propagator;
+    extern rapidjson::Value td_vext;
+    extern rapidjson::Value td_vext_dire;
+    extern rapidjson::Value td_stype;
+    extern rapidjson::Value td_ttype;
+    extern rapidjson::Value td_tstart;
+    extern rapidjson::Value td_tend;
+    extern rapidjson::Value td_lcut1;
+    extern rapidjson::Value td_lcut2;
+    extern rapidjson::Value td_gauss_freq;
+    extern rapidjson::Value td_gauss_phase;
+    extern rapidjson::Value td_gauss_sigma;
+    extern rapidjson::Value td_gauss_t0;
+    extern rapidjson::Value td_gauss_amp;
+    extern rapidjson::Value td_trape_freq;
+    extern rapidjson::Value td_trape_phase;
+    extern rapidjson::Value td_trape_t1;
+    extern rapidjson::Value td_trape_t2;
+    extern rapidjson::Value td_trape_t3;
+    extern rapidjson::Value td_trape_amp;
+    extern rapidjson::Value td_trigo_freq1;
+    extern rapidjson::Value td_trigo_freq2;
+    extern rapidjson::Value td_trigo_phase1;
+    extern rapidjson::Value td_trigo_phase2;
+    extern rapidjson::Value td_trigo_amp;
+    extern rapidjson::Value td_heavi_t0;
+    extern rapidjson::Value td_heavi_amp;
+    extern rapidjson::Value td_out_dipole;
+    extern rapidjson::Value td_out_efield;
+    extern rapidjson::Value ocp;
+    extern rapidjson::Value ocp_set;
+
+    // @param reading_information -- input_para -- debuging_related
+    extern rapidjson::Value t_in_h;
+    extern rapidjson::Value vl_in_h;
+    extern rapidjson::Value vnl_in_h;
+    extern rapidjson::Value vh_in_h;
+    extern rapidjson::Value vion_in_h;
+    extern rapidjson::Value test_force;
+    extern rapidjson::Value test_stress;
+    extern rapidjson::Value colour;
+    extern rapidjson::Value test_skip_ewald;
+
+    // @param reading_information -- input_para -- electronic_conductivities
+    extern rapidjson::Value cal_cond;
+    extern rapidjson::Value cond_nche;
+    extern rapidjson::Value cond_dw;
+    extern rapidjson::Value cond_wcut;
+    extern rapidjson::Value cond_dt;
+    extern rapidjson::Value cond_dtbatch;
+    extern rapidjson::Value cond_fwhm;
+    extern rapidjson::Value cond_nonlocal;
+
+    // @param reading_information -- input_para -- implicit_solvation_model
+    extern rapidjson::Value imp_sol;
+    extern rapidjson::Value eb_k;
+    extern rapidjson::Value tau;
+    extern rapidjson::Value sigma_k;
+    extern rapidjson::Value nc_k;
+
+    // @param reading_information -- stru_infos：
+    extern rapidjson::Value stru_infos;
+    // extern rapidjson::Value ATOMIC_SPECIES;
+    // extern rapidjson::Value NUMERICAL_ORBITAL;
+    // extern rapidjson::Value LATTICE_CONSTANT;
+    // extern rapidjson::Value ATOMIC_POSITIONS;
+
+    // @param reading_information -- KPT_infos
+    extern rapidjson::Value KPT_infos;
+    // extern rapidjson::Value total_number;
+    // extern rapidjson::Value mode;
+    // extern rapidjson::Value vectors;
+
+    // @param reading_information -- orb_infos
+    extern rapidjson::Value orb_infos;
+
+    // @param reading_information -- pp
+    extern rapidjson::Value pp;
+
+    // @param init
+    extern rapidjson::Value init;
+    // @param init -- general
+    // extern rapidjson::Value calculation;
+    // extern rapidjson::Value esolver_type;
+    // extern rapidjson::Value basis_type;
+    // extern rapidjson::Value gamma_only;
+    // extern rapidjson::Value ks_solver;
+    // extern rapidjson::Value ntype;
+    // extern rapidjson::Value nspin;
+    // extern rapidjson::Value ecutwfc;
+    // extern rapidjson::Value scf_thr;
+    // extern rapidjson::Value scf_nmax;
+
+    // @param init -- symmetry
+    // extern rapidjson::Value symmetry;
+    // extern rapidjson::Value BRAVAIS_TYPE;
+    // extern rapidjson::Value BRAVAIS_LATTICE_NAME;
+    // extern rapidjson::Value IBRAV;
+    // extern rapidjson::Value LATTICE_CONSTANT_A;
+    // extern rapidjson::Value right_hand_lattice;
+
+    // @param init -- Kpoints
+    extern rapidjson::Value kpoints;
+    extern rapidjson::Value nkstot;
+    extern rapidjson::Value nkstot_ibz;
+    extern rapidjson::Value coordinates;
+    extern rapidjson::Value weight;
+
+    // @param init -- grid
+    extern rapidjson::Value grid;
+    extern rapidjson::Value energy_cutoff_for_wavefunc;
+    extern rapidjson::Value fft_grid_for_wave_functions;
+    extern rapidjson::Value number_of_plane_waves;
+    extern rapidjson::Value number_of_sticks;
+
+    // @param init -- Smearing
+    // extern rapidjson::Value smearing_method;
+    // extern rapidjson::Value smearing_sigma;
+
+    // @param init -- mixing
+    extern rapidjson::Value mixing;
+
+
+    // @param output
+    extern rapidjson::Value output;
+
+
+
+    // @param final_stru
+    extern rapidjson::Value final_stru;
+    extern rapidjson::Value cell;
+    extern rapidjson::Value coordinate;
+
+
+
+
+    /**
+     *  The functions below initialize the json output parameter 
+     *  tree to connect the nodes of the module
+    */
+
+    /**
+     * @brief   add Top stage：parameter in Abacus:
+     */
+    void Init_json_abacus();
+
+
+    /**
+     * @brief   add Second stage：parameter in Abacus - general_info:
+     */
+    void Init_json_abacus_generalInfo();
+
+
+    /**
+     * @brief   add Second stage：parameter in Abacus - readin_info:
+     */
+    void Init_json_abacus_readinInfo();
+
+
+    /**
+     * @brief   finish json tree build
+     */
+    void Finish_json_tree();
+
+
+
+    /**
+     * @brief   This function is used to populate the template type parameter 
+     *          values into rapidjson's Value object
+     */
+    template <typename T> 
+    void set_json_value(rapidjson::Value &json_v,T *para){
+        if(std::is_same<T,int>::value)
+        {
+            json_v.SetInt(*reinterpret_cast<int*>(para)); 
+        }
+        else if(std::is_same<T,double>::value)
+        {
+            json_v.SetDouble(*reinterpret_cast<double*>(para));
+        }
+        else if(std::is_same<T,bool>::value)
+        {
+            json_v.SetBool(*reinterpret_cast<bool*>(para));
+        }
+        else if(std::is_same<T,std::string>::value)
+        {
+            // json_v.SetString(rapidjson::StringRef((*reinterpret_cast<std::string*>(para)).c_str()));
+
+            json_v.SetString((*reinterpret_cast<std::string*>(para)).c_str(), std::strlen((*reinterpret_cast<std::string*>(para)).c_str()), doc.GetAllocator());
+            //printf("exx_real_number = %s\n",(*reinterpret_cast<std::string*>(para)).c_str());
+        }
+    }
+}
+
+#endif
\ No newline at end of file
diff --git a/source/module_base/test/CMakeLists.txt b/source/module_base/test/CMakeLists.txt
index 666152b476..008df422e5 100644
--- a/source/module_base/test/CMakeLists.txt
+++ b/source/module_base/test/CMakeLists.txt
@@ -217,3 +217,17 @@ AddTest(
   SOURCES assoc_laguerre_test.cpp ../assoc_laguerre.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp
   LIBS ${math_libs} formatter
 )
+if(ENABLE_GOOGLEBENCH)
+  AddTest(
+    TARGET perf_sphbes
+    LIBS formatter
+    SOURCES perf_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp 
+  )
+endif()
+
+if(ENABLE_RAPIDJSON)
+  AddTest(
+    TARGET base_para_json_test
+    SOURCES para_json_test.cpp ../para_json.cpp
+  )
+endif()
diff --git a/source/module_base/test/complexmatrix_test.cpp b/source/module_base/test/complexmatrix_test.cpp
index 026aeb40de..0adc52363a 100644
--- a/source/module_base/test/complexmatrix_test.cpp
+++ b/source/module_base/test/complexmatrix_test.cpp
@@ -23,8 +23,8 @@
  *  - set_as_identity_matrix()
  *  - print():Output the elements of this complex matrix greater than threshold.
  *  - checkreal()
- * 
- * Tested relative functions 
+ *
+ * Tested relative functions
  *  - operator "+" "-" "*" between two ComplexMatrix
  *  - operator "*" between a ComplexMatrix and double or complex, and reverse.
  *  - trace()
@@ -35,13 +35,13 @@
  *  - conj()
  *  - scale_accumulate():
  *  - scaled_sum():
- * 
+ *
  */
 
 //a mock function of WARNING_QUIT, to avoid the uncorrected call by matrix.cpp at line 37.
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
 }
 
 inline void EXPECT_COMPLEX_EQ(const std::complex<double>& a,const std::complex<double>& b)
@@ -104,8 +104,8 @@ TEST_F(ComplexMatrixTest,ConstructorCM)
 
 TEST_F(ComplexMatrixTest,ConstructorCMrvalue)
 {
-    ModuleBase::ComplexMatrix cm2(cm22);        
-    ModuleBase::ComplexMatrix cm1(std::move(cm22)); 
+    ModuleBase::ComplexMatrix cm2(cm22);
+    ModuleBase::ComplexMatrix cm1(std::move(cm22));
     EXPECT_EQ(cm1.nr,cm2.nr);
     EXPECT_EQ(cm1.nc,cm2.nc);
     EXPECT_EQ(cm1.size,cm2.size);
@@ -338,15 +338,15 @@ TEST_F(ComplexMatrixTest,OperatorMultMatrix)
     EXPECT_EQ(cm33.nr,3);
     EXPECT_EQ(cm33.nc,3);
     EXPECT_EQ(cm33.size,9);
-    EXPECT_COMPLEX_EQ(cm33(0,0),std::complex<double>{-46.0,72.0  }); 
+    EXPECT_COMPLEX_EQ(cm33(0,0),std::complex<double>{-46.0,72.0  });
     EXPECT_COMPLEX_EQ(cm33(0,1),std::complex<double>{-46.0,118.0 });
     EXPECT_COMPLEX_EQ(cm33(0,2),std::complex<double>{-46.0,164.0 });
     EXPECT_COMPLEX_EQ(cm33(1,0),std::complex<double>{-54.0,84.0  });
     EXPECT_COMPLEX_EQ(cm33(1,1),std::complex<double>{-54.0,138.0 });
-    EXPECT_COMPLEX_EQ(cm33(1,2),std::complex<double>{-54.0,192.0 }); 
+    EXPECT_COMPLEX_EQ(cm33(1,2),std::complex<double>{-54.0,192.0 });
     EXPECT_COMPLEX_EQ(cm33(2,0),std::complex<double>{-62.0,96.0  });
     EXPECT_COMPLEX_EQ(cm33(2,1),std::complex<double>{-62.0,158.0 });
-    EXPECT_COMPLEX_EQ(cm33(2,2),std::complex<double>{-62.0,220.0 }); 
+    EXPECT_COMPLEX_EQ(cm33(2,2),std::complex<double>{-62.0,220.0 });
 
     EXPECT_DEATH(cm22 * cm32,"");
 }
@@ -525,7 +525,7 @@ TEST_F(ComplexMatrixTest,ScaleSumArray)
     cmout = new ModuleBase::ComplexMatrix*[2];
     cmin1 = new ModuleBase::ComplexMatrix*[2];
     cmin2 = new ModuleBase::ComplexMatrix*[2];
-   
+
     cmin1[0] = &cm1;
     cmin1[1] = &cm2;
     cmin2[0] = &cm3;
@@ -563,7 +563,7 @@ TEST_F(ComplexMatrixTest,print)
    EXPECT_THAT(output,testing::HasSubstr("(3,4)\t(4,5)\t"));
    ifs.close();
    remove("printtest1.log");
-// The condition of  std::abs(data)>threshold_abs && std::imag(data)) <= threshold_imag 
+// The condition of  std::abs(data)>threshold_abs && std::imag(data)) <= threshold_imag
    ofs.open("printtest2.log");
    cm22.print(ofs,1e-10,2);
    ofs.close();
diff --git a/source/module_base/test/inverse_matrix_test.cpp b/source/module_base/test/inverse_matrix_test.cpp
index df68f58a56..a871f906cd 100644
--- a/source/module_base/test/inverse_matrix_test.cpp
+++ b/source/module_base/test/inverse_matrix_test.cpp
@@ -19,7 +19,7 @@
 //a mock function of WARNING_QUIT, to avoid the uncorrected call by matrix.cpp at line 37.
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
 }
 
 TEST(InverseMatrixComplexTest, InverseMatrixComplex)
diff --git a/source/module_base/test/math_sphbes_test.cpp b/source/module_base/test/math_sphbes_test.cpp
index 521d4dc2f4..e72c6e289c 100644
--- a/source/module_base/test/math_sphbes_test.cpp
+++ b/source/module_base/test/math_sphbes_test.cpp
@@ -352,15 +352,27 @@ TEST_F(Sphbes, Zeros)
 
     int lmax = 20;
     int nzeros = 500;
-    double* zeros = new double[nzeros];
+    double* zeros = new double[nzeros*(lmax+1)];
     for (int l = 0; l <= lmax; ++l)
     {
-        ModuleBase::Sphbes::sphbes_zeros(l, nzeros, zeros);
+        ModuleBase::Sphbes::sphbes_zeros(l, nzeros, zeros, false);
         for (int i = 0; i < nzeros; ++i)
         {
             EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[i])), 1e-14);
         }
     }
+
+
+    ModuleBase::Sphbes::sphbes_zeros(lmax, nzeros, zeros, true);
+    for (int l = 0; l <= lmax; ++l)
+    {
+        for (int i = 0; i < nzeros; ++i)
+        {
+            EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[l*nzeros+i])), 1e-14);
+        }
+    }
+
+    delete[] zeros;
 }
 
 TEST_F(Sphbes, ZerosOld)
diff --git a/source/module_base/test/math_ylmreal_test.cpp b/source/module_base/test/math_ylmreal_test.cpp
index d5e7a504ed..13d0bd2b69 100644
--- a/source/module_base/test/math_ylmreal_test.cpp
+++ b/source/module_base/test/math_ylmreal_test.cpp
@@ -13,16 +13,16 @@
 ***********************************************/
 
 /**
- * For lmax <5 cases, the reference values are calculated by the formula from 
+ * For lmax <5 cases, the reference values are calculated by the formula from
  * https://formulasearchengine.com/wiki/Table_of_spherical_harmonics. Note, these
- * formula lack of the Condon–Shortley phase (-1)^m, and in this unit test, item 
+ * formula lack of the Condon–Shortley phase (-1)^m, and in this unit test, item
  * (-1)^m is multiplied.
  * For lmax >=5, the reference values are calculated by YlmReal::Ylm_Real.
  *
  * - Tested functions of class YlmReal
  *      - Ylm_Real
  *      - Ylm_Real2
- *      - rlylm 
+ *      - rlylm
  *      - YlmRealTemplate (double and float)
  *
  * - Tested functions of class Ylm
@@ -30,9 +30,9 @@
  *      - sph_harm
  *      - rl_sph_harm
  *      - grad_rl_sph_harm
- *      - equality_value_test: test the eqaulity of Ylm function between rl_sph_harm (spherical input) and  get_ylm_real (Cartesian input) 
+ *      - equality_value_test: test the eqaulity of Ylm function between rl_sph_harm (spherical input) and  get_ylm_real (Cartesian input)
  *      - equality_gradient_test:test the eqaulity of Ylm gradient function between grad_rl_sph_harm(spherical input) and  rlylm (Cartesian input)
- * 
+ *
  */
 
 
@@ -40,7 +40,7 @@
 //mock functions of WARNING_QUIT and WARNING
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
     void WARNING(const std::string &file,const std::string &description) {return ;}
 }
 
@@ -58,7 +58,7 @@ class YlmRealTest : public testing::Test
     ModuleBase::Vector3<double> *g; //vectors of the 4 points
     double *ref;        //reference of Ylm
     double *rly;        //Ylm
-    double (*rlgy)[3];  //the gradient of Ylm  
+    double (*rlgy)[3];  //the gradient of Ylm
     std::vector<double> rlyvector; //Ylm
     std::vector<std::vector<double>> rlgyvector; //the gradient of Ylm
 
@@ -91,101 +91,101 @@ class YlmRealTest : public testing::Test
     double y4m4(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 3./4.*sqrt(35./M_PI) * x*y*(x*x - y*y) / (r*r*r*r);}
 
     //the reference values are calculated by ModuleBase::Ylm::grad_rl_sph_harm
-    //1st dimension: example, 2nd dimension: Ylm, 3rd dimension: dx/dy/dz 
+    //1st dimension: example, 2nd dimension: Ylm, 3rd dimension: dx/dy/dz
     double rlgyref[4][64][3] = {
-        {   { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, {-6.30783e-01,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -1.09255e+00}, 
-            { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 1.09255e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  1.09255e+00, -0.00000e+00}, 
-            {-0.00000e+00,  0.00000e+00, -1.11953e+00}, { 1.37114e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  4.57046e-01,  0.00000e+00}, 
-            { 0.00000e+00,  0.00000e+00,  1.44531e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.77013e+00,  0.00000e+00, -0.00000e+00}, 
-            { 0.00000e+00, -1.77013e+00,  0.00000e+00}, { 1.26943e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.00714e+00}, 
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.89235e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00, -9.46175e-01,  0.00000e+00}, 
-            {-0.00000e+00,  0.00000e+00, -1.77013e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 2.50334e+00,  0.00000e+00,  0.00000e+00}, 
-            {-0.00000e+00,  2.50334e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.75425e+00}, {-2.26473e+00,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -4.52947e-01,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.39677e+00}, {-0.00000e+00, -0.00000e+00,  0.00000e+00}, 
-            { 2.44619e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.46771e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.07566e+00}, 
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.28191e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -3.28191e+00,  0.00000e+00}, 
-            {-1.90708e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.91311e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00}, 
-            { 2.76362e+00,  0.00000e+00, -0.00000e+00}, {-0.00000e+00,  9.21205e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.76362e+00}, 
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.02739e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00, -2.01826e+00,  0.00000e+00}, 
-            {-0.00000e+00,  0.00000e+00, -2.36662e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 4.09910e+00,  0.00000e+00,  0.00000e+00}, 
-            {-0.00000e+00,  4.09910e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.38995e+00}, { 3.16161e+00,  0.00000e+00, -0.00000e+00}, 
-            { 0.00000e+00,  4.51658e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  3.31900e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            {-3.28564e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -1.40813e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -3.11349e+00}, 
-            {-0.00000e+00, -0.00000e+00,  0.00000e+00}, { 3.63241e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  2.59458e+00,  0.00000e+00}, 
-            { 0.00000e+00,  0.00000e+00,  2.64596e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-4.95014e+00,  0.00000e+00, -0.00000e+00}, 
+        {   { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, {-6.30783e-01,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -1.09255e+00},
+            { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 1.09255e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  1.09255e+00, -0.00000e+00},
+            {-0.00000e+00,  0.00000e+00, -1.11953e+00}, { 1.37114e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  4.57046e-01,  0.00000e+00},
+            { 0.00000e+00,  0.00000e+00,  1.44531e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.77013e+00,  0.00000e+00, -0.00000e+00},
+            { 0.00000e+00, -1.77013e+00,  0.00000e+00}, { 1.26943e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.00714e+00},
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.89235e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00, -9.46175e-01,  0.00000e+00},
+            {-0.00000e+00,  0.00000e+00, -1.77013e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 2.50334e+00,  0.00000e+00,  0.00000e+00},
+            {-0.00000e+00,  2.50334e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.75425e+00}, {-2.26473e+00,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -4.52947e-01,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.39677e+00}, {-0.00000e+00, -0.00000e+00,  0.00000e+00},
+            { 2.44619e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.46771e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.07566e+00},
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.28191e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -3.28191e+00,  0.00000e+00},
+            {-1.90708e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.91311e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00},
+            { 2.76362e+00,  0.00000e+00, -0.00000e+00}, {-0.00000e+00,  9.21205e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.76362e+00},
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.02739e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00, -2.01826e+00,  0.00000e+00},
+            {-0.00000e+00,  0.00000e+00, -2.36662e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 4.09910e+00,  0.00000e+00,  0.00000e+00},
+            {-0.00000e+00,  4.09910e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.38995e+00}, { 3.16161e+00,  0.00000e+00, -0.00000e+00},
+            { 0.00000e+00,  4.51658e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  3.31900e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
+            {-3.28564e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -1.40813e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -3.11349e+00},
+            {-0.00000e+00, -0.00000e+00,  0.00000e+00}, { 3.63241e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  2.59458e+00,  0.00000e+00},
+            { 0.00000e+00,  0.00000e+00,  2.64596e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-4.95014e+00,  0.00000e+00, -0.00000e+00},
             { 0.00000e+00, -4.95014e+00,  0.00000e+00}
         },
         {
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 0.00000e+00, -6.30783e-01,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -0.00000e+00, -1.09255e+00}, { 0.00000e+00, -1.09255e+00,  0.00000e+00}, { 1.09255e+00,  0.00000e+00, -0.00000e+00}, 
-            { 0.00000e+00, -0.00000e+00, -1.11953e+00}, { 4.57046e-01,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.37114e+00, -0.00000e+00}, 
-            { 0.00000e+00, -0.00000e+00, -1.44531e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 1.77013e+00,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00,  1.77013e+00,  0.00000e+00}, { 0.00000e+00,  1.26943e+00, -0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00,  0.00000e+00,  2.00714e+00}, { 0.00000e+00,  1.89235e+00, -0.00000e+00}, {-9.46175e-01,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.77013e+00}, { 0.00000e+00,  2.50334e+00, -0.00000e+00}, 
-            {-2.50334e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.75425e+00}, {-4.52947e-01,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -2.26473e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.39677e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            {-1.46771e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -2.44619e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.07566e+00}, 
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.28191e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -3.28191e+00,  0.00000e+00}, 
-            { 0.00000e+00, -1.90708e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.91311e+00}, 
-            { 0.00000e+00, -2.76362e+00,  0.00000e+00}, { 9.21205e-01,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -0.00000e+00, -2.76362e+00}, { 0.00000e+00, -3.02739e+00,  0.00000e+00}, { 2.01826e+00,  0.00000e+00,  0.00000e+00}, 
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.36662e+00}, { 0.00000e+00, -4.09910e+00,  0.00000e+00}, 
-            { 4.09910e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.38995e+00}, { 4.51658e-01,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00,  3.16161e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.31900e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            { 1.40813e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.28564e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.11349e+00}, 
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 2.59458e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.63241e+00, -0.00000e+00}, 
-            { 0.00000e+00,  0.00000e+00, -2.64596e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.95014e+00,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00,  4.95014e+00, -0.00000e+00}           
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 0.00000e+00, -6.30783e-01,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -0.00000e+00, -1.09255e+00}, { 0.00000e+00, -1.09255e+00,  0.00000e+00}, { 1.09255e+00,  0.00000e+00, -0.00000e+00},
+            { 0.00000e+00, -0.00000e+00, -1.11953e+00}, { 4.57046e-01,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.37114e+00, -0.00000e+00},
+            { 0.00000e+00, -0.00000e+00, -1.44531e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 1.77013e+00,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00,  1.77013e+00,  0.00000e+00}, { 0.00000e+00,  1.26943e+00, -0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00,  0.00000e+00,  2.00714e+00}, { 0.00000e+00,  1.89235e+00, -0.00000e+00}, {-9.46175e-01,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.77013e+00}, { 0.00000e+00,  2.50334e+00, -0.00000e+00},
+            {-2.50334e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.75425e+00}, {-4.52947e-01,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -2.26473e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.39677e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
+            {-1.46771e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -2.44619e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.07566e+00},
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.28191e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -3.28191e+00,  0.00000e+00},
+            { 0.00000e+00, -1.90708e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.91311e+00},
+            { 0.00000e+00, -2.76362e+00,  0.00000e+00}, { 9.21205e-01,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -0.00000e+00, -2.76362e+00}, { 0.00000e+00, -3.02739e+00,  0.00000e+00}, { 2.01826e+00,  0.00000e+00,  0.00000e+00},
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.36662e+00}, { 0.00000e+00, -4.09910e+00,  0.00000e+00},
+            { 4.09910e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.38995e+00}, { 4.51658e-01,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00,  3.16161e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.31900e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
+            { 1.40813e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.28564e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.11349e+00},
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 2.59458e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.63241e+00, -0.00000e+00},
+            { 0.00000e+00,  0.00000e+00, -2.64596e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.95014e+00,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00,  4.95014e+00, -0.00000e+00}
         },
         {
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.26157e+00}, {-1.09255e+00,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -1.09255e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.22045e-16}, {-0.00000e+00,  0.00000e+00, -0.00000e+00}, 
-            { 0.00000e+00,  0.00000e+00,  2.23906e+00}, {-1.82818e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -1.82818e+00,  0.00000e+00}, 
-            { 0.00000e+00,  0.00000e+00,  8.81212e-16}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.84324e-16,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00,  5.55112e-17,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  3.38514e+00}, {-2.67619e+00,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -2.67619e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.30756e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            {-5.52973e-16,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.66533e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.67801e+00}, {-3.62357e+00,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -3.62357e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.87108e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            {-1.22267e-15,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.68219e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.93038e-32,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -6.16298e-33,  0.00000e+00}, 
-            { 0.00000e+00,  0.00000e+00,  6.10264e+00}, {-4.66097e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -4.66097e+00,  0.00000e+00}, 
-            { 0.00000e+00,  0.00000e+00,  8.98664e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-2.30221e-15,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00,  6.93334e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            { 1.77767e-31,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -2.22209e-32,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  7.64784e+00}, {-5.78122e+00,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -5.78122e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.51096e-14}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            {-3.91011e-15,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.17757e-15,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.67737e-31,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -5.84671e-32,  0.00000e+00}, 
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 1.13319e-47,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.26157e+00}, {-1.09255e+00,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -1.09255e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.22045e-16}, {-0.00000e+00,  0.00000e+00, -0.00000e+00},
+            { 0.00000e+00,  0.00000e+00,  2.23906e+00}, {-1.82818e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -1.82818e+00,  0.00000e+00},
+            { 0.00000e+00,  0.00000e+00,  8.81212e-16}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.84324e-16,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00,  5.55112e-17,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  3.38514e+00}, {-2.67619e+00,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -2.67619e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.30756e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
+            {-5.52973e-16,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.66533e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.67801e+00}, {-3.62357e+00,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -3.62357e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.87108e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
+            {-1.22267e-15,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.68219e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.93038e-32,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -6.16298e-33,  0.00000e+00},
+            { 0.00000e+00,  0.00000e+00,  6.10264e+00}, {-4.66097e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -4.66097e+00,  0.00000e+00},
+            { 0.00000e+00,  0.00000e+00,  8.98664e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-2.30221e-15,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00,  6.93334e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
+            { 1.77767e-31,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -2.22209e-32,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  7.64784e+00}, {-5.78122e+00,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -5.78122e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.51096e-14}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
+            {-3.91011e-15,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.17757e-15,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.67737e-31,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -5.84671e-32,  0.00000e+00},
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 1.13319e-47,  0.00000e+00,  0.00000e+00},
             { 0.00000e+00, -1.41649e-48,  0.00000e+00}
         },
         {
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00}, 
-            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 3.64183e-01,  3.64183e-01, -7.28366e-01}, { 6.30783e-01, -0.00000e+00,  6.30783e-01}, 
-            {-0.00000e+00,  6.30783e-01,  6.30783e-01}, {-6.30783e-01,  6.30783e-01, -1.66533e-16}, {-6.30783e-01, -6.30783e-01,  0.00000e+00}, 
-            {-7.46353e-01, -7.46353e-01,  0.00000e+00}, { 0.00000e+00,  3.04697e-01, -1.21879e+00}, { 3.04697e-01,  0.00000e+00, -1.21879e+00}, 
-            { 9.63537e-01, -9.63537e-01,  4.01253e-16}, { 9.63537e-01,  9.63537e-01,  9.63537e-01}, {-4.44089e-16,  1.18009e+00, -2.22045e-16}, 
-            {-1.18009e+00, -1.11022e-16,  0.00000e+00}, { 4.88603e-01,  4.88603e-01,  1.30294e+00}, {-1.03006e+00, -7.72548e-01,  7.72548e-01}, 
-            {-7.72548e-01, -1.03006e+00,  7.72548e-01}, {-7.28366e-01,  7.28366e-01, -5.25363e-16}, {-3.64183e-01, -3.64183e-01, -2.18510e+00}, 
-            { 7.69185e-16, -2.04397e+00, -6.81324e-01}, { 2.04397e+00,  1.92296e-16,  6.81324e-01}, { 9.63537e-01,  9.63537e-01, -1.44756e-16}, 
-            {-9.63537e-01,  9.63537e-01, -5.55112e-17}, { 5.19779e-01,  5.19779e-01, -1.81923e+00}, { 1.40917e+00,  8.05238e-01,  8.05238e-01}, 
-            { 8.05238e-01,  1.40917e+00,  8.05238e-01}, { 0.00000e+00, -4.44089e-16,  3.24739e-16}, {-1.06523e+00, -1.06523e+00,  2.13046e+00}, 
-            {-2.17439e-01,  1.73951e+00,  1.73951e+00}, {-1.73951e+00,  2.17439e-01, -1.73951e+00}, {-1.84503e+00, -1.84503e+00, -9.22517e-01}, 
-            { 1.84503e+00, -1.84503e+00,  6.58625e-16}, { 1.45863e+00,  1.11022e-15,  0.00000e+00}, {-8.88178e-16,  1.45863e+00,  0.00000e+00}, 
-            {-1.46807e+00, -1.46807e+00,  5.87227e-01}, {-4.48502e-01, -3.36617e-16, -2.24251e+00}, {-3.36617e-16, -4.48502e-01, -2.24251e+00}, 
-            { 7.09144e-01, -7.09144e-01,  1.87222e-16}, { 2.12743e+00,  2.12743e+00, -9.38779e-16}, { 7.09144e-01, -5.11006e-16, -2.12743e+00}, 
-            { 1.02201e-15, -7.09144e-01,  2.12743e+00}, { 1.81260e+00,  1.81260e+00,  2.58943e+00}, {-2.07154e+00,  2.07154e+00, -1.66969e-15}, 
-            {-3.03637e+00, -2.31111e-15, -6.07275e-01}, { 1.84889e-15, -3.03637e+00, -6.07275e-01}, { 1.05183e+00, -1.05183e+00,  5.77778e-17}, 
-            { 1.05183e+00,  1.05183e+00,  4.03986e-17}, { 1.27464e+00,  1.27464e+00,  1.69952e+00}, {-1.28472e+00, -1.20442e+00,  1.92707e+00}, 
-            {-1.20442e+00, -1.28472e+00,  1.92707e+00}, {-8.52285e-01,  8.52285e-01, -6.74704e-16}, {-1.50789e+00, -1.50789e+00, -2.95022e+00}, 
-            {-1.11260e+00, -2.08612e+00,  9.27164e-01}, { 2.08612e+00,  1.11260e+00, -9.27164e-01}, {-3.07506e-01, -3.07506e-01, -3.69007e+00}, 
-            { 1.23002e+00, -1.23002e+00,  2.28018e-15}, { 3.69007e+00, -1.53753e-01,  1.84503e+00}, {-1.53753e-01,  3.69007e+00,  1.84503e+00}, 
-            {-2.35197e+00,  2.35197e+00, -8.00513e-16}, {-2.35197e+00, -2.35197e+00, -7.83988e-01}, { 1.37903e-15, -1.46671e+00,  9.77875e-17}, 
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 3.64183e-01,  3.64183e-01, -7.28366e-01}, { 6.30783e-01, -0.00000e+00,  6.30783e-01},
+            {-0.00000e+00,  6.30783e-01,  6.30783e-01}, {-6.30783e-01,  6.30783e-01, -1.66533e-16}, {-6.30783e-01, -6.30783e-01,  0.00000e+00},
+            {-7.46353e-01, -7.46353e-01,  0.00000e+00}, { 0.00000e+00,  3.04697e-01, -1.21879e+00}, { 3.04697e-01,  0.00000e+00, -1.21879e+00},
+            { 9.63537e-01, -9.63537e-01,  4.01253e-16}, { 9.63537e-01,  9.63537e-01,  9.63537e-01}, {-4.44089e-16,  1.18009e+00, -2.22045e-16},
+            {-1.18009e+00, -1.11022e-16,  0.00000e+00}, { 4.88603e-01,  4.88603e-01,  1.30294e+00}, {-1.03006e+00, -7.72548e-01,  7.72548e-01},
+            {-7.72548e-01, -1.03006e+00,  7.72548e-01}, {-7.28366e-01,  7.28366e-01, -5.25363e-16}, {-3.64183e-01, -3.64183e-01, -2.18510e+00},
+            { 7.69185e-16, -2.04397e+00, -6.81324e-01}, { 2.04397e+00,  1.92296e-16,  6.81324e-01}, { 9.63537e-01,  9.63537e-01, -1.44756e-16},
+            {-9.63537e-01,  9.63537e-01, -5.55112e-17}, { 5.19779e-01,  5.19779e-01, -1.81923e+00}, { 1.40917e+00,  8.05238e-01,  8.05238e-01},
+            { 8.05238e-01,  1.40917e+00,  8.05238e-01}, { 0.00000e+00, -4.44089e-16,  3.24739e-16}, {-1.06523e+00, -1.06523e+00,  2.13046e+00},
+            {-2.17439e-01,  1.73951e+00,  1.73951e+00}, {-1.73951e+00,  2.17439e-01, -1.73951e+00}, {-1.84503e+00, -1.84503e+00, -9.22517e-01},
+            { 1.84503e+00, -1.84503e+00,  6.58625e-16}, { 1.45863e+00,  1.11022e-15,  0.00000e+00}, {-8.88178e-16,  1.45863e+00,  0.00000e+00},
+            {-1.46807e+00, -1.46807e+00,  5.87227e-01}, {-4.48502e-01, -3.36617e-16, -2.24251e+00}, {-3.36617e-16, -4.48502e-01, -2.24251e+00},
+            { 7.09144e-01, -7.09144e-01,  1.87222e-16}, { 2.12743e+00,  2.12743e+00, -9.38779e-16}, { 7.09144e-01, -5.11006e-16, -2.12743e+00},
+            { 1.02201e-15, -7.09144e-01,  2.12743e+00}, { 1.81260e+00,  1.81260e+00,  2.58943e+00}, {-2.07154e+00,  2.07154e+00, -1.66969e-15},
+            {-3.03637e+00, -2.31111e-15, -6.07275e-01}, { 1.84889e-15, -3.03637e+00, -6.07275e-01}, { 1.05183e+00, -1.05183e+00,  5.77778e-17},
+            { 1.05183e+00,  1.05183e+00,  4.03986e-17}, { 1.27464e+00,  1.27464e+00,  1.69952e+00}, {-1.28472e+00, -1.20442e+00,  1.92707e+00},
+            {-1.20442e+00, -1.28472e+00,  1.92707e+00}, {-8.52285e-01,  8.52285e-01, -6.74704e-16}, {-1.50789e+00, -1.50789e+00, -2.95022e+00},
+            {-1.11260e+00, -2.08612e+00,  9.27164e-01}, { 2.08612e+00,  1.11260e+00, -9.27164e-01}, {-3.07506e-01, -3.07506e-01, -3.69007e+00},
+            { 1.23002e+00, -1.23002e+00,  2.28018e-15}, { 3.69007e+00, -1.53753e-01,  1.84503e+00}, {-1.53753e-01,  3.69007e+00,  1.84503e+00},
+            {-2.35197e+00,  2.35197e+00, -8.00513e-16}, {-2.35197e+00, -2.35197e+00, -7.83988e-01}, { 1.37903e-15, -1.46671e+00,  9.77875e-17},
             { 1.46671e+00,  1.14919e-15,  1.34475e-16}
         }
     };
@@ -206,71 +206,71 @@ class YlmRealTest : public testing::Test
         rlgy = new double[nylm][3];
         rlgyvector.resize(nylm,std::vector<double>(3));
         ref = new double[64*4]{
-            y00(g[0].x, g[0].y, g[0].z),  y00(g[1].x, g[1].y, g[1].z),  y00(g[2].x, g[2].y, g[2].z),  y00(g[3].x, g[3].y, g[3].z),  
-            y10(g[0].x, g[0].y, g[0].z),  y10(g[1].x, g[1].y, g[1].z),  y10(g[2].x, g[2].y, g[2].z),  y10(g[3].x, g[3].y, g[3].z),  
-            y11(g[0].x, g[0].y, g[0].z),  y11(g[1].x, g[1].y, g[1].z),  y11(g[2].x, g[2].y, g[2].z),  y11(g[3].x, g[3].y, g[3].z),  
-            y1m1(g[0].x, g[0].y, g[0].z), y1m1(g[1].x, g[1].y, g[1].z), y1m1(g[2].x, g[2].y, g[2].z), y1m1(g[3].x, g[3].y, g[3].z), 
-            y20(g[0].x, g[0].y, g[0].z),  y20(g[1].x, g[1].y, g[1].z),  y20(g[2].x, g[2].y, g[2].z),  y20(g[3].x, g[3].y, g[3].z),  
-            y21(g[0].x, g[0].y, g[0].z),  y21(g[1].x, g[1].y, g[1].z),  y21(g[2].x, g[2].y, g[2].z),  y21(g[3].x, g[3].y, g[3].z),  
-            y2m1(g[0].x, g[0].y, g[0].z), y2m1(g[1].x, g[1].y, g[1].z), y2m1(g[2].x, g[2].y, g[2].z), y2m1(g[3].x, g[3].y, g[3].z),                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
-            y22(g[0].x, g[0].y, g[0].z),  y22(g[1].x, g[1].y, g[1].z),  y22(g[2].x, g[2].y, g[2].z),  y22(g[3].x, g[3].y, g[3].z),  
-            y2m2(g[0].x, g[0].y, g[0].z), y2m2(g[1].x, g[1].y, g[1].z), y2m2(g[2].x, g[2].y, g[2].z), y2m2(g[3].x, g[3].y, g[3].z), 
-            y30(g[0].x, g[0].y, g[0].z),  y30(g[1].x, g[1].y, g[1].z),  y30(g[2].x, g[2].y, g[2].z),  y30(g[3].x, g[3].y, g[3].z),  
-            y31(g[0].x, g[0].y, g[0].z),  y31(g[1].x, g[1].y, g[1].z),  y31(g[2].x, g[2].y, g[2].z),  y31(g[3].x, g[3].y, g[3].z),  
-            y3m1(g[0].x, g[0].y, g[0].z), y3m1(g[1].x, g[1].y, g[1].z), y3m1(g[2].x, g[2].y, g[2].z), y3m1(g[3].x, g[3].y, g[3].z), 
-            y32(g[0].x, g[0].y, g[0].z),  y32(g[1].x, g[1].y, g[1].z),  y32(g[2].x, g[2].y, g[2].z),  y32(g[3].x, g[3].y, g[3].z),  
-            y3m2(g[0].x, g[0].y, g[0].z), y3m2(g[1].x, g[1].y, g[1].z), y3m2(g[2].x, g[2].y, g[2].z), y3m2(g[3].x, g[3].y, g[3].z), 
-            y33(g[0].x, g[0].y, g[0].z),  y33(g[1].x, g[1].y, g[1].z),  y33(g[2].x, g[2].y, g[2].z),  y33(g[3].x, g[3].y, g[3].z),  
-            y3m3(g[0].x, g[0].y, g[0].z), y3m3(g[1].x, g[1].y, g[1].z), y3m3(g[2].x, g[2].y, g[2].z), y3m3(g[3].x, g[3].y, g[3].z), 
-            y40(g[0].x, g[0].y, g[0].z),  y40(g[1].x, g[1].y, g[1].z),  y40(g[2].x, g[2].y, g[2].z),  y40(g[3].x, g[3].y, g[3].z),  
-            y41(g[0].x, g[0].y, g[0].z),  y41(g[1].x, g[1].y, g[1].z),  y41(g[2].x, g[2].y, g[2].z),  y41(g[3].x, g[3].y, g[3].z),  
-            y4m1(g[0].x, g[0].y, g[0].z), y4m1(g[1].x, g[1].y, g[1].z), y4m1(g[2].x, g[2].y, g[2].z), y4m1(g[3].x, g[3].y, g[3].z), 
-            y42(g[0].x, g[0].y, g[0].z),  y42(g[1].x, g[1].y, g[1].z),  y42(g[2].x, g[2].y, g[2].z),  y42(g[3].x, g[3].y, g[3].z),  
-            y4m2(g[0].x, g[0].y, g[0].z), y4m2(g[1].x, g[1].y, g[1].z), y4m2(g[2].x, g[2].y, g[2].z), y4m2(g[3].x, g[3].y, g[3].z), 
-            y43(g[0].x, g[0].y, g[0].z),  y43(g[1].x, g[1].y, g[1].z),  y43(g[2].x, g[2].y, g[2].z),  y43(g[3].x, g[3].y, g[3].z),  
-            y4m3(g[0].x, g[0].y, g[0].z), y4m3(g[1].x, g[1].y, g[1].z), y4m3(g[2].x, g[2].y, g[2].z), y4m3(g[3].x, g[3].y, g[3].z), 
-            y44(g[0].x, g[0].y, g[0].z),  y44(g[1].x, g[1].y, g[1].z),  y44(g[2].x, g[2].y, g[2].z),  y44(g[3].x, g[3].y, g[3].z),  
-            y4m4(g[0].x, g[0].y, g[0].z), y4m4(g[1].x, g[1].y, g[1].z), y4m4(g[2].x, g[2].y, g[2].z), y4m4(g[3].x, g[3].y, g[3].z), 
-              0.000000000000000,    0.000000000000000,    0.935602579627389,    0.090028400200397, 
-             -0.452946651195697,   -0.000000000000000,   -0.000000000000000,   -0.348678494661834, 
-             -0.000000000000000,   -0.452946651195697,   -0.000000000000000,   -0.348678494661834, 
-             -0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000, 
-             -0.000000000000000,   -0.000000000000000,    0.000000000000000,   -0.000000000000000, 
-              0.489238299435250,    0.000000000000000,   -0.000000000000000,   -0.376615818502422, 
-              0.000000000000000,   -0.489238299435250,   -0.000000000000000,    0.376615818502422, 
-              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.532615198330370, 
-              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000, 
-             -0.656382056840170,   -0.000000000000000,   -0.000000000000000,   -0.168427714314628, 
-             -0.000000000000000,   -0.656382056840170,   -0.000000000000000,   -0.168427714314628, 
-             -0.317846011338142,   -0.317846011338142,    1.017107236282055,    0.226023830284901, 
-             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.258942827786103, 
-             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.258942827786103, 
-              0.460602629757462,   -0.460602629757462,    0.000000000000000,   -0.000000000000000, 
-              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.409424559784410, 
-             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.136474853261470, 
-             -0.000000000000000,    0.000000000000000,   -0.000000000000000,   -0.136474853261470, 
-             -0.504564900728724,   -0.504564900728724,    0.000000000000000,   -0.598002845308118, 
-             -0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000, 
-             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.350610246256556, 
-             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.350610246256556, 
-              0.683184105191914,   -0.683184105191914,    0.000000000000000,   -0.000000000000000, 
-              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.202424920056864, 
-              0.000000000000000,    0.000000000000000,    1.092548430592079,   -0.350435072502801, 
-              0.451658037912587,    0.000000000000000,   -0.000000000000000,    0.046358202625865, 
-              0.000000000000000,    0.451658037912587,   -0.000000000000000,    0.046358202625865, 
-              0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000, 
-              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.492067081245654, 
-             -0.469376801586882,   -0.000000000000000,   -0.000000000000000,    0.187354445356332, 
-             -0.000000000000000,    0.469376801586882,   -0.000000000000000,   -0.187354445356332, 
-              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.355076798886913, 
-              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000, 
-              0.518915578720260,    0.000000000000000,   -0.000000000000000,   -0.443845998608641, 
-              0.000000000000000,    0.518915578720260,   -0.000000000000000,   -0.443845998608641, 
-              0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000, 
-              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.452635881587108, 
-             -0.707162732524596,    0.000000000000000,   -0.000000000000000,    0.120972027847095, 
-             -0.000000000000000,    0.707162732524596,   -0.000000000000000,   -0.120972027847095  
-         } ; 
+            y00(g[0].x, g[0].y, g[0].z),  y00(g[1].x, g[1].y, g[1].z),  y00(g[2].x, g[2].y, g[2].z),  y00(g[3].x, g[3].y, g[3].z),
+            y10(g[0].x, g[0].y, g[0].z),  y10(g[1].x, g[1].y, g[1].z),  y10(g[2].x, g[2].y, g[2].z),  y10(g[3].x, g[3].y, g[3].z),
+            y11(g[0].x, g[0].y, g[0].z),  y11(g[1].x, g[1].y, g[1].z),  y11(g[2].x, g[2].y, g[2].z),  y11(g[3].x, g[3].y, g[3].z),
+            y1m1(g[0].x, g[0].y, g[0].z), y1m1(g[1].x, g[1].y, g[1].z), y1m1(g[2].x, g[2].y, g[2].z), y1m1(g[3].x, g[3].y, g[3].z),
+            y20(g[0].x, g[0].y, g[0].z),  y20(g[1].x, g[1].y, g[1].z),  y20(g[2].x, g[2].y, g[2].z),  y20(g[3].x, g[3].y, g[3].z),
+            y21(g[0].x, g[0].y, g[0].z),  y21(g[1].x, g[1].y, g[1].z),  y21(g[2].x, g[2].y, g[2].z),  y21(g[3].x, g[3].y, g[3].z),
+            y2m1(g[0].x, g[0].y, g[0].z), y2m1(g[1].x, g[1].y, g[1].z), y2m1(g[2].x, g[2].y, g[2].z), y2m1(g[3].x, g[3].y, g[3].z),
+            y22(g[0].x, g[0].y, g[0].z),  y22(g[1].x, g[1].y, g[1].z),  y22(g[2].x, g[2].y, g[2].z),  y22(g[3].x, g[3].y, g[3].z),
+            y2m2(g[0].x, g[0].y, g[0].z), y2m2(g[1].x, g[1].y, g[1].z), y2m2(g[2].x, g[2].y, g[2].z), y2m2(g[3].x, g[3].y, g[3].z),
+            y30(g[0].x, g[0].y, g[0].z),  y30(g[1].x, g[1].y, g[1].z),  y30(g[2].x, g[2].y, g[2].z),  y30(g[3].x, g[3].y, g[3].z),
+            y31(g[0].x, g[0].y, g[0].z),  y31(g[1].x, g[1].y, g[1].z),  y31(g[2].x, g[2].y, g[2].z),  y31(g[3].x, g[3].y, g[3].z),
+            y3m1(g[0].x, g[0].y, g[0].z), y3m1(g[1].x, g[1].y, g[1].z), y3m1(g[2].x, g[2].y, g[2].z), y3m1(g[3].x, g[3].y, g[3].z),
+            y32(g[0].x, g[0].y, g[0].z),  y32(g[1].x, g[1].y, g[1].z),  y32(g[2].x, g[2].y, g[2].z),  y32(g[3].x, g[3].y, g[3].z),
+            y3m2(g[0].x, g[0].y, g[0].z), y3m2(g[1].x, g[1].y, g[1].z), y3m2(g[2].x, g[2].y, g[2].z), y3m2(g[3].x, g[3].y, g[3].z),
+            y33(g[0].x, g[0].y, g[0].z),  y33(g[1].x, g[1].y, g[1].z),  y33(g[2].x, g[2].y, g[2].z),  y33(g[3].x, g[3].y, g[3].z),
+            y3m3(g[0].x, g[0].y, g[0].z), y3m3(g[1].x, g[1].y, g[1].z), y3m3(g[2].x, g[2].y, g[2].z), y3m3(g[3].x, g[3].y, g[3].z),
+            y40(g[0].x, g[0].y, g[0].z),  y40(g[1].x, g[1].y, g[1].z),  y40(g[2].x, g[2].y, g[2].z),  y40(g[3].x, g[3].y, g[3].z),
+            y41(g[0].x, g[0].y, g[0].z),  y41(g[1].x, g[1].y, g[1].z),  y41(g[2].x, g[2].y, g[2].z),  y41(g[3].x, g[3].y, g[3].z),
+            y4m1(g[0].x, g[0].y, g[0].z), y4m1(g[1].x, g[1].y, g[1].z), y4m1(g[2].x, g[2].y, g[2].z), y4m1(g[3].x, g[3].y, g[3].z),
+            y42(g[0].x, g[0].y, g[0].z),  y42(g[1].x, g[1].y, g[1].z),  y42(g[2].x, g[2].y, g[2].z),  y42(g[3].x, g[3].y, g[3].z),
+            y4m2(g[0].x, g[0].y, g[0].z), y4m2(g[1].x, g[1].y, g[1].z), y4m2(g[2].x, g[2].y, g[2].z), y4m2(g[3].x, g[3].y, g[3].z),
+            y43(g[0].x, g[0].y, g[0].z),  y43(g[1].x, g[1].y, g[1].z),  y43(g[2].x, g[2].y, g[2].z),  y43(g[3].x, g[3].y, g[3].z),
+            y4m3(g[0].x, g[0].y, g[0].z), y4m3(g[1].x, g[1].y, g[1].z), y4m3(g[2].x, g[2].y, g[2].z), y4m3(g[3].x, g[3].y, g[3].z),
+            y44(g[0].x, g[0].y, g[0].z),  y44(g[1].x, g[1].y, g[1].z),  y44(g[2].x, g[2].y, g[2].z),  y44(g[3].x, g[3].y, g[3].z),
+            y4m4(g[0].x, g[0].y, g[0].z), y4m4(g[1].x, g[1].y, g[1].z), y4m4(g[2].x, g[2].y, g[2].z), y4m4(g[3].x, g[3].y, g[3].z),
+              0.000000000000000,    0.000000000000000,    0.935602579627389,    0.090028400200397,
+             -0.452946651195697,   -0.000000000000000,   -0.000000000000000,   -0.348678494661834,
+             -0.000000000000000,   -0.452946651195697,   -0.000000000000000,   -0.348678494661834,
+             -0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000,
+             -0.000000000000000,   -0.000000000000000,    0.000000000000000,   -0.000000000000000,
+              0.489238299435250,    0.000000000000000,   -0.000000000000000,   -0.376615818502422,
+              0.000000000000000,   -0.489238299435250,   -0.000000000000000,    0.376615818502422,
+              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.532615198330370,
+              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000,
+             -0.656382056840170,   -0.000000000000000,   -0.000000000000000,   -0.168427714314628,
+             -0.000000000000000,   -0.656382056840170,   -0.000000000000000,   -0.168427714314628,
+             -0.317846011338142,   -0.317846011338142,    1.017107236282055,    0.226023830284901,
+             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.258942827786103,
+             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.258942827786103,
+              0.460602629757462,   -0.460602629757462,    0.000000000000000,   -0.000000000000000,
+              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.409424559784410,
+             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.136474853261470,
+             -0.000000000000000,    0.000000000000000,   -0.000000000000000,   -0.136474853261470,
+             -0.504564900728724,   -0.504564900728724,    0.000000000000000,   -0.598002845308118,
+             -0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000,
+             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.350610246256556,
+             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.350610246256556,
+              0.683184105191914,   -0.683184105191914,    0.000000000000000,   -0.000000000000000,
+              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.202424920056864,
+              0.000000000000000,    0.000000000000000,    1.092548430592079,   -0.350435072502801,
+              0.451658037912587,    0.000000000000000,   -0.000000000000000,    0.046358202625865,
+              0.000000000000000,    0.451658037912587,   -0.000000000000000,    0.046358202625865,
+              0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000,
+              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.492067081245654,
+             -0.469376801586882,   -0.000000000000000,   -0.000000000000000,    0.187354445356332,
+             -0.000000000000000,    0.469376801586882,   -0.000000000000000,   -0.187354445356332,
+              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.355076798886913,
+              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000,
+              0.518915578720260,    0.000000000000000,   -0.000000000000000,   -0.443845998608641,
+              0.000000000000000,    0.518915578720260,   -0.000000000000000,   -0.443845998608641,
+              0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000,
+              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.452635881587108,
+             -0.707162732524596,    0.000000000000000,   -0.000000000000000,    0.120972027847095,
+             -0.000000000000000,    0.707162732524596,   -0.000000000000000,   -0.120972027847095
+         } ;
     }
 
     void TearDown()
@@ -293,11 +293,11 @@ TEST_F(YlmRealTest,YlmReal)
     ModuleBase::YlmReal::Ylm_Real(nylm,ng,g,ylm);
     for(int i=0;i<nylm;++i)
     {
-        for(int j=0;j<ng;++j) 
+        for(int j=0;j<ng;++j)
         {
             EXPECT_NEAR(ylm(i,j),ref[i*ng+j],doublethreshold)  << "Ylm[" << i << "], example " << j << " not pass";
         }
-    } 
+    }
 }
 
 TEST_F(YlmRealTest,YlmRealTemplate)
@@ -318,7 +318,7 @@ TEST_F(YlmRealTest,gradYlmReal)
     ModuleBase::YlmReal::grad_Ylm_Real(nylm,ng,g,ylm,dylm[0],dylm[1],dylm[2]);
     for(int i=0;i<nylm;++i)
     {
-        for(int j=0;j<ng;++j) 
+        for(int j=0;j<ng;++j)
         {
             EXPECT_NEAR(ylm(i,j),ref[i*ng+j],doublethreshold)  << "Ylm[" << i << "], example " << j << " not pass";
         }
@@ -328,7 +328,7 @@ TEST_F(YlmRealTest,gradYlmReal)
     double step = 1e-7;
     for(int id = 0 ; id < 3 ; ++id)
     {
-        for(int j=0;j<ng;++j) 
+        for(int j=0;j<ng;++j)
         {
             ModuleBase::Vector3<double> gplus = g[j];
             ModuleBase::Vector3<double> gminus = g[j];
@@ -352,16 +352,16 @@ TEST_F(YlmRealTest,YlmReal2)
     ModuleBase::YlmReal::Ylm_Real2(nylm,ng,g,ylm);
     for(int i=0;i<nylm;++i)
     {
-        for(int j=0;j<ng;++j) 
+        for(int j=0;j<ng;++j)
         {
             EXPECT_NEAR(ylm(i,j),ref[i*ng+j],doublethreshold) << "Ylm[" << i << "], example " << j << " not pass";
         }
-    } 
+    }
 }
 
 
 TEST_F(YlmRealTest,YlmRealRlylm)
-{    
+{
     for(int j=0;j<ng;++j)
     {
         ModuleBase::YlmReal::rlylm(lmax,g[j].x,g[j].y,g[j].z,rly);
@@ -374,7 +374,7 @@ TEST_F(YlmRealTest,YlmRealRlylm)
 
 
 TEST_F(YlmRealTest,YlmGetYlmReal)
-{    
+{
     for(int j=0;j<ng;++j)
     {
         ModuleBase::Ylm::get_ylm_real(lmax+1,g[j],rly);
@@ -386,7 +386,7 @@ TEST_F(YlmRealTest,YlmGetYlmReal)
 }
 
 TEST_F(YlmRealTest,YlmSphHarm)
-{    
+{
     ModuleBase::Ylm::set_coefficients ();
     for(int j=0;j<ng;++j)
     {
@@ -395,13 +395,13 @@ TEST_F(YlmRealTest,YlmSphHarm)
         for(int i=0;i<nylm;++i)
         {
             EXPECT_NEAR(rlyvector[i],ref[i*ng+j],doublethreshold)  << "Ylm[" << i << "], example " << j << " not pass";
-            
+
         }
     }
 }
 
 TEST_F(YlmRealTest,YlmRlSphHarm)
-{    
+{
     ModuleBase::Ylm::set_coefficients ();
     for(int j=0;j<ng;++j)
     {
@@ -410,13 +410,13 @@ TEST_F(YlmRealTest,YlmRlSphHarm)
         for(int i=0;i<nylm;++i)
         {
             EXPECT_NEAR(rlyvector[i],ref[i*ng+j],doublethreshold)  << "Ylm[" << i << "], example " << j << " not pass";
-            
+
         }
     }
 }
 //used to be test1 in ylm.h
 TEST_F(YlmRealTest,YlmGradRlSphHarm)
-{    
+{
     ModuleBase::Ylm::set_coefficients ();
     for(int j=0;j<ng;++j)
     {
@@ -426,7 +426,7 @@ TEST_F(YlmRealTest,YlmGradRlSphHarm)
         {
             EXPECT_NEAR(rlyvector[i],ref[i*ng+j],doublethreshold)  << "Ylm[" << i << "], example " << j << " not pass";
             for(int k=0;k<3;++k) {EXPECT_NEAR(rlgyvector[i][k],rlgyref[j][i][k],1e-5);}
-            
+
         }
     }
 }
@@ -435,7 +435,7 @@ TEST_F(YlmRealTest,YlmGradRlSphHarm)
 TEST_F(YlmRealTest, equality_value_test)
 {
 
-    
+
     ModuleBase::Vector3<double> R (20.0, 0.0, 0.0);
 	const double xdr = R.x/R.norm();
 	const double ydr = R.y/R.norm();
@@ -444,17 +444,17 @@ TEST_F(YlmRealTest, equality_value_test)
 	const double rl = std::pow( R.norm(), L);
 	//std::cout << " rl=" << rl << std::endl;
 	ModuleBase::Ylm::set_coefficients();
-	
+
 	int nu = 100;
-	
+
 	// Peize Lin change rlya 2016-08-26
 	std::vector<double> rlya;
 	double rlyb[400];
 	ModuleBase::Ylm::ZEROS( rlyb, 400);
-	
+
 	ModuleBase::Ylm::rl_sph_harm(L, xdr, ydr, zdr, rlya);
 	ModuleBase::Ylm::get_ylm_real(L+1, R, rlyb);
-	
+
 	for (int i=0; i < nu; i++)
 	{
 		double diff = fabs(rlya[i]-rlyb[i]);
@@ -467,21 +467,21 @@ TEST_F(YlmRealTest, equality_value_test)
 TEST_F(YlmRealTest, equality_gradient_test)
 {
 
-    
+
     ModuleBase::Vector3<double> R (0.1,-0.2,0.5);
 	ModuleBase::Ylm::set_coefficients();
-	
+
 	//int nu = 100;
 
 	std::vector<double> rlya;
 	double rlyb[400];
-	
+
 	std::vector<std::vector<double>> grlya;
 	double grlyb[400][3];
-	
+
 	ModuleBase::Ylm::grad_rl_sph_harm (9, R.x, R.y, R.z, rlya, grlya);
 	ModuleBase::Ylm::rlylm (10, R.x, R.y, R.z, rlyb, grlyb);
-	
+
 	for (int i = 0; i < 100; i++)
 	{
 		double diffx = fabs(grlya[i][2]-grlyb[i][2]);
diff --git a/source/module_base/test/para_json_test.cpp b/source/module_base/test/para_json_test.cpp
new file mode 100644
index 0000000000..3ce6ecce49
--- /dev/null
+++ b/source/module_base/test/para_json_test.cpp
@@ -0,0 +1,68 @@
+#include "module_base/para_json.h"
+#include "gtest/gtest.h"
+#ifdef __MPI
+#include "mpi.h"
+#endif
+
+#include <stdlib.h>
+#include "rapidjson/document.h"
+/************************************************
+ *  unit test of Input::ParaJson
+ ***********************************************/
+
+/**
+ * - Tested Functions:
+ *   - Init()
+ *     - init json tree from input::Init and check if the json string is valid
+ */
+
+class ParaJsonTest : public ::testing::Test
+{
+  protected:
+    std::string testString;
+};
+
+// check if a string is a valid JSON string
+bool isValidJSON(const std::string& jsonString)
+{
+    rapidjson::Document document;
+    document.Parse(jsonString.c_str());
+
+    return !document.HasParseError();
+}
+
+TEST_F(ParaJsonTest, Init)
+{
+    //std::string input_file = "./support/INPUT";
+    //Input input_tmp;
+    //EXPECT_NO_THROW(input_tmp.Init(input_file));
+
+    // int status = system("rm -r ./OUT.autotest/");
+    // EXPECT_EQ(status,0);
+    // Para_Json::Init_json_abacus_readinInfo();
+    Para_Json::Init_json_abacus_generalInfo();
+    Para_Json::Init_json_abacus();
+    Para_Json::Finish_json_tree();
+    rapidjson::StringBuffer buffer;
+    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+    Para_Json::doc.Accept(writer);
+    std::string json = buffer.GetString();
+    EXPECT_EQ(isValidJSON(json), true);
+}
+
+int main(int argc, char** argv)
+{
+#ifdef __MPI
+    MPI_Init(&argc, &argv);
+    MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC);
+    MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK);
+#endif
+    testing::InitGoogleTest(&argc, argv);
+    int result;
+    result = RUN_ALL_TESTS();
+#ifdef __MPI
+    MPI_Finalize();
+#endif
+    return result;
+}
+
diff --git a/source/module_base/test/perf_sphbes_test.cpp b/source/module_base/test/perf_sphbes_test.cpp
new file mode 100644
index 0000000000..4c574baa8e
--- /dev/null
+++ b/source/module_base/test/perf_sphbes_test.cpp
@@ -0,0 +1,72 @@
+#include"../math_sphbes.h"
+#include<fstream>
+#include <benchmark/benchmark.h>
+#include <iostream>
+#include <cstring>
+#include <cmath>
+
+/************************************************
+*  performace test of class Sphbes
+***********************************************/
+
+/**
+ * Tested function: 
+ *      - sphbesj
+ *      - Spherical_Bessel
+ */
+
+class PerfSphbes : public benchmark::Fixture {
+public:
+    const double q = 1;
+    const int n = 1000;
+    double stop = 1000.0;
+    double dr = 0.0;
+    double* rc, *rinf, *jc, *jinf;
+    void SetUp(const benchmark::State& state){
+        const double rcut = state.range(0) + 0.5;
+        rc = new double[n + 10]; 
+        rinf = new double[n + 10];
+        jc = new double[n + 10];
+        jinf = new double[n + 10];
+
+        // generate data points in (0, rcut] in log scale
+        double rmin = 0.0001;
+        double log_rmin = std::log(rmin);
+        double log_rcut = std::log(rcut);
+        dr = (log_rcut - log_rmin) / (n-1);
+        memset(rc, 0, (n+10) * sizeof(double));
+        for (int i = 0; i < n; i++)
+            rc[i] = std::exp(log_rmin + i * dr);
+        
+        // generate data points in [rcut, stop] in linear scale
+        memset(rinf, 0, (n+10) * sizeof(double));
+        rinf[0] = rcut;
+        dr = (stop - rcut) / (n-1);
+        for (int i = 1; i < n; i++)
+            rinf[i] += rinf[i-1] + dr;
+    }
+    void TearDown(const benchmark::State& state){
+        delete[] rc;
+        delete[] rinf;
+        delete[] jc;
+        delete[]  jinf;
+    }
+};    
+
+BENCHMARK_DEFINE_F(PerfSphbes, BM_Spherical_Bessel)(benchmark::State& state) {
+    for (auto _ : state) {
+        ModuleBase::Sphbes::Spherical_Bessel(n, rc, q, state.range(0), jc);
+        ModuleBase::Sphbes::Spherical_Bessel(n, rinf, q, state.range(0), jinf);
+    }
+}
+
+BENCHMARK_DEFINE_F(PerfSphbes, BM_sphbesj)(benchmark::State& state) {
+    for (auto _ : state) {
+        ModuleBase::Sphbes::sphbesj(n, rc, q, state.range(0), jc);
+        ModuleBase::Sphbes::sphbesj(n, rinf, q, state.range(0), jinf);
+    }
+}
+
+BENCHMARK_REGISTER_F(PerfSphbes, BM_sphbesj)->DenseRange(0, 11, 1)->Unit(benchmark::kMicrosecond);
+BENCHMARK_REGISTER_F(PerfSphbes, BM_Spherical_Bessel)->DenseRange(0, 11, 1)->Unit(benchmark::kMicrosecond);
+BENCHMARK_MAIN(); 
\ No newline at end of file
diff --git a/source/module_base/tool_quit.h b/source/module_base/tool_quit.h
index eafaf673cc..f944696d5a 100644
--- a/source/module_base/tool_quit.h
+++ b/source/module_base/tool_quit.h
@@ -33,13 +33,13 @@ void WARNING(const std::string &file, const std::string &description);
  * @brief Close .log files and exit
  *
  */
-void QUIT(void);
+[[noreturn]] void QUIT(void);
 
 /**
  * @brief Close .log files and exit
  *
  */
-void QUIT(int ret);
+[[noreturn]] void QUIT(int ret);
 
 /**
  * @brief Combine the functions of WARNING and QUIT
@@ -47,7 +47,7 @@ void QUIT(int ret);
  * @param file The file where warning happens
  * @param description The warning information
  */
-void WARNING_QUIT(const std::string &file, const std::string &description);
+[[noreturn]] void WARNING_QUIT(const std::string& file, const std::string& description);
 
 /**
  * @brief Combine the functions of WARNING and QUIT
@@ -55,7 +55,7 @@ void WARNING_QUIT(const std::string &file, const std::string &description);
  * @param file The file where warning happens
  * @param description The warning information
  */
-void WARNING_QUIT(const std::string &file, const std::string &description, int ret);
+[[noreturn]] void WARNING_QUIT(const std::string& file, const std::string& description, int ret);
 
 /**
  * @brief Check, if true, WARNING_QUIT
diff --git a/source/module_cell/klist.cpp b/source/module_cell/klist.cpp
index 52bc42440d..e11ab183f5 100644
--- a/source/module_cell/klist.cpp
+++ b/source/module_cell/klist.cpp
@@ -362,6 +362,10 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 
 			//recalculate nkstot.
 			nkstot = 0;
+            /* ISSUE#3482: to distinguish different kline segments */
+            std::vector<int> kpt_segids;
+            kl_segids.clear(); kl_segids.shrink_to_fit();
+            int kpt_segid = 0;
 			for(int iks=0; iks<nks_special; iks++)
 			{
 				ifk >> ksx[iks];
@@ -371,6 +375,9 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 				//std::cout << " nkl[" << iks << "]=" << nkl[iks] << std::endl;
 				assert(nkl[iks] >= 0);
 				nkstot += nkl[iks];
+                /* ISSUE#3482: to distinguish different kline segments */
+                if((nkl[iks] == 1)&&(iks!=(nks_special-1))) kpt_segid++;
+                kpt_segids.push_back(kpt_segid);
 			}
 			assert( nkl[nks_special-1] == 1);
 
@@ -389,6 +396,7 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 					kvec_c[count].x = ksx[iks-1] + is*dx;
 					kvec_c[count].y = ksy[iks-1] + is*dy;
 					kvec_c[count].z = ksz[iks-1] + is*dz;
+                    kl_segids.push_back(kpt_segids[iks-1]); /* ISSUE#3482: to distinguish different kline segments */
 					++count;
 				}
 			}
@@ -397,15 +405,14 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 			kvec_c[count].x = ksx[nks_special-1];
 			kvec_c[count].y = ksy[nks_special-1];
 			kvec_c[count].z = ksz[nks_special-1];
+            kl_segids.push_back(kpt_segids[nks_special-1]); /* ISSUE#3482: to distinguish different kline segments */
 			++count;
 
 			//std::cout << " count = " << count << std::endl;
-			assert (count == nkstot );
-
-			for(int ik=0; ik<nkstot; ik++)
-			{
-				wk[ik] = 1.0;
-			}
+			assert(count == nkstot);
+            assert(kl_segids.size() == nkstot); /* ISSUE#3482: to distinguish different kline segments */
+			
+            std::for_each(wk.begin(), wk.end(), [](double& d){d = 1.0;});
 
             this->kc_done = true;
 
@@ -439,15 +446,22 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 
 			//recalculate nkstot.
 			nkstot = 0;
+            /* ISSUE#3482: to distinguish different kline segments */
+            std::vector<int> kpt_segids;
+            kl_segids.clear(); kl_segids.shrink_to_fit();
+            int kpt_segid = 0;
 			for(int iks=0; iks<nks_special; iks++)
 			{
 				ifk >> ksx[iks];
 				ifk >> ksy[iks];
 				ifk >> ksz[iks];
-				ModuleBase::GlobalFunc::READ_VALUE( ifk, nkl[iks] );
+				ModuleBase::GlobalFunc::READ_VALUE( ifk, nkl[iks] ); /* so ifk is ifstream for kpoint, then nkl is number of kpoints on line */
 				//std::cout << " nkl[" << iks << "]=" << nkl[iks] << std::endl;
 				assert(nkl[iks] >= 0);
 				nkstot += nkl[iks];
+                /* ISSUE#3482: to distinguish different kline segments */
+                if((nkl[iks] == 1)&&(iks!=(nks_special-1))) kpt_segid++;
+                kpt_segids.push_back(kpt_segid);
 			}
 			assert( nkl[nks_special-1] == 1);
 
@@ -466,6 +480,7 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 					kvec_d[count].x = ksx[iks-1] + is*dx;
 					kvec_d[count].y = ksy[iks-1] + is*dy;
 					kvec_d[count].z = ksz[iks-1] + is*dz;
+                    kl_segids.push_back(kpt_segids[iks-1]); /* ISSUE#3482: to distinguish different kline segments */
 					++count;
 				}
 			}
@@ -474,18 +489,16 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 			kvec_d[count].x = ksx[nks_special-1];
 			kvec_d[count].y = ksy[nks_special-1];
 			kvec_d[count].z = ksz[nks_special-1];
+            kl_segids.push_back(kpt_segids[nks_special-1]); /* ISSUE#3482: to distinguish different kline segments */
 			++count;
 
 			//std::cout << " count = " << count << std::endl;
-			assert (count == nkstot );
+			assert(count == nkstot );
+            assert(kl_segids.size() == nkstot); /* ISSUE#3482: to distinguish different kline segments */
 
-			for(int ik=0; ik<nkstot; ik++)
-			{
-				wk[ik] = 1.0;
-			}
+			std::for_each(wk.begin(), wk.end(), [](double& d){d = 1.0;});
 
             this->kd_done = true;
-
 		}
 
         else
@@ -1122,6 +1135,9 @@ void K_Vectors::mpi_k(void)
 
     Parallel_Common::bcast_int(nmp, 3);
 
+    kl_segids.resize(nkstot);
+    Parallel_Common::bcast_int(kl_segids.data(), nkstot);
+
     Parallel_Common::bcast_double(koffset, 3);
 
     this->nks = GlobalC::Pkpoints.nks_pool[GlobalV::MY_POOL];
@@ -1352,6 +1368,8 @@ void K_Vectors::mpi_k_after_vc(void)
     Parallel_Common::bcast_int(nspin);
     Parallel_Common::bcast_int(nkstot);
     Parallel_Common::bcast_int(nmp, 3);
+    kl_segids.resize(nkstot);
+    Parallel_Common::bcast_int(kl_segids.data(), nkstot);
     Parallel_Common::bcast_double(koffset, 3);
 
     this->nks = GlobalC::Pkpoints.nks_pool[GlobalV::MY_POOL];
diff --git a/source/module_cell/klist.h b/source/module_cell/klist.h
index a9e06f8614..aa92cf29fd 100644
--- a/source/module_cell/klist.h
+++ b/source/module_cell/klist.h
@@ -29,6 +29,7 @@ class K_Vectors
     int nkstot_full;    /// number of k points in full k mesh
 
     int nmp[3];						// Number of Monhorst-Pack
+    std::vector<int> kl_segids;	// index of kline segment
 
     K_Vectors();
     ~K_Vectors();
diff --git a/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp b/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp
index 617674256a..bb447bca4c 100644
--- a/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp
+++ b/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp
@@ -223,7 +223,7 @@ TEST_F(SltkAtomInputTest, ConstructorNoExpand)
     GlobalV::test_grid = 1;
     // this is a bug if radius is too small
     // because the expand_flag will be false!
-    radius = 1e-1000;
+    radius = 0;
     Atom_input Atom_inp(ofs, *ucell, ucell->nat, ucell->ntype, pbc, radius, test_atom_in);
     EXPECT_FALSE(Atom_inp.getExpandFlag());
     // call set_FAtom and Load_atom
diff --git a/source/module_cell/read_atoms.cpp b/source/module_cell/read_atoms.cpp
index dc517bccd7..4c6bf9c0eb 100644
--- a/source/module_cell/read_atoms.cpp
+++ b/source/module_cell/read_atoms.cpp
@@ -535,100 +535,101 @@ bool UnitCell::read_atom_positions(std::ifstream &ifpos, std::ofstream &ofs_runn
 				ModuleBase::GlobalFunc::ZEROS(atoms[it].mag,na);
 				for (int ia = 0;ia < na; ia++)
 				{
- // modify the reading of frozen ions and velocities  -- Yuanbo Li 2021/8/20
-                                        ifpos >> v.x >> v.y >> v.z;
-                                        mv.x = true ;
-                                        mv.y = true ;
-                                        mv.z = true ;
-                                        atoms[it].vel[ia].set(0,0,0);
-										atoms[it].mag[ia]=magnet.start_magnetization[it];//if this line is used, default startmag_type would be 2
-										atoms[it].angle1[ia]=0;
-										atoms[it].angle2[ia]=0;
-										atoms[it].m_loc_[ia].set(0,0,0);
-
-                                        std::string tmpid;
-                                        tmpid = ifpos.get();
-
-										if( (int)tmpid[0] < 0 )
-										{
-											std::cout << "read_atom_positions, mismatch in atom number for atom type: " << atoms[it].label << std::endl;
-											exit(1); 
-										}
-
-										bool input_vec_mag=false;
-										bool input_angle_mag=false;
-                                        while ( (tmpid != "\n") && (ifpos.eof()==false) && (tmpid !="#") )
-                                        {
-                                                tmpid = ifpos.get() ;
-                                                // old method of reading frozen ions
-                                                char tmp = (char)tmpid[0];
-                                                if ( tmp >= 48 && tmp <= 57 )
-                                                {
-                                                        mv.x = std::stoi(tmpid);
-                                                        ifpos >> mv.y >> mv.z ;
-                                                }
-                                                // new method of reading frozen ions and velocities
-												if ( tmp >= 'a' && tmp <='z')
-												{
-													ifpos.putback(tmp);
-													ifpos >> tmpid;
-												}
-                                                if ( tmpid == "m" )
-                                                {
-                                                        ifpos >> mv.x >> mv.y >> mv.z ;
-                                                }
-                                                else if ( tmpid == "v" ||tmpid == "vel" || tmpid == "velocity" )
-                                                {
-                                                        ifpos >> atoms[it].vel[ia].x >> atoms[it].vel[ia].y >> atoms[it].vel[ia].z;
-                                                }
-												else if ( tmpid == "mag" || tmpid == "magmom")
-												{
-													set_element_mag_zero = true;
-													double tmpamg=0;
-													ifpos >> tmpamg;
-													tmp=ifpos.get();
-													while (tmp==' ')
-													{
-														tmp=ifpos.get();
-													}
-													
-													if((tmp >= 48 && tmp <= 57) or tmp=='-')
-													{
-														ifpos.putback(tmp);
-														ifpos >> atoms[it].m_loc_[ia].y>>atoms[it].m_loc_[ia].z;
-														atoms[it].m_loc_[ia].x=tmpamg;
-														atoms[it].mag[ia]=sqrt(pow(atoms[it].m_loc_[ia].x,2)+pow(atoms[it].m_loc_[ia].y,2)+pow(atoms[it].m_loc_[ia].z,2));
-														input_vec_mag=true;
-														
-													}
-													else
-													{
-														ifpos.putback(tmp);
-														atoms[it].mag[ia]=tmpamg;
-													}
-													
-													// atoms[it].mag[ia];
-												}
-												else if ( tmpid == "angle1")
-												{
-													 ifpos >> atoms[it].angle1[ia];
-													 atoms[it].angle1[ia]=atoms[it].angle1[ia]/180 *ModuleBase::PI;
-													 input_angle_mag=true;
-													 set_element_mag_zero = true;
-												}
-												else if ( tmpid == "angle2")
-												{
-													 ifpos >> atoms[it].angle2[ia];
-													 atoms[it].angle2[ia]=atoms[it].angle2[ia]/180 *ModuleBase::PI;
-													 input_angle_mag=true;
-													 set_element_mag_zero = true;
-												}
-												
-                                        }
-					while ( (tmpid != "\n") && (ifpos.eof()==false) )
-                                        {
-                                                tmpid = ifpos.get();
-                                        }
+ 				// modify the reading of frozen ions and velocities  -- Yuanbo Li 2021/8/20
+					ifpos >> v.x >> v.y >> v.z;
+					mv.x = true ;
+					mv.y = true ;
+					mv.z = true ;
+					atoms[it].vel[ia].set(0,0,0);
+					atoms[it].mag[ia]=magnet.start_magnetization[it];//if this line is used, default startmag_type would be 2
+					atoms[it].angle1[ia]=0;
+					atoms[it].angle2[ia]=0;
+					atoms[it].m_loc_[ia].set(0,0,0);
+
+					std::string tmpid;
+					tmpid = ifpos.get();
+
+					if( (int)tmpid[0] < 0 )
+					{
+						std::cout << "read_atom_positions, mismatch in atom number for atom type: " << atoms[it].label << std::endl;
+						exit(1); 
+					}
+
+					bool input_vec_mag=false;
+					bool input_angle_mag=false;
+					// read if catch goodbit before "\n" and "#"
+					while ( (tmpid != "\n") && (ifpos.good()) && (tmpid !="#") )
+					{
+						tmpid = ifpos.get() ;
+						// old method of reading frozen ions
+						char tmp = (char)tmpid[0];
+						if ( tmp >= 48 && tmp <= 57 )
+						{
+								mv.x = std::stoi(tmpid);
+								ifpos >> mv.y >> mv.z ;
+						}
+						// new method of reading frozen ions and velocities
+						if ( tmp >= 'a' && tmp <='z')
+						{
+							ifpos.putback(tmp);
+							ifpos >> tmpid;
+						}
+						if ( tmpid == "m" )
+						{
+								ifpos >> mv.x >> mv.y >> mv.z ;
+						}
+						else if ( tmpid == "v" ||tmpid == "vel" || tmpid == "velocity" )
+						{
+								ifpos >> atoms[it].vel[ia].x >> atoms[it].vel[ia].y >> atoms[it].vel[ia].z;
+						}
+						else if ( tmpid == "mag" || tmpid == "magmom")
+						{
+							set_element_mag_zero = true;
+							double tmpamg=0;
+							ifpos >> tmpamg;
+							tmp=ifpos.get();
+							while (tmp==' ')
+							{
+								tmp=ifpos.get();
+							}
+							
+							if((tmp >= 48 && tmp <= 57) or tmp=='-')
+							{
+								ifpos.putback(tmp);
+								ifpos >> atoms[it].m_loc_[ia].y>>atoms[it].m_loc_[ia].z;
+								atoms[it].m_loc_[ia].x=tmpamg;
+								atoms[it].mag[ia]=sqrt(pow(atoms[it].m_loc_[ia].x,2)+pow(atoms[it].m_loc_[ia].y,2)+pow(atoms[it].m_loc_[ia].z,2));
+								input_vec_mag=true;
+								
+							}
+							else
+							{
+								ifpos.putback(tmp);
+								atoms[it].mag[ia]=tmpamg;
+							}
+							
+							// atoms[it].mag[ia];
+						}
+						else if ( tmpid == "angle1")
+						{
+								ifpos >> atoms[it].angle1[ia];
+								atoms[it].angle1[ia]=atoms[it].angle1[ia]/180 *ModuleBase::PI;
+								input_angle_mag=true;
+								set_element_mag_zero = true;
+						}
+						else if ( tmpid == "angle2")
+						{
+								ifpos >> atoms[it].angle2[ia];
+								atoms[it].angle2[ia]=atoms[it].angle2[ia]/180 *ModuleBase::PI;
+								input_angle_mag=true;
+								set_element_mag_zero = true;
+						}	
+					}
+					// move to next line
+					while ( (tmpid != "\n") && (ifpos.good()) )
+					{
+							tmpid = ifpos.get();
+					}
 					std::string mags;
 					//cout<<"mag"<<atoms[it].mag[ia]<<"angle1"<<atoms[it].angle1[ia]<<"angle2"<<atoms[it].angle2[ia]<<'\n';
 
diff --git a/source/module_elecstate/occupy.cpp b/source/module_elecstate/occupy.cpp
index 80918dd3f1..e896aae4e8 100644
--- a/source/module_elecstate/occupy.cpp
+++ b/source/module_elecstate/occupy.cpp
@@ -79,6 +79,12 @@ void Occupy::decision(const std::string &name, const std::string &smearing_metho
         {
             gaussian_type = 2; // 2nd Methfessel-Paxton method.
         }
+        else if (smearing_method == "mp3")
+        {
+            // acually any order Methfessel-Paxton method can be supported in Occupy::w1gauss()
+            // however the parameter is string instead of int
+            ModuleBase::WARNING_QUIT("occupy", "Some refactor of smearing shoule be done before supporting any order of Methfessel-Paxton method!");
+        }
 
         else if (smearing_method == "marzari-vanderbilt" || smearing_method == "cold" || smearing_method == "mv")
         {
@@ -597,411 +603,3 @@ double Occupy::w1gauss(const double &x, const int n)
 
     return w1;
 } // end function w1gauss
-
-/*
-void Occupy::tweights(const int nks,const int nspin,const int nband,const double &nelec,
-                      const int ntetra,const ModuleBase::matrix &tetra, double **ekb, double &ef, ModuleBase::matrix
-&wg)
-{
-    //===================================================================
-    // calculates weights with the tetrahedron method (Bloechl version)
-    // integer :: nks, nspin, GlobalV::NBANDS, ntetra, tetra (4, ntetra)
-    //===================================================================
-
-    double e1, e2, e3, e4, c1, c2, c3, c4, dosef;
-    int ik, ibnd, nt, nk, ns, i, kp1, kp2, kp3, kp4;
-
-    double etetra[4];
-    int itetra[4];
-
-    // Calculate the Fermi energy ef
-    efermit(ekb, GlobalV::NBANDS, nks, nelec, nspin, ntetra, tetra, ef);
-
-    for (ik = 0;ik < nks;ik++)
-    {
-        for (ibnd = 0;ibnd < nband;ibnd++)
-        {
-            wg(ik, ibnd) = 0.0;
-        } // enddo
-    } // enddo
-
-    for (ns = 0;ns < nspin;ns++)
-    {
-        //==================================================================
-        // nk is used to select k-points with up (ns=1) or down (ns=2) spin
-        //==================================================================
-        if (ns == 1)
-        {
-            nk = 0;
-        }
-        else
-        {
-            nk = nks / 2;
-        }
-
-        for (nt = 0;nt < ntetra;nt++)
-        {
-            for (ibnd = 0;ibnd < GlobalV::NBANDS;ibnd++)
-            {
-                //======================================================
-                // etetra are the energies at the vertexes of the nt-th
-                // tetrahedron
-                //======================================================
-                for (i = 0;i < 4;i++)
-                {
-                    etetra [i] = ekb[static_cast<int>( tetra(nt,i) ) + nk][ibnd];
-                }
-
-                itetra[0] = 0;
-
-                ModuleBase::hpsort(4, etetra, itetra);
-
-                //===============================================
-                // ...sort in ascending order: e1 < e2 < e3 < e4
-                //===============================================
-                e1 = etetra [0];
-                e2 = etetra [1];
-                e3 = etetra [2];
-                e4 = etetra [3];
-
-                //==============================================================
-                // kp1-kp4 are the irreducible k-points corresponding to e1-e4
-                //==============================================================
-
-                kp1 = static_cast<int>( tetra(nt,itetra[0]) )+ nk;
-                kp2 = static_cast<int>( tetra(nt,itetra[1]) )+ nk;
-                kp3 = static_cast<int>( tetra(nt,itetra[2]) )+ nk;
-                kp4 = static_cast<int>( tetra(nt,itetra[3]) )+ nk;
-
-                //======================
-                // calculate weights wg
-                //======================
-                if (ef >= e4)
-                {
-                    wg(kp1, ibnd) = wg(kp1, ibnd) + 0.250 / ntetra;
-                    wg(kp2, ibnd) = wg(kp2, ibnd) + 0.250 / ntetra;
-                    wg(kp3, ibnd) = wg(kp3, ibnd) + 0.250 / ntetra;
-                    wg(kp4, ibnd) = wg(kp4, ibnd) + 0.250 / ntetra;
-                }
-                else if (ef < e4 && ef >= e3)
-                {
-                    c4 = 0.250 / ntetra * pow(e4 - ef, 3) / (e4 - e1) / (e4 - e2)
-                         / (e4 - e3);
-                    dosef = 3.0 / ntetra * (e4 - ef) * (e4 - ef) / (e4 - e1) / (e4 - e2)
-                            / (e4 - e3);
-                    wg(kp1, ibnd) = wg(kp1, ibnd) + 0.250 / ntetra - c4 *
-                                    (e4 - ef) / (e4 - e1) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0;
-                    wg(kp2, ibnd) = wg(kp2, ibnd) + 0.250 / ntetra - c4 *
-                                    (e4 - ef) / (e4 - e2) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0;
-                    wg(kp3, ibnd) = wg(kp3, ibnd) + 0.250 / ntetra - c4 *
-                                    (e4 - ef) / (e4 - e3) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0;
-                    wg(kp4, ibnd) = wg(kp4, ibnd) + 0.250 / ntetra - c4 *
-                                    (4.0 - (e4 - ef) * (1.0 / (e4 - e1) + 1.0 / (e4 - e2)
-                                                        + 1.0 / (e4 - e3))) + dosef * (e1 + e2 + e3 + e4 - 4.0 *
-                                                                                       ekb[kp4][ibnd]) / 40.0;
-                }
-
-                else if (ef < e3 && ef >= e2)
-                {
-                    c1 = 0.250 / ntetra * (ef - e1) * (ef - e1) / (e4 - e1) / (e3 - e1);
-                    c2 = 0.250 / ntetra * (ef - e1) * (ef - e2) * (e3 - ef)
-                         / (e4 - e1) / (e3 - e2) / (e3 - e1);
-                    c3 = 0.250 / ntetra * (ef - e2) * (ef - e2) * (e4 - ef) / (e4 - e2)
-                         / (e3 - e2) / (e4 - e1);
-                    dosef = 1.0 / ntetra / (e3 - e1) / (e4 - e1) * (3.0 *
-                            (e2 - e1) + 6.0 * (ef - e2) - 3.0 * (e3 - e1 + e4 - e2)
-                            * (ef - e2) * (ef - e2) / (e3 - e2) / (e4 - e2));
-                    wg(kp1, ibnd) = wg(kp1, ibnd) + c1 + (c1 + c2) * (e3 - ef)
-                                    / (e3 - e1) + (c1 + c2 + c3) * (e4 - ef) / (e4 - e1) + dosef *
-                                    (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0;
-                    wg(kp2, ibnd) = wg(kp2, ibnd) + c1 + c2 + c3 + (c2 + c3)
-                                    * (e3 - ef) / (e3 - e2) + c3 * (e4 - ef) / (e4 - e2) + dosef *
-                                    (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0;
-                    wg(kp3, ibnd) = wg(kp3, ibnd) + (c1 + c2) * (ef - e1)
-                                    / (e3 - e1) + (c2 + c3) * (ef - e2) / (e3 - e2) + dosef *
-                                    (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0;
-                    wg(kp4, ibnd) = wg(kp4, ibnd) + (c1 + c2 + c3) * (ef - e1)
-                                    / (e4 - e1) + c3 * (ef - e2) / (e4 - e2) + dosef * (e1 + e2 +
-                                            e3 + e4 - 4.0 * ekb[kp4][ibnd]) / 40.0;
-                }
-                else if (ef < e2 && ef >= e1)
-                {
-                    c4 = 0.250 / ntetra * (ef - e1) * (ef - e1) * (ef - e1) / (e2 - e1) /
-                         (e3 - e1) / (e4 - e1);
-                    dosef = 3.0 / ntetra * (ef - e1) * (ef - e1) / (e2 - e1) / (e3 - e1)
-                            / (e4 - e1);
-                    wg(kp1, ibnd) = wg(kp1, ibnd) + c4 * (4.0 - (ef - e1)
-                                                          * (1.0 / (e2 - e1) + 1.0 / (e3 - e1) + 1.0 / (e4 - e1)))
-                                    + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0;
-                    wg(kp2, ibnd) = wg(kp2, ibnd) + c4 * (ef - e1) / (e2 - e1)
-                                    + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0;
-                    wg(kp3, ibnd) = wg(kp3, ibnd) + c4 * (ef - e1) / (e3 - e1)
-                                    + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0;
-                    wg(kp4, ibnd) = wg(kp4, ibnd) + c4 * (ef - e1) / (e4 - e1)
-                                    + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp4][ibnd]) / 40.0;
-                } // endif
-            } // enddo
-        } // enddo
-    } // enddo
-
-    //=====================================================================
-    // add correct spin normalization : 2 for LDA, 1 for LSDA calculations
-    //=====================================================================
-    for (ik = 0;ik < nks;ik++)
-    {
-        for (ibnd = 0;ibnd < GlobalV::NBANDS;ibnd++)
-        {
-            wg(ik, ibnd) = wg(ik, ibnd) * 2.0 / nspin;
-        }
-    }
-    return;
-} // end subroutine tweights
-*/
-
-/*
-double Occupy::wsweight(const ModuleBase::Vector3<double> &r, ModuleBase::Vector3<double> *rws,const int nrws)
-{
-    //============================================================
-    // integer ir, nreq, nrws
-    // real(kind=dp) r(3), rrt, ck, eps, rws(0:3,nrws), wsweight
-    // parameter (eps=1.0e-6)
-    //============================================================
-    const double eps = 1.0e-6;
-
-    int nreq = 1;
-
-    for (int ir = 0;ir < nrws;ir++)
-    {
-        const double rrt = r * rws[ir];
-        const double ck = rrt - rws[ir].x;
-        //	rrt = r[1]*rws(1,ir) + r[2]*rws(2,ir) + r[3]*rws(3,ir);
-        //	ck = rrt-rws(0,ir);
-
-        if (ck > eps)
-        {
-            break;
-        }
-
-        if (std::abs(ck) < eps)
-        {
-            nreq++;
-        }
-    } // end do
-
-    const double wswe = 1.0 / nreq;
-
-    return wswe;
-} // end function wsweight
-*/
-
-/*
-void Occupy::efermit(double** ekb,const int nband,const int nks,const double &nelec,const int nspin,
-                     const int ntetra,const ModuleBase::matrix &tetra, double &ef)
-{
-    //=======================================================
-    // Finds the Fermi energy - tetrahedron method (Bloechl)
-    // the transformation Ry to eV
-    //=======================================================
-
-    // parameter :
-    const int maxiter = 300;
-    const double eps = 1.0e-10;
-
-    double efbetter;
-
-    //===================================
-    // nlw : the minimum energy band
-    // elw : the lower limit of the fermi ener
-    // eup : the upper limit of the fermi ener
-    // external sumkt
-    // find bounds for the Fermi energy.
-    //===================================
-    const int nlw = max(  1, static_cast<int>( (nelec / 2.0 - 5.0) )  );
-    double elw = ekb[nlw][0];
-    double eup = ekb[0][GlobalV::NBANDS-1];
-
-    for (int ik = 1;ik < nks;ik++)// do ik = 2, nks
-    {
-        elw = min(elw, ekb[ik][nlw]);
-        eup = max(eup, ekb[ik][GlobalV::NBANDS-1]);
-    }
-    for (int ik = 1;ik < nks;ik++)// do ik = 2, nks
-    {
-        elw = min(elw, ekb[ik][nlw]);
-        eup = max(eup, ekb[ik][GlobalV::NBANDS-1]);
-    }
-
-    //===============================
-    // Bisection method
-    // the number of states with eup
-    // the number of states with elw
-    //===============================
-    const double sumkup = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, eup);
-    const double sumklw = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, elw);
-
-    GlobalV::ofs_running << "\n sumkup = " << sumkup;
-    GlobalV::ofs_running << "\n sumklw = " << sumklw << std::endl;
-
-    if ((sumkup - nelec) < - eps || (sumklw - nelec) > eps)
-    {
-        ModuleBase::WARNING("efermit","unexpected error.");
-    }
-
-    double better = 1.0e+10;
-
-    bool converge = false;
-
-    double sumkmid = 0.0;
-    for (int iter = 0;iter < maxiter;iter++)
-    {
-        // the number of states with ef
-        ef = (eup + elw) / 2.0;
-        sumkmid = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, ef);
-
-        if (std::abs(sumkmid - nelec) < better)
-        {
-            better = std::abs(sumkmid - nelec);
-            efbetter = ef;
-        }
-
-        // converged
-        if (std::abs(sumkmid - nelec) < eps)
-        {
-            converge = true;
-            break;
-        }
-        else if ((sumkmid - nelec) < - eps)
-        {
-            elw = ef;
-        }
-        else
-        {
-            eup = ef;
-        }
-    }
-    if (!converge)
-    {
-        // unconverged exit:
-        // the best available ef is used . Needed in some difficult cases
-        ef = efbetter;
-        sumkmid = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, ef);
-    }
-
-    //==============================================================
-    // Check if Fermi level is above any of the highest eigenvalues
-    //==============================================================
-    for (int ik = 0;ik < nks;ik++)
-    {
-        if (ef > ekb[ik][GlobalV::NBANDS-1] + 1.e-4)
-        {
-            std::cout << "\n ef = " << ef;
-        }
-    }
-    return;
-} // end subroutine efermit
-*/
-
-/*
-double Occupy::sumkt(double** ekb,const int nband,const int nks,const int nspin,const int ntetra,
-                     const ModuleBase::matrix &tetra,const double &e)
-{
-    double etetra[4];
-    double sum = 0.0;
-
-    int nk = 0 ;
-    for (int ns = 0; ns < nspin;ns++)
-    {
-        //==================================================================
-        // nk is used to select k-points with up (ns=1) or down (ns=2) spin
-        //==================================================================
-        if (ns == 1)
-        {
-            nk = 0;
-        }
-        else
-        {
-            nk = nks / 2;
-        }
-
-        for (int nt = 0; nt < ntetra; nt++)
-        {
-            for (int ibnd = 0; ibnd < GlobalV::NBANDS; ibnd++)
-            {
-                //======================================================
-                // etetra are the energies at the vertexes of the nt-th
-                // tetrahedron
-                //======================================================
-                for (int i = 0; i < 4; i++)
-                {
-                    etetra [i] = ekb[  static_cast<int>( (tetra(i, nt) + nk) )][ ibnd  ];
-                }
-
-                piksort(4, etetra);
-                //===========================================
-                //sort in ascending order: e1 < e2 < e3 < e4
-                //===========================================
-                const double e1 = etetra [0];
-                const double e2 = etetra [1];
-                const double e3 = etetra [2];
-                const double e4 = etetra [3];
-
-                //===============================================
-                // calculate sum over k of the integrated charge
-                //===============================================
-                if (e >= e4)
-                {
-                    sum += 1.0 / ntetra;
-                }
-                else if (e < e4 && e >= e3)
-                {
-                    sum += 1.0 / ntetra * (1.0 - pow((e4 - e), 3) / (e4 - e1)
-                                           / (e4 - e2) / (e4 - e3));
-                }
-                else if (e < e3 && e >= e2)
-                {
-                    sum += 1.0 / ntetra / (e3 - e1) / (e4 - e1) *
-                           ((e2 - e1) * (e2 - e1) + 3.0 * (e2 - e1) * (e - e2) +
-                            3.0 * (e - e2) * (e - e2) - (e3 - e1 + e4 - e2) /
-                            (e3 - e2) / (e4 - e2) * pow((e - e2), 3));
-                }
-                else if (e < e2 && e >= e1)
-                {
-                    sum += 1.0 / ntetra * pow((e - e1), 3) /
-                           (e2 - e1) / (e3 - e1) / (e4 - e1);
-                }
-            }//ibnd
-        }//nt
-    }//ns
-
-// add correct spin normalization : 2 for LDA, 1 for LSDA calculations
-    sum *= 2.0 / nspin;
-    return sum;
-} // end function sumkt
-*/
-
-/*
-void Occupy::piksort(const int n, double *a)
-{
-    int i;
-    bool b = true;
-    for (int j = 1;j < n;j++) // do j = 2, n
-    {
-        const double temp = a [j];
-        for (i = j - 1;i >= 0;i--)  // do i = j - 1, 1, - 1
-        {
-            if (a [i] <= temp)
-            {
-                b = false;
-                break;
-            }
-            a [i + 1] = a [i];
-        }
-        if (b)
-        {
-            i = 0;
-        }
-        a [i + 1] = temp;
-    }
-    return;
-} //end subroutine piksort
-*/
diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp
index 520dcfd176..0b2608e5ea 100644
--- a/source/module_esolver/esolver_ks.cpp
+++ b/source/module_esolver/esolver_ks.cpp
@@ -409,7 +409,7 @@ namespace ModuleESolver
                         }
                     }
 
-                    this->conv_elec = (drho < this->scf_thr);
+                    this->conv_elec = (drho < this->scf_thr && iter!=GlobalV::MIXING_RESTART);
 
                     // If drho < hsolver_error in the first iter or drho < scf_thr, we do not change rho.
                     if (drho < hsolver_error || this->conv_elec)
@@ -435,8 +435,16 @@ namespace ModuleESolver
                         //     }
                         //     p_chgmix->auto_set(bandgap_for_autoset, GlobalC::ucell);
                         // }
-                        
-                        p_chgmix->mix_rho(pelec->charge);
+                        // mixing will restart after GlobalV::MIXING_RESTART steps
+                        // So, GlobalV::MIXING_RESTART=1 means mix from scratch
+                        if (GlobalV::MIXING_RESTART > 0 && iter == GlobalV::MIXING_RESTART - 1)
+                        {
+                            // do not mix charge density
+                        }
+                        else
+                        {
+                            p_chgmix->mix_rho(pelec->charge); // update chr->rho by mixing
+                        }
                         if (GlobalV::SCF_THR_TYPE == 2) pelec->charge->renormalize_rho(); // renormalize rho in R-space would induce a error in K-space
                         //----------charge mixing done-----------
                     }
@@ -467,6 +475,11 @@ namespace ModuleESolver
                     bool stop = this->do_after_converge(iter);
                     if(stop) {std::cout << "break\n"; break;}
                 }
+                // notice for restart
+                if (GlobalV::MIXING_RESTART > 0 && iter == GlobalV::MIXING_RESTART - 1)
+                {
+                    std::cout<<"SCF restart after this step!"<<std::endl;
+                }
             }
             afterscf(istep);
             ModuleBase::timer::tick(this->classname, "Run");
diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index 8fdc9217fd..4c4d6c342f 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -309,7 +309,7 @@ namespace ModuleESolver
     GlobalV::ofs_running << " !FINAL_ETOT_IS " << this->pelec->f_en.etot * ModuleBase::Ry_to_eV << " eV" << std::endl;
     GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl;
 
-    if (INPUT.out_dos != 0 || INPUT.out_band != 0 || INPUT.out_proj_band != 0)
+    if (INPUT.out_dos != 0 || INPUT.out_band[0] != 0 || INPUT.out_proj_band != 0)
     {
         GlobalV::ofs_running << "\n\n\n\n";
         GlobalV::ofs_running << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl;
@@ -331,7 +331,7 @@ namespace ModuleESolver
 
     int nspin0 = (GlobalV::NSPIN == 2) ? 2 : 1;
 
-    if (INPUT.out_band) // pengfei 2014-10-13
+    if (INPUT.out_band[0]) // pengfei 2014-10-13
     {
         int nks = 0;
         if (nspin0 == 1)
@@ -348,7 +348,15 @@ namespace ModuleESolver
             std::stringstream ss2;
             ss2 << GlobalV::global_out_dir << "BANDS_" << is + 1 << ".dat";
             GlobalV::ofs_running << "\n Output bands in file: " << ss2.str() << std::endl;
-            ModuleIO::nscf_band(is, ss2.str(), nks, GlobalV::NBANDS, 0.0, this->pelec->ekb, this->kv, &(GlobalC::Pkpoints));
+            ModuleIO::nscf_band(is, 
+                                ss2.str(), 
+                                nks, 
+                                GlobalV::NBANDS, 
+                                0.0, 
+                                INPUT.out_band[1],
+                                this->pelec->ekb, 
+                                this->kv, 
+                                &(GlobalC::Pkpoints));
         }
     } // out_band
 
@@ -483,8 +491,19 @@ namespace ModuleESolver
     template <typename TK, typename TR>
     void ESolver_KS_LCAO<TK, TR>::eachiterinit(const int istep, const int iter)
 {
-    if (iter == 1)
+    if (iter == 1 || iter == GlobalV::MIXING_RESTART)
+    {
+        if (iter == GlobalV::MIXING_RESTART) // delete mixing and re-construct it to restart 
+        {
+            this->p_chgmix->set_mixing(GlobalV::MIXING_MODE,
+                                GlobalV::MIXING_BETA,
+                                GlobalV::MIXING_NDIM,
+                                GlobalV::MIXING_GG0,
+                                GlobalV::MIXING_TAU,
+                                GlobalV::MIXING_BETA_MAG);
+        }
         this->p_chgmix->mix_reset();
+    }
 
     // mohan update 2012-06-05
     this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband();
diff --git a/source/module_esolver/esolver_ks_pw.cpp b/source/module_esolver/esolver_ks_pw.cpp
index 294f1636f7..07779f62ec 100644
--- a/source/module_esolver/esolver_ks_pw.cpp
+++ b/source/module_esolver/esolver_ks_pw.cpp
@@ -492,9 +492,19 @@ void ESolver_KS_PW<T, Device>::othercalculation(const int istep)
 template <typename T, typename Device>
 void ESolver_KS_PW<T, Device>::eachiterinit(const int istep, const int iter)
 {
-    if (iter == 1)
+    if (iter == 1 || iter == GlobalV::MIXING_RESTART)
+    {
+        if (iter == GlobalV::MIXING_RESTART) // delete mixing and re-construct it to restart 
+        {
+            this->p_chgmix->set_mixing(GlobalV::MIXING_MODE,
+                                GlobalV::MIXING_BETA,
+                                GlobalV::MIXING_NDIM,
+                                GlobalV::MIXING_GG0,
+                                GlobalV::MIXING_TAU,
+                                GlobalV::MIXING_BETA_MAG);
+        }
         this->p_chgmix->mix_reset();
-
+    }
     // mohan move harris functional to here, 2012-06-05
     // use 'rho(in)' and 'v_h and v_xc'(in)
     this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband();
@@ -894,6 +904,58 @@ void ESolver_KS_PW<T, Device>::afterscf(const int istep)
                             this->kspw_psi[0].get_pointer() - this->kspw_psi[0].get_psi_bias(),
                             this->psi[0].size());
     }
+
+    if(INPUT.band_print_num > 0)
+    {
+        std::complex<double> * wfcr = new std::complex<double>[this->pw_rho->nxyz];
+        double * rho_band = new double [this->pw_rho->nxyz];
+        for(int i = 0; i < this->pw_rho->nxyz; i++)
+        {
+            rho_band[i] = 0.0;
+        }
+
+        for(int i = 0; i < INPUT.band_print_num; i++)
+        {
+            int ib = INPUT.bands_to_print[i];
+            for(int ik = 0; ik < this->kv.nks; ik++)
+            {
+                this->psi->fix_k(ik);
+                this->pw_wfc->recip_to_real(this->ctx,&psi[0](ib,0),wfcr,ik);
+
+                double w1 = static_cast<double>(this->kv.wk[ik] / GlobalC::ucell.omega);
+
+                for(int i = 0; i < this->pw_rho->nxyz; i++)
+                {
+                    rho_band[i] += std::norm(wfcr[i]) * w1;
+                }
+            }
+
+            std::stringstream ssc;
+            ssc << GlobalV::global_out_dir << "band" << ib << ".cube";     
+
+            ModuleIO::write_rho
+            (
+#ifdef __MPI
+                this->pw_big->bz,
+                this->pw_big->nbz,
+                this->pw_big->nplane,
+                this->pw_big->startz_current,
+#endif
+                rho_band,
+                0,
+                GlobalV::NSPIN,
+                0,
+                ssc.str(),
+                this->pw_rho->nx,
+                this->pw_rho->ny,
+                this->pw_rho->nz,
+                0.0,
+                &(GlobalC::ucell),
+                11);
+        }
+        delete[] wfcr;
+        delete[] rho_band;
+    }
 }
 
 template <typename T, typename Device>
@@ -959,7 +1021,7 @@ void ESolver_KS_PW<T, Device>::postprocess()
     GlobalV::ofs_running << " !FINAL_ETOT_IS " << this->pelec->f_en.etot * ModuleBase::Ry_to_eV << " eV" << std::endl;
     GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl;
 
-    if (INPUT.out_dos != 0 || INPUT.out_band != 0)
+    if (INPUT.out_dos != 0 || INPUT.out_band[0] != 0)
     {
         GlobalV::ofs_running << "\n\n\n\n";
         GlobalV::ofs_running << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl;
@@ -1001,7 +1063,7 @@ void ESolver_KS_PW<T, Device>::postprocess()
         }
     }
 
-    if (INPUT.out_band) // pengfei 2014-10-13
+    if (INPUT.out_band[0]) // pengfei 2014-10-13
     {
         int nks = 0;
         if (nspin0 == 1)
@@ -1022,6 +1084,7 @@ void ESolver_KS_PW<T, Device>::postprocess()
                                 nks,
                                 GlobalV::NBANDS,
                                 0.0,
+                                INPUT.out_band[1],
                                 this->pelec->ekb,
                                 this->kv,
                                 &(GlobalC::Pkpoints));
diff --git a/source/module_hamilt_general/module_xc/test/test_xc.cpp b/source/module_hamilt_general/module_xc/test/test_xc.cpp
index 558556b66b..a770a88458 100644
--- a/source/module_hamilt_general/module_xc/test/test_xc.cpp
+++ b/source/module_hamilt_general/module_xc/test/test_xc.cpp
@@ -11,7 +11,7 @@
 
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
 }
 
 namespace GlobalV
@@ -95,7 +95,7 @@ class XCTest_PBEsol : public testing::Test
                 e_gga.push_back(e);
                 v1_gga.push_back(v1);
                 v2_gga.push_back(v2);
-            }                                           
+            }
         }
 };
 
diff --git a/source/module_hamilt_general/module_xc/test/test_xc1.cpp b/source/module_hamilt_general/module_xc/test/test_xc1.cpp
index 8e7a451e71..bc5c439630 100644
--- a/source/module_hamilt_general/module_xc/test/test_xc1.cpp
+++ b/source/module_hamilt_general/module_xc/test/test_xc1.cpp
@@ -12,7 +12,7 @@
 
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
 }
 
 namespace GlobalV
diff --git a/source/module_hamilt_general/module_xc/test/test_xc2.cpp b/source/module_hamilt_general/module_xc/test/test_xc2.cpp
index 4b1b7e888e..5bf75a3c68 100644
--- a/source/module_hamilt_general/module_xc/test/test_xc2.cpp
+++ b/source/module_hamilt_general/module_xc/test/test_xc2.cpp
@@ -11,7 +11,7 @@
 
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
 }
 
 namespace GlobalV
@@ -202,7 +202,7 @@ class XCTest_PZ_SPN : public testing::Test
                 e_lda.push_back(e);
                 v1_lda.push_back(v1);
                 v2_lda.push_back(v2);
-            }          
+            }
         }
 };
 
@@ -238,7 +238,7 @@ class XCTest_SLATER1_SPN : public testing::Test
                 e_lda.push_back(e);
                 v1_lda.push_back(v1);
                 v2_lda.push_back(v2);
-            }          
+            }
         }
 };
 
@@ -273,7 +273,7 @@ class XCTest_SLATER_RXC_SPN : public testing::Test
                 e_lda.push_back(e);
                 v1_lda.push_back(v1);
                 v2_lda.push_back(v2);
-            }           
+            }
         }
 };
 
@@ -310,7 +310,7 @@ class XCTest_P86_SPN : public testing::Test
                 v1_gga.push_back(v1);
                 v2_gga.push_back(v2);
                 v3_gga.push_back(v3);
-            }         
+            }
         }
 };
 
diff --git a/source/module_hamilt_general/module_xc/test/test_xc4.cpp b/source/module_hamilt_general/module_xc/test/test_xc4.cpp
index 114c817b0f..b4c8b70093 100644
--- a/source/module_hamilt_general/module_xc/test/test_xc4.cpp
+++ b/source/module_hamilt_general/module_xc/test/test_xc4.cpp
@@ -11,7 +11,7 @@
 
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
 }
 
 namespace GlobalV
@@ -49,7 +49,7 @@ class XCTest_SCAN : public testing::Test
                 v2_.push_back(v2);
                 v3_.push_back(v3);
             }
-        }                       
+        }
 };
 
 TEST_F(XCTest_SCAN, set_xc_type)
diff --git a/source/module_hamilt_general/module_xc/test/test_xc5.cpp b/source/module_hamilt_general/module_xc/test/test_xc5.cpp
index bd2f87da8a..d9dfed1b20 100644
--- a/source/module_hamilt_general/module_xc/test/test_xc5.cpp
+++ b/source/module_hamilt_general/module_xc/test/test_xc5.cpp
@@ -25,9 +25,6 @@ class XCTest_VXC : public testing::Test
         double et2 = 0, vt2 = 0;
         ModuleBase::matrix v2;
 
-        double et4 = 0, vt4 = 0;
-        ModuleBase::matrix v4;
-
         void SetUp()
         {
             ModulePW::PW_Basis rhopw;
@@ -87,13 +84,6 @@ class XCTest_VXC : public testing::Test
             vt2 = std::get<1>(etxc_vtxc_v);
             v2  = std::get<2>(etxc_vtxc_v);
 
-            GlobalV::NSPIN = 4;
-            GlobalV::DOMAG = true;
-            etxc_vtxc_v
-                = XC_Functional::v_xc(rhopw.nrxx,&chr,&ucell);
-            et4 = std::get<0>(etxc_vtxc_v);
-            vt4 = std::get<1>(etxc_vtxc_v);
-            v4  = std::get<2>(etxc_vtxc_v);
         }
 };
 
@@ -121,29 +111,6 @@ TEST_F(XCTest_VXC, set_xc_type)
     EXPECT_NEAR(v2(1,3),-1.97506482,1.0e-8);
     EXPECT_NEAR(v2(1,4),-2.160374198,1.0e-8);
 
-    EXPECT_NEAR(et4,-27.40098253,1.0e-8);
-    EXPECT_NEAR(vt4,-35.81948838,1.0e-8);
-    EXPECT_NEAR(v4(0,0),0,1.0e-8);
-    EXPECT_NEAR(v4(0,1),-1.559604078,1.0e-8);
-    EXPECT_NEAR(v4(0,2),-1.920028447,1.0e-8);
-    EXPECT_NEAR(v4(0,3),-2.168396069,1.0e-8);
-    EXPECT_NEAR(v4(0,4),-2.36419592,1.0e-8);
-    EXPECT_NEAR(v4(1,0),0,1.0e-8);
-    EXPECT_NEAR(v4(1,1),-0.09308179605,1.0e-8);
-    EXPECT_NEAR(v4(1,2),-0.123132664,1.0e-8);
-    EXPECT_NEAR(v4(1,3),-0.144332804,1.0e-8);
-    EXPECT_NEAR(v4(1,4),-0.16127282,1.0e-8);
-    EXPECT_NEAR(v4(2,0),0,1.0e-8);
-    EXPECT_NEAR(v4(2,1),-0.9308179605,1.0e-8);
-    EXPECT_NEAR(v4(2,2),-1.23132664,1.0e-8);
-    EXPECT_NEAR(v4(2,3),-1.44332804,1.0e-8);
-    EXPECT_NEAR(v4(2,4),-1.6127282,1.0e-8);
-    EXPECT_NEAR(v4(3,0),0,1.0e-8);
-    EXPECT_NEAR(v4(3,1),-0.09308179605,1.0e-8);
-    EXPECT_NEAR(v4(3,2),-0.123132664,1.0e-8);
-    EXPECT_NEAR(v4(3,3),-0.144332804,1.0e-8);
-    EXPECT_NEAR(v4(3,4),-0.16127282,1.0e-8);
-
 }
 
 class XCTest_VXC_Libxc : public testing::Test
@@ -156,9 +123,6 @@ class XCTest_VXC_Libxc : public testing::Test
         double et2 = 0, vt2 = 0;
         ModuleBase::matrix v2;
 
-        double et4 = 0, vt4 = 0;
-        ModuleBase::matrix v4;
-
         void SetUp()
         {
             ModulePW::PW_Basis rhopw;
@@ -218,13 +182,6 @@ class XCTest_VXC_Libxc : public testing::Test
             vt2 = std::get<1>(etxc_vtxc_v);
             v2  = std::get<2>(etxc_vtxc_v);
 
-            GlobalV::NSPIN = 4;
-            GlobalV::DOMAG = true;
-            etxc_vtxc_v
-                = XC_Functional::v_xc(rhopw.nrxx,&chr,&ucell);
-            et4 = std::get<0>(etxc_vtxc_v);
-            vt4 = std::get<1>(etxc_vtxc_v);
-            v4  = std::get<2>(etxc_vtxc_v);
         }
 };
 
@@ -252,28 +209,6 @@ TEST_F(XCTest_VXC_Libxc, set_xc_type)
     EXPECT_NEAR(v2(1,3),-1.975058937,1.0e-8);
     EXPECT_NEAR(v2(1,4),-2.160368003,1.0e-8);
 
-    EXPECT_NEAR(et4,-27.28201062,1.0e-8);
-    EXPECT_NEAR(vt4,-35.98253991,1.0e-8);
-    EXPECT_NEAR(v4(0,0),0,1.0e-8);
-    EXPECT_NEAR(v4(0,1),-1.268278149,1.0e-8);
-    EXPECT_NEAR(v4(0,2),-1.598108222,1.0e-8);
-    EXPECT_NEAR(v4(0,3),-1.828079634,1.0e-8);
-    EXPECT_NEAR(v4(0,4),-2.010634115,1.0e-8);
-    EXPECT_NEAR(v4(1,0),0,1.0e-8);
-    EXPECT_NEAR(v4(1,1),-0.1255782493,1.0e-8);
-    EXPECT_NEAR(v4(1,2),-0.1582362929,1.0e-8);
-    EXPECT_NEAR(v4(1,3),-0.1810068558,1.0e-8);
-    EXPECT_NEAR(v4(1,4),-0.1990824429,1.0e-8);
-    EXPECT_NEAR(v4(2,0),0,1.0e-8);
-    EXPECT_NEAR(v4(2,1),-1.255782493,1.0e-8);
-    EXPECT_NEAR(v4(2,2),-1.582362929,1.0e-8);
-    EXPECT_NEAR(v4(2,3),-1.810068558,1.0e-8);
-    EXPECT_NEAR(v4(2,4),-1.990824429,1.0e-8);
-    EXPECT_NEAR(v4(3,0),0,1.0e-8);
-    EXPECT_NEAR(v4(3,1),-0.1255782493,1.0e-8);
-    EXPECT_NEAR(v4(3,2),-0.1582362929,1.0e-8);
-    EXPECT_NEAR(v4(3,3),-0.1810068558,1.0e-8);
-    EXPECT_NEAR(v4(3,4),-0.1990824429,1.0e-8);
 }
 
 class XCTest_VXC_meta : public testing::Test
diff --git a/source/module_hamilt_general/module_xc/test/xc3_mock.h b/source/module_hamilt_general/module_xc/test/xc3_mock.h
index 628937adfe..da7f1e6f08 100644
--- a/source/module_hamilt_general/module_xc/test/xc3_mock.h
+++ b/source/module_hamilt_general/module_xc/test/xc3_mock.h
@@ -75,7 +75,7 @@ namespace ModulePW
         return x;
     }
 
-    
+
     template <typename FPTYPE, typename Device>
     void PW_Basis_K::real_to_recip(const Device* ctx,
                        const std::complex<FPTYPE>* in,
@@ -115,7 +115,7 @@ namespace ModulePW
                                                                      const int ik,
                                                                      const bool add,
                                                                      const double factor) const;
-#if __CUDA || __ROCM                                                                     
+#if __CUDA || __ROCM
     template void PW_Basis_K::real_to_recip<double, psi::DEVICE_GPU>(const psi::DEVICE_GPU* ctx,
                                                                      const std::complex<double>* in,
                                                                      std::complex<double>* out,
@@ -129,7 +129,7 @@ namespace ModulePW
                                                                      const int ik,
                                                                      const bool add,
                                                                      const double factor) const;
-#endif 
+#endif
 
     FFT::FFT(){};
     FFT::~FFT(){};
@@ -144,9 +144,13 @@ namespace ModulePW
 
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
+    void WARNING_QUIT(const std::string &file,const std::string &description)
+    {
+        std::cout << " " << file <<"  warning : "<< description<<std::endl;
+        exit(1);
+    }
     void WARNING(const std::string &file,const std::string &description) {};
-    
+
     void Matrix3::Identity(){};
 
     IntArray::IntArray(int,int){};
@@ -228,4 +232,4 @@ namespace Parallel_Reduce
     template void reduce_pool<float>(float& object);
     template void reduce_pool<float>(float* object, const int n);
     template void reduce_pool<double>(double* object, const int n);
-}
\ No newline at end of file
+}
diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
index 3d85150b22..7ee0394d4d 100644
--- a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
@@ -33,7 +33,7 @@ ModuleBase::matrix SpinConstrain<std::complex<double>, psi::DEVICE_CPU>::cal_MW_
         const char N_char = 'N';
         const int one_int = 1;
         const std::complex<double> one_float = {1.0, 0.0}, zero_float = {0.0, 0.0};        
-        pzgemm_(&T_char,
+        pzgemm_(&N_char,
                 &T_char,
                 &nw,
                 &nw,
diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp
index 0c0595a9ce..bd0ad4ce3a 100644
--- a/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp
@@ -22,7 +22,7 @@ std::vector<std::vector<std::vector<double>>> SpinConstrain<std::complex<double>
                 AorbMulP[is][iat].resize(nw_it, 0.0);
                 for (int iw = 0; iw < nw_it; iw++)
                 {
-                    AorbMulP[is][iat][iw] = orbMulP(is, num);
+                    AorbMulP[is][iat][iw] = std::abs(orbMulP(is, num))< 1e-10 ? 0.0 : orbMulP(is, num);
                     num++;
                 }
             }
@@ -92,16 +92,10 @@ void SpinConstrain<std::complex<double>, psi::DEVICE_CPU>::calculate_MW(
             }
             else if (this->nspin_ == 4)
             {
-                this->Mi_[iat].x = total_charge_soc[1];
-                this->Mi_[iat].y = total_charge_soc[2];
-                this->Mi_[iat].z = total_charge_soc[3];
+                this->Mi_[iat].x = (std::abs(total_charge_soc[1]) < this->sc_thr_)? 0.0 : total_charge_soc[1];
+                this->Mi_[iat].y = (std::abs(total_charge_soc[2]) < this->sc_thr_)? 0.0 : total_charge_soc[2];
+                this->Mi_[iat].z = (std::abs(total_charge_soc[3]) < this->sc_thr_)? 0.0 : total_charge_soc[3];
             }
-            if (std::abs(this->Mi_[iat].x) < 1e-12)
-                this->Mi_[iat].x = 0.0;
-            if (std::abs(this->Mi_[iat].y) < 1e-12)
-                this->Mi_[iat].y = 0.0;
-            if (std::abs(this->Mi_[iat].z) < 1e-12)
-                this->Mi_[iat].z = 0.0;
         }
     }
 }
diff --git a/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp b/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp
index db64d5490d..845db88062 100644
--- a/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp
@@ -45,6 +45,7 @@ void SpinConstrain<std::complex<double>, psi::DEVICE_CPU>::run_lambda_loop(int o
         }
         else
         {
+            where_fill_scalar_else_2d(this->constrain_, 0, zero, delta_lambda, delta_lambda);
             add_scalar_multiply_2d(initial_lambda, delta_lambda, one, this->lambda_);
             this->cal_mw_from_lambda(i_step);
             new_spin = this->Mi_;
@@ -87,6 +88,7 @@ void SpinConstrain<std::complex<double>, psi::DEVICE_CPU>::run_lambda_loop(int o
         add_scalar_multiply_2d(dnu, search, alpha_trial, dnu);
         delta_lambda = dnu;
 
+        where_fill_scalar_else_2d(this->constrain_, 0, zero, delta_lambda, delta_lambda);
         add_scalar_multiply_2d(initial_lambda, delta_lambda, one, this->lambda_);
         this->cal_mw_from_lambda(i_step);
 
@@ -115,4 +117,4 @@ void SpinConstrain<std::complex<double>, psi::DEVICE_CPU>::run_lambda_loop(int o
         }
         alpha_trial = alpha_trial * pow(g, 0.7);
     }
-}
\ No newline at end of file
+}
diff --git a/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp b/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp
index fedb46a976..a55ad59681 100644
--- a/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp
+++ b/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp
@@ -28,7 +28,8 @@ void MPIInit()
     npcol = 1;
     Cblacs_pinfo(&myrank, &mysize);
     Cblacs_get(-1, 0, &ictxt);
-    Cblacs_gridinit(&ictxt, "Row", nprow, npcol);
+    char order[] = "Row";
+    Cblacs_gridinit(&ictxt, order, nprow, npcol);
     Cblacs_gridinfo(ictxt, &nprow, &npcol, &myprow, &mypcol);
 }
 
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index fbaf7b1806..8e4ee5b15b 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -1,14 +1,13 @@
 #include <complex>
 #ifdef __PEXSI
-#include "diago_pexsi.h"
-
 #include "c_pexsi_interface.h"
+#include "diago_pexsi.h"
 #include "module_base/global_variable.h"
 #include "module_base/lapack_connector.h"
 #include "module_base/timer.h"
 #include "module_base/tool_quit.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
-#include "pexsi/pexsi_solver.h"
+#include "module_pexsi/pexsi_solver.h"
 
 typedef hamilt::MatrixBlock<double> matd;
 typedef hamilt::MatrixBlock<std::complex<double>> matcd;
@@ -16,7 +15,7 @@ typedef hamilt::MatrixBlock<std::complex<double>> matcd;
 namespace hsolver
 {
 
-template<>
+template <>
 void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in)
 {
     ModuleBase::TITLE("DiagoPEXSI", "diag");
@@ -25,30 +24,31 @@ void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>&
     std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
     MPI_Comm COMM_DIAG = MPI_COMM_WORLD;
     this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt,
-                                this->ParaV->nb,
-                                this->ParaV->nrow,
-                                this->ParaV->ncol,
-                                h_mat.p,
-                                s_mat.p,
-                                this->DM,
-                                this->EDM,
-                                this->totalEnergyH,
-                                this->totalEnergyS,
-                                this->totalFreeEnergy);
+                                       this->ParaV->nb,
+                                       this->ParaV->nrow,
+                                       this->ParaV->ncol,
+                                       h_mat.p,
+                                       s_mat.p,
+                                       this->DM,
+                                       this->EDM,
+                                       this->totalEnergyH,
+                                       this->totalEnergyS,
+                                       this->totalFreeEnergy);
     this->ps->solve();
-    this->EDM = this->ps->EDM;
-    this->DM = this->ps->DM; // loc.dm_gamma[ik] loc.dm_gamma[0]?
-    this->totalFreeEnergy = this->ps->totalFreeEnergy;
-    this->totalEnergyH = this->ps->totalEnergyH;
-    this->totalEnergyS = this->ps->totalEnergyS;
+    this->EDM = this->ps->get_EDM();
+    this->DM = this->ps->get_DM(); // loc.dm_gamma[ik] loc.dm_gamma[0]?
+    this->totalFreeEnergy = this->ps->get_totalFreeEnergy();
+    this->totalEnergyH = this->ps->get_totalEnergyH();
+    this->totalEnergyS = this->ps->get_totalEnergyS();
 }
 
-template<>
-void DiagoPexsi<std::complex<double>>::diag(hamilt::Hamilt<std::complex<double>>* phm_in, psi::Psi<std::complex<double>>& psi, double* eigenvalue_in)
+template <>
+void DiagoPexsi<std::complex<double>>::diag(hamilt::Hamilt<std::complex<double>>* phm_in,
+                                            psi::Psi<std::complex<double>>& psi,
+                                            double* eigenvalue_in)
 {
     ModuleBase::TITLE("DiagoPEXSI", "diag");
     ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case");
-    
 }
 
 } // namespace hsolver
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index 018397a33d..c212d7795a 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -3,7 +3,7 @@
 
 #include "diagh.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
-#include "pexsi/pexsi_solver.h"
+#include "module_pexsi/pexsi_solver.h"
 
 namespace hsolver
 {
diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp
index 16fa5f335b..ae784d2009 100644
--- a/source/module_hsolver/hsolver_pw.cpp
+++ b/source/module_hsolver/hsolver_pw.cpp
@@ -624,17 +624,31 @@ void HSolverPW<T, Device>::hamiltSolvePsiK(hamilt::Hamilt<T, Device>* hm, psi::P
         hm->ops->hPsi(info);
         ModuleBase::timer::tick("DiagoCG_New", "hpsi_func");
     };
-    auto spsi_func = [hm](const ct::Tensor& psi_in, ct::Tensor& spsi_out) {
+    auto spsi_func = [this, hm](const ct::Tensor& psi_in, ct::Tensor& spsi_out) {
         ModuleBase::timer::tick("DiagoCG_New", "spsi_func");
         // psi_in should be a 2D tensor: 
         // psi_in.shape() = [nbands, nbasis]
         const auto ndim = psi_in.shape().ndim();
         REQUIRES_OK(ndim <= 2, "dims of psi_in should be less than or equal to 2");
-        // Convert a Tensor object to a psi::Psi object
-        hm->sPsi(psi_in.data<T>(), spsi_out.data<T>(), 
+
+        if (GlobalV::use_uspp)
+        {
+            // Convert a Tensor object to a psi::Psi object
+            hm->sPsi(psi_in.data<T>(), spsi_out.data<T>(), 
             ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1), 
             ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1), 
             ndim == 1 ? 1 : psi_in.shape().dim_size(0));
+        } else
+        {
+            psi::memory::synchronize_memory_op<T, Device, Device>()(
+                this->ctx,
+                this->ctx,
+                spsi_out.data<T>(),
+                psi_in.data<T>(),
+                static_cast<size_t>((ndim == 1 ? 1 : psi_in.shape().dim_size(0))
+                                    * (ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1))));
+        }
+        
         ModuleBase::timer::tick("DiagoCG_New", "spsi_func");
     };
     auto psi_tensor = ct::TensorMap(
@@ -776,4 +790,4 @@ template class HSolverPW<std::complex<float>, psi::DEVICE_GPU>;
 template class HSolverPW<std::complex<double>, psi::DEVICE_GPU>;
 #endif
 
-} // namespace hsolver
\ No newline at end of file
+} // namespace hsolver
diff --git a/source/module_hsolver/module_pexsi/CMakeLists.txt b/source/module_hsolver/module_pexsi/CMakeLists.txt
index 8faab8b4b4..87d16ff557 100644
--- a/source/module_hsolver/module_pexsi/CMakeLists.txt
+++ b/source/module_hsolver/module_pexsi/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_library(pexsi OBJECT DistBCDMatrix.cpp DistCCSMatrix.cpp DistMatrixTransformer.cpp pexsi_solver.cpp simplePEXSI.cpp)
+add_library(pexsi OBJECT dist_bcd_matrix.cpp dist_ccs_matrix.cpp dist_matrix_transformer.cpp pexsi_solver.cpp simple_pexsi.cpp)
 
 if(ENABLE_COVERAGE)
   add_coverage(pexsi)
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
index cf815bd4ae..e498b83a2e 100644
--- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
@@ -1,3 +1,4 @@
+#ifdef __PEXSI
 #include "dist_bcd_matrix.h"
 
 #include <mpi.h>
@@ -110,4 +111,5 @@ int DistBCDMatrix::pnum(const int prow, const int pcol)
 {
     return this->prowpcol2pnum[prow * this->npcols + pcol];
 }
-} // namespace pexsi
\ No newline at end of file
+} // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
index 7dbddbad7c..98b8512893 100644
--- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
@@ -2,6 +2,8 @@
 #define DISTBCDMATRIX_H
 
 #include <mpi.h>
+
+#include "module_hsolver/module_pexsi/dist_matrix_transformer.h"
 // a Block Cyclic Data Distribution matrix
 // http://www.netlib.org/utk/papers/factor/node3.html
 // local matrix elements is stored in column major
@@ -27,6 +29,27 @@ class DistBCDMatrix
     int pnum(const int prow, const int pcol);
     //~DistBCDMatrix();
 
+    const MPI_Comm get_comm() const
+    {
+        return comm;
+    };
+    const MPI_Group get_group() const
+    {
+        return group;
+    };
+    const int get_nrow() const
+    {
+        return nrow;
+    };
+    const int get_ncol() const
+    {
+        return ncol;
+    };
+    const char get_LAYOUT() const
+    {
+        return LAYOUT;
+    };
+
   private:
     // MPI communicator
     MPI_Comm comm;
diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
index 365622d249..ddd02aaa9a 100644
--- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
+++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
@@ -1,3 +1,4 @@
+#ifdef __PEXSI
 #include "dist_ccs_matrix.h"
 
 #include <mpi.h>
@@ -114,4 +115,5 @@ DistCCSMatrix::~DistCCSMatrix()
     delete[] colptrLocal;
     delete[] rowindLocal;
 }
-} // namespace pexsi
\ No newline at end of file
+} // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
index aa5e67b6ab..a63a0dc16c 100644
--- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
+++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
@@ -19,6 +19,44 @@ class DistCCSMatrix
     int globalCol(int localCol);
     int localCol(int globalCol, int& mypcol);
     void setnnz(int nnzLocal);
+
+    const MPI_Comm get_comm() const
+    {
+        return comm;
+    };
+    const MPI_Group get_group() const
+    {
+        return group;
+    };
+    const MPI_Group get_group_data() const
+    {
+        return group_data;
+    };
+    const int get_size() const
+    {
+        return size;
+    };
+    const int get_nnz() const
+    {
+        return nnz;
+    };
+    const int get_nnzlocal() const
+    {
+        return nnzLocal;
+    };
+    const int get_numcol_local() const
+    {
+        return numColLocal;
+    };
+    int* get_colptr_local() const
+    {
+        return colptrLocal;
+    };
+    int* get_rowind_local() const
+    {
+        return rowindLocal;
+    };
+
     ~DistCCSMatrix();
 
   private:
@@ -50,6 +88,8 @@ class DistCCSMatrix
     // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal
     int* colptrLocal;
     int* rowindLocal;
+
+    // friend class DistMatrixTransformer;
 };
 } // namespace pexsi
 #endif // DISTCCSMATRIX_H
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
index 01b96f42cc..ef6c6fec72 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
@@ -1,3 +1,6 @@
+#ifdef __PEXSI
+#include "dist_matrix_transformer.h"
+
 #include <mpi.h>
 
 #include <climits>
@@ -28,11 +31,11 @@ namespace pexsi
 // wether this function is called for the first time for a index array; nprocs: total number of processes size_process:
 // the number of indices in each process displacement_process: the start position in each process index: the array
 // contains the indices
-inline int MinimumIndexPosition(const bool isFirst,
-                                const int nprocs,
-                                int* size_process,
-                                int* displacement_process,
-                                const int* index)
+inline int DistMatrixTransformer::MinimumIndexPosition(const bool isFirst,
+                                                       const int nprocs,
+                                                       int* size_process,
+                                                       int* displacement_process,
+                                                       const int* index)
 {
     // usually the minimum index is continuous, so it will be a good idea to
     // check the one next to the previous index first.
@@ -104,16 +107,16 @@ inline int MinimumIndexPosition(const bool isFirst,
     }
 }
 
-inline void buildCCSParameter(const int size,
-                              const int nprocs,
-                              std::vector<int> size_process,
-                              std::vector<int> displacement_process,
-                              const int* position_index,
-                              DistCCSMatrix& DST_Matrix,
-                              int* buffer2ccsIndex)
+inline void DistMatrixTransformer::buildCCSParameter(const int size,
+                                                     const int nprocs,
+                                                     std::vector<int> size_process,
+                                                     std::vector<int> displacement_process,
+                                                     const int* position_index,
+                                                     DistCCSMatrix& DST_Matrix,
+                                                     int* buffer2ccsIndex)
 {
     // find the minimum one from left buffer index
-    if (DST_Matrix.nnzLocal <= 0)
+    if (DST_Matrix.get_nnzlocal() <= 0)
         return;
 
     int pre_col = -1;
@@ -123,31 +126,34 @@ inline void buildCCSParameter(const int size,
     while (p_mini >= 0)
     {
         int index_mini = position_index[p_mini];
-        int col_mini = index_mini / DST_Matrix.size; //-DST_Matrix.firstCol;
-        int row_mini = index_mini % DST_Matrix.size;
+        int col_mini = index_mini / DST_Matrix.get_size(); //-DST_Matrix.firstCol;
+        int row_mini = index_mini % DST_Matrix.get_size();
         if (col_mini > pre_col) // a new column starts, column pointer is a 1-based array
         {
             pre_col = col_mini;
-            DST_Matrix.colptrLocal[col_mini] = nnz_now + 1;
+            DST_Matrix.get_colptr_local()[col_mini] = nnz_now + 1;
         }
-        DST_Matrix.rowindLocal[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based
+        DST_Matrix.get_rowind_local()[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based
         // copy data from buffer to M, be careful M is a 0-based array
         buffer2ccsIndex[nnz_now] = p_mini;
         ++nnz_now;
         p_mini = MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index);
     }
     // The last element of colptrLocal is nnzLocal+1
-    DST_Matrix.colptrLocal[DST_Matrix.numColLocal] = nnz_now + 1;
+    DST_Matrix.get_colptr_local()[DST_Matrix.get_numcol_local()] = nnz_now + 1;
 }
 
-inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal)
+inline void DistMatrixTransformer::buffer2CCSvalue(int nnzLocal,
+                                                   int* buffer2ccsIndex,
+                                                   double* buffer,
+                                                   double* nzvalLocal)
 {
     for (int i = 0; i < nnzLocal; ++i)
     {
         nzvalLocal[i] = buffer[buffer2ccsIndex[i]];
     }
 }
-inline void countMatrixDistribution(int N, double* A, std::map<int, int>& P)
+inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std::map<int, int>& P)
 {
     for (int i = 0; i < N; ++i)
     {
@@ -161,15 +167,15 @@ inline void countMatrixDistribution(int N, double* A, std::map<int, int>& P)
 }
 
 // find out the index of non-zero elements
-inline int getNonZeroIndex(char LAYOUT,
-                           const int nrow,
-                           const int ncol,
-                           double* H_2d,
-                           double* S_2d,
-                           const double ZERO_Limit,
-                           int& nnz,
-                           std::vector<int>& rowidx,
-                           std::vector<int>& colidx)
+inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT,
+                                                  const int nrow,
+                                                  const int ncol,
+                                                  double* H_2d,
+                                                  double* S_2d,
+                                                  const double ZERO_Limit,
+                                                  int& nnz,
+                                                  std::vector<int>& rowidx,
+                                                  std::vector<int>& colidx)
 {
 #ifdef _DEBUG
     char f_log[80];
@@ -275,21 +281,21 @@ inline int getNonZeroIndex(char LAYOUT,
     return 0;
 }
 
-int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
-                            DistCCSMatrix& DST_Matrix,
-                            const int NPROC_TRANS,
-                            MPI_Group& GROUP_TRANS,
-                            MPI_Comm& COMM_TRANS,
-                            const int nnz,
-                            std::vector<int>& rowidx,
-                            std::vector<int>& colidx,
-                            int& sender_size,
-                            std::vector<int>& sender_size_process,
-                            std::vector<int>& sender_displacement_process,
-                            int& receiver_size,
-                            std::vector<int>& receiver_size_process,
-                            std::vector<int>& receiver_displacement_process,
-                            std::vector<int>& buffer2ccsIndex)
+int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
+                                                   DistCCSMatrix& DST_Matrix,
+                                                   const int NPROC_TRANS,
+                                                   MPI_Group& GROUP_TRANS,
+                                                   MPI_Comm& COMM_TRANS,
+                                                   const int nnz,
+                                                   std::vector<int>& rowidx,
+                                                   std::vector<int>& colidx,
+                                                   int& sender_size,
+                                                   std::vector<int>& sender_size_process,
+                                                   std::vector<int>& sender_displacement_process,
+                                                   int& receiver_size,
+                                                   std::vector<int>& receiver_size_process,
+                                                   std::vector<int>& receiver_displacement_process,
+                                                   std::vector<int>& buffer2ccsIndex)
 {
     // debug
     int myproc;
@@ -322,12 +328,12 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
     std::vector<int> proc_map_data_trans;
     if (myproc == 0)
     {
-        MPI_Group_size(DST_Matrix.group_data, &nproc_data);
+        MPI_Group_size(DST_Matrix.get_group_data(), &nproc_data);
         MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS);
         proc_map_data_trans.resize(nproc_data, 0);
         for (int i = 0; i < nproc_data; ++i)
         {
-            MPI_Group_translate_ranks(DST_Matrix.group_data, 1, &i, GROUP_TRANS, &proc_map_data_trans[i]);
+            MPI_Group_translate_ranks(DST_Matrix.get_group_data(), 1, &i, GROUP_TRANS, &proc_map_data_trans[i]);
         }
         MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS);
     }
@@ -429,7 +435,7 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
         int dst_col = DST_Matrix.localCol(g_col, dst_process);
         int l_row = rowidx[i];
         int dst_row = SRC_Matrix.globalRow(l_row);
-        sender_index[i] = dst_col * DST_Matrix.size + dst_row;
+        sender_index[i] = dst_col * DST_Matrix.get_size() + dst_row;
     }
 // debug
 #ifdef _DEBUG
@@ -478,10 +484,10 @@ int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
     return 0;
 }
 
-int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
-                      DistCCSMatrix& DST_Matrix,
-                      MPI_Group& GROUP_TRANS,
-                      MPI_Comm& COMM_TRANS)
+int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
+                                             DistCCSMatrix& DST_Matrix,
+                                             MPI_Group& GROUP_TRANS,
+                                             MPI_Comm& COMM_TRANS)
 {
 // debug
 #ifdef _DEBUG
@@ -499,7 +505,7 @@ int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
 #endif
     // build transfortram communicator which contains both processes of BCD processors and
     // CCS processors with nonzero elements
-    MPI_Group_union(DST_Matrix.group_data, SRC_Matrix.group, &GROUP_TRANS);
+    MPI_Group_union(DST_Matrix.get_group_data(), SRC_Matrix.get_group(), &GROUP_TRANS);
     MPI_Comm_create(MPI_COMM_WORLD, GROUP_TRANS, &COMM_TRANS);
 // debug
 #ifdef _DEBUG
@@ -557,7 +563,7 @@ int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
     return 0;
 }
 
-int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS)
+int DistMatrixTransformer::deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS)
 {
     MPI_Group_free(&GROUP_TRANS);
     if (COMM_TRANS != MPI_COMM_NULL)
@@ -571,13 +577,13 @@ int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS)
 // two destination matrices share the same non-zero elements positions
 // if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero,
 // even if one of them is acturely zero All matrices must have same MPI communicator
-int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
-                      double* H_2d,
-                      double* S_2d,
-                      const double ZERO_Limit,
-                      DistCCSMatrix& DST_Matrix,
-                      double*& H_ccs,
-                      double*& S_ccs)
+int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
+                                             double* H_2d,
+                                             double* S_2d,
+                                             const double ZERO_Limit,
+                                             DistCCSMatrix& DST_Matrix,
+                                             double*& H_ccs,
+                                             double*& S_ccs)
 {
 // debug
 #ifdef _DEBUG
@@ -614,9 +620,9 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
         {
             log << "nprocs: " << SRC_Matrix.nprocs << " ; myprow: " << SRC_Matrix.myprow
                 << " ; mypcol: " << SRC_Matrix.mypcol << std::endl;
-            log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.nrow << " ; ncol: " << SRC_Matrix.ncol
+            log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.get_nrow() << " ; ncol: " << SRC_Matrix.get_ncol()
                 << std::endl;
-            log << "layout:" << SRC_Matrix.LAYOUT << std::endl;
+            log << "layout:" << SRC_Matrix.get_LAYOUT() << std::endl;
             log << "ZERO = " << ZERO_Limit << std::endl;
             log << "DST_Matrix parameters:" << std::endl;
             log << "size: " << DST_Matrix.size << " ;nproc_data: " << DST_Matrix.nproc_data << std::endl;
@@ -633,11 +639,11 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
         if (myproc < 100)
             log << "start counting nnz..." << std::endl;
 #endif
-        if (SRC_Matrix.comm != MPI_COMM_NULL)
+        if (SRC_Matrix.get_comm() != MPI_COMM_NULL)
         {
-            getNonZeroIndex(SRC_Matrix.LAYOUT,
-                            SRC_Matrix.nrow,
-                            SRC_Matrix.ncol,
+            getNonZeroIndex(SRC_Matrix.get_LAYOUT(),
+                            SRC_Matrix.get_nrow(),
+                            SRC_Matrix.get_ncol(),
                             H_2d,
                             S_2d,
                             ZERO_Limit,
@@ -654,11 +660,11 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
             if(SRC_Matrix.comm != MPI_COMM_NULL)
             {
                 log<<"NonZeroIndex :"<<std::endl;
-                if(SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
+                if(SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
                 {
                     for(int i=0; i<nnz; ++i)
                     {
-                        int HS_idx=rowidx[i]*SRC_Matrix.ncol+colidx[i];
+                        int HS_idx=rowidx[i]*SRC_Matrix.get_ncol()+colidx[i];
                         log<<rowidx[i]<<' '<<colidx[i]<<' '<<HS_idx;
                         log<<' '<<H_2d[HS_idx]<<' '<<S_2d[HS_idx]<<std::endl;
                     }
@@ -667,7 +673,7 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                 {
                     for(int i=0; i<nnz; ++i)
                     {
-                        int HS_idx=colidx[i]*SRC_Matrix.nrow+rowidx[i];
+                        int HS_idx=colidx[i]*SRC_Matrix.get_nrow()+rowidx[i];
                         log<<rowidx[i]<<' '<<colidx[i]<<' '<<HS_idx;
                         log<<' '<<H_2d[HS_idx]<<' '<<S_2d[HS_idx]<<std::endl;
                     }
@@ -707,18 +713,18 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
         std::vector<double> sender_buffer(sender_size);
         std::vector<double> receiver_buffer(receiver_size);
         // put H to sender buffer
-        if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
+        if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]];
+                sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]];
             }
         }
         else
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]];
+                sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]];
             }
         }
 #ifdef _DEBUG
@@ -749,18 +755,18 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
 #endif
 
         // put S to sender buffer
-        if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
+        if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]];
+                sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]];
             }
         }
         else
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]];
+                sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]];
             }
         }
 #ifdef _DEBUG
@@ -804,12 +810,12 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
 
 // transform two sparse matrices from Compressed Column Storage (CCS) to block cyclic distribution (BCD) distribution
 // two source matrices share the same non-zero elements positions
-int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
-                      double* DMnzvalLocal,
-                      double* EDMnzvalLocal,
-                      DistBCDMatrix& DST_Matrix,
-                      double* DM,
-                      double* EDM)
+int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
+                                             double* DMnzvalLocal,
+                                             double* EDMnzvalLocal,
+                                             DistBCDMatrix& DST_Matrix,
+                                             double* DM,
+                                             double* EDM)
 {
 // debug
 #ifdef _DEBUG
@@ -840,7 +846,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
     if (COMM_TRANS != MPI_COMM_NULL)
     {
         // init DM and EDM with 0
-        for (int i = 0; i < DST_Matrix.nrow * DST_Matrix.ncol; ++i)
+        for (int i = 0; i < DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); ++i)
         {
             DM[i] = 0;
             EDM[i] = 0;
@@ -877,12 +883,12 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
         MPI_Comm_rank(COMM_TRANS, &myproc_trans);
         if (myproc_trans == 0)
         {
-            MPI_Group_size(DST_Matrix.group, &nproc_bcd);
+            MPI_Group_size(DST_Matrix.get_group(), &nproc_bcd);
             MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS);
             proc_map_bcd_trans.resize(nproc_bcd, 0);
             for (int i = 0; i < nproc_bcd; ++i)
             {
-                MPI_Group_translate_ranks(DST_Matrix.group, 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]);
+                MPI_Group_translate_ranks(DST_Matrix.get_group(), 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]);
             }
             MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS);
         }
@@ -933,7 +939,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             log << "display all columns and rows of nonzeros values:\n";
         int log_nnz = 0;
 #endif
-        for (int icol = 0; icol < SRC_Matrix.numColLocal; ++icol)
+        for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol)
         {
             int g_col = SRC_Matrix.globalCol(icol);
             int recv_pcol_bcd;
@@ -942,9 +948,9 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             // log<<g_col<<"\n ";
             // #endif
             // OUT(ofs_running, "transformCCStoBCD: recv_pcol_bcd", recv_pcol_bcd);
-            for (int rowidx = SRC_Matrix.colptrLocal[icol] - 1; rowidx < SRC_Matrix.colptrLocal[icol + 1] - 1; ++rowidx)
+            for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; ++rowidx)
             {
-                int g_row = SRC_Matrix.rowindLocal[rowidx] - 1;
+                int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1;
                 int recv_prow_bcd;
                 int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd);
                 int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
@@ -1020,7 +1026,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
 #endif
 
         // setup up sender index and receiver index
-        int sender_size = SRC_Matrix.nnzLocal;
+        int sender_size = SRC_Matrix.get_nnzlocal();
         int* sender_index;
         double* sender_buffer;
         int* dst_index;
@@ -1119,14 +1125,14 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
         if (myproc < 100)
             log << "idx start at " << idx << std::endl;
 #endif
-        for (int icol = 0; icol < SRC_Matrix.numColLocal; ++icol)
+        for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol)
         {
             int g_col = SRC_Matrix.globalCol(icol);
             int recv_pcol_bcd;
             int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd);
-            for (int rowidx = SRC_Matrix.colptrLocal[icol] - 1; rowidx < SRC_Matrix.colptrLocal[icol + 1] - 1; ++rowidx)
+            for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; ++rowidx)
             {
-                int g_row = SRC_Matrix.rowindLocal[rowidx] - 1;
+                int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1;
                 int recv_prow_bcd;
                 int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd);
 #ifdef _DEBUG
@@ -1315,9 +1321,9 @@ for(int i=0; i<receiver_size; ++i)
         log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" < 0"<<std::endl;
         log.flush();
     }
-    else if(receiver_index[i*2]>DST_Matrix.nrow)
+    else if(receiver_index[i*2]>DST_Matrix.get_nrow())
     {
-        log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.nrow<<std::endl;
+        log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.get_nrow()<<std::endl;
         log.flush();
     }
     if(receiver_index[i*2+1]<0)
@@ -1325,9 +1331,9 @@ for(int i=0; i<receiver_size; ++i)
         log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" < 0"<<std::endl;
         log.flush();
     }
-    else if(receiver_index[i*2+1]>DST_Matrix.ncol)
+    else if(receiver_index[i*2+1]>DST_Matrix.get_ncol())
     {
-        log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.ncol<<std::endl;
+        log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.get_ncol()<<std::endl;
         log.flush();
     }
 }
@@ -1376,10 +1382,10 @@ MPI_Barrier(COMM_TRANS);
                         << std::endl;
                     log.flush();
                 }
-                else if (receiver_index[i * 2] > DST_Matrix.nrow)
+                else if (receiver_index[i * 2] > DST_Matrix.get_nrow())
                 {
                     log << "ERROR! receiver_index(BCD)[" << 2 * i << "] = " << receiver_index[i * 2] << " > "
-                        << DST_Matrix.nrow << std::endl;
+                        << DST_Matrix.get_nrow() << std::endl;
                     log.flush();
                 }
                 if (receiver_index[i * 2 + 1] < 0)
@@ -1388,10 +1394,10 @@ MPI_Barrier(COMM_TRANS);
                         << std::endl;
                     log.flush();
                 }
-                else if (receiver_index[i * 2 + 1] > DST_Matrix.ncol)
+                else if (receiver_index[i * 2 + 1] > DST_Matrix.get_ncol())
                 {
                     log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " > "
-                        << DST_Matrix.ncol << std::endl;
+                        << DST_Matrix.get_ncol() << std::endl;
                     log.flush();
                 }
             }
@@ -1428,14 +1434,14 @@ MPI_Barrier(COMM_TRANS);
 // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM");
 #endif
         // transform receiver_buffer to DM
-        if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
+        if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r')
         {
-            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = ix * DST_Matrix.ncol + iy;
+                int idx = ix * DST_Matrix.get_ncol() + iy;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1444,7 +1450,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is " << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " ncol = " << DST_Matrix.ncol << std::endl;
+                            << " ncol = " << DST_Matrix.get_ncol() << std::endl;
                         log.flush();
                     }
                 }
@@ -1454,12 +1460,12 @@ MPI_Barrier(COMM_TRANS);
         }
         else
         {
-            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = iy * DST_Matrix.nrow + ix;
+                int idx = iy * DST_Matrix.get_nrow() + ix;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1468,7 +1474,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " nrow = " << DST_Matrix.nrow << std::endl;
+                            << " nrow = " << DST_Matrix.get_nrow() << std::endl;
                         log.flush();
                     }
                 }
@@ -1512,14 +1518,14 @@ MPI_Barrier(COMM_TRANS);
 // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM");
 #endif
         // transform receiver_buffer to EDM
-        if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
+        if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r')
         {
-            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = ix * DST_Matrix.ncol + iy;
+                int idx = ix * DST_Matrix.get_ncol() + iy;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1528,7 +1534,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " ncol = " << DST_Matrix.ncol << std::endl;
+                            << " ncol = " << DST_Matrix.get_ncol() << std::endl;
                         log.flush();
                     }
                 }
@@ -1538,12 +1544,12 @@ MPI_Barrier(COMM_TRANS);
         }
         else
         {
-            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = iy * DST_Matrix.nrow + ix;
+                int idx = iy * DST_Matrix.get_nrow() + ix;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1552,7 +1558,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " nrow = " << DST_Matrix.nrow << std::endl;
+                            << " nrow = " << DST_Matrix.get_nrow() << std::endl;
                         log.flush();
                     }
                 }
@@ -1596,3 +1602,4 @@ MPI_Barrier(COMM_TRANS);
 }
 
 } // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
index 1d28866c96..e3e27e995a 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
@@ -1,8 +1,9 @@
 #ifndef DISTMATRIXTRANSFORMER_H
 #define DISTMATRIXTRANSFORMER_H
 
-#include "dist_bcd_matrix.h"
-#include "dist_ccs_matrix.h"
+#include <mpi.h>
+
+#include <vector>
 // transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
 // they should have same MPI communicator
 // The local matrix of BCD is column-major order
@@ -15,6 +16,62 @@
 // even if one of them is acturely zero All matrices must have same MPI communicator
 namespace pexsi
 {
+class DistBCDMatrix;
+class DistCCSMatrix;
+
+namespace DistMatrixTransformer
+{
+int MinimumIndexPosition(const bool isFirst,
+                         const int nprocs,
+                         int* size_process,
+                         int* displacement_process,
+                         const int* index);
+
+void buildCCSParameter(const int size,
+                       const int nprocs,
+                       std::vector<int> size_process,
+                       std::vector<int> displacement_process,
+                       const int* position_index,
+                       DistCCSMatrix& DST_Matrix,
+                       int* buffer2ccsIndex);
+
+void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal);
+
+void countMatrixDistribution(int N, double* A, std::map<int, int>& P);
+
+int getNonZeroIndex(char LAYOUT,
+                    const int nrow,
+                    const int ncol,
+                    double* H_2d,
+                    double* S_2d,
+                    const double ZERO_Limit,
+                    int& nnz,
+                    std::vector<int>& rowidx,
+                    std::vector<int>& colidx);
+
+int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
+                            DistCCSMatrix& DST_Matrix,
+                            const int NPROC_TRANS,
+                            MPI_Group& GROUP_TRANS,
+                            MPI_Comm& COMM_TRANS,
+                            const int nnz,
+                            std::vector<int>& rowidx,
+                            std::vector<int>& colidx,
+                            int& sender_size,
+                            std::vector<int>& sender_size_process,
+                            std::vector<int>& sender_displacement_process,
+                            int& receiver_size,
+                            std::vector<int>& receiver_size_process,
+                            std::vector<int>& receiver_displacement_process,
+                            std::vector<int>& buffer2ccsIndex);
+
+int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
+                      DistCCSMatrix& DST_Matrix,
+                      MPI_Group& GROUP_TRANS,
+                      MPI_Comm& COMM_TRANS);
+
+int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS);
+
 int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                       double* H_2d,
                       double* S_2d,
@@ -31,6 +88,7 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
                       double* ENDnzvalLocal,
                       DistBCDMatrix& DST_Matrix,
                       double* DM_2d,
-                      double* END_2d);
+                      double* ED_2d);
+}; // namespace DistMatrixTransformer
 } // namespace pexsi
 #endif // DISTMATRIXTRANSFORMER_H
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
index 90d16ae993..1be66abf59 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -1,3 +1,4 @@
+#ifdef __PEXSI
 #include "pexsi_solver.h"
 
 #include <mpi.h>
@@ -5,6 +6,11 @@
 #include <cstring>
 
 #include "module_base/global_variable.h"
+#include "simple_pexsi.h"
+
+extern MPI_Comm DIAG_WORLD;
+extern MPI_Comm GRID_WORLD;
+extern MPI_Group GRID_GROUP;
 
 namespace pexsi
 {
@@ -37,9 +43,7 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text,
 
 int PEXSI_Solver::solve()
 {
-    extern MPI_Comm DIAG_WORLD;
-    extern MPI_Comm GRID_WORLD;
-    extern MPI_Group GRID_GROUP;
+
     simplePEXSI(DIAG_WORLD,
                 GRID_WORLD,
                 GRID_GROUP,
@@ -61,12 +65,12 @@ int PEXSI_Solver::solve()
     return 0;
 }
 
-const double* PEXSI_Solver::get_DM() const
+double* PEXSI_Solver::get_DM() const
 {
     return DM;
 }
 
-const double* PEXSI_Solver::get_EDM() const
+double* PEXSI_Solver::get_EDM() const
 {
     return EDM;
 }
@@ -75,4 +79,16 @@ const double PEXSI_Solver::get_totalFreeEnergy() const
 {
     return totalFreeEnergy;
 }
-} // namespace pexsi
\ No newline at end of file
+
+const double PEXSI_Solver::get_totalEnergyH() const
+{
+    return totalEnergyH;
+}
+
+const double PEXSI_Solver::get_totalEnergyS() const
+{
+    return totalEnergyS;
+}
+
+} // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h
index 0c3164e5f0..b3d7aed152 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.h
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.h
@@ -18,9 +18,11 @@ class PEXSI_Solver
                  double& totalEnergyS,
                  double& totalFreeEnergy);
     int solve();
-    const double* get_DM() const;
-    const double* get_EDM() const;
+    double* get_DM() const;
+    double* get_EDM() const;
     const double get_totalFreeEnergy() const;
+    const double get_totalEnergyH() const;
+    const double get_totalEnergyS() const;
 
   private:
     int blacs_text;
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
index 845beef18c..df72a061c5 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -2,6 +2,7 @@
 // the H and S matrices are given by 2D block cyclic distribution
 // the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution
 // #include "mpi.h"
+#ifdef __PEXSI
 #include <mpi.h>
 
 #include <cfloat>
@@ -18,6 +19,7 @@
 #include "module_base/lapack_connector.h"
 #include "module_base/timer.h"
 #include "module_base/tool_quit.h"
+#include "module_base/global_variable.h"
 
 namespace pexsi
 {
@@ -102,220 +104,252 @@ int loadPEXSIOption(MPI_Comm comm,
     // 10: numElectronPEXSITolerance
     // 11: ZERO_Limit
     double double_para[12];
-    int myid;
-    MPI_Comm_rank(comm, &myid);
-    if (myid == 0)
-    {
-        std::ifstream ifs(PexsiOptionFile.c_str());
-        if (!ifs)
-        {
-            return 1;
-        }
-        setDefaultOption(int_para, double_para);
 
-        ifs.clear();
-        ifs.seekg(0);
+    // read in PEXSI options from GlobalV
+    int_para[0] = GlobalV::pexsi_npole;
+    int_para[1] = GlobalV::pexsi_inertia;
+    int_para[2] = GlobalV::pexsi_nmax;
+    int_para[3] = 0;
+    int_para[4] = 1; // GlobalV::pexsi_symbolic;
+    int_para[5] = GlobalV::pexsi_comm;
+    int_para[6] = 0;
+    int_para[7] = GlobalV::pexsi_storage;
+    int_para[8] = GlobalV::pexsi_ordering;
+    int_para[9] = GlobalV::pexsi_row_ordering;
+    int_para[10] = GlobalV::pexsi_nproc;
+    int_para[11] = GlobalV::pexsi_symm;
+    int_para[12] = GlobalV::pexsi_trans;
+    int_para[13] = GlobalV::pexsi_method;
+    int_para[14] = 2;
+    int_para[15] = 0;
+    int_para[16] = GlobalV::pexsi_nproc_pole;
 
-        char key[128];
-        char lowercase_key[128];
-        const int LINE_LINGTH = 1024;
-        char unused_string[LINE_LINGTH];
+    double_para[0] = GlobalV::NSPIN; // GlobalV::pexsi_spin;
+    double_para[1] = GlobalV::pexsi_temp;
+    double_para[2] = GlobalV::pexsi_gap;
+    double_para[3] = GlobalV::pexsi_delta_e;
+    double_para[4] = GlobalV::pexsi_mu_lower;
+    double_para[5] = GlobalV::pexsi_mu_upper;
+    double_para[6] = GlobalV::pexsi_mu;
+    double_para[7] = GlobalV::pexsi_mu_thr;
+    double_para[8] = GlobalV::pexsi_mu_expand;
+    double_para[9] = GlobalV::pexsi_mu_guard;
+    double_para[10] = GlobalV::pexsi_elec_thr;
+    double_para[11] = GlobalV::pexsi_zero_thr;
+    // int myid;
+    // MPI_Comm_rank(comm, &myid);
+    // if (myid == 0)
+    // {
+    //     std::ifstream ifs(PexsiOptionFile.c_str());
+    //     if (!ifs)
+    //     {
+    //         return 1;
+    //     }
+    //     setDefaultOption(int_para, double_para);
 
-        while (ifs.good())
-        {
-            ifs >> key;
-            //~ cout<<"readin word is: "<<key<<endl;
-            strtolower(key, lowercase_key);
-            if (strcmp("spin", lowercase_key) == 0)
-            {
-                //~ ifs>>options.spin;
-                ifs >> double_para[0];
-                //~ cout<<"double_para[0]: "<<key<<" = "<<double_para[0]<<endl;
-            }
-            else if (strcmp("temperature", lowercase_key) == 0)
-            {
-                //~ ifs>>options.temperature;
-                ifs >> double_para[1];
-                //~ cout<<"double_para[1]: "<<key<<" = "<<double_para[1]<<endl;
-            }
-            else if (strcmp("gap", lowercase_key) == 0)
-            {
-                //~ ifs>>options.gap;
-                ifs >> double_para[2];
-                //~ cout<<"double_para[2]: "<<key<<" = "<<double_para[2]<<endl;
-            }
-            else if (strcmp("deltae", lowercase_key) == 0)
-            {
-                //~ ifs>>options.deltaE;
-                ifs >> double_para[3];
-                //~ cout<<"double_para[3]: "<<key<<" = "<<double_para[3]<<endl;
-            }
-            else if (strcmp("numpole", lowercase_key) == 0)
-            {
-                //~ ifs>>options.numPole;
-                ifs >> int_para[0];
-                //~ cout<<"int_para[0]: "<<key<<" = "<<int_para[0]<<endl;
-            }
-            else if (strcmp("isinertiacount", lowercase_key) == 0)
-            {
-                //~ ifs>>options.isInertiaCount;
-                ifs >> int_para[1];
-                //~ cout<<"int_para[1]: "<<key<<" = "<<int_para[1]<<endl;
-            }
-            else if (strcmp("maxpexsiiter", lowercase_key) == 0)
-            {
-                //~ ifs>>options.maxPEXSIIter;
-                ifs >> int_para[2];
-                //~ cout<<"int_para[2]: "<<key<<" = "<<int_para[2]<<endl;
-            }
-            else if (strcmp("mumin0", lowercase_key) == 0)
-            {
-                //~ ifs>>options.muMin0;
-                ifs >> double_para[4];
-                //~ cout<<"double_para[4]: "<<key<<" = "<<double_para[4]<<endl;
-            }
-            else if (strcmp("mumax0", lowercase_key) == 0)
-            {
-                //~ ifs>>options.muMax0;
-                ifs >> double_para[5];
-                //~ cout<<"double_para[5]: "<<key<<" = "<<double_para[5]<<endl;
-            }
-            else if (strcmp("mu0", lowercase_key) == 0)
-            {
-                //~ ifs>>options.mu0;
-                ifs >> double_para[6];
-                //~ cout<<"double_para[6]: "<<key<<" = "<<double_para[6]<<endl;
-            }
-            else if (strcmp("muinertiatolerance", lowercase_key) == 0)
-            {
-                //~ ifs>>options.muInertiaTolerance;
-                ifs >> double_para[7];
-                //~ cout<<"double_para[7]: "<<key<<" = "<<double_para[7]<<endl;
-            }
-            else if (strcmp("muinertiaexpansion", lowercase_key) == 0)
-            {
-                //~ ifs>>options.muInertiaExpansion;
-                ifs >> double_para[8];
-                //~ cout<<"double_para[8]: "<<key<<" = "<<double_para[8]<<endl;
-            }
-            else if (strcmp("mupexsisafeguard", lowercase_key) == 0)
-            {
-                //~ ifs>>options.muPEXSISafeGuard;
-                ifs >> double_para[9];
-                //~ cout<<"double_para[9]: "<<key<<" = "<<double_para[9]<<endl;
-            }
-            else if (strcmp("numelectronpexsitolerance", lowercase_key) == 0)
-            {
-                //~ ifs>>options.numElectronPEXSITolerance;
-                ifs >> double_para[10];
-                //~ cout<<"double_para[10]: "<<key<<" = "<<double_para[10]<<endl;
-            }
-            else if (strcmp("zero_limit", lowercase_key) == 0)
-            {
-                ifs >> double_para[11];
-            }
-            else if (strcmp("matrixtype", lowercase_key) == 0)
-            {
-                //~ ifs>>options.matrixType;
-                ifs >> int_para[3];
-                //~ cout<<"int_para[3]: "<<key<<" = "<<int_para[3]<<endl;
-            }
-            else if (strcmp("issymbolicfactorize", lowercase_key) == 0)
-            {
-                //~ ifs>>options.isSymbolicFactorize;
-                ifs >> int_para[4];
-                //~ cout<<"int_para[4]: "<<key<<" = "<<int_para[4]<<endl;
-            }
-            else if (strcmp("isconstructcommpattern", lowercase_key) == 0)
-            {
-                //~ ifs>>options.isConstructCommPattern;
-                ifs >> int_para[5];
-                //~ cout<<"int_para[5]: "<<key<<" = "<<int_para[5]<<endl;
-            }
-            else if (strcmp("solver", lowercase_key) == 0)
-            {
-                //~ ifs>>options.solver;
-                ifs >> int_para[6];
-                //~ cout<<"int_para[6]: "<<key<<" = "<<int_para[6]<<endl;
-            }
-            else if (strcmp("symmetricstorage", lowercase_key) == 0)
-            {
-                //~ ifs>>options.symmetricStorage;
-                ifs >> int_para[7];
-                //~ cout<<"int_para[7]: "<<key<<" = "<<int_para[7]<<endl;
-            }
-            else if (strcmp("ordering", lowercase_key) == 0)
-            {
-                //~ ifs>>options.ordering;
-                ifs >> int_para[8];
-                //~ cout<<"int_para[8]: "<<key<<" = "<<int_para[8]<<endl;
-            }
-            else if (strcmp("rowordering", lowercase_key) == 0)
-            {
-                //~ ifs>>options.rowOrdering;
-                ifs >> int_para[9];
-                //~ cout<<"int_para[9]: "<<key<<" = "<<int_para[9]<<endl;
-            }
-            else if (strcmp("npsymbfact", lowercase_key) == 0)
-            {
-                //~ ifs>>options.npSymbFact;
-                ifs >> int_para[10];
-                //~ cout<<"int_para[10]: "<<key<<" = "<<int_para[10]<<endl;
-            }
-            else if (strcmp("symmetric", lowercase_key) == 0)
-            {
-                //~ ifs>>options.symmetric;
-                ifs >> int_para[11];
-                //~ cout<<"int_para[11]: "<<key<<" = "<<int_para[11]<<endl;
-            }
-            else if (strcmp("transpose", lowercase_key) == 0)
-            {
-                //~ ifs>>options.transpose;
-                ifs >> int_para[12];
-                //~ cout<<"int_para[12]: "<<key<<" = "<<int_para[12]<<endl;
-            }
-            else if (strcmp("method", lowercase_key) == 0)
-            {
-                //~ ifs>>options.method;
-                ifs >> int_para[13];
-                //~ cout<<"int_para[13]: "<<key<<" = "<<int_para[13]<<endl;
-            }
-            else if (strcmp("npoints", lowercase_key) == 0)
-            {
-                //~ ifs>>options.nPoints;
-                ifs >> int_para[14];
-                //~ cout<<"int_para[14]: "<<key<<" = "<<int_para[14]<<endl;
-            }
-            else if (strcmp("verbosity", lowercase_key) == 0)
-            {
-                //~ ifs>>options.verbosity;
-                ifs >> int_para[15];
-                //~ cout<<"int_para[15]: "<<key<<" = "<<int_para[15]<<endl;
-            }
-            else if (strcmp("numprocessperpole", lowercase_key) == 0)
-            {
-                //~ ifs>>options.verbosity;
-                ifs >> int_para[16];
-                //~ cout<<"int_para[16]: "<<key<<" = "<<int_para[16]<<endl;
-            }
-            else
-            {
-                if (key[0] == '#' || key[0] == '/')
-                {
-                    ifs.getline(unused_string, LINE_LINGTH);
-                }
-                else
-                {
-                    std::cout << " THE PARAMETER NAME '" << key << "' IS NOT USED!" << std::endl;
-                    return 1;
-                }
-            }
-        }
-    }
+    //     ifs.clear();
+    //     ifs.seekg(0);
+
+    //     char key[128];
+    //     char lowercase_key[128];
+    //     const int LINE_LINGTH = 1024;
+    //     char unused_string[LINE_LINGTH];
 
-    // broadcast all options
-    MPI_Bcast(int_para, 17, MPI_INT, 0, comm);
-    MPI_Bcast(double_para, 12, MPI_DOUBLE, 0, comm);
+    //     while (ifs.good())
+    //     {
+    //         ifs >> key;
+    //         //~ cout<<"readin word is: "<<key<<endl;
+    //         strtolower(key, lowercase_key);
+    //         if (strcmp("spin", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.spin;
+    //             ifs >> double_para[0];
+    //             //~ cout<<"double_para[0]: "<<key<<" = "<<double_para[0]<<endl;
+    //         }
+    //         else if (strcmp("temperature", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.temperature;
+    //             ifs >> double_para[1];
+    //             //~ cout<<"double_para[1]: "<<key<<" = "<<double_para[1]<<endl;
+    //         }
+    //         else if (strcmp("gap", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.gap;
+    //             ifs >> double_para[2];
+    //             //~ cout<<"double_para[2]: "<<key<<" = "<<double_para[2]<<endl;
+    //         }
+    //         else if (strcmp("deltae", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.deltaE;
+    //             ifs >> double_para[3];
+    //             //~ cout<<"double_para[3]: "<<key<<" = "<<double_para[3]<<endl;
+    //         }
+    //         else if (strcmp("numpole", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.numPole;
+    //             ifs >> int_para[0];
+    //             //~ cout<<"int_para[0]: "<<key<<" = "<<int_para[0]<<endl;
+    //         }
+    //         else if (strcmp("isinertiacount", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.isInertiaCount;
+    //             ifs >> int_para[1];
+    //             //~ cout<<"int_para[1]: "<<key<<" = "<<int_para[1]<<endl;
+    //         }
+    //         else if (strcmp("maxpexsiiter", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.maxPEXSIIter;
+    //             ifs >> int_para[2];
+    //             //~ cout<<"int_para[2]: "<<key<<" = "<<int_para[2]<<endl;
+    //         }
+    //         else if (strcmp("mumin0", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.muMin0;
+    //             ifs >> double_para[4];
+    //             //~ cout<<"double_para[4]: "<<key<<" = "<<double_para[4]<<endl;
+    //         }
+    //         else if (strcmp("mumax0", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.muMax0;
+    //             ifs >> double_para[5];
+    //             //~ cout<<"double_para[5]: "<<key<<" = "<<double_para[5]<<endl;
+    //         }
+    //         else if (strcmp("mu0", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.mu0;
+    //             ifs >> double_para[6];
+    //             //~ cout<<"double_para[6]: "<<key<<" = "<<double_para[6]<<endl;
+    //         }
+    //         else if (strcmp("muinertiatolerance", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.muInertiaTolerance;
+    //             ifs >> double_para[7];
+    //             //~ cout<<"double_para[7]: "<<key<<" = "<<double_para[7]<<endl;
+    //         }
+    //         else if (strcmp("muinertiaexpansion", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.muInertiaExpansion;
+    //             ifs >> double_para[8];
+    //             //~ cout<<"double_para[8]: "<<key<<" = "<<double_para[8]<<endl;
+    //         }
+    //         else if (strcmp("mupexsisafeguard", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.muPEXSISafeGuard;
+    //             ifs >> double_para[9];
+    //             //~ cout<<"double_para[9]: "<<key<<" = "<<double_para[9]<<endl;
+    //         }
+    //         else if (strcmp("numelectronpexsitolerance", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.numElectronPEXSITolerance;
+    //             ifs >> double_para[10];
+    //             //~ cout<<"double_para[10]: "<<key<<" = "<<double_para[10]<<endl;
+    //         }
+    //         else if (strcmp("zero_limit", lowercase_key) == 0)
+    //         {
+    //             ifs >> double_para[11];
+    //         }
+    //         else if (strcmp("matrixtype", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.matrixType;
+    //             ifs >> int_para[3];
+    //             //~ cout<<"int_para[3]: "<<key<<" = "<<int_para[3]<<endl;
+    //         }
+    //         else if (strcmp("issymbolicfactorize", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.isSymbolicFactorize;
+    //             ifs >> int_para[4];
+    //             //~ cout<<"int_para[4]: "<<key<<" = "<<int_para[4]<<endl;
+    //         }
+    //         else if (strcmp("isconstructcommpattern", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.isConstructCommPattern;
+    //             ifs >> int_para[5];
+    //             //~ cout<<"int_para[5]: "<<key<<" = "<<int_para[5]<<endl;
+    //         }
+    //         else if (strcmp("solver", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.solver;
+    //             ifs >> int_para[6];
+    //             //~ cout<<"int_para[6]: "<<key<<" = "<<int_para[6]<<endl;
+    //         }
+    //         else if (strcmp("symmetricstorage", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.symmetricStorage;
+    //             ifs >> int_para[7];
+    //             //~ cout<<"int_para[7]: "<<key<<" = "<<int_para[7]<<endl;
+    //         }
+    //         else if (strcmp("ordering", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.ordering;
+    //             ifs >> int_para[8];
+    //             //~ cout<<"int_para[8]: "<<key<<" = "<<int_para[8]<<endl;
+    //         }
+    //         else if (strcmp("rowordering", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.rowOrdering;
+    //             ifs >> int_para[9];
+    //             //~ cout<<"int_para[9]: "<<key<<" = "<<int_para[9]<<endl;
+    //         }
+    //         else if (strcmp("npsymbfact", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.npSymbFact;
+    //             ifs >> int_para[10];
+    //             //~ cout<<"int_para[10]: "<<key<<" = "<<int_para[10]<<endl;
+    //         }
+    //         else if (strcmp("symmetric", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.symmetric;
+    //             ifs >> int_para[11];
+    //             //~ cout<<"int_para[11]: "<<key<<" = "<<int_para[11]<<endl;
+    //         }
+    //         else if (strcmp("transpose", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.transpose;
+    //             ifs >> int_para[12];
+    //             //~ cout<<"int_para[12]: "<<key<<" = "<<int_para[12]<<endl;
+    //         }
+    //         else if (strcmp("method", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.method;
+    //             ifs >> int_para[13];
+    //             //~ cout<<"int_para[13]: "<<key<<" = "<<int_para[13]<<endl;
+    //         }
+    //         else if (strcmp("npoints", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.nPoints;
+    //             ifs >> int_para[14];
+    //             //~ cout<<"int_para[14]: "<<key<<" = "<<int_para[14]<<endl;
+    //         }
+    //         else if (strcmp("verbosity", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.verbosity;
+    //             ifs >> int_para[15];
+    //             //~ cout<<"int_para[15]: "<<key<<" = "<<int_para[15]<<endl;
+    //         }
+    //         else if (strcmp("numprocessperpole", lowercase_key) == 0)
+    //         {
+    //             //~ ifs>>options.verbosity;
+    //             ifs >> int_para[16];
+    //             //~ cout<<"int_para[16]: "<<key<<" = "<<int_para[16]<<endl;
+    //         }
+    //         else
+    //         {
+    //             if (key[0] == '#' || key[0] == '/')
+    //             {
+    //                 ifs.getline(unused_string, LINE_LINGTH);
+    //             }
+    //             else
+    //             {
+    //                 std::cout << " THE PARAMETER NAME '" << key << "' IS NOT USED!" << std::endl;
+    //                 return 1;
+    //             }
+    //         }
+    //     }
+    // }
+
+    // // broadcast all options
+    // MPI_Bcast(int_para, 17, MPI_INT, 0, comm);
+    // MPI_Bcast(double_para, 12, MPI_DOUBLE, 0, comm);
 
     // setup PEXSI options from int_para and double_para
     options.numPole = int_para[0];
@@ -458,14 +492,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
         log_PEXSIgrid(pexsi_prow, pexsi_pcol, f_log);
 //}
 #endif
-    if (myid % (pexsi_prow * pexsi_pcol) == 0)
-    {
-        outputFileIndex = myid / (pexsi_prow * pexsi_pcol);
-    }
-    else
-    {
-        outputFileIndex = -1;
-    }
+    outputFileIndex = -1;
     // OUT(ofs_running, "checkpoint04");
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIPlanInit");
     if (comm_PEXSI != MPI_COMM_NULL)
@@ -523,7 +550,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     // transform H and S from 2D block cyclic distribution to compressed column sparse matrix
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
     // OUT(ofs_running, "checkpoint12");
-    transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal);
+    DistMatrixTransformer::transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal);
     // MPI_Barrier(MPI_COMM_WORLD);
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
     // OUT(ofs_running, "checkpoint13");
@@ -542,11 +569,11 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
         PPEXSILoadRealHSMatrix(plan,
                                options,
                                size,
-                               DST_Matrix.nnz,
-                               DST_Matrix.nnzLocal,
-                               DST_Matrix.numColLocal,
-                               DST_Matrix.colptrLocal,
-                               DST_Matrix.rowindLocal,
+                               DST_Matrix.get_nnz(),
+                               DST_Matrix.get_nnzlocal(),
+                               DST_Matrix.get_numcol_local(),
+                               DST_Matrix.get_colptr_local(),
+                               DST_Matrix.get_rowind_local(),
                                HnzvalLocal,
                                isSIdentity,
                                SnzvalLocal,
@@ -600,9 +627,9 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
             delete[] EDMnzvalLocal;
         if (FDMnzvalLocal != nullptr)
             delete[] FDMnzvalLocal;
-        DMnzvalLocal = new double[DST_Matrix.nnzLocal];
-        EDMnzvalLocal = new double[DST_Matrix.nnzLocal];
-        FDMnzvalLocal = new double[DST_Matrix.nnzLocal];
+        DMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
+        EDMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
+        FDMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
         if (myid < numProcessPerPole)
         {
             PPEXSIRetrieveRealDFTMatrix(plan,
@@ -633,8 +660,8 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     {
         delete[] DM;
         delete[] EDM;
-        DM = new double[SRC_Matrix.nrow * SRC_Matrix.ncol];
-        EDM = new double[SRC_Matrix.nrow * SRC_Matrix.ncol];
+        DM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
+        EDM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
     }
 #ifdef _DEBUG
     // OUT(ofs_running, "checkpoint19");
@@ -644,7 +671,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
 #endif
     // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
-    transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM);
+    DistMatrixTransformer::transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM);
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
     // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
 
@@ -702,4 +729,5 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     // MPI_Barrier(MPI_COMM_WORLD);
     return 0;
 }
-} // namespace pexsi
\ No newline at end of file
+} // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index cc079cf208..b74ac6b104 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -22,6 +22,7 @@
 #include "module_base/global_variable.h"
 #include "module_base/parallel_common.h"
 #include "module_base/timer.h"
+#include "module_base/tool_quit.h"
 #include "version.h"
 Input INPUT;
 
@@ -303,6 +304,7 @@ void Input::Default(void)
     mixing_mode = "broyden";
     mixing_beta = -10;
     mixing_ndim = 8;
+    mixing_restart = 0;
     mixing_gg0 = 1.00;       // use Kerker defaultly
     mixing_beta_mag = -10.0; // only set when nspin == 2 || nspin == 4
     mixing_gg0_mag = 0.0;    // defaultly exclude Kerker from mixing magnetic density
@@ -327,6 +329,8 @@ void Input::Default(void)
 
     out_bandgap = 0; // QO added for bandgap printing
 
+    band_print_num = 0;
+
     deepks_out_labels = 0; // caoyu added 2020-11-24, mohan added 2021-01-03
     deepks_scf = 0;
     deepks_bandgap = 0;
@@ -336,7 +340,7 @@ void Input::Default(void)
     out_wfc_pw = 0;
     out_wfc_r = 0;
     out_dos = 0;
-    out_band = 0;
+    out_band = {0, 8};
     out_proj_band = 0;
     out_mat_hs = {0, 8};
     out_mat_xc = 0;
@@ -635,6 +639,34 @@ void Input::Default(void)
     qo_thr = 1e-6;
     qo_screening_coeff = {};
 
+    //==========================================================
+    // variables for PEXSI
+    //==========================================================
+    pexsi_npole = 54;
+    pexsi_inertia = 1;
+    pexsi_nmax = 80;
+    // pexsi_symbolic = 1;
+    pexsi_comm = 1;
+    pexsi_storage = 1;
+    pexsi_ordering = 0;
+    pexsi_row_ordering = 1;
+    pexsi_nproc = 1;
+    pexsi_symm = 1;
+    pexsi_trans = 0;
+    pexsi_method = 1;
+    pexsi_nproc_pole = 1;
+    // pexsi_spin = 2;
+    pexsi_temp = 0.0001;
+    pexsi_gap = 0;
+    pexsi_delta_e = 20.0;
+    pexsi_mu_lower = -10;
+    pexsi_mu_upper = 10;
+    pexsi_mu = 0.0;
+    pexsi_mu_thr = 0.05;
+    pexsi_mu_expand = 0.3;
+    pexsi_mu_guard = 0.2;
+    pexsi_elec_thr = 0.001;
+    pexsi_zero_thr = 1e-10;
     return;
 }
 
@@ -1256,6 +1288,10 @@ bool Input::Read(const std::string& fn)
         {
             read_value(ifs, mixing_ndim);
         }
+        else if (strcmp("mixing_restart", word) == 0)
+        {
+            read_value(ifs, mixing_restart);
+        }
         else if (strcmp("mixing_gg0", word) == 0) // mohan add 2014-09-27
         {
             read_value(ifs, mixing_gg0);
@@ -1327,6 +1363,14 @@ bool Input::Read(const std::string& fn)
         {
             read_bool(ifs, out_chg);
         }
+        else if (strcmp("band_print_num", word) == 0)
+        {
+            read_value(ifs, band_print_num);
+        }
+        else if (strcmp("bands_to_print", word) == 0)
+        {
+            ifs.ignore(150, '\n');
+        }
         else if (strcmp("out_dm", word) == 0)
         {
             read_bool(ifs, out_dm);
@@ -1378,13 +1422,13 @@ bool Input::Read(const std::string& fn)
         }
         else if (strcmp("out_band", word) == 0)
         {
-            read_bool(ifs, out_band);
+            read_value2stdvector(ifs, out_band);
+            if(out_band.size() == 1) out_band.push_back(8);
         }
         else if (strcmp("out_proj_band", word) == 0)
         {
             read_bool(ifs, out_proj_band);
         }
-
         else if (strcmp("out_mat_hs", word) == 0)
         {
             read_value2stdvector(ifs, out_mat_hs);
@@ -2290,6 +2334,9 @@ bool Input::Read(const std::string& fn)
         {
             read_value(ifs, sc_file);
         }
+        //----------------------------------------------------------------------------------
+        //    Quasiatomic orbital
+        //----------------------------------------------------------------------------------
         else if (strcmp("qo_switch", word) == 0){
             read_bool(ifs, qo_switch);
         }
@@ -2305,6 +2352,106 @@ bool Input::Read(const std::string& fn)
         else if (strcmp("qo_screening_coeff", word) == 0){
             read_value2stdvector(ifs, qo_screening_coeff);
         }
+        //----------------------------------------------------------------------------------
+        //    PEXSI
+        //----------------------------------------------------------------------------------
+        else if (strcmp("pexsi_npole", word) == 0){
+            read_value(ifs, pexsi_npole);
+        }
+        else if (strcmp("pexsi_inertia", word) == 0){
+            read_value(ifs, pexsi_inertia);
+        }
+        else if (strcmp("pexsi_nmax", word) == 0) {
+            read_value(ifs, pexsi_nmax);
+        }
+        // else if (strcmp("pexsi_symbolic", word) == 0)
+        // {
+        //     read_value(ifs, pexsi_symbolic);
+        // }
+        else if (strcmp("pexsi_comm", word) == 0)
+        {
+            read_value(ifs, pexsi_comm);
+        }
+        else if (strcmp("pexsi_storage", word) == 0)
+        {
+            read_value(ifs, pexsi_storage);
+        }
+        else if (strcmp("pexsi_ordering", word) == 0)
+        {
+            read_value(ifs, pexsi_ordering);
+        }
+        else if (strcmp("pexsi_row_ordering", word) == 0)
+        {
+            read_value(ifs, pexsi_row_ordering);
+        }
+        else if (strcmp("pexsi_nproc", word) == 0)
+        {
+            read_value(ifs, pexsi_nproc);
+        }
+        else if (strcmp("pexsi_symm", word) == 0)
+        {
+            read_value(ifs, pexsi_symm);
+        }
+        else if (strcmp("pexsi_trans", word) == 0)
+        {
+            read_value(ifs, pexsi_trans);
+        }
+        else if (strcmp("pexsi_method", word) == 0)
+        {
+            read_value(ifs, pexsi_method);
+        }
+        else if (strcmp("pexsi_nproc_pole", word) == 0)
+        {
+            read_value(ifs, pexsi_nproc_pole);
+        }
+        // else if (strcmp("pexsi_spin", word) == 0)
+        // {
+        //     read_value(ifs, pexsi_spin);
+        // }
+        else if (strcmp("pexsi_temp", word) == 0)
+        {
+            read_value(ifs, pexsi_temp);
+        }
+        else if (strcmp("pexsi_gap", word) == 0)
+        {
+            read_value(ifs, pexsi_gap);
+        }
+        else if (strcmp("pexsi_delta_e", word) == 0)
+        {
+            read_value(ifs, pexsi_delta_e);
+        }
+        else if (strcmp("pexsi_mu_lower", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_lower);
+        }
+        else if (strcmp("pexsi_mu_upper", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_upper);
+        }
+        else if (strcmp("pexsi_mu", word) == 0)
+        {
+            read_value(ifs, pexsi_mu);
+        }
+        else if (strcmp("pexsi_mu_thr", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_thr);
+        }
+        else if (strcmp("pexsi_mu_expand", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_expand);
+        }
+        else if (strcmp("pexsi_mu_guard", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_guard);
+        }
+        else if (strcmp("pexsi_elec_thr", word) == 0)
+        {
+            read_value(ifs, pexsi_elec_thr);
+        }
+        else if (strcmp("pexsi_zero_thr", word) == 0)
+        {
+            read_value(ifs, pexsi_zero_thr);
+        }
         else
         {
             // xiaohui add 2015-09-15
@@ -2369,6 +2516,29 @@ bool Input::Read(const std::string& fn)
         ModuleBase::WARNING_QUIT("Input", "The ntype in INPUT is not equal to the ntype counted in STRU, check it.");
     }
 
+    if(band_print_num > 0)
+    {
+        bands_to_print.resize(band_print_num);
+        ifs.clear();
+        ifs.seekg(0); // move to the beginning of the file
+        ifs.rdstate();
+        while (ifs.good())
+        {
+            ifs >> word1;
+            if (ifs.eof() != 0)
+                break;
+            strtolower(word1, word); // convert uppercase std::string to lower case; word1 --> word
+
+            if (strcmp("bands_to_print", word) == 0)
+            {
+                for(int i = 0; i < band_print_num; i ++)
+                {
+                    ifs >> bands_to_print[i];
+                }
+            }
+        }
+    }
+
     //----------------------------------------------------------
     //       DFT+U    Xin Qu  added on 2020-10-29
     //----------------------------------------------------------
@@ -2826,7 +2996,7 @@ void Input::Default_2(void) // jiyy add 2019-08-04
         this->relax_nmax = 1;
         out_stru = 0;
         out_dos = 0;
-        out_band = 0;
+        out_band[0] = 0;
         out_proj_band = 0;
         cal_force = 0;
         init_wfc = "file";
@@ -2843,7 +3013,7 @@ void Input::Default_2(void) // jiyy add 2019-08-04
         this->relax_nmax = 1;
         out_stru = 0;
         out_dos = 0;
-        out_band = 0;
+        out_band[0] = 0;
         out_proj_band = 0;
         cal_force = 0;
         init_wfc = "file";
@@ -3292,6 +3462,7 @@ void Input::Bcast()
     Parallel_Common::bcast_string(mixing_mode);
     Parallel_Common::bcast_double(mixing_beta);
     Parallel_Common::bcast_int(mixing_ndim);
+    Parallel_Common::bcast_int(mixing_restart);
     Parallel_Common::bcast_double(mixing_gg0); // mohan add 2014-09-27
     Parallel_Common::bcast_double(mixing_beta_mag);
     Parallel_Common::bcast_double(mixing_gg0_mag);
@@ -3325,7 +3496,8 @@ void Input::Bcast()
     Parallel_Common::bcast_int(out_wfc_pw);
     Parallel_Common::bcast_bool(out_wfc_r);
     Parallel_Common::bcast_int(out_dos);
-    Parallel_Common::bcast_bool(out_band);
+    if(GlobalV::MY_RANK != 0) out_band.resize(2); /* If this line is absent, will cause segmentation fault in io_input_test_para */
+    Parallel_Common::bcast_int(out_band.data(), 2);
     Parallel_Common::bcast_bool(out_proj_band);
     if(GlobalV::MY_RANK != 0) out_mat_hs.resize(2); /* If this line is absent, will cause segmentation fault in io_input_test_para */
     Parallel_Common::bcast_int(out_mat_hs.data(), 2);
@@ -3523,6 +3695,17 @@ void Input::Bcast()
     Parallel_Common::bcast_bool(restart_save);  // Peize Lin add 2020.04.04
     Parallel_Common::bcast_bool(restart_load);  // Peize Lin add 2020.04.04
 
+    Parallel_Common::bcast_int(band_print_num);
+    if(GlobalV::MY_RANK != 0)
+    {
+        bands_to_print.resize(band_print_num);
+    }
+
+    for(int i = 0; i < band_print_num; i++)
+    {
+        Parallel_Common::bcast_int(bands_to_print[i]);
+    }
+
     //-----------------------------------------------------------------------------------
     // DFT+U (added by Quxin 2020-10-29)
     //-----------------------------------------------------------------------------------
@@ -3625,6 +3808,34 @@ void Input::Bcast()
     Parallel_Common::bcast_bool(qo_switch);
     Parallel_Common::bcast_string(qo_basis);
     Parallel_Common::bcast_double(qo_thr);
+    //==========================================================
+    // PEXSI
+    //==========================================================
+    Parallel_Common::bcast_int(pexsi_npole);
+    Parallel_Common::bcast_int(pexsi_inertia);
+    Parallel_Common::bcast_int(pexsi_nmax);
+    // Parallel_Common::bcast_int(pexsi_symbolic);
+    Parallel_Common::bcast_int(pexsi_comm);
+    Parallel_Common::bcast_int(pexsi_storage);
+    Parallel_Common::bcast_int(pexsi_ordering);
+    Parallel_Common::bcast_int(pexsi_row_ordering);
+    Parallel_Common::bcast_int(pexsi_nproc);
+    Parallel_Common::bcast_int(pexsi_symm);
+    Parallel_Common::bcast_int(pexsi_trans);
+    Parallel_Common::bcast_int(pexsi_method);
+    Parallel_Common::bcast_int(pexsi_nproc_pole);
+    // Parallel_Common::bcast_double(pexsi_spin);
+    Parallel_Common::bcast_double(pexsi_temp);
+    Parallel_Common::bcast_double(pexsi_gap);
+    Parallel_Common::bcast_double(pexsi_delta_e);
+    Parallel_Common::bcast_double(pexsi_mu_lower);
+    Parallel_Common::bcast_double(pexsi_mu_upper);
+    Parallel_Common::bcast_double(pexsi_mu);
+    Parallel_Common::bcast_double(pexsi_mu_thr);
+    Parallel_Common::bcast_double(pexsi_mu_expand);
+    Parallel_Common::bcast_double(pexsi_mu_guard);
+    Parallel_Common::bcast_double(pexsi_elec_thr);
+    Parallel_Common::bcast_double(pexsi_zero_thr);
     /* broadcasting std::vector is sometime a annorying task... */
     if (ntype != 0) /* ntype has been broadcasted before */
     {
@@ -3922,10 +4133,11 @@ void Input::Check(void)
         }
         else if (ks_solver == "pexsi")
         {
-#ifndef __MPI
-            ModuleBase::WARNING_QUIT("Input", "Cusolver can not be used for series version.");
-#else
+#ifdef __PEXSI
             GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl;
+#else
+            ModuleBase::WARNING_QUIT("Input",
+                "Can not use PEXSI if abacus is not compiled with PEXSI. Please change ks_solver to scalapack_gvx.");
 #endif
 
 
diff --git a/source/module_io/input.h b/source/module_io/input.h
index b4e983abad..1d29c6311a 100644
--- a/source/module_io/input.h
+++ b/source/module_io/input.h
@@ -232,6 +232,7 @@ class Input
     std::string mixing_mode; // "plain","broyden",...
     double mixing_beta; // 0 : no_mixing
     int mixing_ndim; // used in Broyden method
+    int mixing_restart;
     double mixing_gg0; // used in kerker method. mohan add 2014-09-27
     double mixing_beta_mag;
     double mixing_gg0_mag;
@@ -259,11 +260,13 @@ class Input
     bool out_chg; // output charge density. 0: no; 1: yes
     bool out_dm; // output density matrix.
     bool out_dm1;
+    int band_print_num;
+    std::vector<int> bands_to_print;
     int out_pot; // yes or no
     int out_wfc_pw; // 0: no; 1: txt; 2: dat
     bool out_wfc_r; // 0: no; 1: yes
     int out_dos; // dos calculation. mohan add 20090909
-    bool out_band; // band calculation pengfei 2014-10-13
+    std::vector<int> out_band; // band calculation pengfei 2014-10-13
     bool out_proj_band; // projected band structure calculation jiyy add 2022-05-11
     std::vector<int> out_mat_hs; // output H matrix and S matrix in local basis.
     bool out_mat_xc; // output exchange-correlation matrix in KS-orbital representation.
@@ -599,6 +602,34 @@ class Input
     double qo_thr = 1e-6;
     std::vector<std::string> qo_strategy = {};
     std::vector<double> qo_screening_coeff = {};
+    //==========================================================
+    // variables for PEXSI
+    //==========================================================
+    int pexsi_npole = 54;
+    int pexsi_inertia = 1;
+    int pexsi_nmax = 80;
+    // int pexsi_symbolic = 1;
+    int pexsi_comm = 1;
+    int pexsi_storage = 1;
+    int pexsi_ordering = 0;
+    int pexsi_row_ordering = 1;
+    int pexsi_nproc = 1;
+    int pexsi_symm = 1;
+    int pexsi_trans = 0;
+    int pexsi_method = 1;
+    int pexsi_nproc_pole = 1;
+    // double pexsi_spin = 2;
+    double pexsi_temp = 0.0001;
+    double pexsi_gap = 0;
+    double pexsi_delta_e = 20.0;
+    double pexsi_mu_lower = -10;
+    double pexsi_mu_upper = 10;
+    double pexsi_mu = 0.0;
+    double pexsi_mu_thr = 0.05;
+    double pexsi_mu_expand = 0.3;
+    double pexsi_mu_guard = 0.2;
+    double pexsi_elec_thr = 0.001;
+    double pexsi_zero_thr = 1e-10;
     
   private:
     //==========================================================
@@ -667,7 +698,15 @@ class Input
     template <typename T>
     typename std::enable_if<std::is_same<T, double>::value, T>::type cast_string(const std::string& str) { return std::stod(str); }
     template <typename T>
-    typename std::enable_if<std::is_same<T, int>::value, T>::type cast_string(const std::string& str) { return std::stoi(str); }
+    typename std::enable_if<std::is_same<T, int>::value, T>::type cast_string(const std::string& str)
+    {
+        if (str == "true" || str == "1")
+            return 1;
+        else if (str == "false" || str == "0")
+            return 0;
+        else
+            return std::stoi(str);
+    }
     template <typename T>
     typename std::enable_if<std::is_same<T, bool>::value, T>::type cast_string(const std::string& str) { return (str == "true" || str == "1"); }
     template <typename T>
diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp
index a52245d05c..d6e3371111 100644
--- a/source/module_io/input_conv.cpp
+++ b/source/module_io/input_conv.cpp
@@ -750,6 +750,7 @@ void Input_Conv::Convert(void)
     GlobalV::MIXING_MODE = INPUT.mixing_mode;
     GlobalV::MIXING_BETA = INPUT.mixing_beta;
     GlobalV::MIXING_NDIM = INPUT.mixing_ndim;
+    GlobalV::MIXING_RESTART = INPUT.mixing_restart;
     GlobalV::MIXING_GG0 = INPUT.mixing_gg0;
     GlobalV::MIXING_BETA_MAG = INPUT.mixing_beta_mag;
     GlobalV::MIXING_GG0_MAG = INPUT.mixing_gg0_mag;
@@ -765,6 +766,35 @@ void Input_Conv::Convert(void)
     GlobalV::qo_strategy = INPUT.qo_strategy;
     GlobalV::qo_thr = INPUT.qo_thr;
     GlobalV::qo_screening_coeff = INPUT.qo_screening_coeff;
+
+    //-----------------------------------------------
+    // PEXSI related parameters
+    //-----------------------------------------------
+    GlobalV::pexsi_npole = INPUT.pexsi_npole;
+    GlobalV::pexsi_inertia = INPUT.pexsi_inertia;
+    GlobalV::pexsi_nmax = INPUT.pexsi_nmax;
+    // GlobalV::pexsi_symbolic = INPUT.pexsi_symbolic;
+    GlobalV::pexsi_comm = INPUT.pexsi_comm;
+    GlobalV::pexsi_storage = INPUT.pexsi_storage;
+    GlobalV::pexsi_ordering = INPUT.pexsi_ordering;
+    GlobalV::pexsi_row_ordering = INPUT.pexsi_row_ordering;
+    GlobalV::pexsi_nproc = INPUT.pexsi_nproc;
+    GlobalV::pexsi_symm = INPUT.pexsi_symm;
+    GlobalV::pexsi_trans = INPUT.pexsi_trans;
+    GlobalV::pexsi_method = INPUT.pexsi_method;
+    GlobalV::pexsi_nproc_pole = INPUT.pexsi_nproc_pole;
+    // GlobalV::pexsi_spin = INPUT.pexsi_spin;
+    GlobalV::pexsi_temp = INPUT.pexsi_temp;
+    GlobalV::pexsi_gap = INPUT.pexsi_gap;
+    GlobalV::pexsi_delta_e = INPUT.pexsi_delta_e;
+    GlobalV::pexsi_mu_lower = INPUT.pexsi_mu_lower;
+    GlobalV::pexsi_mu_upper = INPUT.pexsi_mu_upper;
+    GlobalV::pexsi_mu = INPUT.pexsi_mu;
+    GlobalV::pexsi_mu_thr = INPUT.pexsi_mu_thr;
+    GlobalV::pexsi_mu_expand = INPUT.pexsi_mu_expand;
+    GlobalV::pexsi_mu_guard = INPUT.pexsi_mu_guard;
+    GlobalV::pexsi_elec_thr = INPUT.pexsi_elec_thr;
+    GlobalV::pexsi_zero_thr = INPUT.pexsi_zero_thr;
     ModuleBase::timer::tick("Input_Conv", "Convert");
     return;
 }
diff --git a/source/module_io/mulliken_charge.cpp b/source/module_io/mulliken_charge.cpp
index 393da5fda4..bdcdb5a035 100644
--- a/source/module_io/mulliken_charge.cpp
+++ b/source/module_io/mulliken_charge.cpp
@@ -44,7 +44,7 @@ ModuleBase::matrix ModuleIO::cal_mulliken(const std::vector<std::vector<double>>
         const char N_char = 'N';
         const int one_int = 1;
         const double one_float = 1.0, zero_float = 0.0;        
-        pdgemm_(&T_char,
+        pdgemm_(&N_char,
                 &T_char,
                 &GlobalV::NLOCAL,
                 &GlobalV::NLOCAL,
@@ -156,7 +156,7 @@ ModuleBase::matrix ModuleIO::cal_mulliken(const std::vector<std::vector<std::com
         const char N_char = 'N';
         const int one_int = 1;
         const std::complex<double> one_float = {1.0, 0.0}, zero_float = {0.0, 0.0};        
-        pzgemm_(&T_char,
+        pzgemm_(&N_char,
                 &T_char,
                 &GlobalV::NLOCAL,
                 &GlobalV::NLOCAL,
diff --git a/source/module_io/nscf_band.cpp b/source/module_io/nscf_band.cpp
index d8b7b05ca6..290dc58bd3 100644
--- a/source/module_io/nscf_band.cpp
+++ b/source/module_io/nscf_band.cpp
@@ -3,6 +3,7 @@
 #include "module_base/global_variable.h"
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
+#include "module_base/formatter_physfmt.h"
 
 void ModuleIO::nscf_band(
 	const int &is,
@@ -10,6 +11,7 @@ void ModuleIO::nscf_band(
 	const int &nks, 
 	const int &nband,
 	const double &fermie,
+	const int &precision,
 	const ModuleBase::matrix& ekb,
 	const K_Vectors& kv,
 	const Parallel_Kpoints* Pkpoints)
@@ -33,23 +35,28 @@ void ModuleIO::nscf_band(
 		if (ik>0)
 		{
 			auto delta=kv.kvec_c[ik]-kv.kvec_c[ik-1];
-			klength[ik] = klength[ik-1] + delta.norm();
+			klength[ik] = klength[ik-1];
+			klength[ik] += (kv.kl_segids[ik] == kv.kl_segids[ik-1]) ? delta.norm() : 0.0;
 		}
+		/* first find if present kpoint in present pool */
 		if ( GlobalV::MY_POOL == Pkpoints->whichpool[ik] )
 		{
+			/* then get the local kpoint index, which starts definitly from 0 */
 			const int ik_now = ik - Pkpoints->startk_pool[GlobalV::MY_POOL];
+			/* if present kpoint corresponds the spin of the present one */
 			if( kv.isk[ik_now+is*nks] == is )
 			{ 
 				if ( GlobalV::RANK_IN_POOL == 0)
 				{
-					std::ofstream ofs(out_band_dir.c_str(),std::ios::app);
-					ofs << std::setprecision(8);
-					//start from 1
-					ofs << ik+1;
-					ofs << " " << klength[ik] << " ";
+					formatter::PhysicalFmt physfmt; // create a physical formatter temporarily
+					std::ofstream ofs(out_band_dir.c_str(), std::ios::app);
+					physfmt.adjust_formatter_flexible(4, 0, false); // for integer
+					ofs << physfmt.get_p_formatter()->format(ik+1);
+					physfmt.adjust_formatter_flexible(precision, 4.0/double(precision), false); // for decimal
+					ofs << physfmt.get_p_formatter()->format(klength[ik]);
 					for(int ib = 0; ib < nband; ib++)
 					{
-						ofs << " " << (ekb(ik_now+is*nks, ib)-fermie) * ModuleBase::Ry_to_eV;
+						ofs << physfmt.get_p_formatter()->format((ekb(ik_now+is*nks, ib)-fermie) * ModuleBase::Ry_to_eV);
 					}
 					ofs << std::endl;
 					ofs.close();	
@@ -83,18 +90,30 @@ void ModuleIO::nscf_band(
 #else
 //	std::cout<<"\n nband = "<<nband<<std::endl;
 //	std::cout<<out_band_dir<<std::endl;
-
+	formatter::PhysicalFmt physfmt; // create a physical formatter temporarily
+	std::vector<double> klength;
+	klength.resize(nks);
+	klength[0] = 0.0;
 	std::ofstream ofs(out_band_dir.c_str());
 	for(int ik=0;ik<nks;ik++)
 	{
+		if (ik>0)
+		{
+			auto delta=kv.kvec_c[ik]-kv.kvec_c[ik-1];
+			klength[ik] = klength[ik-1];
+			klength[ik] += (kv.kl_segids[ik] == kv.kl_segids[ik-1]) ? delta.norm() : 0.0;
+		}
 		if( kv.isk[ik] == is)
 		{
-			ofs<<std::setw(12)<<ik + 1;
+			physfmt.adjust_formatter_flexible(4, 0, false); // for integer
+			ofs << physfmt.get_p_formatter()->format(ik+1);
+			physfmt.adjust_formatter_flexible(precision, 4.0/double(precision), false); // for decimal
+			ofs << physfmt.get_p_formatter()->format(klength[ik]); // add klength, in accordance with the MPI version
 			for(int ibnd = 0; ibnd < nband; ibnd++)
 			{
-				ofs <<std::setw(15) << (ekb(ik, ibnd)-fermie) * ModuleBase::Ry_to_eV;
+				ofs << physfmt.get_p_formatter()->format((ekb(ik, ibnd)-fermie) * ModuleBase::Ry_to_eV);
 			}
-			ofs<<std::endl;
+			ofs << std::endl;
 		}
 	}
 	ofs.close();
diff --git a/source/module_io/nscf_band.h b/source/module_io/nscf_band.h
index 6a22427551..3ec96d4a9f 100644
--- a/source/module_io/nscf_band.h
+++ b/source/module_io/nscf_band.h
@@ -12,6 +12,7 @@ namespace ModuleIO
 		const int &nks, 
 		const int &nband, 
 		const double &fermie,
+		const int &precision,
 		const ModuleBase::matrix &ekb,
 		const K_Vectors& kv,
 		const Parallel_Kpoints* Pkpoints);
diff --git a/source/module_io/parameter_pool.cpp b/source/module_io/parameter_pool.cpp
index 906b9b57d8..524df9de87 100644
--- a/source/module_io/parameter_pool.cpp
+++ b/source/module_io/parameter_pool.cpp
@@ -69,7 +69,7 @@ int count_ntype(const std::string& fn)
  * @param input_value_path parameter default value file path
  * @param input_value_path parameter input value file path
  */
-bool Init(const std::string& default_type_path,
+void Init(const std::string& default_type_path,
           const std::string& default_value_path,
           const std::string& input_value_path)
 {
@@ -103,10 +103,8 @@ void strtolower(char* sa, char* sb)
  * @brief Reads the default parameters from the specified file and saves them to the global variable
  *        default_parametes_type
  * @param fn Specifies the path to the file
- * @return true Read successfully
- * @return false Read failure
  */
-bool default_parametes_reader(const std::string& fn, std::map<std::string, std::string>& default_parametes_type)
+void default_parametes_reader(const std::string& fn, std::map<std::string, std::string>& default_parametes_type)
 {
     std::ifstream inputFile(fn.c_str());
     if (inputFile.is_open())
@@ -122,28 +120,24 @@ bool default_parametes_reader(const std::string& fn, std::map<std::string, std::
     }
     else
     {
-        std::cout << "Cannot open file !" << std::endl;
+        ModuleBase::WARNING_QUIT("Input", "Cannot open file" + fn);
     }
 }
 /**
  * @brief This function is used to read the input parameter file and store it as a key-value pair
  * @param fn Enter the path to the parameter file
  */
-bool input_parameters_get(const std::string& fn, std::map<std::string, InputParameter>& input)
+void input_parameters_get(const std::string& fn, std::map<std::string, InputParameter>& input)
 {
-    // The module title information is displayed
     ModuleBase::TITLE("Input", "Read");
-    // If it is not the primary node, return false
     if (GlobalV::MY_RANK != 0)
-        return false;
+        return;
 
     // Open the input parameter file
     std::ifstream ifs(fn.c_str(), std::ios::in); // "in_datas/input_parameters"
-    // If the opening fails, an error message is printed and false is returned
     if (!ifs)
     {
-        std::cout << " Can't find the INPUT file." << std::endl;
-        return false;
+        ModuleBase::WARNING_QUIT("Input", "Can't find the INPUT file at " + fn);
     }
     ifs.clear();
     ifs.seekg(0);
@@ -166,8 +160,7 @@ bool input_parameters_get(const std::string& fn, std::map<std::string, InputPara
     // If ierr is 0, the word "INPUT_PARAMETERS" is not found, and an error message is printed with false
     if (ierr == 0)
     {
-        std::cout << " Error parameter list." << std::endl;
-        return false; // return error : false
+        ModuleBase::WARNING_QUIT("Input", "INPUT_PARAMETERS statement not found.");
     }
     ifs.rdstate();
 
@@ -274,15 +267,11 @@ bool input_parameters_get(const std::string& fn, std::map<std::string, InputPara
         }
         else if (ifs.bad() != 0)
         {
-            std::cout << " Bad input parameters. " << std::endl;
-            return false;
+            ModuleBase::WARNING_QUIT("Input", "Bad input parameters.");
         }
         else if (ifs.fail() != 0)
         {
-            std::cout << " word = " << word << std::endl;
-            std::cout << " Fail to read parameters. " << std::endl;
-            ifs.clear();
-            return false;
+            ModuleBase::WARNING_QUIT("Input", "Fail to read parameters: word = " + std::string(word));
         }
         else if (ifs.good() == 0)
         {
@@ -306,11 +295,9 @@ bool input_parameters_get(const std::string& fn, std::map<std::string, InputPara
     {
         ModuleBase::WARNING_QUIT("Input", "The ntype in INPUT is not equal to the ntype counted in STRU, check it.");
     }
-
-    return true;
 }
 
-bool input_parameters_set(std::map<std::string, InputParameter> input_parameters)
+void input_parameters_set(std::map<std::string, InputParameter> input_parameters)
 {
     if (input_parameters.count("nupdown") != 0)
     {
@@ -831,6 +818,10 @@ bool input_parameters_set(std::map<std::string, InputParameter> input_parameters
     {
         INPUT.mixing_ndim = *static_cast<int*>(input_parameters["mixing_ndim"].get());
     }
+    else if (input_parameters.count("mixing_restart") != 0)
+    {
+        INPUT.mixing_restart = *static_cast<int*>(input_parameters["mixing_restart"].get());
+    }
     else if (input_parameters.count("mixing_gg0") != 0)
     {
         INPUT.mixing_gg0 = *static_cast<double*>(input_parameters["mixing_gg0"].get());
@@ -917,7 +908,7 @@ bool input_parameters_set(std::map<std::string, InputParameter> input_parameters
     }
     else if (input_parameters.count("out_band") != 0)
     {
-        INPUT.out_band = *static_cast<bool*>(input_parameters["out_band"].get());
+        INPUT.out_band = *static_cast<std::vector<int>*>(input_parameters["out_band"].get());
     }
     else if (input_parameters.count("out_proj_band") != 0)
     {
diff --git a/source/module_io/parameter_pool.h b/source/module_io/parameter_pool.h
index 83baedd036..bd4ae575dd 100644
--- a/source/module_io/parameter_pool.h
+++ b/source/module_io/parameter_pool.h
@@ -241,12 +241,12 @@ class InputParameter
         }
     }
 };
-bool Init(const std::string& default_type_path,
+void Init(const std::string& default_type_path,
           const std::string& default_value_path,
           const std::string& input_value_path);
-bool default_parametes_reader(const std::string& fn, std::map<std::string, std::string>& default_parametes_type);
-bool input_parameters_get(const std::string& fn, std::map<std::string, InputParameter>& input);
-bool input_parameters_set(std::map<std::string, InputParameter> input_parameters);
+void default_parametes_reader(const std::string& fn, std::map<std::string, std::string>& default_parametes_type);
+void input_parameters_get(const std::string& fn, std::map<std::string, InputParameter>& input);
+void input_parameters_set(std::map<std::string, InputParameter> input_parameters);
 
 extern std::map<std::string, InputParameter> input_parameters;
 extern std::map<std::string, std::string> default_parametes_type;
diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp
index f0d7e43f68..a566827792 100644
--- a/source/module_io/test/input_conv_test.cpp
+++ b/source/module_io/test/input_conv_test.cpp
@@ -183,6 +183,7 @@ TEST_F(InputConvTest, Conv)
 	EXPECT_EQ(GlobalV::sc_mag_switch,0);
     EXPECT_TRUE(GlobalV::decay_grad_switch);
     EXPECT_EQ(GlobalV::sc_file, "sc.json");
+	EXPECT_EQ(GlobalV::MIXING_RESTART,0);
 }
 
 TEST_F(InputConvTest, ConvRelax)
diff --git a/source/module_io/test/input_test.cpp b/source/module_io/test/input_test.cpp
index 02a5a19e10..11bce873ab 100644
--- a/source/module_io/test/input_test.cpp
+++ b/source/module_io/test/input_test.cpp
@@ -176,9 +176,11 @@ TEST_F(InputTest, Default)
         EXPECT_EQ(INPUT.out_wfc_pw,0);
         EXPECT_EQ(INPUT.out_wfc_r,0);
         EXPECT_EQ(INPUT.out_dos,0);
-        EXPECT_EQ(INPUT.out_band,0);
+        EXPECT_EQ(INPUT.out_band[0],0);
+		EXPECT_EQ(INPUT.out_band[1],8);
         EXPECT_EQ(INPUT.out_proj_band,0);
         EXPECT_EQ(INPUT.out_mat_hs[0],0);
+		EXPECT_EQ(INPUT.out_mat_hs[1],8);
         EXPECT_EQ(INPUT.out_mat_hs2,0);
         EXPECT_EQ(INPUT.out_mat_xc, 0);
         EXPECT_EQ(INPUT.out_interval,1);
@@ -539,9 +541,11 @@ TEST_F(InputTest, Read)
         EXPECT_EQ(INPUT.out_wfc_pw,0);
         EXPECT_EQ(INPUT.out_wfc_r,0);
         EXPECT_EQ(INPUT.out_dos,0);
-        EXPECT_EQ(INPUT.out_band,0);
+        EXPECT_EQ(INPUT.out_band[0],0);
+		EXPECT_EQ(INPUT.out_band[1],8);
         EXPECT_EQ(INPUT.out_proj_band,0);
         EXPECT_EQ(INPUT.out_mat_hs[0],0);
+		EXPECT_EQ(INPUT.out_mat_hs[1],8);
         EXPECT_EQ(INPUT.out_mat_hs2,0);
         EXPECT_EQ(INPUT.out_mat_xc, 0);
         EXPECT_EQ(INPUT.out_interval,1);
@@ -921,7 +925,8 @@ TEST_F(InputTest, Default_2)
     EXPECT_EQ(INPUT.relax_nmax, 1);
     EXPECT_EQ(INPUT.out_stru, 0);
     EXPECT_EQ(INPUT.symmetry, "0");
-	EXPECT_EQ(INPUT.out_band,0);
+	EXPECT_EQ(INPUT.out_band[0],0);
+	EXPECT_EQ(INPUT.out_band[1],8);
 	EXPECT_EQ(INPUT.out_proj_band,0);
 	EXPECT_EQ(INPUT.cal_force,0);
 	EXPECT_EQ(INPUT.init_wfc,"file");
@@ -943,7 +948,8 @@ TEST_F(InputTest, Default_2)
     EXPECT_EQ(INPUT.relax_nmax, 1);
     EXPECT_EQ(INPUT.symmetry, "0");
     EXPECT_EQ(INPUT.out_stru, 0);
-	EXPECT_EQ(INPUT.out_band,0);
+	EXPECT_EQ(INPUT.out_band[0],0);
+	EXPECT_EQ(INPUT.out_band[1],8);
 	EXPECT_EQ(INPUT.out_proj_band,0);
 	EXPECT_EQ(INPUT.cal_force,0);
 	EXPECT_EQ(INPUT.init_wfc,"file");
diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp
index 58e04eb32d..d005fdfccc 100644
--- a/source/module_io/test/input_test_para.cpp
+++ b/source/module_io/test/input_test_para.cpp
@@ -26,12 +26,13 @@ class InputParaTest : public ::testing::Test
 #ifdef __MPI
 TEST_F(InputParaTest, Bcast)
 {
+    INPUT.Default();
     if (GlobalV::MY_RANK == 0)
     {
-        INPUT.Default(); /* hmmm... why there is not Default_2 here? and, seems Default is execute directly on each processor? */
+        INPUT.suffix = "BcastTest";
     }
     INPUT.Bcast();
-    EXPECT_EQ(INPUT.suffix, "ABACUS");
+    EXPECT_EQ(INPUT.suffix, "BcastTest");
     EXPECT_EQ(INPUT.stru_file, "");
     EXPECT_EQ(INPUT.kpoint_file, "");
     EXPECT_EQ(INPUT.pseudo_dir, "");
@@ -180,7 +181,8 @@ TEST_F(InputParaTest, Bcast)
     EXPECT_EQ(INPUT.out_wfc_pw, 0);
     EXPECT_EQ(INPUT.out_wfc_r, 0);
     EXPECT_EQ(INPUT.out_dos, 0);
-    EXPECT_EQ(INPUT.out_band, 0);
+    EXPECT_EQ(INPUT.out_band[0], 0);
+    EXPECT_EQ(INPUT.out_band[1], 8);
     EXPECT_EQ(INPUT.out_proj_band, 0);
     EXPECT_EQ(INPUT.out_mat_hs[0], 0);
     EXPECT_EQ(INPUT.out_mat_hs[1], 8);
@@ -379,6 +381,7 @@ TEST_F(InputParaTest, Bcast)
     EXPECT_TRUE(INPUT.mdp.dump_virial);
     EXPECT_FALSE(INPUT.mixing_tau);
     EXPECT_FALSE(INPUT.mixing_dftu);
+    EXPECT_EQ(INPUT.mixing_restart,0);
     EXPECT_EQ(INPUT.out_bandgap, 0);
     EXPECT_EQ(INPUT.out_mat_t, 0);
 
diff --git a/source/module_io/test/support/INPUT b/source/module_io/test/support/INPUT
index 469dff2ff4..4fbde867db 100644
--- a/source/module_io/test/support/INPUT
+++ b/source/module_io/test/support/INPUT
@@ -59,7 +59,7 @@ out_pot                        2 #output realspace potential
 out_wfc_pw                     0 #output wave functions
 out_wfc_r                      0 #output wave functions in realspace
 out_dos                        0 #output energy and dos
-out_band                       false #output energy and band structure
+out_band                       0 #output energy and band structure
 out_proj_band                  FaLse #output projected band structure
 restart_save                   f #print to disk every step for restart
 restart_load                   F #restart from disk
diff --git a/source/module_io/test/support/witestfile b/source/module_io/test/support/witestfile
index 4043773876..4db819d53f 100644
--- a/source/module_io/test/support/witestfile
+++ b/source/module_io/test/support/witestfile
@@ -55,7 +55,7 @@ out_pot                        2 #output realspace potential
 out_wfc_pw                     0 #output wave functions
 out_wfc_r                      0 #output wave functions in realspace
 out_dos                        0 #output energy and dos
-out_band                       false #output energy and band structure
+out_band                       0 #output energy and band structure
 out_proj_band                  FaLse #output projected band structure
 restart_save                   f #print to disk every step for restart
 restart_load                   F #restart from disk
diff --git a/source/module_io/test/to_qo_test.cpp b/source/module_io/test/to_qo_test.cpp
index 93692f858e..9477b2eb54 100644
--- a/source/module_io/test/to_qo_test.cpp
+++ b/source/module_io/test/to_qo_test.cpp
@@ -543,7 +543,39 @@ TEST_F(toQOTest, CalculateSelfOvlpRFull)
     //tqo.write_ovlp(tqo.ovlp_R()[0], "QO_self_ovlp.dat");
 }
 
-TEST_F(toQOTest, BuildPswfc)
+/* Si_dojo_soc.upf is special: two p orbitals, one s orbital */
+
+TEST_F(toQOTest, BuildPswfcPartial1)
+{
+    define_fcc_cell(ucell);
+    toQO tqo("pswfc", {"s", "s"});
+    tqo.unwrap_unitcell(&ucell);
+    tqo.build_ao(ucell.ntype, ucell.pseudo_fn);
+    EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals
+    EXPECT_EQ(tqo.nchi(), 2);
+}
+
+TEST_F(toQOTest, BuildPswfcPartial2)
+{
+    define_fcc_cell(ucell);
+    toQO tqo("pswfc", {"ps", "s"});
+    tqo.unwrap_unitcell(&ucell);
+    tqo.build_ao(ucell.ntype, ucell.pseudo_fn);
+    EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals
+    EXPECT_EQ(tqo.nchi(), 8); // the first element is Si, it has two p orbitals, so 3+3+1+1
+}
+
+TEST_F(toQOTest, BuildPswfcPartial3)
+{
+    define_fcc_cell(ucell);
+    toQO tqo("pswfc", {"all", "p"});
+    tqo.unwrap_unitcell(&ucell);
+    tqo.build_ao(ucell.ntype, ucell.pseudo_fn);
+    EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals
+    EXPECT_EQ(tqo.nchi(), 10);
+}
+
+TEST_F(toQOTest, BuildPswfcAll)
 {
     define_fcc_cell(ucell);
     toQO tqo("pswfc", {"all", "all"});
diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp
index d61133715d..8dccb5627a 100644
--- a/source/module_io/test/write_input_test.cpp
+++ b/source/module_io/test/write_input_test.cpp
@@ -384,13 +384,16 @@ TEST_F(write_input, Mixing7)
     std::string output((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
     EXPECT_THAT(output, testing::HasSubstr("#Parameters (7.Charge Mixing)"));
     EXPECT_THAT(output, testing::HasSubstr("mixing_type                    broyden #plain; pulay; broyden"));
-    EXPECT_THAT(output,
-                testing::HasSubstr("mixing_beta                    0.7 #mixing parameter: 0 means no new charge"));
+    EXPECT_THAT(output, testing::HasSubstr("mixing_beta                    0.7 #mixing parameter: 0 means no new charge"));
     EXPECT_THAT(output, testing::HasSubstr("mixing_ndim                    8 #mixing dimension in pulay or broyden"));
     EXPECT_THAT(output, testing::HasSubstr("mixing_gg0                     0 #mixing parameter in kerker"));
+    EXPECT_THAT(output, testing::HasSubstr("mixing_beta_mag                -10 #mixing parameter for magnetic density"));
+    EXPECT_THAT(output, testing::HasSubstr("mixing_gg0_mag                 0 #mixing parameter in kerker"));
+    EXPECT_THAT(output, testing::HasSubstr("mixing_gg0_min                 0.1 #the minimum kerker coefficient"));
+    EXPECT_THAT(output, testing::HasSubstr("mixing_angle                   -10 #angle mixing parameter for non-colinear calculations"));
     EXPECT_THAT(output, testing::HasSubstr("mixing_tau                     0 #whether to mix tau in mGGA calculation"));
-    EXPECT_THAT(output,
-                testing::HasSubstr("mixing_dftu                    0 #whether to mix locale in DFT+U calculation"));
+    EXPECT_THAT(output, testing::HasSubstr("mixing_dftu                    0 #whether to mix locale in DFT+U calculation"));
+    EXPECT_THAT(output, testing::HasSubstr("mixing_restart                 0 #which step to restart mixing during SCF"));
     EXPECT_THAT(output, testing::HasSubstr(""));
     ifs.close();
     remove("write_input_test.log");
diff --git a/source/module_io/test_serial/nscf_band_test.cpp b/source/module_io/test_serial/nscf_band_test.cpp
index 4483bf37cd..db9bf752fb 100644
--- a/source/module_io/test_serial/nscf_band_test.cpp
+++ b/source/module_io/test_serial/nscf_band_test.cpp
@@ -54,9 +54,16 @@ class BandTest : public ::testing::Test
 	    ekb(1,1) =  2.0;
 	    ekb(1,2) =  3.0;
         kv = new K_Vectors;
+        // specify the kpoints
+        kv->kvec_c.resize(nks);
+        kv->kvec_c[0] = ModuleBase::Vector3<double>(0.0, 0.0, 0.0);
+        kv->kvec_c[1] = ModuleBase::Vector3<double>(1.0, 0.0, 0.0);
         kv->isk.resize(nks);
         kv->isk[0] = 0;
         kv->isk[1] = 1;
+        kv->kl_segids.resize(nks);
+        kv->kl_segids[0] = 0;
+        kv->kl_segids[1] = 0;
         Pkpoints = new Parallel_Kpoints;
     }
 
@@ -81,12 +88,12 @@ class BandTest : public ::testing::Test
 TEST_F(BandTest, nscf_band)
 {
     // Call the function to be tested
-    ModuleIO::nscf_band(is, out_band_dir, nks, nband, fermie, ekb, *kv, Pkpoints);
+    ModuleIO::nscf_band(is, out_band_dir, nks, nband, fermie, 8, ekb, *kv, Pkpoints);
 
     // Check the output file
     std::ifstream ifs(out_band_dir);
     std::string str((std::istreambuf_iterator<char>(ifs)),std::istreambuf_iterator<char>());
     ASSERT_TRUE(ifs.is_open());
-    EXPECT_THAT(str, testing::HasSubstr("1       -27.2114       -13.6057              0"));
+    EXPECT_THAT(str, testing::HasSubstr("1   0.00000000 -27.21139600 -13.60569800   0.00000000"));
     ifs.close();
 }
diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp
index 6003a4fdb0..cb26bc2283 100644
--- a/source/module_io/write_input.cpp
+++ b/source/module_io/write_input.cpp
@@ -85,6 +85,7 @@ void Input::Print(const std::string &fn) const
     ModuleBase::GlobalFunc::OUTP(ofs, "cal_force", cal_force, "if calculate the force at the end of the electronic iteration");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_freq_ion", out_freq_ion, "the frequency ( >= 0 ) of ionic step to output charge density and wavefunction. 0: output only when ion steps are finished");
     ModuleBase::GlobalFunc::OUTP(ofs, "device", device, "the computing device for ABACUS");
+    ModuleBase::GlobalFunc::OUTP(ofs, "precision", precision, "the computing precision for ABACUS");
 
     ofs << "\n#Parameters (2.PW)" << std::endl;
     ModuleBase::GlobalFunc::OUTP(ofs, "ecutwfc", ecutwfc, "#energy cutoff for wave functions");
@@ -122,7 +123,7 @@ void Input::Print(const std::string &fn) const
     ModuleBase::GlobalFunc::OUTP(ofs, "out_wfc_pw", out_wfc_pw, "output wave functions");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_wfc_r", out_wfc_r, "output wave functions in realspace");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_dos", out_dos, "output energy and dos");
-    ModuleBase::GlobalFunc::OUTP(ofs, "out_band", out_band, "output energy and band structure");
+    ModuleBase::GlobalFunc::OUTP(ofs, "out_band", out_band[0], "output energy and band structure (with precision "+std::to_string(out_band[1])+")");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_proj_band", out_proj_band, "output projected band structure");
     ModuleBase::GlobalFunc::OUTP(ofs, "restart_save", restart_save, "print to disk every step for restart");
     ModuleBase::GlobalFunc::OUTP(ofs, "restart_load", restart_load, "restart from disk");
@@ -222,7 +223,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
     ModuleBase::GlobalFunc::OUTP(ofs, "lcao_dk", lcao_dk, "delta k for 1D integration in LCAO");
     ModuleBase::GlobalFunc::OUTP(ofs, "lcao_dr", lcao_dr, "delta r for 1D integration in LCAO");
     ModuleBase::GlobalFunc::OUTP(ofs, "lcao_rmax", lcao_rmax, "max R for 1D two-center integration table");
-    ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs", out_mat_hs[0], "output H and S matrix");
+    ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs", out_mat_hs[0], "output H and S matrix (with precision "+std::to_string(out_mat_hs[1])+")");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs2", out_mat_hs2, "output H(R) and S(R) matrix");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_dh", out_mat_dh, "output of derivative of H(R) matrix");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_xc", out_mat_xc, "output exchange-correlation matrix in KS-orbital representation");
@@ -247,6 +248,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_type", mixing_mode, "plain; pulay; broyden");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_beta", mixing_beta, "mixing parameter: 0 means no new charge");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_ndim", mixing_ndim, "mixing dimension in pulay or broyden");
+    ModuleBase::GlobalFunc::OUTP(ofs, "mixing_restart", mixing_restart, "which step to restart mixing during SCF");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0", mixing_gg0, "mixing parameter in kerker");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_beta_mag", mixing_beta_mag, "mixing parameter for magnetic density");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0_mag", mixing_gg0_mag, "mixing parameter in kerker");
@@ -493,7 +495,32 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_switch", qo_switch, "0: no QO analysis; 1: QO analysis");
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_basis", qo_basis, "type of QO basis function: hydrogen: hydrogen-like basis, pswfc: read basis from pseudopotential");
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_thr", qo_thr, "accuracy for evaluating cutoff radius of QO basis function");
-  
+
+    ofs << "\n#Parameters (24.PEXSI)" << std::endl;
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_npole", pexsi_npole, "Number of poles in expansion");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_inertia", pexsi_inertia, "Whether inertia counting is used at the very beginning of PEXSI process");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nmax", pexsi_nmax, "Maximum number of PEXSI iterations after each inertia counting procedure.");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_comm", pexsi_comm, "Whether to construct PSelInv communication pattern");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices, 0: non-symmetric, 1: symmetric");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_ordering", pexsi_ordering, "Ordering strategy for factorization and selected inversion");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc", pexsi_nproc, "Number of processors for parmetis");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "matrix symmetry, 0: non-symmetric, 1: symmetric");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "transpose, 0: no transpose, 1: transpose");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_method", pexsi_method, "pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc_pole", pexsi_nproc_pole, "Number of processes used by each pole");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_temp", pexsi_temp, "Temperature, in the same unit as H");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_gap", pexsi_gap, "Spectral gap");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \f$S^{-1} H\f$");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_lower", pexsi_mu_lower, "Initial guess of lower bound for mu");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_upper", pexsi_mu_upper, "Initial guess of upper bound for mu");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu", pexsi_mu, "Initial guess for mu (for the solver)");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_thr", pexsi_mu_thr, "Stopping criterion in terms of the chemical potential for the inertia counting procedure");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_expand", pexsi_mu_expand, "If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_guard", pexsi_mu_guard, "Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_elec_thr", pexsi_elec_thr, "Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_zero_thr", pexsi_zero_thr, "if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0");
+
     ofs.close();
     return;
 }
\ No newline at end of file
diff --git a/source/module_ri/Exx_LRI.hpp b/source/module_ri/Exx_LRI.hpp
index ace9097bb0..c9b3b69601 100644
--- a/source/module_ri/Exx_LRI.hpp
+++ b/source/module_ri/Exx_LRI.hpp
@@ -12,7 +12,6 @@
 #include "module_ri/exx_abfs-construct_orbs.h"
 #include "module_ri/exx_abfs-io.h"
 #include "module_ri/conv_coulomb_pot_k.h"
-#include "module_ri/conv_coulomb_pot_k-template.h"
 #include "module_base/tool_title.h"
 #include "module_base/timer.h"
 #include "module_ri/serialization_cereal.h"
@@ -71,14 +70,19 @@ void Exx_LRI<Tdata>::init(const MPI_Comm &mpi_comm_in, const K_Vectors &kv_in)
 			case Conv_Coulomb_Pot_K::Ccp_Type::Ccp:
 				return {};
 			case Conv_Coulomb_Pot_K::Ccp_Type::Hf:
-				return {};
+			{
+				// 4/3 * pi * Rcut^3 = V_{supercell} = V_{unitcell} * Nk
+				const int nspin0 = (GlobalV::NSPIN==2) ? 2 : 1;
+				const double hf_Rcut = std::pow(0.75 * this->p_kv->nkstot_full/nspin0 * GlobalC::ucell.omega / (ModuleBase::PI), 1.0/3.0);
+				return {{"hf_Rcut", hf_Rcut}};
+			}
 			case Conv_Coulomb_Pot_K::Ccp_Type::Hse:
 				return {{"hse_omega", this->info.hse_omega}};
 			default:
 				throw std::domain_error(std::string(__FILE__)+" line "+std::to_string(__LINE__));	break;
 		}
 	};
-    this->abfs_ccp = Conv_Coulomb_Pot_K::cal_orbs_ccp(this->abfs, this->info.ccp_type, get_ccp_parameter(), this->info.ccp_rmesh_times, this->p_kv->nkstot_full);
+    this->abfs_ccp = Conv_Coulomb_Pot_K::cal_orbs_ccp(this->abfs, this->info.ccp_type, get_ccp_parameter(), this->info.ccp_rmesh_times);
 
 
 	for( size_t T=0; T!=this->abfs.size(); ++T )
diff --git a/source/module_ri/LRI_CV_Tools.hpp b/source/module_ri/LRI_CV_Tools.hpp
index 532e7104fb..8ad95c3715 100644
--- a/source/module_ri/LRI_CV_Tools.hpp
+++ b/source/module_ri/LRI_CV_Tools.hpp
@@ -250,11 +250,10 @@ LRI_CV_Tools::cal_latvec_range(const double &rcut_times)
 	const ModuleBase::Vector3<double> proj = ModuleBase::Mathzone::latvec_projection(
 		std::array<ModuleBase::Vector3<double>,3>{GlobalC::ucell.a1, GlobalC::ucell.a2, GlobalC::ucell.a3});
 	const ModuleBase::Vector3<double> latvec_times = Rcut_max * rcut_times / (proj * GlobalC::ucell.lat0);
-	const ModuleBase::Vector3<Tcell> latvec_times_ceil = 
-		{std::ceil(latvec_times.x),
-		 std::ceil(latvec_times.y),
-		 std::ceil(latvec_times.z)};
-	const ModuleBase::Vector3<Tcell> period = 2 * latvec_times_ceil + ModuleBase::Vector3<Tcell>{1,1,1};
+    const ModuleBase::Vector3<Tcell> latvec_times_ceil = {static_cast<Tcell>(std::ceil(latvec_times.x)),
+                                                          static_cast<Tcell>(std::ceil(latvec_times.y)),
+                                                          static_cast<Tcell>(std::ceil(latvec_times.z))};
+    const ModuleBase::Vector3<Tcell> period = 2 * latvec_times_ceil + ModuleBase::Vector3<Tcell>{1,1,1};
 	return std::array<Tcell,3>{period.x, period.y, period.z};
 }
 
@@ -308,7 +307,7 @@ LRI_CV_Tools::get_dCVws(
 				const Abfs::Vector3_Order<double> R_delta = -tau0+tau1+(RI_Util::array3_to_Vector3(cell1)*GlobalC::ucell.latvec);
 				dCVws[it0][it1][R_delta][ix] = dCVs_B.second;
 			}
-		}		
+		}
 	}
 	return dCVws;
 }
diff --git a/source/module_ri/conv_coulomb_pot_k-template.h b/source/module_ri/conv_coulomb_pot_k-template.h
deleted file mode 100644
index 9a3d245286..0000000000
--- a/source/module_ri/conv_coulomb_pot_k-template.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef CONV_COULOMB_POT_K_TEMPLATE_H
-#define CONV_COULOMB_POT_K_TEMPLATE_H
-
-#include "conv_coulomb_pot_k.h"
-#include <vector>
-#include <cmath>
-
-#include "../module_ri/test_code/exx_abfs-construct_orbs-test.h"
-
-
-template< typename T >
-T Conv_Coulomb_Pot_K::cal_orbs_ccp(
-	const T & orbs,
-	const Ccp_Type &ccp_type,
-	const std::map<std::string,double> &parameter,
-	const double rmesh_times, 
-    const int& nks)
-{
-	T orbs_ccp(orbs.size());
-	for( size_t i=0; i!=orbs.size(); ++i )
-		orbs_ccp[i] = cal_orbs_ccp(orbs[i], ccp_type, parameter, rmesh_times, nks );
-	return orbs_ccp;
-}
-
-extern template
-Numerical_Orbital_Lm Conv_Coulomb_Pot_K::cal_orbs_ccp<Numerical_Orbital_Lm>(
-	const Numerical_Orbital_Lm & orbs,
-	const Ccp_Type &ccp_type,
-	const std::map<std::string,double> &parameter,
-    const double rmesh_times,
-    const int& nks);
-
-	
-	
-template< typename T >
-double Conv_Coulomb_Pot_K::get_rmesh_proportion(
-	const T & orbs,
-	const double psi_threshold)
-{
-	double rmesh_proportion=0;
-	for( const auto &orb : orbs )
-		rmesh_proportion = std::max(rmesh_proportion, get_rmesh_proportion(orb,psi_threshold));
-	return rmesh_proportion;
-}
-
-extern template
-double Conv_Coulomb_Pot_K::get_rmesh_proportion(
-	const Numerical_Orbital_Lm & orbs,
-	const double psi_threshold);
-	
-#endif
\ No newline at end of file
diff --git a/source/module_ri/conv_coulomb_pot_k.cpp b/source/module_ri/conv_coulomb_pot_k.cpp
index 9f573509ee..62dd582a44 100644
--- a/source/module_ri/conv_coulomb_pot_k.cpp
+++ b/source/module_ri/conv_coulomb_pot_k.cpp
@@ -2,104 +2,109 @@
 #include "../module_base/constants.h"
 #include "../module_basis/module_ao/ORB_atomic_lm.h"
 #include "../module_hamilt_pw/hamilt_pwdft/global.h"
-std::vector<double> Conv_Coulomb_Pot_K::cal_psi_ccp( const std::vector<double> & psif )
+
+namespace Conv_Coulomb_Pot_K
 {
-	std::vector<double> psik2_ccp(psif.size());
-	for( size_t ik=0; ik<psif.size(); ++ik )
-		psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik];
-	return psik2_ccp;
-}
 
-// rongshi add 2022-07-27
-// Sphere truction -- Spencer
-std::vector<double> Conv_Coulomb_Pot_K::cal_psi_hf(const int& nks, const std::vector<double> &psif,
-                                                   const std::vector<double> &k_radial,
-                                                   const double omega = 0)
-{	
-    const int nspin0 = (GlobalV::NSPIN==2) ? 2 : 1;
-    const double Rc = std::pow(0.75 * nks/nspin0 * GlobalC::ucell.omega / (ModuleBase::PI), 1.0/3.0);
-    std::vector<double> psik2_ccp(psif.size());
-    for (size_t ik = 0; ik < psif.size(); ++ik)
-        psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1 - std::cos(k_radial[ik] * Rc));
-    return psik2_ccp;
-}
+	std::vector<double> cal_psi_ccp(
+		const std::vector<double> & psif)
+	{
+		std::vector<double> psik2_ccp(psif.size());
+		for( size_t ik=0; ik<psif.size(); ++ik )
+			psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik];
+		return psik2_ccp;
+	}
 
+	// rongshi add 2022-07-27
+	// Sphere truction -- Spencer
+	std::vector<double> cal_psi_hf(
+		const std::vector<double> &psif,
+		const std::vector<double> &k_radial,
+		const double hf_Rcut)
+	{
+		std::vector<double> psik2_ccp(psif.size());
+		for (size_t ik = 0; ik < psif.size(); ++ik)
+			psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1 - std::cos(k_radial[ik] * hf_Rcut));
+		return psik2_ccp;
+	}
 
-std::vector<double> Conv_Coulomb_Pot_K::cal_psi_hse( 
-	const std::vector<double> & psif,
-	const std::vector<double> & k_radial,
-	const double omega)
-{
-	std::vector<double> psik2_ccp(psif.size());
-	for( size_t ik=0; ik<psif.size(); ++ik )
-		psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1-std::exp(-(k_radial[ik]*k_radial[ik])/(4*omega*omega)));
-	return psik2_ccp;
-}
 
+	std::vector<double> cal_psi_hse(
+		const std::vector<double> & psif,
+		const std::vector<double> & k_radial,
+		const double hse_omega)
+	{
+		std::vector<double> psik2_ccp(psif.size());
+		for( size_t ik=0; ik<psif.size(); ++ik )
+			psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1-std::exp(-(k_radial[ik]*k_radial[ik])/(4*hse_omega*hse_omega)));
+		return psik2_ccp;
+	}
 
 
-template<>
-Numerical_Orbital_Lm Conv_Coulomb_Pot_K::cal_orbs_ccp<Numerical_Orbital_Lm>(
-	const Numerical_Orbital_Lm &orbs,
-	const Ccp_Type &ccp_type,
-	const std::map<std::string,double> &parameter,
-    const double rmesh_times,
-    const int& nks)
-{
-	std::vector<double> psik2_ccp;
-	switch(ccp_type)
+
+	template<>
+	Numerical_Orbital_Lm cal_orbs_ccp<Numerical_Orbital_Lm>(
+		const Numerical_Orbital_Lm &orbs,
+		const Ccp_Type &ccp_type,
+		const std::map<std::string,double> &parameter,
+		const double rmesh_times)
 	{
-		case Ccp_Type::Ccp:
-			psik2_ccp = cal_psi_ccp( orbs.get_psif() );		break;
-		case Ccp_Type::Hf:
-        	psik2_ccp = cal_psi_hf(nks, orbs.get_psif(), orbs.get_k_radial());      break;
-		case Ccp_Type::Hse:
-			psik2_ccp = cal_psi_hse( orbs.get_psif(), orbs.get_k_radial(), parameter.at("hse_omega") );		break;
-		default:
-			throw( ModuleBase::GlobalFunc::TO_STRING(__FILE__)+" line "+ModuleBase::GlobalFunc::TO_STRING(__LINE__) );		break;
-	}
+		std::vector<double> psik2_ccp;
+		switch(ccp_type)
+		{
+			case Ccp_Type::Ccp:
+				psik2_ccp = cal_psi_ccp( orbs.get_psif() );		break;
+			case Ccp_Type::Hf:
+				psik2_ccp = cal_psi_hf( orbs.get_psif(), orbs.get_k_radial(), parameter.at("hf_Rcut"));      break;
+			case Ccp_Type::Hse:
+				psik2_ccp = cal_psi_hse( orbs.get_psif(), orbs.get_k_radial(), parameter.at("hse_omega") );		break;
+			default:
+				throw( ModuleBase::GlobalFunc::TO_STRING(__FILE__)+" line "+ModuleBase::GlobalFunc::TO_STRING(__LINE__) );		break;
+		}
 
-	const double dr = orbs.get_rab().back();
-	const int Nr = (static_cast<int>(orbs.getNr()*rmesh_times)) | 1;
-	std::vector<double> rab(Nr);
-	for( size_t ir=0; ir<std::min(orbs.getNr(),Nr); ++ir )
-		rab[ir] = orbs.getRab(ir);
-	for( size_t ir=orbs.getNr(); ir<Nr; ++ir )
-		rab[ir] = dr;
-	std::vector<double> r_radial(Nr);
-	for( size_t ir=0; ir<std::min(orbs.getNr(),Nr); ++ir )
-		r_radial[ir] = orbs.getRadial(ir);
-	for( size_t ir=orbs.getNr(); ir<Nr; ++ir )
-        r_radial[ir] = orbs.get_r_radial().back() + (ir - orbs.getNr() + 1) * dr;
-	
-	Numerical_Orbital_Lm orbs_ccp;
-	orbs_ccp.set_orbital_info(
- 		orbs.getLabel(),
-	 	orbs.getType(),
-		orbs.getL(),
-		orbs.getChi(),
-	    Nr,
-		ModuleBase::GlobalFunc::VECTOR_TO_PTR(rab),
-		ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_radial),
-		Numerical_Orbital_Lm::Psi_Type::Psik2,
-		ModuleBase::GlobalFunc::VECTOR_TO_PTR(psik2_ccp),
-		orbs.getNk(),
-		orbs.getDk(),
-		orbs.getDruniform(),
-		false,
-		true, GlobalV::CAL_FORCE);
-	return orbs_ccp;
-}
+		const double dr = orbs.get_rab().back();
+		const int Nr = (static_cast<int>(orbs.getNr()*rmesh_times)) | 1;
+		std::vector<double> rab(Nr);
+		for( size_t ir=0; ir<std::min(orbs.getNr(),Nr); ++ir )
+			rab[ir] = orbs.getRab(ir);
+		for( size_t ir=orbs.getNr(); ir<Nr; ++ir )
+			rab[ir] = dr;
+		std::vector<double> r_radial(Nr);
+		for( size_t ir=0; ir<std::min(orbs.getNr(),Nr); ++ir )
+			r_radial[ir] = orbs.getRadial(ir);
+		for( size_t ir=orbs.getNr(); ir<Nr; ++ir )
+			r_radial[ir] = orbs.get_r_radial().back() + (ir - orbs.getNr() + 1) * dr;
 
-template<>
-double Conv_Coulomb_Pot_K::get_rmesh_proportion(
-	const Numerical_Orbital_Lm &orbs,
-	const double psi_threshold)
-{
-	for(int ir=orbs.getNr()-1; ir>=0; --ir)
+		Numerical_Orbital_Lm orbs_ccp;
+		orbs_ccp.set_orbital_info(
+			orbs.getLabel(),
+			orbs.getType(),
+			orbs.getL(),
+			orbs.getChi(),
+			Nr,
+			ModuleBase::GlobalFunc::VECTOR_TO_PTR(rab),
+			ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_radial),
+			Numerical_Orbital_Lm::Psi_Type::Psik2,
+			ModuleBase::GlobalFunc::VECTOR_TO_PTR(psik2_ccp),
+			orbs.getNk(),
+			orbs.getDk(),
+			orbs.getDruniform(),
+			false,
+			true, GlobalV::CAL_FORCE);
+		return orbs_ccp;
+	}
+
+	template<>
+	double get_rmesh_proportion(
+		const Numerical_Orbital_Lm &orbs,
+		const double psi_threshold)
 	{
-		if(std::abs(orbs.getPsi(ir))>=psi_threshold)
-			return static_cast<double>(ir)/orbs.getNr();
+		for(int ir=orbs.getNr()-1; ir>=0; --ir)
+		{
+			if(std::abs(orbs.getPsi(ir))>=psi_threshold)
+				return static_cast<double>(ir)/orbs.getNr();
+		}
+		return 0.0;
 	}
-	return 0.0;
+
 }
diff --git a/source/module_ri/conv_coulomb_pot_k.h b/source/module_ri/conv_coulomb_pot_k.h
index 9adec9d915..d464a53f91 100644
--- a/source/module_ri/conv_coulomb_pot_k.h
+++ b/source/module_ri/conv_coulomb_pot_k.h
@@ -5,40 +5,37 @@
 #include <map>
 #include <string>
 
-class Conv_Coulomb_Pot_K
+namespace Conv_Coulomb_Pot_K
 {
-public:
+	enum class Ccp_Type{		//	parameter:
+		Ccp,					//
+		Hf,						//		"hf_Rcut"
+		Hse};					//		"hse_omega"
 
-	enum class Ccp_Type{		//  parameter:
-		Ccp,                 // 
-		Hf,					//
-		Hse};					//  	"hse_omega"
-
-	template<typename T> static T cal_orbs_ccp(
+	template<typename T> T cal_orbs_ccp(
 		const T &orbs,
 		const Ccp_Type &ccp_type,
 		const std::map<std::string,double> &parameter,
-        const double rmesh_times,
-        const int& nks);
-	
-private:
-		
-	template< typename T > static double get_rmesh_proportion(
+        const double rmesh_times);
+
+  //private:
+	template< typename T > double get_rmesh_proportion(
 		const T &orbs,
 		const double psi_threshold);
-		
-private:
 
-	static std::vector<double> cal_psi_ccp( const std::vector<double> & psif );
-	
-	static std::vector<double> cal_psi_hf(const int& nks, const std::vector<double> &psif,
-                                          const std::vector<double> &k_radial,
-                                          const double omega);
-
-	static std::vector<double> cal_psi_hse( 
+  //private:
+	std::vector<double> cal_psi_ccp(
+		const std::vector<double> & psif);
+	std::vector<double> cal_psi_hf(
+		const std::vector<double> &psif,
+		const std::vector<double> &k_radial,
+		const double hf_Rcut);
+	std::vector<double> cal_psi_hse(
 		const std::vector<double> & psif,
 		const std::vector<double> & k_radial,
-		const double omega);
-};
+		const double hse_omega);
+}
+
+#include "conv_coulomb_pot_k.hpp"
 
 #endif
\ No newline at end of file
diff --git a/source/module_ri/conv_coulomb_pot_k.hpp b/source/module_ri/conv_coulomb_pot_k.hpp
new file mode 100644
index 0000000000..5ca3abe5c8
--- /dev/null
+++ b/source/module_ri/conv_coulomb_pot_k.hpp
@@ -0,0 +1,37 @@
+#ifndef CONV_COULOMB_POT_K_HPP
+#define CONV_COULOMB_POT_K_HPP
+
+#include "conv_coulomb_pot_k.h"
+#include <vector>
+#include <cmath>
+
+namespace Conv_Coulomb_Pot_K
+{
+
+	template< typename T >
+	std::vector<T> cal_orbs_ccp(
+		const std::vector<T> & orbs,
+		const Ccp_Type &ccp_type,
+		const std::map<std::string,double> &parameter,
+		const double rmesh_times)
+	{
+		std::vector<T> orbs_ccp(orbs.size());
+		for( size_t i=0; i!=orbs.size(); ++i )
+			orbs_ccp[i] = cal_orbs_ccp(orbs[i], ccp_type, parameter, rmesh_times);
+		return orbs_ccp;
+	}
+
+	template< typename T >
+	double get_rmesh_proportion(
+		const std::vector<T> & orbs,
+		const double psi_threshold)
+	{
+		double rmesh_proportion=0;
+		for( const auto &orb : orbs )
+			rmesh_proportion = std::max(rmesh_proportion, get_rmesh_proportion(orb,psi_threshold));
+		return rmesh_proportion;
+	}
+
+}
+
+#endif
\ No newline at end of file
diff --git a/source/module_ri/exx_lip.cpp b/source/module_ri/exx_lip.cpp
index 2f685be5f1..0c4211d890 100644
--- a/source/module_ri/exx_lip.cpp
+++ b/source/module_ri/exx_lip.cpp
@@ -481,7 +481,7 @@ void Exx_Lip::b_cal( int ik, int iq, int ib)
 	}
 
 	std::complex<double> * const porter = new std::complex<double> [rho_basis->nrxx];
-	
+
 	for(size_t iw=0; iw< GlobalV::NLOCAL; ++iw)
 	{
 		const std::complex<double> * const phi_w = phi[iw];
@@ -495,7 +495,7 @@ void Exx_Lip::b_cal( int ik, int iq, int ib)
 		if( Conv_Coulomb_Pot_K::Ccp_Type::Ccp==info.ccp_type || Conv_Coulomb_Pot_K::Ccp_Type::Hf==info.ccp_type )
 			if((iq==iq_vecik) && (gzero_rank_in_pool==GlobalV::RANK_IN_POOL))							/// need to check while use k_point parallel
 				b0[iw] = b_w[rho_basis->ig_gge0];
-		
+
 		for( size_t ig=0; ig<rho_basis->npw; ++ig)
 			b_w[ig] *= recip_qkg2[ig];
 	}
@@ -634,12 +634,14 @@ void Exx_Lip::write_q_pack() const
 	if(!GlobalV::RANK_IN_POOL)
 	{
 		const std::string exx_q_pack = "exx_q_pack/";
-
+		int return_value=0;
 		const std::string command_mkdir = "test -d " + GlobalV::global_out_dir + exx_q_pack + " || mkdir " + GlobalV::global_out_dir + exx_q_pack;
-		system( command_mkdir.c_str() );	// Need to check
+        return_value = system(command_mkdir.c_str());
+        assert(return_value == 0);
 
-		const std::string command_kpoint = "test -f " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card + " || cp " + GlobalV::global_kpoint_card + " " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card;
-		system( command_kpoint.c_str() );	// Need to check
+        const std::string command_kpoint = "test -f " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card + " || cp " + GlobalV::global_kpoint_card + " " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card;
+        return_value = system(command_kpoint.c_str());
+		assert(return_value==0);
 
 		std::stringstream ss_wf_wg;
 		ss_wf_wg << GlobalV::global_out_dir << exx_q_pack << "wf_wg_" << GlobalV::MY_POOL;
diff --git a/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat b/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat
index af0ad58c0d..1aa4b94ac1 100644
--- a/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat
+++ b/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat
@@ -1,6 +1,6 @@
-1 0  -3.3870593 -0.79801307 5.0648821 5.0648821 7.8411435 9.605949
-2 0.17320508  -3.8620194 -0.075179882 5.1365314 5.1365314 7.9181006 9.6849562
-3 0.34641016  -4.6307963 1.434193 5.3528485 5.3528485 8.1554014 9.8149803
-4 0.51961524  -5.2581292 3.25095 5.6954121 5.6954121 8.5186442 9.653424
-5 0.69282032  -5.6519805 5.1370698 6.0846605 6.0846605 8.8678468 9.1370653
-6 0.8660254  -5.7858738 6.2887673 6.2887673 6.2887673 8.8364355 8.8364355
+   1   0.00000000  -3.38705933  -0.79801307   5.06488210   5.06488210   7.84114355   9.60594903
+   2   0.17320508  -3.86201936  -0.07517988   5.13653145   5.13653145   7.91810064   9.68495616
+   3   0.34641016  -4.63079629   1.43419304   5.35284854   5.35284854   8.15540136   9.81498032
+   4   0.51961524  -5.25812925   3.25094996   5.69541211   5.69541211   8.51864422   9.65342396
+   5   0.69282032  -5.65198054   5.13706981   6.08466055   6.08466055   8.86784679   9.13706533
+   6   0.86602540  -5.78587376   6.28876728   6.28876728   6.28876728   8.83643550   8.83643550
diff --git a/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat b/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat
index ced64a27bf..0a1088ceb5 100644
--- a/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat
+++ b/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat
@@ -1,6 +1,6 @@
-1 0  -3.3870489 -0.79801666 5.0648903 5.0649025 7.841159 9.6059536
-2 0.17320508  -3.861988 -0.075180055 5.1365331 5.136532 7.9181141 9.6849637
-3 0.34641016  -4.6307948 1.43421 5.3529077 5.3528526 8.1553806 9.8149771
-4 0.51961524  -5.2581202 3.2509597 5.6954124 5.6954235 8.5188338 9.6533658
-5 0.69282032  -5.6519785 5.1370872 6.0846758 6.0846914 8.8679617 9.1370593
-6 0.8660254  -5.7858674 6.288817 6.2887755 6.2887815 8.8365307 8.8365071
+   1   0.00000000  -3.38704889  -0.79801666   5.06489026   5.06490253   7.84115900   9.60595365
+   2   0.17320508  -3.86198803  -0.07518005   5.13653307   5.13653200   7.91811409   9.68496369
+   3   0.34641016  -4.63079483   1.43421001   5.35290770   5.35285262   8.15538060   9.81497711
+   4   0.51961524  -5.25812025   3.25095973   5.69541240   5.69542353   8.51883375   9.65336577
+   5   0.69282032  -5.65197852   5.13708720   6.08467575   6.08469139   8.86796173   9.13705931
+   6   0.86602540  -5.78586742   6.28881704   6.28877545   6.28878152   8.83653074   8.83650706
diff --git a/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref b/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref
index 168c5723a3..bffad6b08a 100644
--- a/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref
+++ b/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref
@@ -3,92 +3,92 @@ CALCULATE THE MULLIkEN ANALYSIS FOR EACH ATOM
  Total charge:	32
 Decomposed Mulliken populations
 0                 Zeta of Fe                        Spin 1                        Spin 2                        Spin 3                        Spin 4
-s                        0                           1.317                       0.05552                        0.2843                       0.02903
-  sum over m                                         1.317                       0.05552                        0.2843                       0.02903
-s                        1                           1.726                      -0.01923                      -0.09498                      0.005159
-  sum over m                                         1.726                      -0.01923                      -0.09498                      0.005159
-s                        2                         0.03246                      -0.04333                       -0.2148                      0.008137
-  sum over m                                       0.03246                      -0.04333                       -0.2148                      0.008137
-s                        3                        -0.02921                      0.005194                       0.02641                      0.001867
-  sum over m                                      -0.02921                      0.005194                       0.02641                      0.001867
-  sum over m+zeta                                    3.046                     -0.001842                     0.0009368                       0.04419
-pz                        0                           2.034                     -0.001185                     -0.005932                     1.545e-06
-px                        0                           2.033                     -0.001283                     -0.006419                     1.538e-06
-py                        0                           2.033                     -0.001188                     -0.005944                     1.543e-06
-  sum over m                                           6.1                     -0.003656                      -0.01829                     4.626e-06
-pz                        1                        -0.02622                     0.0005602                      0.002791                             0
-px                        1                        -0.02639                     0.0006145                      0.003054                             0
-py                        1                        -0.02603                     0.0005563                       0.00277                             0
-  sum over m                                      -0.07864                      0.001731                      0.008615                             0
-  sum over m+zeta                                    6.021                     -0.001925                      -0.00968                     5.611e-06
-dz^2                        0                           1.964                     0.0008273                      0.004131                     4.077e-06
-dxz                        0                           1.044                        0.1755                        0.7507                      0.002258
-dyz                        0                          0.9544                        0.1768                        0.7532                      0.002329
-dx^2-y^2                        0                           1.967                     0.0007523                      0.003756                     3.978e-06
-dxy                        0                           1.055                        0.1751                        0.7495                      0.002251
-  sum over m                                         6.984                         0.529                         2.261                      0.006846
-dz^2                        1                         0.03863                    -0.0008699                     -0.004363                     5.197e-06
-dxz                        1                        -0.03759                     -0.005346                      -0.01936                    -0.0001322
-dyz                        1                        -0.03407                     -0.005734                      -0.02118                    -0.0001342
-dx^2-y^2                        1                         0.03943                    -0.0009093                     -0.004564                     5.691e-06
-dxy                        1                        -0.03787                     -0.005246                       -0.0189                    -0.0001314
-  sum over m                                      -0.03146                      -0.01811                      -0.06836                     -0.000387
-  sum over m+zeta                                    6.952                        0.5109                         2.193                      0.006459
-fz^3                        0                       -0.007049                     0.0007578                      0.003775                             0
-fxz^2                        0                       -0.002045                     0.0002638                      0.001312                             0
-fyz^2                        0                       -0.002729                     0.0002912                      0.001448                             0
-fzx^2-zy^2                        0                       6.273e-05                             0                    -6.642e-06                             0
-fxyz                        0                       1.153e-05                     1.446e-06                     5.675e-06                             0
-fx^3-3*xy^2                        0                        -0.00338                       0.00044                      0.002189                             0
-f3yx^2-y^3                        0                        -0.00407                     0.0004646                      0.002311                             0
-  sum over m                                       -0.0192                      0.002219                       0.01103                     2.581e-06
-  sum over m+zeta                                  -0.0192                      0.002219                       0.01103                     2.581e-06
-Total Charge on atom:  Fe                  16
-Total Magnetism on atom:  Fe                   (0.5093, 2.195, 0.05066)
+s                        0                           1.317                       0.06196                       -0.2625                      -0.07949
+  sum over m                                         1.317                       0.06196                       -0.2625                      -0.07949
+s                        1                           1.726                      -0.01809                       0.09886                      -0.01413
+  sum over m                                         1.726                      -0.01809                       0.09886                      -0.01413
+s                        2                         0.03246                      -0.04153                        0.2209                      -0.02228
+  sum over m                                       0.03246                      -0.04153                        0.2209                      -0.02228
+s                        3                        -0.02921                      0.005609                        -0.025                     -0.005114
+  sum over m                                      -0.02921                      0.005609                        -0.025                     -0.005114
+  sum over m+zeta                                    3.046                      0.007945                        0.0323                        -0.121
+pz                        0                           2.034                     -0.001186                      0.005932                    -3.981e-06
+px                        0                           2.033                     -0.001283                      0.006419                    -3.989e-06
+py                        0                           2.033                     -0.001188                      0.005944                    -3.979e-06
+  sum over m                                           6.1                     -0.003658                        0.0183                    -1.195e-05
+pz                        1                        -0.02621                     0.0005578                     -0.002789                             0
+px                        1                        -0.02639                     0.0006107                     -0.003054                             0
+py                        1                        -0.02603                     0.0005536                     -0.002768                             0
+  sum over m                                      -0.07863                      0.001722                     -0.008611                             0
+  sum over m+zeta                                    6.021                     -0.001936                      0.009684                    -1.277e-05
+dz^2                        0                           1.964                     0.0008269                     -0.004128                    -1.088e-05
+dxz                        0                           1.044                         0.156                       -0.7849                    -0.0003055
+dyz                        0                          0.9592                        0.1564                       -0.7869                    -0.0003096
+dx^2-y^2                        0                           1.967                      0.000752                     -0.003754                    -1.059e-05
+dxy                        0                           1.055                        0.1558                       -0.7835                    -0.0003047
+  sum over m                                         6.988                        0.4698                        -2.363                    -0.0009413
+dz^2                        1                         0.03863                    -0.0008716                      0.004365                    -1.357e-05
+dxz                        1                        -0.03708                     -0.004148                       0.02101                     1.956e-05
+dyz                        1                        -0.03373                     -0.004494                       0.02274                     1.968e-05
+dx^2-y^2                        1                         0.03943                    -0.0009117                      0.004566                    -1.471e-05
+dxy                        1                        -0.03733                     -0.004056                       0.02055                     1.945e-05
+  sum over m                                      -0.03008                      -0.01448                       0.07324                     3.041e-05
+  sum over m+zeta                                    6.958                        0.4553                         -2.29                    -0.0009109
+fz^3                        0                       -0.007044                     0.0007552                     -0.003776                    -1.406e-06
+fxz^2                        0                       -0.002046                     0.0002628                     -0.001314                             0
+fyz^2                        0                        -0.00273                       0.00029                      -0.00145                             0
+fzx^2-zy^2                        0                       5.811e-05                             0                     3.451e-06                             0
+fxyz                        0                        1.14e-05                     1.249e-06                    -6.306e-06                             0
+fx^3-3*xy^2                        0                       -0.003379                     0.0004381                      -0.00219                             0
+f3yx^2-y^3                        0                        -0.00407                     0.0004626                     -0.002313                             0
+  sum over m                                       -0.0192                      0.002209                      -0.01105                    -4.307e-06
+  sum over m+zeta                                  -0.0192                      0.002209                      -0.01105                    -4.307e-06
+Total Charge on atom:  Fe               16.01
+Total Magnetism on atom:  Fe                   (0.4635, -2.259, -0.1219)
 
 
 1                 Zeta of Fe                        Spin 1                        Spin 2                        Spin 3                        Spin 4
-s                        0                           1.275                       0.05341                        0.2605                      -0.02903
-  sum over m                                         1.275                       0.05341                        0.2605                      -0.02903
-s                        1                           1.755                      -0.01752                      -0.08879                     -0.005156
-  sum over m                                         1.755                      -0.01752                      -0.08879                     -0.005156
-s                        2                        -0.02898                       -0.0404                       -0.2039                      -0.00813
-  sum over m                                      -0.02898                       -0.0404                       -0.2039                      -0.00813
-s                        3                        -0.04711                      0.006367                       0.03139                     -0.001874
-  sum over m                                      -0.04711                      0.006367                       0.03139                     -0.001874
-  sum over m+zeta                                    2.954                      0.001862                    -0.0008532                      -0.04419
-pz                        0                           2.032                     -0.001369                     -0.006852                    -1.367e-06
-px                        0                           2.025                    -0.0009208                     -0.004608                    -1.387e-06
-py                        0                           2.032                     -0.001332                     -0.006666                    -1.366e-06
-  sum over m                                         6.089                     -0.003622                      -0.01813                    -4.119e-06
-pz                        1                        -0.02528                     0.0005889                      0.002889                             0
-px                        1                        -0.01606                     0.0001369                     0.0006408                             0
-py                        1                        -0.02466                      0.000571                      0.002802                             0
-  sum over m                                        -0.066                      0.001297                      0.006331                     2.367e-06
-  sum over m+zeta                                    6.023                     -0.002325                      -0.01179                    -1.753e-06
-dz^2                        0                           1.957                      0.001158                      0.005774                    -3.913e-06
-dxz                        0                           1.097                        0.1724                        0.7275                      0.002311
-dyz                        0                          0.9509                        0.1759                        0.7475                      0.002269
-dx^2-y^2                        0                           1.947                      0.001654                      0.008245                    -4.075e-06
-dxy                        0                           1.113                        0.1714                        0.7227                      0.002304
-  sum over m                                         7.065                        0.5225                         2.212                      0.006876
-dz^2                        1                         0.03925                     -0.001062                     -0.005333                    -4.383e-06
-dxz                        1                         -0.0366                     -0.003947                      -0.01263                    -0.0001213
-dyz                        1                        -0.03157                     -0.005197                      -0.01856                    -0.0001267
-dx^2-y^2                        1                         0.04266                     -0.001394                     -0.007002                    -4.206e-06
-dxy                        1                        -0.03743                     -0.003854                      -0.01222                    -0.0001203
-  sum over m                                      -0.02369                      -0.01545                      -0.05575                    -0.0003768
-  sum over m+zeta                                    7.041                        0.5071                         2.156                      0.006499
-fz^3                        0                       -0.006614                     0.0007261                      0.003596                             0
-fxz^2                        0                       -0.001954                     0.0002565                      0.001276                             0
-fyz^2                        0                       -0.002684                     0.0002742                      0.001366                             0
-fzx^2-zy^2                        0                        9.09e-05                      1.99e-05                     8.018e-05                             0
-fxyz                        0                       2.062e-05                     4.102e-06                     1.816e-05                             0
-fx^3-3*xy^2                        0                       -0.003203                     0.0004291                       0.00213                             0
-f3yx^2-y^3                        0                       -0.003698                     0.0004635                      0.002271                             0
-  sum over m                                      -0.01804                      0.002174                       0.01074                             0
-  sum over m+zeta                                 -0.01804                      0.002174                       0.01074                             0
-Total Charge on atom:  Fe                  16
-Total Magnetism on atom:  Fe                   (0.5088, 2.154, -0.03769)
+s                        0                           1.275                       0.04699                       -0.2823                       0.07949
+  sum over m                                         1.275                       0.04699                       -0.2823                       0.07949
+s                        1                           1.755                      -0.01866                       0.08491                       0.01412
+  sum over m                                         1.755                      -0.01866                       0.08491                       0.01412
+s                        2                        -0.02899                      -0.04221                        0.1978                       0.02226
+  sum over m                                      -0.02899                      -0.04221                        0.1978                       0.02226
+s                        3                        -0.04712                       0.00595                      -0.03281                      0.005133
+  sum over m                                      -0.04712                       0.00595                      -0.03281                      0.005133
+  sum over m+zeta                                    2.954                     -0.007928                      -0.03239                         0.121
+pz                        0                           2.032                     -0.001371                       0.00685                     3.967e-06
+px                        0                           2.025                    -0.0009218                      0.004606                     3.958e-06
+py                        0                           2.032                     -0.001333                      0.006664                     3.965e-06
+  sum over m                                         6.089                     -0.003626                       0.01812                     1.189e-05
+pz                        1                        -0.02529                     0.0005803                     -0.002904                             0
+px                        1                        -0.01606                     0.0001295                    -0.0006492                             0
+py                        1                        -0.02466                     0.0005625                     -0.002815                             0
+  sum over m                                      -0.06602                      0.001272                     -0.006367                             0
+  sum over m+zeta                                    6.023                     -0.002353                       0.01175                      1.25e-05
+dz^2                        0                           1.957                      0.001154                     -0.005778                     1.149e-05
+dxz                        0                           1.091                        0.1517                       -0.7637                    -8.462e-05
+dyz                        0                          0.9556                        0.1553                       -0.7815                    -8.443e-05
+dx^2-y^2                        0                           1.947                      0.001648                     -0.008249                     1.233e-05
+dxy                        0                           1.106                        0.1508                       -0.7591                    -8.432e-05
+  sum over m                                         7.056                        0.4606                        -2.318                    -0.0002295
+dz^2                        1                         0.03925                     -0.001067                      0.005328                     1.289e-05
+dxz                        1                        -0.03558                     -0.002824                       0.01439                       2.5e-06
+dyz                        1                        -0.03117                     -0.003962                        0.0201                     2.798e-06
+dx^2-y^2                        1                         0.04266                     -0.001401                      0.006997                      1.29e-05
+dxy                        1                        -0.03637                     -0.002747                       0.01401                     2.475e-06
+  sum over m                                      -0.02122                        -0.012                       0.06082                     3.356e-05
+  sum over m+zeta                                    7.035                        0.4486                        -2.257                     -0.000196
+fz^3                        0                       -0.006615                     0.0007206                     -0.003605                     1.352e-06
+fxz^2                        0                       -0.001955                     0.0002554                     -0.001278                             0
+fyz^2                        0                       -0.002684                     0.0002735                     -0.001368                             0
+fzx^2-zy^2                        0                       9.383e-05                      1.68e-05                    -8.473e-05                             0
+fxyz                        0                       2.053e-05                      3.66e-06                    -1.839e-05                             0
+fx^3-3*xy^2                        0                       -0.003204                     0.0004266                     -0.002134                             0
+f3yx^2-y^3                        0                       -0.003695                     0.0004558                     -0.002281                             0
+  sum over m                                      -0.01804                      0.002152                      -0.01077                     4.022e-06
+  sum over m+zeta                                 -0.01804                      0.002152                      -0.01077                     4.022e-06
+Total Charge on atom:  Fe               15.99
+Total Magnetism on atom:  Fe                   (0.4405, -2.289, 0.1208)
 
 
diff --git a/tests/integrate/204_NO_KP_NC_deltaspin/result.ref b/tests/integrate/204_NO_KP_NC_deltaspin/result.ref
index 649ae1ef31..8a17a1fada 100644
--- a/tests/integrate/204_NO_KP_NC_deltaspin/result.ref
+++ b/tests/integrate/204_NO_KP_NC_deltaspin/result.ref
@@ -1,4 +1,4 @@
-etotref -6844.685232776227
-etotperatomref -3422.3426163881
+etotref -6844.326716364628
+etotperatomref -3422.1633581823
 Compare_mulliken_pass 0
-totaltimeref 21.55
+totaltimeref 36.59
diff --git a/tests/integrate/207_NO_KP_OB/refBANDS_1.dat b/tests/integrate/207_NO_KP_OB/refBANDS_1.dat
index c3cd3a0b6d..87a35be1b0 100644
--- a/tests/integrate/207_NO_KP_OB/refBANDS_1.dat
+++ b/tests/integrate/207_NO_KP_OB/refBANDS_1.dat
@@ -1,6 +1,6 @@
-1 0  -3.2007432 -0.55268317 5.3411505 5.3411505 8.2951616 10.292492
-2 0.17320508  -3.6638913 0.16302859 5.4122469 5.4122469 8.3831312 10.350824
-3 0.34641016  -4.4233335 1.6715127 5.6238068 5.6238068 8.6434268 10.399663
-4 0.51961524  -5.0448087 3.4994417 5.9592422 5.9592422 9.0296871 10.132257
-5 0.69282032  -5.4330707 5.3995429 6.3414385 6.3414385 9.3569963 9.5807977
-6 0.8660254  -5.5643351 6.5398122 6.5398122 6.5398122 9.280228 9.280228
+   1   0.00000000  -3.20074324  -0.55268317   5.34115051   5.34115051   8.29516160  10.29249200
+   2   0.17320508  -3.66389133   0.16302859   5.41224692   5.41224692   8.38313120  10.35082356
+   3   0.34641016  -4.42333349   1.67151268   5.62380682   5.62380682   8.64342675  10.39966330
+   4   0.51961524  -5.04480873   3.49944170   5.95924219   5.95924219   9.02968708  10.13225743
+   5   0.69282032  -5.43307067   5.39954292   6.34143853   6.34143853   9.35699627   9.58079775
+   6   0.86602540  -5.56433513   6.53981221   6.53981221   6.53981221   9.28022796   9.28022796
diff --git a/tests/integrate/Autotest.sh b/tests/integrate/Autotest.sh
index 5310998097..37908d1769 100755
--- a/tests/integrate/Autotest.sh
+++ b/tests/integrate/Autotest.sh
@@ -74,6 +74,11 @@ check_out(){
     # check every 'key' word
     #------------------------------------------------------
     for key in $properties; do
+    
+        if [ $key == "totaltimeref" ]; then
+            # echo "time=$cal ref=$ref"
+            break
+        fi
 
         #--------------------------------------------------
         # calculated value
@@ -91,11 +96,6 @@ check_out(){
         #--------------------------------------------------
         deviation=`awk 'BEGIN {x='$ref';y='$cal';printf "%.'$ca'f\n",x-y}'`
 
-        if [ $key == "totaltimeref" ]; then
-            # echo "time=$cal ref=$ref"
-            break
-        fi
-
 
         #--------------------------------------------------
         # If deviation < threshold, then the test passes,
diff --git a/toolchain/README.md b/toolchain/README.md
index 3e747006b7..a8ce2f711e 100644
--- a/toolchain/README.md
+++ b/toolchain/README.md
@@ -91,6 +91,7 @@ The needed dependencies version default:
 - `LibXC` 6.2.2
 - `ELPA` 2023.05.001
 - `CEREAL` 1.3.2
+- `RapidJSON` 1.1.0
 And Intel-oneAPI need user or server manager to manually install from Intel.
 [Intel-oneAPI](https://www.intel.cn/content/www/cn/zh/developer/tools/oneapi/toolkits.html)
 
@@ -102,12 +103,12 @@ Dependencies below are optional， which is NOT installed by default:
 Users can install them by using `--with-*=install` in toolchain*.sh, which is `no` in default.
 > Notice: LibRI, LibComm and Libnpy is on actively development, you should check-out the package version when using this toolchain. Also, LibRI and LibComm can be installed by github submodule, which is also work for libnpy, which is more recommended.
 
-Notice: for `CEREAL`, `Libnpy`, `LibRI` and `LibComm`, 
+Notice: for `CEREAL`,`RapidJSON`, `Libnpy`, `LibRI` and `LibComm`, 
 you need to download them from github.com, 
 rename it as formatted, and put them in `build` directory at the same time
 e.g.:
 ```shell
-# packages downloaded from github.com
+# packages downloaded from github.com, RapidJSON is not supported now
 mv v1.3.2.tar.gz build/cereal-1.3.2.tar.gz
 ```
 
@@ -175,7 +176,7 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
         -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \
         ......
 ```
-Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS
+Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS for now
 
 ### shell problem
 If you encounter problem like:
@@ -206,6 +207,8 @@ The default compiler for Intel-oneAPI is `icpx` and `icx`, which will cause prob
 
 The best way is to change `icpx` to `icpc`, `icx` to `icc`. user can manually change it in toolchain*.sh via `--with-intel-classic=yes`
 
+Notice: `icc` and `icpc` from Intel Classic Compiler of Intel-oneAPI is not supported for 2024.0 and newer version.
+
 
 ### LibRI and LibComm problem
 (There is some problem sometimes when compling with LibRI and LibComm, detailed information is needed)
@@ -216,6 +219,9 @@ Sometimes Intel-oneAPI have problem to link `mpirun`,
 which will always show in 2023.2.0 version of MPI in Intel-oneAPI. 
 Try `source /path/to/setvars.sh` or install another version of IntelMPI may help.
 
+which is fixed in 2024.0.0 version of Intel-oneAPI, 
+And will not occur in Intel-MPI before 2021.10.0 (Intel-oneAPI before 2023.2.0)
+
 More problem and possible solution can be accessed via [#2928](https://github.com/deepmodeling/abacus-develop/issues/2928)
 
 
@@ -230,9 +236,6 @@ from ABACUS repo, make dependencies package more independent and flexible.
 2. Users can manually change `pkg_install_dir` variable 
 in `scripts/stage*/install*` to change the installation directory 
 of each packages, which may let the installation more fiexible.
-3. Users can manually change `INSTALL` variable in `scripts/common_vars.sh`
-to change the installation directory of all packages, which may let the
-installation more fiexible.
 
 
 ## More
diff --git a/toolchain/install_abacus_toolchain.sh b/toolchain/install_abacus_toolchain.sh
index be41687307..68c7959ce1 100755
--- a/toolchain/install_abacus_toolchain.sh
+++ b/toolchain/install_abacus_toolchain.sh
@@ -182,10 +182,12 @@ The --with-PKG options follow the rules:
   --with-scalapack        Parallel linear algebra library, needed for parallel
                           calculations.
                           Default = install
+  --with-elpa             Eigenvalue SoLvers for Petaflop-Applications library.
+                          Fast library for large parallel jobs， Especially for ABACUS LCAO
+                          Default = install
   --with-cereal           Enable cereal for ABACUS LCAO
                           Default = install
-  --with-elpa             Eigenvalue SoLvers for Petaflop-Applications library.
-                          Fast library for large parallel jobs.
+  --with-rapidjson        Enable rapidjson for ABACUS to read/write json files
                           Default = install
   --with-libtorch         Enable libtorch the machine learning framework needed for DeePKS
                           Default = no
@@ -230,7 +232,7 @@ EOF
 tool_list="gcc intel cmake"
 mpi_list="mpich openmpi intelmpi"
 math_list="mkl acml openblas"
-lib_list="fftw libxc scalapack elpa cereal libtorch libnpy libri libcomm"
+lib_list="fftw libxc scalapack elpa cereal rapidjson libtorch libnpy libri libcomm"
 package_list="${tool_list} ${mpi_list} ${math_list} ${lib_list}"
 # ------------------------------------------------------------------------
 
@@ -264,7 +266,8 @@ with_acml="__SYSTEM__"
 with_openblas="__INSTALL__"
 with_elpa="__INSTALL__"
 with_cereal="__INSTALL__"
-# with_libtorch="__DONTUSE__"
+with_rapidjson="__INSTALL__"
+# with_libtorch="__DONTUSE__" # default
 # with_libnpy="__DONTUSE__"
 # with_libri="__DONTUSE__"
 # with_libcomm="__DONTUSE__"
@@ -549,6 +552,9 @@ while [ $# -ge 1 ]; do
     --with-cereal*)
       with_cereal=$(read_with "${1}")
       ;;
+    --with-rapidjson*)
+      with_rapidjson=$(read_with "${1}")
+      ;;
     --with-libnpy*)
       with_libnpy=$(read_with "${1}")
       ;;
diff --git a/toolchain/scripts/common_vars.sh b/toolchain/scripts/common_vars.sh
index d04d40fa39..51f941f6fe 100755
--- a/toolchain/scripts/common_vars.sh
+++ b/toolchain/scripts/common_vars.sh
@@ -7,8 +7,7 @@
 # directories and files used by the installer
 ROOTDIR=${ROOTDIR:-"$(pwd -P)"}
 SCRIPTDIR=${SCRIPTDIR:-"${ROOTDIR}/scripts"}
-INSTALLDIR=${INSTALLDIR:-"${ROOTDIR}/install"}
-#INSTALLDIR=${INSTALLDIR:-"${HOME}/abacus_deps"} # advanced installation
+INSTALLDIR=${INSTALLDIR:-"${ROOTDIR}/install"} # should not be changed
 BUILDDIR=${BUILDDIR:-"${ROOTDIR}/build"}
 SETUPFILE=${SETUPFILE:-"${INSTALLDIR}/setup"}
 ARCH_FILE_TEMPLATE=${ARCH_FILE_TEMPLATE:-"${SCRIPTDIR}/arch_base.tmpl"}
diff --git a/toolchain/scripts/stage4/install_rapidjson.sh b/toolchain/scripts/stage4/install_rapidjson.sh
new file mode 100755
index 0000000000..b63789ffff
--- /dev/null
+++ b/toolchain/scripts/stage4/install_rapidjson.sh
@@ -0,0 +1,93 @@
+#!/bin/bash -e
+
+# TODO: Review and if possible fix shellcheck errors.
+# shellcheck disable=all
+# RAPIDJSON is not need any complex setting
+# Only problem is the installation from github.com
+
+[ "${BASH_SOURCE[0]}" ] && SCRIPT_NAME="${BASH_SOURCE[0]}" || SCRIPT_NAME=$0
+SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_NAME")/.." && pwd -P)"
+
+rapidjson_ver="1.1.0"
+rapidjson_sha256="bf7ced29704a1e696fbccf2a2b4ea068e7774fa37f6d7dd4039d0787f8bed98e"
+source "${SCRIPT_DIR}"/common_vars.sh
+source "${SCRIPT_DIR}"/tool_kit.sh
+source "${SCRIPT_DIR}"/signal_trap.sh
+source "${INSTALLDIR}"/toolchain.conf
+source "${INSTALLDIR}"/toolchain.env
+
+[ -f "${BUILDDIR}/setup_rapidjson" ] && rm "${BUILDDIR}/setup_rapidjson"
+
+RAPIDJSON_CFLAGS=""
+! [ -d "${BUILDDIR}" ] && mkdir -p "${BUILDDIR}"
+cd "${BUILDDIR}"
+
+case "$with_rapidjson" in
+  __INSTALL__)
+    echo "==================== Installing RAPIDJSON ===================="
+    dirname="rapidjson-${rapidjson_ver}"
+    pkg_install_dir="${INSTALLDIR}/$dirname"
+    #pkg_install_dir="${HOME}/lib/rapidjson/${rapidjson_ver}"
+    install_lock_file="$pkg_install_dir/install_successful"
+    url="https://github.com/Tencent/rapidjson/archive/refs/tags/v${rapidjson_ver}.tar.gz"
+    filename="rapidjson-${rapidjson_ver}.tar.gz"
+    if verify_checksums "${install_lock_file}"; then
+        echo "$dirname is already installed, skipping it."
+    else
+        if [ -f $filename ]; then
+        echo "$filename is found"
+        else
+        # download from github.com and checksum
+            echo "wget --quiet $url -O $filename"
+            if ! wget --quiet $url -O $filename; then
+            report_error "failed to download $url"
+            recommend_offline_installation $filename $url
+            fi
+        # checksum
+        checksum "$filename" "$rapidjson_sha256"
+        fi
+        echo "Installing from scratch into ${pkg_install_dir}"
+        [ -d $dirname ] && rm -rf $dirname
+        tar -xzf $filename
+        mkdir -p "${pkg_install_dir}"
+        cp -r $dirname/* "${pkg_install_dir}/"
+        write_checksums "${install_lock_file}" "${SCRIPT_DIR}/stage4/$(basename ${SCRIPT_NAME})"
+    fi
+        ;;
+    __SYSTEM__)
+        echo "==================== CANNOT Finding RAPIDJSON from system paths NOW ===================="
+        recommend_offline_installation $filename $url
+        # How to do it in rapidjson? -- Zhaoqing in 2023/08/23
+        # check_lib -lxcf03 "libxc"
+        # check_lib -lxc "libxc"
+        # add_include_from_paths LIBXC_CFLAGS "xc.h" $INCLUDE_PATHS
+        # add_lib_from_paths LIBXC_LDFLAGS "libxc.*" $LIB_PATHS
+        ;;
+    __DONTUSE__) ;;
+    
+    *)
+    echo "==================== Linking RAPIDJSON to user paths ===================="
+    check_dir "${pkg_install_dir}"
+    RAPIDJSON_CFLAGS="-I'${pkg_install_dir}'"
+    ;;
+esac
+if [ "$with_rapidjson" != "__DONTUSE__" ]; then
+    if [ "$with_rapidjson" != "__SYSTEM__" ]; then
+    # LibRI deps should find rapidjson include in CPATH
+        cat << EOF > "${BUILDDIR}/setup_rapidjson"
+prepend_path CPATH "$pkg_install_dir/include"
+export CPATH="${pkg_install_dir}/include:"${CPATH}
+EOF
+        cat "${BUILDDIR}/setup_rapidjson" >> $SETUPFILE
+    fi
+    cat << EOF >> "${BUILDDIR}/setup_rapidjson"
+export RAPIDJSON_CFLAGS="${RAPIDJSON_CFLAGS}"
+export RAPIDJSON_ROOT="$pkg_install_dir"
+EOF
+fi
+
+load "${BUILDDIR}/setup_rapidjson"
+write_toolchain_env "${INSTALLDIR}"
+
+cd "${ROOTDIR}"
+report_timing "rapidjson"
diff --git a/toolchain/scripts/stage4/install_stage4.sh b/toolchain/scripts/stage4/install_stage4.sh
index ffe8f670c9..b5c7cf5eed 100755
--- a/toolchain/scripts/stage4/install_stage4.sh
+++ b/toolchain/scripts/stage4/install_stage4.sh
@@ -4,6 +4,7 @@
 # shellcheck disable=all
 
 ./scripts/stage4/install_cereal.sh
+./scripts/stage4/install_rapidjson.sh
 ./scripts/stage4/install_libtorch.sh
 ./scripts/stage4/install_libnpy.sh
 ./scripts/stage4/install_libri.sh
diff --git a/toolchain/scripts/tool_kit.sh b/toolchain/scripts/tool_kit.sh
index d07445089b..120b623fee 100755
--- a/toolchain/scripts/tool_kit.sh
+++ b/toolchain/scripts/tool_kit.sh
@@ -54,9 +54,12 @@ By download $__filename from $__url,
 Rename it as $__filename and put it into ${BUILDDIR},
 And re-run toolchain installation script.
 
-Instead of github.com. you can manually install requirements packages via:
-1. Download from www.cp2k.org/static/downloads
-2. wget https://bohrium-api.dp.tech/ds-dl/abacus-deps-93wi-v1 -O abacus-deps-v1.zip
+You can manually install requirements packages via:
+1. Download from www.cp2k.org/static/downloads (for OpenBLAS, OpenMPI and Others)
+2. Download from github.com (for CEREAL, RapidJSON, libnpy, LibRI and others stage4 packages)
+3. Use git submodule update --init --recursive (for LibRI)
+4. wget https://bohrium-api.dp.tech/ds-dl/abacus-deps-93wi-v2 -O abacus-deps.zip
+5. for Intel-oneAPI, please contact your server manager our visit Intel official website
 EOF
 }
 
diff --git a/toolchain/toolchain_gnu.sh b/toolchain/toolchain_gnu.sh
index 57105f601b..04635bb63d 100755
--- a/toolchain/toolchain_gnu.sh
+++ b/toolchain/toolchain_gnu.sh
@@ -18,6 +18,7 @@
 --with-fftw=install \
 --with-elpa=install \
 --with-cereal=install \
+--with-rapidjson=install \
 --with-libtorch=no \
 --with-libnpy=no \
 --with-libri=no \
diff --git a/toolchain/toolchain_intel-mpich.sh b/toolchain/toolchain_intel-mpich.sh
index ffc2626670..fcf3cc41ee 100755
--- a/toolchain/toolchain_intel-mpich.sh
+++ b/toolchain/toolchain_intel-mpich.sh
@@ -21,6 +21,7 @@
 --with-fftw=no \
 --with-elpa=install \
 --with-cereal=install \
+--with-rapidjson=install \
 --with-libtorch=no \
 --with-libnpy=no \
 --with-libri=no \
diff --git a/toolchain/toolchain_intel.sh b/toolchain/toolchain_intel.sh
index 8f391be008..e5298c570d 100755
--- a/toolchain/toolchain_intel.sh
+++ b/toolchain/toolchain_intel.sh
@@ -22,6 +22,7 @@
 --with-fftw=no \
 --with-elpa=install \
 --with-cereal=install \
+--with-rapidjson=install \
 --with-libtorch=no \
 --with-libnpy=no \
 --with-libri=no \

From 6aa019c41ae3348e176cb3f5ceb58aff6422606d Mon Sep 17 00:00:00 2001
From: Hongxu Ren <60290838+Flying-dragon-boxing@users.noreply.github.com>
Date: Sun, 28 Jan 2024 14:03:20 +0800
Subject: [PATCH 17/44] Revert "Modify inputs and update to latest version"

---
 .github/workflows/test.yml                    |  18 +-
 CMakeLists.txt                                |  77 +-
 Dockerfile.cuda                               |   2 +-
 Dockerfile.gnu                                |   2 +-
 Dockerfile.intel                              |   2 +-
 cmake/FindELPA.cmake                          |  44 +-
 cmake/FindLAPACK.cmake                        |   2 +-
 cmake/FindLibxc.cmake                         |  36 -
 cmake/FindPEXSI.cmake                         |   8 +-
 deps/libpaw_interface                         |   2 +-
 docs/advanced/input_files/input-main.md       |  37 +-
 docs/advanced/install.md                      |  10 -
 docs/quick_start/easy_install.md              |   9 +-
 docs/quick_start/hands_on.md                  |   2 +-
 python/pyabacus/CMakeLists.txt                |  11 +-
 python/pyabacus/src/py_abacus.cpp             |  13 -
 python/pyabacus/src/py_math_base.cpp          |  63 --
 python/pyabacus/src/py_numerical_radial.cpp   |   4 +-
 python/pyabacus/src/pyabacus/__init__.py      |   5 +-
 python/pyabacus/tests/test_base_math.py       |  15 -
 python/pyabacus/tests/test_nr.py              |  25 +
 source/Makefile                               |  17 +-
 source/Makefile.Objects                       |  18 +-
 source/Makefile.vars                          |  20 +-
 source/module_base/global_variable.cpp        |  30 -
 source/module_base/global_variable.h          |  28 -
 source/module_base/math_sphbes.cpp            |  56 +-
 source/module_base/math_sphbes.h              |  13 +-
 source/module_base/para_json.cpp              | 977 ------------------
 source/module_base/para_json.h                | 560 ----------
 source/module_base/test/CMakeLists.txt        |  14 -
 .../module_base/test/complexmatrix_test.cpp   |  22 +-
 .../module_base/test/inverse_matrix_test.cpp  |   2 +-
 source/module_base/test/math_sphbes_test.cpp  |  16 +-
 source/module_base/test/math_ylmreal_test.cpp | 364 +++----
 source/module_base/test/para_json_test.cpp    |  68 --
 source/module_base/test/perf_sphbes_test.cpp  |  72 --
 source/module_base/tool_quit.h                |   8 +-
 source/module_cell/klist.cpp                  |  44 +-
 source/module_cell/klist.h                    |   1 -
 .../test/sltk_atom_input_test.cpp             |   2 +-
 source/module_cell/read_atoms.cpp             | 189 ++--
 source/module_elecstate/occupy.cpp            | 414 +++++++-
 source/module_esolver/esolver_ks.cpp          |  19 +-
 source/module_esolver/esolver_ks_lcao.cpp     |  27 +-
 source/module_esolver/esolver_ks_pw.cpp       |  71 +-
 .../module_xc/test/test_xc.cpp                |   4 +-
 .../module_xc/test/test_xc1.cpp               |   2 +-
 .../module_xc/test/test_xc2.cpp               |  10 +-
 .../module_xc/test/test_xc4.cpp               |   4 +-
 .../module_xc/test/test_xc5.cpp               |  65 ++
 .../module_xc/test/xc3_mock.h                 |  16 +-
 .../module_deltaspin/cal_mw.cpp               |   2 +-
 .../module_deltaspin/cal_mw_helper.cpp        |  14 +-
 .../module_deltaspin/lambda_loop.cpp          |   4 +-
 .../module_tddft/test/tddft_test.cpp          |   3 +-
 source/module_hsolver/diago_pexsi.cpp         |  44 +-
 source/module_hsolver/diago_pexsi.h           |   2 +-
 source/module_hsolver/hsolver_pw.cpp          |  22 +-
 .../module_pexsi/CMakeLists.txt               |   2 +-
 .../module_pexsi/dist_bcd_matrix.cpp          |   4 +-
 .../module_pexsi/dist_bcd_matrix.h            |  23 -
 .../module_pexsi/dist_ccs_matrix.cpp          |   4 +-
 .../module_pexsi/dist_ccs_matrix.h            |  40 -
 .../module_pexsi/dist_matrix_transformer.cpp  | 233 ++---
 .../module_pexsi/dist_matrix_transformer.h    |  64 +-
 .../module_pexsi/pexsi_solver.cpp             |  28 +-
 .../module_pexsi/pexsi_solver.h               |   6 +-
 .../module_pexsi/simple_pexsi.cpp             | 490 +++++----
 source/module_io/input.cpp                    | 230 +----
 source/module_io/input.h                      |  43 +-
 source/module_io/input_conv.cpp               |  30 -
 source/module_io/mulliken_charge.cpp          |   4 +-
 source/module_io/nscf_band.cpp                |  41 +-
 source/module_io/nscf_band.h                  |   1 -
 source/module_io/parameter_pool.cpp           |  39 +-
 source/module_io/parameter_pool.h             |   8 +-
 source/module_io/test/input_conv_test.cpp     |   1 -
 source/module_io/test/input_test.cpp          |  14 +-
 source/module_io/test/input_test_para.cpp     |   9 +-
 source/module_io/test/support/INPUT           |   2 +-
 source/module_io/test/support/witestfile      |   2 +-
 source/module_io/test/to_qo_test.cpp          |  34 +-
 source/module_io/test/write_input_test.cpp    |  11 +-
 .../module_io/test_serial/nscf_band_test.cpp  |  11 +-
 source/module_io/write_input.cpp              |  33 +-
 source/module_ri/Exx_LRI.hpp                  |  10 +-
 source/module_ri/LRI_CV_Tools.hpp             |  11 +-
 .../module_ri/conv_coulomb_pot_k-template.h   |  51 +
 source/module_ri/conv_coulomb_pot_k.cpp       | 181 ++--
 source/module_ri/conv_coulomb_pot_k.h         |  47 +-
 source/module_ri/conv_coulomb_pot_k.hpp       |  37 -
 source/module_ri/exx_lip.cpp                  |  14 +-
 .../107_PW_OBOD_MemSaver/refBANDS_1.dat       |  12 +-
 .../107_PW_OB_outputbands/refBANDS_1.dat      |  12 +-
 .../204_NO_KP_NC_deltaspin/mulliken.txt.ref   | 168 +--
 .../204_NO_KP_NC_deltaspin/result.ref         |   6 +-
 tests/integrate/207_NO_KP_OB/refBANDS_1.dat   |  12 +-
 tests/integrate/Autotest.sh                   |  10 +-
 toolchain/README.md                           |  15 +-
 toolchain/install_abacus_toolchain.sh         |  14 +-
 toolchain/scripts/common_vars.sh              |   3 +-
 toolchain/scripts/stage4/install_rapidjson.sh |  93 --
 toolchain/scripts/stage4/install_stage4.sh    |   1 -
 toolchain/scripts/tool_kit.sh                 |   9 +-
 toolchain/toolchain_gnu.sh                    |   1 -
 toolchain/toolchain_intel-mpich.sh            |   1 -
 toolchain/toolchain_intel.sh                  |   1 -
 108 files changed, 1722 insertions(+), 4025 deletions(-)
 delete mode 100644 cmake/FindLibxc.cmake
 delete mode 100644 python/pyabacus/src/py_abacus.cpp
 delete mode 100644 python/pyabacus/src/py_math_base.cpp
 delete mode 100644 python/pyabacus/tests/test_base_math.py
 create mode 100644 python/pyabacus/tests/test_nr.py
 delete mode 100644 source/module_base/para_json.cpp
 delete mode 100644 source/module_base/para_json.h
 delete mode 100644 source/module_base/test/para_json_test.cpp
 delete mode 100644 source/module_base/test/perf_sphbes_test.cpp
 create mode 100644 source/module_ri/conv_coulomb_pot_k-template.h
 delete mode 100644 source/module_ri/conv_coulomb_pot_k.hpp
 delete mode 100755 toolchain/scripts/stage4/install_rapidjson.sh

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 76f48347a8..856e56d97a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -2,37 +2,27 @@ name: Integration Test and Unit Test
 
 on:
   pull_request:
-
+  
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
-
+  
 jobs:
   test:
     name: Test
     runs-on: self-hosted
     if: github.repository_owner == 'deepmodeling'
-    container:
-      image: ghcr.io/deepmodeling/abacus-gnu
-      volumes:
-        - /tmp/ccache:/github/home/.ccache
+    container: ghcr.io/deepmodeling/abacus-gnu
     steps:
       - name: Checkout
         uses: actions/checkout@v4
         with:
           submodules: recursive
-
-      - name: Install Ccache
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y ccache
-
       - name: Build
         run: |
-          cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON -DENABLE_GOOGLEBENCH=ON
+          cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON
           cmake --build build -j8
           cmake --install build
-
       - name: Test
         env:
           GTEST_COLOR: 'yes'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 73a846304b..8440662355 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,12 @@ project(ABACUS
     LANGUAGES CXX
 )
 
+# private options, should not be pushed to master
+# set(PEXSI_DIR "~/projects/pexsi-build/pexsi")
+# set(SuperLU_DIR "~/projects/pexsi-build/superlu")
+# set(ParMETIS_DIR "~/projects/pexsi-build/parmetis")
+# set(ELPA_INCLUDE_DIR "/usr/include/elpa_openmp-2023.05.001")
+
 option(ENABLE_LCAO "Enable LCAO calculation." ON)
 option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF)
 option(ENABLE_LIBXC "Enable LibXC functionality" OFF)
@@ -34,36 +40,7 @@ option(DEBUG_INFO "Print message for developers to debug." OFF)
 option(ENABLE_NATIVE_OPTIMIZATION "Enable compilation optimization for the native machine's CPU type" OFF)
 option(COMMIT_INFO "Print commit information in log" ON)
 option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON)
-option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF)
-option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF)
 option(USE_PEXSI "Enable support for PEXSI." OFF)
-
-
-
-# enable json support
-if(ENABLE_RAPIDJSON)
-  find_package(RapidJSON)
-  if(NOT RapidJSON_FOUND)
-    message(WARNING "Rapidjson is not found, trying downloading from github, or you can install Rapidjson first and reinstall abacus.")
-    include(FetchContent)
-    FetchContent_Declare(
-      rapidjson
-      GIT_REPOSITORY https://github.com/Tencent/rapidjson.git
-      GIT_TAG "origin/master"
-      GIT_SHALLOW TRUE
-      GIT_PROGRESS TRUE
-    )
-    set(RAPIDJSON_BUILD_TESTS OFF CACHE INTERNAL "")
-    set(RAPIDJSON_BUILD_EXAMPLES OFF CACHE INTERNAL "")
-    FetchContent_MakeAvailable(rapidjson)
-    set(RapidJSON_INCLUDE_PATH "${rapidjson_SOURCE_DIR}/include")
-  endif()
-  add_compile_definitions(__RAPIDJSON)
-  add_definitions(-DRAPIDJSON_HAS_CXX11_NOEXCEPT=0)
-  include_directories(${RapidJSON_INCLUDE_PATH})
-endif()
-
-
 if (USE_CUDA)
   set(USE_CUSOLVER_LCAO ON)
 else()
@@ -212,7 +189,7 @@ if(ENABLE_LCAO)
   
   if(USE_PEXSI)
     find_package(PEXSI REQUIRED)
-    target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi)
+    target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi)
     include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR})
     add_compile_definitions(__PEXSI)
   endif()
@@ -437,7 +414,8 @@ endif()
 
 if(ENABLE_DEEPKS)
   # Torch uses outdated components to detech CUDA arch, causing failure on latest CUDA kits.
-  # Set CMake variable TORCH_CUDA_ARCH_LIST in the form of "major.minor" if required.
+  # See above for setting CMAKE_CUDA_ARCHITECTURES
+  set(TORCH_CUDA_ARCH_LIST CMAKE_CUDA_ARCHITECTURES)
   find_package(Torch REQUIRED)
   if(NOT Torch_VERSION VERSION_LESS "2.1.0")
     set_if_higher(CMAKE_CXX_STANDARD 17)
@@ -544,8 +522,11 @@ if(DEFINED Libxc_DIR)
   set(ENABLE_LIBXC ON)
 endif()
 if(ENABLE_LIBXC)
-  # use `cmake/FindLibxc.cmake` to detect Libxc installation with `pkg-config`
-  find_package(Libxc REQUIRED)
+  find_package(Libxc REQUIRED HINTS
+    ${Libxc_DIR}/share/cmake/Libxc
+    ${Libxc_DIR}/lib/cmake/Libxc
+    ${Libxc_DIR}/lib64/cmake/Libxc
+  )
   message(STATUS "Found Libxc: version " ${Libxc_VERSION})
   if(${Libxc_VERSION} VERSION_LESS 5.1.7)
     message(FATAL_ERROR "LibXC >= 5.1.7 is required.")
@@ -599,25 +580,6 @@ if(INFO)
   # modifications on blas_connector and lapack_connector
 endif()
 
-#  Add performance test in abacus
-IF (ENABLE_GOOGLEBENCH)
-  set(BUILD_TESTING ON)
-  find_package(benchmark HINTS ${BENCHMARK_DIR})
-  if(NOT ${benchmark_FOUND})
-    set(BENCHMARK_USE_BUNDLED_GTEST OFF)
-    include(FetchContent)
-    FetchContent_Declare(
-      benchmark
-      GIT_REPOSITORY https://github.com/google/benchmark.git
-      GIT_TAG "origin/main"
-      GIT_SHALLOW TRUE
-      GIT_PROGRESS TRUE 
-    )
-    set(BENCHMARK_ENABLE_TESTING OFF)
-    FetchContent_MakeAvailable(benchmark)
-  endif()
-endif()
-
 IF (BUILD_TESTING)
   set_if_higher(CMAKE_CXX_STANDARD 14) # Required in orbital
   include(CTest)
@@ -647,14 +609,8 @@ IF (BUILD_TESTING)
     endif()
 
     #dependencies & link library
-    if(ENABLE_GOOGLEBENCH)
-      target_link_libraries(${UT_TARGET} ${UT_LIBS}
-        Threads::Threads GTest::gtest_main GTest::gmock_main benchmark::benchmark)
-      else()
-        target_link_libraries(${UT_TARGET} ${UT_LIBS}
-          Threads::Threads GTest::gtest_main GTest::gmock_main)
-    endif()
-      
+    target_link_libraries(${UT_TARGET} ${UT_LIBS}
+      Threads::Threads GTest::gtest_main GTest::gmock_main)
     if(USE_OPENMP)
       target_link_libraries(${UT_TARGET} OpenMP::OpenMP_CXX)
     endif()
@@ -664,7 +620,6 @@ IF (BUILD_TESTING)
       WORKING_DIRECTORY $<TARGET_FILE_DIR:${UT_TARGET}>
     )
   endfunction(AddTest)
-
 endif()
 
 add_subdirectory(source)
diff --git a/Dockerfile.cuda b/Dockerfile.cuda
index e950f097f9..719f7c4278 100644
--- a/Dockerfile.cuda
+++ b/Dockerfile.cuda
@@ -2,7 +2,7 @@ FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
 
 RUN apt update && apt install -y --no-install-recommends \
     libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev \
-    libxc-dev libgtest-dev libgmock-dev libbenchmark-dev python3-numpy \
+    libxc-dev libgtest-dev libgmock-dev python3-numpy \
     bc cmake git g++ make bc time sudo unzip vim wget
 
 ENV GIT_SSL_NO_VERIFY=true TERM=xterm-256color \
diff --git a/Dockerfile.gnu b/Dockerfile.gnu
index 060d930563..0b6b45d248 100644
--- a/Dockerfile.gnu
+++ b/Dockerfile.gnu
@@ -1,7 +1,7 @@
 FROM ubuntu:22.04
 RUN apt update && apt install -y --no-install-recommends \
     libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev \
-    libxc-dev libgtest-dev libgmock-dev libbenchmark-dev python3-numpy \
+    libxc-dev libgtest-dev libgmock-dev python3-numpy \
     bc cmake git g++ make bc time sudo unzip vim wget gfortran
 
 ENV GIT_SSL_NO_VERIFY=true TERM=xterm-256color \
diff --git a/Dockerfile.intel b/Dockerfile.intel
index 3947f05b9e..6cac8c9f5f 100644
--- a/Dockerfile.intel
+++ b/Dockerfile.intel
@@ -2,7 +2,7 @@ FROM ubuntu:22.04
 
 RUN apt-get update && apt-get install -y \
     bc cmake git gnupg gcc g++ python3-numpy sudo wget vim unzip \
-    libcereal-dev libxc-dev libgtest-dev libgmock-dev libbenchmark-dev
+    libcereal-dev libxc-dev libgtest-dev libgmock-dev
 
 # Following steps by https://software.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/installation/install-using-package-managers/apt.html .
 RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
diff --git a/cmake/FindELPA.cmake b/cmake/FindELPA.cmake
index 4105e47592..5769f7248c 100644
--- a/cmake/FindELPA.cmake
+++ b/cmake/FindELPA.cmake
@@ -7,50 +7,34 @@
 #  ELPA_INCLUDE_DIR - Where to find ELPA headers.
 #
 
-find_package(PkgConfig)
-
-find_path(ELPA_INCLUDE_DIRS
+find_path(ELPA_INCLUDE_DIR
     elpa/elpa.h
     HINTS ${ELPA_DIR}
     PATH_SUFFIXES "include" "include/elpa"
     )
 if(USE_OPENMP)
-    find_library(ELPA_LINK_LIBRARIES
-    NAMES elpa_openmp elpa
-    HINTS ${ELPA_DIR}
-    PATH_SUFFIXES "lib"
-    )
+    find_library(ELPA_LIBRARY
+        NAMES elpa_openmp elpa
+        HINTS ${ELPA_DIR}
+        PATH_SUFFIXES "lib"
+        )
 else()
-    find_library(ELPA_LINK_LIBRARIES
-    NAMES elpa
-    HINTS ${ELPA_DIR}
-    PATH_SUFFIXES "lib"
-    )
-endif()
-
-if(NOT ELPA_INCLUDE_DIRS AND PKG_CONFIG_FOUND)
-  if(DEFINED ELPA_DIR)
-    string(APPEND CMAKE_PREFIX_PATH ";${ELPA_DIR}")
-  endif()
-  if(USE_OPENMP)
-    pkg_search_module(ELPA REQUIRED IMPORTED_TARGET GLOBAL elpa_openmp)
-  else()
-    pkg_search_module(ELPA REQUIRED IMPORTED_TARGET GLOBAL elpa)
-  endif()
-elseif(NOT PKG_CONFIG_FOUND)
-  message(
-    "ELPA : We need pkg-config to get all information about the elpa library")
+    find_library(ELPA_LIBRARY
+        NAMES elpa
+        HINTS ${ELPA_DIR}
+        PATH_SUFFIXES "lib"
+        )
 endif()
 
 # Handle the QUIET and REQUIRED arguments and
 # set ELPA_FOUND to TRUE if all variables are non-zero.
 include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(ELPA DEFAULT_MSG ELPA_LINK_LIBRARIES ELPA_INCLUDE_DIRS)
+find_package_handle_standard_args(ELPA DEFAULT_MSG ELPA_LIBRARY ELPA_INCLUDE_DIR)
 
 # Copy the results to the output variables and target.
 if(ELPA_FOUND)
-    list(GET ELPA_LINK_LIBRARIES 0 ELPA_LIBRARY)
-    set(ELPA_INCLUDE_DIR ${ELPA_INCLUDE_DIRS})
+    set(ELPA_LIBRARIES ${ELPA_LIBRARY})
+    set(ELPA_INCLUDE_DIR ${ELPA_INCLUDE_DIR})
 
     if(NOT TARGET ELPA::ELPA)
         add_library(ELPA::ELPA UNKNOWN IMPORTED)
diff --git a/cmake/FindLAPACK.cmake b/cmake/FindLAPACK.cmake
index c240d5facf..4f4bfbc425 100644
--- a/cmake/FindLAPACK.cmake
+++ b/cmake/FindLAPACK.cmake
@@ -6,7 +6,7 @@
 #
 
 find_library(LAPACK_LIBRARY
-    NAMES openblas blas
+    NAMES openblas
     HINTS ${LAPACK_DIR}
     PATH_SUFFIXES "lib"
 )
diff --git a/cmake/FindLibxc.cmake b/cmake/FindLibxc.cmake
deleted file mode 100644
index 4a3c04cba7..0000000000
--- a/cmake/FindLibxc.cmake
+++ /dev/null
@@ -1,36 +0,0 @@
-include(FindPackageHandleStandardArgs)
-
-if(DEFINED Libxc_DIR)
-  string(APPEND CMAKE_PREFIX_PATH ";${Libxc_DIR}")
-endif()
-# Using CMake interface as default.
-# NO REQUIRED here, otherwhile it would throw error
-# with no LibXC found.
-find_package(Libxc HINTS
-    ${Libxc_DIR}/share/cmake/Libxc
-    ${Libxc_DIR}/lib/cmake/Libxc
-    ${Libxc_DIR}/lib64/cmake/Libxc
-  )
-if(NOT TARGET Libxc::xc)
-  find_package(PkgConfig REQUIRED)
-  pkg_search_module(Libxc REQUIRED IMPORTED_TARGET GLOBAL libxc)
-  find_package_handle_standard_args(Libxc DEFAULT_MSG Libxc_LINK_LIBRARIES Libxc_INCLUDE_DIRS)
-endif()
-
-
-# Copy the results to the output variables and target.
-# if find_package() above works, Libxc::xc would be present and
-# below would be skipped.
-if(Libxc_FOUND AND NOT TARGET Libxc::xc)
-	set(Libxc_LIBRARY ${Libxc_LINK_LIBRARIES})
-	set(Libxc_LIBRARIES ${Libxc_LIBRARY})
-	set(Libxc_INCLUDE_DIR ${Libxc_INCLUDE_DIRS})
-	add_library(Libxc::xc UNKNOWN IMPORTED)
-	set_target_properties(Libxc::xc PROPERTIES
-		IMPORTED_LOCATION "${Libxc_LIBRARY}"
-		INTERFACE_INCLUDE_DIRECTORIES "${Libxc_INCLUDE_DIR}")
-endif()
-
-set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${Libxc_INCLUDE_DIR})
-
-mark_as_advanced(Libxc_INCLUDE_DIR Libxc_LIBRARY)
diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake
index 062764acce..22fe4dd01c 100644
--- a/cmake/FindPEXSI.cmake
+++ b/cmake/FindPEXSI.cmake
@@ -35,18 +35,18 @@ find_library(ParMETIS_LIBRARY
     PATH_SUFFIXES "lib"
 )
 
-find_library(SuperLU_DIST_LIBRARY
+find_library(SuperLU_LIBRARY
     NAMES libsuperlu_dist.a
-    HINTS ${SuperLU_DIST_DIR}
+    HINTS ${SuperLU_DIR}
     PATH_SUFFIXES "lib"
 )
 
 # Handle the QUIET and REQUIRED arguments and
 # set Cereal_FOUND to TRUE if all variables are non-zero.
 include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_DIST_LIBRARY)
+find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_LIBRARY)
 
 
 # Copy the results to the output variables and target.
-mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_DIST_LIBRARY)
+mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_LIBRARY)
 
diff --git a/deps/libpaw_interface b/deps/libpaw_interface
index c211c0ab33..893cfe5b88 160000
--- a/deps/libpaw_interface
+++ b/deps/libpaw_interface
@@ -1 +1 @@
-Subproject commit c211c0ab330adf3cc374f50ab3edee46b174e64c
+Subproject commit 893cfe5b88c4b640b88a82335474d9f67d4c4cf6
diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index 8c675cab92..b9ee122af6 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -72,7 +72,6 @@
     - [mixing\_beta](#mixing_beta)
     - [mixing\_beta\_mag](#mixing_beta_mag)
     - [mixing\_ndim](#mixing_ndim)
-    - [mixing\_restart](#mixing_restart)
     - [mixing\_gg0](#mixing_gg0)
     - [mixing\_gg0\_mag](#mixing_gg0_mag)
     - [mixing\_gg0\_min](#mixing_gg0_min)
@@ -146,8 +145,6 @@
     - [out\_app\_flag](#out_app_flag)
     - [out\_ndigits](#out_ndigits)
     - [out\_interval](#out_interval)
-    - [band\_print\_num](#band_print_num)
-    - [bands\_to\_print](#bands_to_print)
     - [out\_element\_info](#out_element_info)
     - [restart\_save](#restart_save)
     - [restart\_load](#restart_load)
@@ -948,8 +945,6 @@ calculations.
   - **fixed**: fixed occupations (available for non-coductors only)
   - **gauss** or **gaussian**: Gaussian smearing method.
   - **mp**: methfessel-paxton smearing method; recommended for metals.
-  - **mp2**: 2-nd methfessel-paxton smearing method; recommended for metals.
-  - **mv** or **cold**: marzari-vanderbilt smearing method.
   - **fd**: Fermi-Dirac smearing method: $f=1/\{1+\exp[(E-\mu)/kT]\}$ and smearing_sigma below is the temperature $T$ (in Ry).
 - **Default**: gauss
 
@@ -1006,13 +1001,6 @@ We recommend the following options:
   For systems that are difficult to converge, one could try increasing the value of 'mixing_ndim' to enhance the stability of the self-consistent field (SCF) calculation.
 - **Default**: 8
 
-### mixing_restart
-
-- **Type**: Integer
-- **Description**: At `mixing_restart`-th iteration, SCF will restart by using output charge density from perivos iteration as input charge density directly, and start a new mixing. `mixing_restart=0|1` means SCF starts from scratch.
-  
-- **Default**: 0
-
 ### mixing_gg0
 
 - **Type**: Real
@@ -1506,8 +1494,8 @@ These variables are used to control the output of properties.
 
 ### out_band
 
-- **Type**: Boolean Integer(optional)
-- **Description**: Whether to output the band structure (in eV), optionally output precision can be set by a second parameter, default is 8. For more information, refer to the [band.md](../elec_properties/band.md)
+- **Type**: Boolean
+- **Description**: Whether to output the band structure (in eV). For more information, refer to the [band.md](../elec_properties/band.md)
 - **Default**: False
 
 ### out_proj_band
@@ -1611,20 +1599,6 @@ These variables are used to control the output of properties.
 - **Description**: Control the interval for printing Mulliken population analysis, $r(R)$, $H(R)$, $S(R)$, $T(R)$, $dH(R)$, $H(k)$, $S(k)$ and $wfc(k)$ matrices during molecular dynamics calculations. Check input parameters [out_mul](#out_mul), [out_mat_r](#out_mat_r), [out_mat_hs2](#out_mat_hs2), [out_mat_t](#out_mat_t), [out_mat_dh](#out_mat_dh), [out_mat_hs](#out_mat_hs) and [out_wfc_lcao](#out_wfc_lcao) for more information, respectively.
 - **Default**: 1
 
-### band_print_num
-
-- **Type**: Integer
-- **Availability**: PW basis
-- **Description**: If you want to plot a partial charge density contributed from some chosen bands. `band_print_num` define the number of band list. The result can be found in "band*.cube".
-- **Default**: 0
-
-### bands_to_print
-
-- **Type**: vector
-- **Availability**: band_print_num > 0
-- **Description**: define which band you want to choose for partial charge density.
-- **Default**: []
-
 ### out_element_info
 
 - **Type**: Boolean
@@ -2802,9 +2776,9 @@ These variables are used to control berry phase and wannier90 interface paramete
 
 - **Type**: String
 - **Description**: the spin direction for the Wannier function calculation when nspin is set to 2
-  - `up`: Calculate spin up for the Wannier function.
-  - `down`: Calculate spin down for the Wannier function.
-- **Default**: `up`
+  - "up": Calculate spin up for the Wannier function.
+  - "down": Calculate spin down for the Wannier function.
+- **Default**: "up"
 
 ### out_wannier_mmn
 
@@ -2844,7 +2818,6 @@ These variables are used to control berry phase and wannier90 interface paramete
 - **Description**: write the "UNK.*" file in ASCII format or binary format.
   - 0: write the "UNK.*" file in binary format.
   - 1: write the "UNK.*" file in ASCII format (text file format).
-- **Default**: 1
 
 [back to top](#full-list-of-input-keywords)
 
diff --git a/docs/advanced/install.md b/docs/advanced/install.md
index d6201a060f..e929fac34c 100644
--- a/docs/advanced/install.md
+++ b/docs/advanced/install.md
@@ -69,16 +69,6 @@ After building and installing, unit tests can be performed with `ctest`.
 
 To run a subset of unit test, use `ctest -R <test-match-pattern>` to perform tests with name matched by given pattern.
 
-## Build Performance Tests
-
-To build performance tests for ABACUS, define `ENABLE_GOOGLEBENCH` flag. You can also specify the path to a local installation of [Google Benchmark](https://github.com/google/benchmark.git) by setting `BENCHMARK_DIR` flags. If not found locally, the configuration process will try to download it automatically.
-
-```bash
-cmake -B build -DENABLE_GOOGLEBENCH=1
-```
-
-Google Benchmark requires Google Test to build and run the tests. When setting `ENABLE_GOOGLEBENCH` to ON, `BUILD_TESTING` is automatically enabled. After building and installing, performance tests can be executed with `ctest`.
-
 ## Build with CUDA support
 
 ### Extra prerequisites
diff --git a/docs/quick_start/easy_install.md b/docs/quick_start/easy_install.md
index 4089e303a3..957b9d3262 100644
--- a/docs/quick_start/easy_install.md
+++ b/docs/quick_start/easy_install.md
@@ -28,7 +28,7 @@ These requirements support the calculation of plane-wave basis in ABACUS. For LC
 Some of these packages can be installed with popular package management system, such as `apt` and `yum`:
 
 ```bash
-sudo apt update && sudo apt install -y libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev libxc-dev g++ make cmake bc git pkgconf
+sudo apt update && sudo apt install -y libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev libxc-dev g++ make cmake bc git
 ```
 
 > Installing ELPA by apt only matches requirements on Ubuntu 22.04. For earlier linux distributions, you should build ELPA from source.
@@ -111,12 +111,12 @@ Here, 'build' is the path for building ABACUS; and '-D' is used for setting up s
   - `LAPACK_DIR`: Path to OpenBLAS library `libopenblas.so`(including BLAS and LAPACK)
   - `SCALAPACK_DIR`: Path to ScaLAPACK library `libscalapack.so`
   - `ELPA_DIR`: Path to ELPA install directory; should be the folder containing 'include' and 'lib'.
-  > Note: In ABACUS v3.5.1 or earlier, if you install ELPA from source , please add a symlink to avoid the additional include file folder with version name: `ln -s elpa/include/elpa-2021.05.002/elpa elpa/include/elpa` to help the build system find ELPA headers.
+  > Note: If you install ELPA from source, please add a symlink to avoid the additional include file folder with version name: `ln -s elpa/include/elpa-2021.05.002/elpa elpa/include/elpa`. This is a known behavior of ELPA.
 
   - `FFTW3_DIR`: Path to FFTW3.
   - `CEREAL_INCLUDE_DIR`: Path to the parent folder of `cereal/cereal.hpp`. Will download from GitHub if absent.
   - `Libxc_DIR`: (Optional) Path to Libxc.
-  > Note: In ABACUS v3.5.1 or earlier, Libxc built from source with Makefile is NOT supported; please compile Libxc with CMake instead.
+  > Note: Building Libxc from source with Makefile does NOT support using it in CMake here. Please compile Libxc with CMake instead.
   - `LIBRI_DIR`: (Optional) Path to LibRI.
   - `LIBCOMM_DIR`: (Optional) Path to LibComm.
 
@@ -126,7 +126,6 @@ Here, 'build' is the path for building ABACUS; and '-D' is used for setting up s
   - `ENABLE_LIBRI=OFF`: [Enable LibRI](../advanced/install.md#add-libri-support) to suppport variety of functionals. If `LIBRI_DIR` and `LIBCOMM_DIR` is defined, `ENABLE_LIBRI` will set to 'ON'.
   - `USE_OPENMP=ON`: Enable OpenMP support. Building ABACUS without OpenMP is not fully tested yet.
   - `BUILD_TESTING=OFF`: [Build unit tests](../advanced/install.md#build-unit-tests).
-  - `ENABLE_GOOGLEBENCH=OFF`: [Build performance tests](../advanced/install.md#build-performance-tests)
   - `ENABLE_MPI=ON`: Enable MPI parallel compilation. If set to `OFF`, a serial version of ABACUS with PW basis only will be compiled. Currently serial version of ABACUS with LCAO basis is not supported yet, so `ENABLE_LCAO` will be automatically set to `OFF`.
   - `ENABLE_COVERAGE=OFF`: Build ABACUS executable supporting [coverage analysis](../CONTRIBUTING.md#generating-code-coverage-report). This feature has a drastic impact on performance.
   - `ENABLE_ASAN=OFF`: Build with Address Sanitizer. This feature would help detecting memory problems.
@@ -230,7 +229,7 @@ conda create -n abacus_env abacus -c conda-forge
 conda activate abacus_env
 export CMAKE_PREFIX_PATH=$CONDA_PREFIX:$CMAKE_PREFIX_PATH
 
-# By default OpenBLAS is used; run `conda install "blas=*=mkl" mkl_fft mkl-devel -c conda-forge` to switch implementation.
+# By default OpenBLAS is used; run `conda install "blas=*=mkl" mkl_fft -c conda-forge` to switch implementation.
 export MKLROOT=$CONDA_PREFIX # If Intel MKL is required.
 
 export CMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'`:$CMAKE_PREFIX_PATH # If DEEPKS support is required;
diff --git a/docs/quick_start/hands_on.md b/docs/quick_start/hands_on.md
index 2e0e768169..d63c6b0232 100644
--- a/docs/quick_start/hands_on.md
+++ b/docs/quick_start/hands_on.md
@@ -57,7 +57,7 @@ basis_type              lcao
 calculation             scf		# this is the key parameter telling abacus to do a scf calculation
 ```
 
-The pseudopotential files of `Mg_ONCV_PBE-1.0.upf` and `O_ONCV_PBE-1.0.upf` should be provided under the directory of `pseudo_dir` defined in `INPUT` (the default directory is "./"), and the orbital files `Mg_gga_8au_100Ry_4s2p1d.orb` and `O_gga_8au_100Ry_2s2p1d.orb` under the directory of `orbital_dir` also defined in `INPUT` (the default directory is "./"). The pseudopotential and orbital files can be downloaded from the [ABACUS website](http://abacus.ustc.edu.cn/pseudo/list.htm).
+The pseudopotential files of `Mg_ONCV_PBE-1.0.upf` and `O_ONCV_PBE-1.0.upf` should be provided under the directory of `pseudo_dir`, and the orbital files `Mg_gga_8au_100Ry_4s2p1d.orb` and `O_gga_8au_100Ry_2s2p1d.orb` under the directory of `orbital_dir`. The pseudopotential and orbital files can be downloaded from the [ABACUS website](http://abacus.ustc.edu.cn/pseudo/list.htm).
 
 The final mandatory input file is called `KPT`, which sets the reciprocal space k-mesh. Below is an example:
 
diff --git a/python/pyabacus/CMakeLists.txt b/python/pyabacus/CMakeLists.txt
index 0effbe83f2..399bd4fe57 100644
--- a/python/pyabacus/CMakeLists.txt
+++ b/python/pyabacus/CMakeLists.txt
@@ -12,14 +12,9 @@ set(BASE_PATH "${PROJECT_SOURCE_DIR}/../../source/module_base")
 set(ABACUS_SOURCE_DIR "${PROJECT_SOURCE_DIR}/../../source")
 include_directories(${BASE_PATH} ${ABACUS_SOURCE_DIR})
 list(APPEND _sources
-    #${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.h
-    #${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.cpp
-    ${ABACUS_SOURCE_DIR}/module_base/constants.h
-    ${ABACUS_SOURCE_DIR}/module_base/math_sphbes.h
-    ${ABACUS_SOURCE_DIR}/module_base/math_sphbes.cpp
-    ${PROJECT_SOURCE_DIR}/src/py_abacus.cpp
-    #${PROJECT_SOURCE_DIR}/src/py_numerical_radial.cpp
-    ${PROJECT_SOURCE_DIR}/src/py_math_base.cpp)
+    ${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.h
+    ${ABACUS_SOURCE_DIR}/module_basis/module_nao/numerical_radial.cpp
+    ${PROJECT_SOURCE_DIR}/src/py_numerical_radial.cpp)
 python_add_library(_core MODULE ${_sources} WITH_SOABI)
 target_link_libraries(_core PRIVATE pybind11::headers)
 target_compile_definitions(_core PRIVATE VERSION_INFO=${PROJECT_VERSION})
diff --git a/python/pyabacus/src/py_abacus.cpp b/python/pyabacus/src/py_abacus.cpp
deleted file mode 100644
index 34b354dc6b..0000000000
--- a/python/pyabacus/src/py_abacus.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <pybind11/numpy.h>
-#include <pybind11/pybind11.h>
-
-namespace py = pybind11;
-
-void bind_numerical_radial(py::module& m);
-void bind_math_base(py::module& m);
-
-PYBIND11_MODULE(_core, m)
-{
-    // bind_numerical_radial(m);
-    bind_math_base(m);
-}
\ No newline at end of file
diff --git a/python/pyabacus/src/py_math_base.cpp b/python/pyabacus/src/py_math_base.cpp
deleted file mode 100644
index 4378690897..0000000000
--- a/python/pyabacus/src/py_math_base.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <pybind11/numpy.h>
-#include <pybind11/pybind11.h>
-
-#include "module_base/math_sphbes.h"
-
-namespace py = pybind11;
-using namespace pybind11::literals;
-template <typename... Args>
-using overload_cast_ = pybind11::detail::overload_cast_impl<Args...>;
-
-void bind_math_base(py::module& m)
-{
-    py::module module_base = m.def_submodule("ModuleBase");
-
-    py::class_<ModuleBase::Sphbes>(module_base, "Sphbes")
-        .def(py::init<>())
-        .def_static("sphbesj", overload_cast_<const int, const double>()(&ModuleBase::Sphbes::sphbesj), "l"_a, "x"_a)
-        .def_static("dsphbesj", overload_cast_<const int, const double>()(&ModuleBase::Sphbes::dsphbesj), "l"_a, "x"_a)
-        .def_static("sphbesj",
-                    [](const int n, py::array_t<double> r, const double q, const int l, py::array_t<double> jl) {
-                        py::buffer_info r_info = r.request();
-                        if (r_info.ndim != 1)
-                        {
-                            throw std::runtime_error("r array must be 1-dimensional");
-                        }
-                        py::buffer_info jl_info = jl.request();
-                        if (jl_info.ndim != 1)
-                        {
-                            throw std::runtime_error("jl array must be 1-dimensional");
-                        }
-                        ModuleBase::Sphbes::sphbesj(n,
-                                                    static_cast<const double* const>(r_info.ptr),
-                                                    q,
-                                                    l,
-                                                    static_cast<double* const>(jl_info.ptr));
-                    })
-        .def_static("dsphbesj",
-                    [](const int n, py::array_t<double> r, const double q, const int l, py::array_t<double> djl) {
-                        py::buffer_info r_info = r.request();
-                        if (r_info.ndim != 1)
-                        {
-                            throw std::runtime_error("r array must be 1-dimensional");
-                        }
-                        py::buffer_info djl_info = djl.request();
-                        if (djl_info.ndim != 1)
-                        {
-                            throw std::runtime_error("djl array must be 1-dimensional");
-                        }
-                        ModuleBase::Sphbes::dsphbesj(n,
-                                                     static_cast<const double* const>(r_info.ptr),
-                                                     q,
-                                                     l,
-                                                     static_cast<double* const>(djl_info.ptr));
-                    })
-        .def_static("sphbes_zeros", [](const int l, const int n, py::array_t<double> zeros) {
-            py::buffer_info zeros_info = zeros.request();
-            if (zeros_info.ndim != 1)
-            {
-                throw std::runtime_error("zeros array must be 1-dimensional");
-            }
-            ModuleBase::Sphbes::sphbes_zeros(l, n, static_cast<double* const>(zeros_info.ptr));
-        });
-}
\ No newline at end of file
diff --git a/python/pyabacus/src/py_numerical_radial.cpp b/python/pyabacus/src/py_numerical_radial.cpp
index ebda8f080b..296229b3d1 100644
--- a/python/pyabacus/src/py_numerical_radial.cpp
+++ b/python/pyabacus/src/py_numerical_radial.cpp
@@ -8,7 +8,7 @@ using namespace pybind11::literals;
 template <typename... Args>
 using overload_cast_ = pybind11::detail::overload_cast_impl<Args...>;
 
-void bind_numerical_radial(py::module& m)
+PYBIND11_MODULE(_core, m)
 {
     // Create the submodule for NumericalRadial
     py::module m_numerical_radial = m.def_submodule("NumericalRadial");
@@ -165,4 +165,4 @@ void bind_numerical_radial(py::module& m)
         .def_property_readonly("kgrid", overload_cast_<int>()(&NumericalRadial::kgrid, py::const_))
         .def_property_readonly("rvalue", overload_cast_<int>()(&NumericalRadial::rvalue, py::const_))
         .def_property_readonly("kvalue", overload_cast_<int>()(&NumericalRadial::kvalue, py::const_));
-}
\ No newline at end of file
+}
diff --git a/python/pyabacus/src/pyabacus/__init__.py b/python/pyabacus/src/pyabacus/__init__.py
index 94d8c0d5b8..cda9318053 100644
--- a/python/pyabacus/src/pyabacus/__init__.py
+++ b/python/pyabacus/src/pyabacus/__init__.py
@@ -1,4 +1,3 @@
 from __future__ import annotations
-# from ._core import __doc__, __version__, NumericalRadial, ModuleBase
-from ._core import ModuleBase
-__all__ = ["ModuleBase"]
\ No newline at end of file
+from ._core import __doc__, __version__, NumericalRadial
+__all__ = ["__doc__", "__version__", "NumericalRadial"]
\ No newline at end of file
diff --git a/python/pyabacus/tests/test_base_math.py b/python/pyabacus/tests/test_base_math.py
deleted file mode 100644
index 97d5118bac..0000000000
--- a/python/pyabacus/tests/test_base_math.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from __future__ import annotations
-
-import pyabacus as m
-import numpy as np
-
-
-def test_version():
-    assert m.__version__ == "0.0.1"
-
-def test_sphbes():
-    s = m.ModuleBase.Sphbes()
-    # test for sphbesj
-    assert s.sphbesj(1, 0.0) == 0.0
-    assert s.sphbesj(0, 0.0) == 1.0
-
diff --git a/python/pyabacus/tests/test_nr.py b/python/pyabacus/tests/test_nr.py
new file mode 100644
index 0000000000..4986331b25
--- /dev/null
+++ b/python/pyabacus/tests/test_nr.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+import pyabacus as m
+
+
+def test_version():
+    assert m.__version__ == "0.0.1"
+
+def test_attributes():
+    chi = m.NumericalRadial()
+    # string
+    assert chi.symbol == ''
+    # integer
+    assert chi.itype == 0
+    assert chi.izeta == 0
+    assert chi.l == -1
+    assert chi.nr == 0
+    assert chi.nk == 0
+    # float
+    assert chi.rcut == 0.0
+    assert chi.kcut == 0.0
+    assert chi.pr == 0.0
+    assert chi.pk == 0.0
+    # bool
+    assert chi.is_fft_compliant == False
diff --git a/source/Makefile b/source/Makefile
index 7bd81f26d9..0dcd329161 100644
--- a/source/Makefile
+++ b/source/Makefile
@@ -7,7 +7,7 @@ include Makefile.vars
 INCLUDES = -I. -Icommands -I../ -Imodule_base/module_container
 
 LIBS = -lm -lpthread
-OPTS = ${INCLUDES} -std=c++14 -pedantic -m64 ${INCLUDES}
+OPTS = ${INCLUDES} -Ofast -g -traceback -xHost -std=c++11 -simd -march=native -m64 -qopenmp -Werror -Wall -pedantic 
 HONG = -D__LCAO
 HONG += -D__ELPA
 ifeq ($(OPENMP), ON)
@@ -75,7 +75,7 @@ else
     FFTW_INCLUDE_DIR = ${FFTW_DIR}/include
     FFTW_LIB_DIR     = ${FFTW_DIR}/lib
     HONG  += -D__FFTW3
-    LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR}
+    LIBS += -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR} -qmkl
     INCLUDES += -I${FFTW_INCLUDE_DIR}
     
     #==========================
@@ -140,6 +140,12 @@ ifdef LIBTORCH_DIR
   endif
 endif
 
+ifdef PEXSI_DIR
+    INCLUDES += -I${PEXSI_INCLUDE_DIR} ${SCOTCH_INCLUDE} ${DSUPERLU_INCLUDE}
+    LIBS += -L${PEXSI_LIB_DIR} -lpexsi_linux_release_v2.0 ${DSUPERLU_LIB} ${PTSCOTCH_LIB} ${SCOTCH_LIB}
+    HONG += -D__PEXSI
+endif
+
 ifdef DeePMD_DIR
     HONG  += -D__DPMD -DHIGH_PREC 
     OPTS  += -Wl,--no-as-needed
@@ -169,13 +175,6 @@ ifdef DeePMD_DIR
     INCLUDES += -I${TensorFlow_INCLUDE_DIR}
 endif
 
-ifdef PEXSI_DIR
-    OBJS_ABACUS += ${OBJS_HSOLVER_PEXSI}
-    INCLUDES += -I${PEXSI_DIR}/include -I${PARMETIS_DIR}/include -I${DSUPERLU_DIR}/include
-    LIBS += -L${PEXSI_DIR}/lib -lpexsi -L${DSUPERLU_DIR}/lib -lsuperlu_dist -L${PARMETIS_DIR}/lib -lparmetis -lmetis
-    HONG += -D__PEXSI
-endif
-
 include Makefile.Objects
 
 #==========================
diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index 71e637a80b..2a69761da3 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -30,7 +30,7 @@ VPATH=./src_global:\
 ./module_hsolver:\
 ./module_hsolver/kernels:\
 ./module_hsolver/genelpa:\
-./module_hsolver/module_pexsi:\
+./module_hsolver/pexsi:\
 ./module_elecstate:\
 ./module_elecstate/kernels:\
 ./module_elecstate/potentials:\
@@ -102,7 +102,6 @@ ${OBJS_VDW}\
 ${OBJS_DFTU}\
 ${OBJS_DELTASPIN}\
 ${OBJS_TENSOR}\
-${OBJS_HSOLVER_PEXSI}\
 
 OBJS_MAIN=main.o\
     driver.o\
@@ -291,7 +290,13 @@ OBJS_HSOLVER=diago_cg.o\
     diago_iter_assist.o\
     math_kernel_op.o\
     dngvd_op.o\
-    
+    diago_pexsi.o\
+    DistBCDMatrix.o\
+    DistCCSMatrix.o\
+    DistMatrixTransformer.o\
+    pexsi_solver.o\
+    simplePEXSI.o\
+
 OBJS_HSOLVER_LCAO=hsolver_lcao.o\
       diago_blas.o\
       diago_elpa.o\
@@ -300,13 +305,6 @@ OBJS_HSOLVER_LCAO=hsolver_lcao.o\
       elpa_new_complex.o\
       utils.o\
 
-OBJS_HSOLVER_PEXSI=diago_pexsi.o\
-      pexsi_solver.o\
-      simple_pexsi.o\
-      dist_bcd_matrix.o\
-      dist_ccs_matrix.o\
-      dist_matrix_transformer.o\
-      
 OBJS_MD=fire.o\
     langevin.o\
     md_base.o\
diff --git a/source/Makefile.vars b/source/Makefile.vars
index 477b0a251d..860bbdd806 100644
--- a/source/Makefile.vars
+++ b/source/Makefile.vars
@@ -33,6 +33,15 @@ ELPA_DIR      = /root/lib/ELPA
 ELPA_INCLUDE_DIR = ${ELPA_DIR}/include/
 
 CEREAL_DIR    = /root/lib/cereal
+DSUPERLU_DIR = /root/workspace/superlu_dist-7.2.0
+DSUPERLU_INCLUDE = -I${DSUPERLU_DIR}/include
+DSUPERLU_LIB    = ${DSUPERLU_DIR}/lib/libsuperlu_dist.a
+
+SCOTCH_INCLUDE  = -I/usr/local/include
+PTSCOTCH_DIR    = /root/workspace/scotch_6.0.0
+PTSCOTCH_LIB    = ${PTSCOTCH_DIR}/lib/libptscotchparmetis.a ${PTSCOTCH_DIR}/lib/libptscotch.a ${PTSCOTCH_DIR}/lib/libptscotcherrexit.a ${PTSCOTCH_DIR}/lib/libptscotcherr.a
+SCOTCH_LIB      = ${PTSCOTCH_DIR}/lib/libscotchmetis.a ${PTSCOTCH_DIR}/lib/libscotch.a ${PTSCOTCH_DIR}/lib/libscotcherr.a ${PTSCOTCH_DIR}/lib/libscotcherrexit.a
+
 
 
 ##-------------------  FOR GNU COMPILER  ------------------------------
@@ -59,9 +68,14 @@ CEREAL_DIR    = /root/lib/cereal
 ## To use LIBXC:  set LIBXC_DIR which contains include and lib/libxc.a (>5.1.7)
 ## To use DeePMD: set DeePMD_DIR and TensorFlow_DIR
 ## To use LibRI:  set LIBRI_DIR and LIBCOMM_DIR
-## To use PEXSI: set PEXSI_DIR DSUPERLU_DIR and PARMETIS_DIR
+## To use PEXSI: set PEXSI_DIR which contains include and libpexsi.a
 ##---------------------------------------------------------------------
 
+PEXSI_DIR = /root/workspace/pexsi_v2.0.0
+PEXSI_LIB_DIR = ${PEXSI_DIR}/src
+PEXSI_INCLUDE_DIR = ${PEXSI_DIR}/include
+
+
 # LIBTORCH_DIR  = /usr/local
 # LIBNPY_DIR    = /usr/local
 
@@ -73,10 +87,6 @@ CEREAL_DIR    = /root/lib/cereal
 # LIBRI_DIR     = /public/software/LibRI
 # LIBCOMM_DIR   = /public/software/LibComm
 
-# PEXSI_DIR = /home/rhx/projects/pexsi-build/pexsi
-# DSUPERLU_DIR = /home/rhx/projects/pexsi-build/superlu
-# PARMETIS_DIR    = /home/rhx/projects/pexsi-build/parmetis
-
 ##---------------------------------------------------------------------
 # NP = 14 # It is not supported. use make -j14 or make -j to parallelly compile
 # DEBUG = OFF
diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp
index 6b7015dc25..696bcd6088 100644
--- a/source/module_base/global_variable.cpp
+++ b/source/module_base/global_variable.cpp
@@ -248,7 +248,6 @@ std::string of_kernel_file = "WTkernel.txt";
 std::string MIXING_MODE = "broyden";
 double MIXING_BETA = 0.7;
 int MIXING_NDIM = 8;
-int MIXING_RESTART = 0;
 double MIXING_GG0 = 1.00;
 double MIXING_BETA_MAG = 1.6;
 double MIXING_GG0_MAG = 1.00;
@@ -301,33 +300,4 @@ std::string qo_basis = "hydrogen";
 std::vector<std::string> qo_strategy = {};
 double qo_thr = 1.0e-6;
 std::vector<double> qo_screening_coeff = {};
-
-//==========================================================
-// PEXSI related
-//==========================================================
-int pexsi_npole = 54;
-int pexsi_inertia = 1;
-int pexsi_nmax = 80;
-// int pexsi_symbolic = 1;
-int pexsi_comm = 1;
-int pexsi_storage = 1;
-int pexsi_ordering = 0;
-int pexsi_row_ordering = 1;
-int pexsi_nproc = 1;
-int pexsi_symm = 1;
-int pexsi_trans = 0;
-int pexsi_method = 1;
-int pexsi_nproc_pole = 1;
-// double pexsi_spin = 2;
-double pexsi_temp = 0.0001;
-double pexsi_gap = 0;
-double pexsi_delta_e = 20.0;
-double pexsi_mu_lower = -10;
-double pexsi_mu_upper = 10;
-double pexsi_mu = 0.0;
-double pexsi_mu_thr = 0.05;
-double pexsi_mu_expand = 0.3;
-double pexsi_mu_guard = 0.2;
-double pexsi_elec_thr = 0.001;
-double pexsi_zero_thr = 1e-10;
 } // namespace GlobalV
diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h
index 9808ca080b..b1fbb1748d 100644
--- a/source/module_base/global_variable.h
+++ b/source/module_base/global_variable.h
@@ -277,7 +277,6 @@ extern std::string of_kernel_file; // The name of WT kernel file.
 extern std::string MIXING_MODE;
 extern double MIXING_BETA;
 extern int MIXING_NDIM;
-extern int MIXING_RESTART;
 extern double MIXING_GG0;
 extern bool MIXING_TAU;
 extern double MIXING_BETA_MAG;
@@ -329,32 +328,5 @@ extern std::string qo_basis;
 extern std::vector<std::string> qo_strategy;
 extern double qo_thr;
 extern std::vector<double> qo_screening_coeff;
-
-// PEXSI related
-extern int pexsi_npole;
-extern int pexsi_inertia;
-extern int pexsi_nmax;
-// extern int pexsi_symbolic;
-extern int pexsi_comm;
-extern int pexsi_storage;
-extern int pexsi_ordering;
-extern int pexsi_row_ordering;
-extern int pexsi_nproc;
-extern int pexsi_symm;
-extern int pexsi_trans;
-extern int pexsi_method;
-extern int pexsi_nproc_pole;
-// extern double pexsi_spin;
-extern double pexsi_temp;
-extern double pexsi_gap;
-extern double pexsi_delta_e;
-extern double pexsi_mu_lower;
-extern double pexsi_mu_upper;
-extern double pexsi_mu;
-extern double pexsi_mu_thr;
-extern double pexsi_mu_expand;
-extern double pexsi_mu_guard;
-extern double pexsi_elec_thr;
-extern double pexsi_zero_thr;
 } // namespace GlobalV
 #endif
diff --git a/source/module_base/math_sphbes.cpp b/source/module_base/math_sphbes.cpp
index 73e0127e6b..5e7f41de54 100644
--- a/source/module_base/math_sphbes.cpp
+++ b/source/module_base/math_sphbes.cpp
@@ -1,7 +1,7 @@
 #include "math_sphbes.h"
+#include "timer.h"
 #include "constants.h"
 #include <algorithm>
-#include <iostream>
 
 #include <cassert>
 
@@ -425,6 +425,7 @@ void Sphbes::Spherical_Bessel
     double *jl		 // jl(1:msh) = j_l(q*r(i)),spherical bessel function
 )
 {
+    ModuleBase::timer::tick("Sphbes","Spherical_Bessel");
     double x1=0.0;
 
     int i=0;
@@ -597,6 +598,7 @@ void Sphbes::Spherical_Bessel
         }
     }
 
+    ModuleBase::timer::tick("Sphbes","Spherical_Bessel");
     return;
 }
 
@@ -611,6 +613,7 @@ void Sphbes::Spherical_Bessel
 	double *sjp
 )
 {
+	ModuleBase::timer::tick("Sphbes","Spherical_Bessel");
 
 	//calculate jlx first
 	Spherical_Bessel (msh, r, q, l, sj);
@@ -631,6 +634,7 @@ void Sphbes::dSpherical_Bessel_dx
     double *djl		 // jl(1:msh) = j_l(q*r(i)),spherical bessel function
 )
 {
+    ModuleBase::timer::tick("Sphbes","dSpherical_Bessel_dq");
     if (l < 0 )
     {
 		std::cout << "We temporarily only calculate derivative of l >= 0." << std::endl;
@@ -678,6 +682,7 @@ void Sphbes::dSpherical_Bessel_dx
         }
         delete[] jl;
     }
+    ModuleBase::timer::tick("Sphbes","dSpherical_Bessel_dq");
     return;
 }
 
@@ -803,7 +808,7 @@ void Sphbes::dsphbesj(const int n,
     }
 }
 
-void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros, const bool return_all)
+void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros)
 {
     assert( n > 0 );
     assert( l >= 0 );
@@ -813,22 +818,10 @@ void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros, const b
     // This property enables us to use bracketing method recursively
     // to find all zeros of j_l from the zeros of j_0.
 
-    // If return_all is true, zeros of j_0, j_1, ..., j_l will all be returned
-    // such that zeros[l*n+i] is the i-th zero of j_l. As such, it is required
-    // that the array "zeros" has a size of (l+1)*n.
-    //
-    // If return_all is false, only the zeros of j_l will be returned
-    // and "zeros" is merely required to have a size of n.
-    // Note that in this case the bracketing method can be applied with a stride
-    // of 2 instead of 1:
-    // j_0 --> j_1 --> j_3 --> j_5 --> ... --> j_l  (odd  l)
-    // j_0 --> j_2 --> j_4 --> j_6 --> ... --> j_l  (even l)
-
-    // Every recursion step reduces the number of zeros by 1.
-    // If return_all is true, one needs to start with n+l zeros of j_0
-    // to ensure n zeros of j_l; otherwise with a stride of 2 one only
-    // needs to start with n+(l+1)/2 zeros of j_0
-    int nz = n + ( return_all ? l : (l+1)/2 );
+    // if l is odd , j_0 --> j_1 --> j_3 --> j_5 --> ...
+    // if l is even, j_0 --> j_2 --> j_4 --> j_6 --> ...
+
+    int nz = n + (l+1)/2; // number of effective zeros in buffer
     double* buffer = new double[nz];
 
     // zeros of j_0 = sin(x)/x is just n*pi
@@ -838,34 +831,27 @@ void Sphbes::sphbes_zeros(const int l, const int n, double* const zeros, const b
         buffer[i] = (i+1) * PI;
     }
 
-    int ll; // active l
+    int ll = 1;
     auto jl = [&ll] (double x) { return sphbesj(ll, x); };
-    int stride;
-    std::function<void()> copy_if_needed;
-    int offset = 0; // keeps track of the position in zeros for next copy (used when return_all == true)
-    if (return_all)
-    {
-        copy_if_needed = [&](){ std::copy(buffer, buffer + n, zeros + offset); offset += n; };
-        stride = 1;
-        ll = 1;
-    }
-    else
+
+    if (l % 2 == 1)
     {
-        copy_if_needed = [](){};
-        stride = 2;
-        ll = 2 - l % 2;
+        for (int i = 0; i < nz-1; i++)
+        {
+            buffer[i] = illinois(jl, buffer[i], buffer[i+1], 1e-15, 50);
+        }
+        --nz;
     }
 
-    for (; ll <= l; ll += stride, --nz)
+    for (ll = 2 + l%2; ll <= l; ll += 2, --nz)
     {
-        copy_if_needed();
         for (int i = 0; i < nz-1; i++)
         {
             buffer[i] = illinois(jl, buffer[i], buffer[i+1], 1e-15, 50);
         }
     }
 
-    std::copy(buffer, buffer + n, zeros + offset);
+    std::copy(buffer, buffer + n, zeros);
     delete[] buffer;
 }
 
diff --git a/source/module_base/math_sphbes.h b/source/module_base/math_sphbes.h
index 7aa9c78a48..c654847a5d 100644
--- a/source/module_base/math_sphbes.h
+++ b/source/module_base/math_sphbes.h
@@ -126,18 +126,13 @@ class Sphbes
      * This function computes the first n positive zeros of the l-th order
      * spherical Bessel function of the first kind. 
      *
-     * @param[in]   l           (maximum) order of the spherical Bessel function
-     * @param[in]   n           number of zeros to be computed (for each j_l if return_all is true)
-     * @param[out]  zeros       on exit, contains the positive zeros.
-     * @param[in]   return_all  if true, return all zeros from j_0 to j_l such that zeros[l*n+i]
-     *                          is the i-th zero of j_l. If false, return only the first n zeros of j_l.
-     *
-     * @note The size of array "zeros" must be at least (l+1)*n if return_all is true, and n otherwise.
+     * @param[in]   l       order of the spherical Bessel function
+     * @param[in]   n       number of zeros to be computed
+     * @param[out]  zeros   on exit, contains the first n positive zeros in ascending order
      */
     static void sphbes_zeros(const int l,
                              const int n,
-                             double* const zeros,
-                             bool return_all = false
+                             double* const zeros
     );
 
 private:
diff --git a/source/module_base/para_json.cpp b/source/module_base/para_json.cpp
deleted file mode 100644
index 1f042271f8..0000000000
--- a/source/module_base/para_json.cpp
+++ /dev/null
@@ -1,977 +0,0 @@
-#include "para_json.h"
-#include "module_base/global_variable.h"
-
-#ifdef __RAPIDJSON
-
-namespace Para_Json
-{
-    int test=4;
-    // @param doc: the output json file
-    rapidjson::Document doc;
-    rapidjson::Value abacus(rapidjson::kObjectType);
-  
-    // @param general_info ：
-    rapidjson::Value general_info(rapidjson::kObjectType);
-    rapidjson::Value version;
-     
-    rapidjson::Value commit;
-    rapidjson::Value begin_time;
-    rapidjson::Value begin_date;
-    rapidjson::Value device_g;
-    // @param general_info -- parallel：
-    rapidjson::Value parallel(rapidjson::kObjectType);
-    rapidjson::Value drank;
-    rapidjson::Value dsize;
-    rapidjson::Value dcolor ;
-    // @param general_info -- path
-    rapidjson::Value path(rapidjson::kObjectType);
-    rapidjson::Value global_out_dir;
-    rapidjson::Value global_in_card;
-    rapidjson::Value pseudo_dir_path ;
-    rapidjson::Value orbital_dir_path;
-
-    
-    // @param reading_information：
-    rapidjson::Value readin_info(rapidjson::kObjectType);
-    // @param reading_information -- input_file：
-    rapidjson::Value input_file(rapidjson::kObjectType);
-
-
-    // @param reading_information -- input_file -- system_variables：
-    rapidjson::Value input_suffix;
-    rapidjson::Value ntype;
-    rapidjson::Value calculation;
-    rapidjson::Value esolver_type;
-    rapidjson::Value symmetry;
-    rapidjson::Value symmetry_precfield;
-    rapidjson::Value symmetry_autoclose;
-    rapidjson::Value kpar;
-    rapidjson::Value bndpar;
-    rapidjson::Value latname;
-    rapidjson::Value init_wfc;
-    rapidjson::Value init_chg;
-    rapidjson::Value init_vel;
-    rapidjson::Value nelec;
-    rapidjson::Value nupdown;
-    rapidjson::Value dft_functional;
-    rapidjson::Value xc_temperature;
-    rapidjson::Value pseudo_rcut(rapidjson::kNumberType );
-    rapidjson::Value pseudo_mesh;
-    rapidjson::Value mem_saver;
-    rapidjson::Value diago_proc;
-    rapidjson::Value nbspline;
-    rapidjson::Value kspacing(rapidjson::kArrayType);
-    rapidjson::Value min_dist_coef(rapidjson::kNumberType);
-    rapidjson::Value device;
-    // @param reading_information -- input_file -- files_related
-    rapidjson::Value stru_file;
-    rapidjson::Value kpoint_file;
-    rapidjson::Value pseudo_dir;
-    rapidjson::Value orbital_dir;
-    rapidjson::Value read_file_dir;
-    rapidjson::Value wannier_card;
-
-    // @param reading_information -- input_file -- planewave_related
-    rapidjson::Value ecutwfc;
-    rapidjson::Value nx;
-    rapidjson::Value ny;
-    rapidjson::Value nz;
-    rapidjson::Value pw_seed;
-    rapidjson::Value pw_diag_thr;
-    rapidjson::Value pw_diag_nmax;
-    rapidjson::Value pw_diag_ndim;
-    // @param reading_information -- input_file -- numerical_atomic_orbitals_related
-    rapidjson::Value nb2d;
-    rapidjson::Value lmaxmax;
-    rapidjson::Value lcao_ecut;
-    rapidjson::Value lcao_dk;
-    rapidjson::Value lcao_dr;
-    rapidjson::Value lcao_rmax;
-    rapidjson::Value search_radius;
-    rapidjson::Value search_pbc;
-    rapidjson::Value bx;
-    rapidjson::Value by;
-    rapidjson::Value bz;
-    // @param reading_information -- input_file -- electronic_structure
-    rapidjson::Value basis_type;
-    rapidjson::Value ks_solver;
-    rapidjson::Value nbands;
-    rapidjson::Value nbands_istate;
-    rapidjson::Value nspin;
-    rapidjson::Value smearing_method;
-    rapidjson::Value smearing_sigma;
-    rapidjson::Value smearing_sigma_temp;
-    rapidjson::Value mixing_type;
-    rapidjson::Value mixing_beta;
-    rapidjson::Value mixing_ndim;
-    rapidjson::Value mixing_gg0;
-    rapidjson::Value mixing_tau;
-    rapidjson::Value mixing_dftu;
-    rapidjson::Value gamma_only;
-    rapidjson::Value printe;
-    rapidjson::Value scf_nmax;
-    rapidjson::Value scf_thr;
-    rapidjson::Value scf_thr_type;
-    rapidjson::Value chg_extrap;
-    rapidjson::Value lspinorb;
-    rapidjson::Value noncolin;
-    rapidjson::Value soc_lambda;
-    // @param reading_information -- input_file -- electronic_structure_SDFT
-    rapidjson::Value method_sto;
-    rapidjson::Value nbands_sto;
-    rapidjson::Value nche_sto(rapidjson::kNumberType);
-    rapidjson::Value emin_sto;
-    rapidjson::Value emax_sto;
-    rapidjson::Value seed_sto;
-    rapidjson::Value initsto_freq;
-    rapidjson::Value npart_sto;
-    // @param reading_information -- input_file -- geometry_relaxation
-    rapidjson::Value relax_method;
-    rapidjson::Value relax_new;
-    rapidjson::Value relax_scale_force;
-    rapidjson::Value relax_nmax;
-    rapidjson::Value relax_cg_thr;
-    rapidjson::Value cal_force;
-    rapidjson::Value force_thr;
-    rapidjson::Value force_thr_ev;
-    rapidjson::Value force_thr_ev2;
-    rapidjson::Value relax_bfgs_w1;
-    rapidjson::Value relax_bfgs_w2;
-    rapidjson::Value relax_bfgs_rmax;
-    rapidjson::Value relax_bfgs_rmin;
-    rapidjson::Value relax_bfgs_init;
-    rapidjson::Value cal_stress;
-    rapidjson::Value stress_thr;
-    rapidjson::Value press1;
-    rapidjson::Value press2;
-    rapidjson::Value press3;
-    rapidjson::Value fixed_axes;
-    rapidjson::Value fixed_ibrav;
-    rapidjson::Value fixed_atoms;
-    rapidjson::Value cell_factor;
-
-    // @param reading_information -- input_file -- output_information_related
-    rapidjson::Value out_mul;
-    rapidjson::Value out_freq_elec;
-    rapidjson::Value out_freq_ion;
-    rapidjson::Value out_chg;
-    rapidjson::Value out_pot;
-    rapidjson::Value out_dm;
-    rapidjson::Value out_dm1;
-    rapidjson::Value out_wfc_pw;
-    rapidjson::Value out_wfc_r;
-    rapidjson::Value out_wfc_lcao;
-    rapidjson::Value out_dos;
-    rapidjson::Value out_band;
-    rapidjson::Value out_proj_band;
-    rapidjson::Value out_stru;
-    rapidjson::Value out_bandgap;
-    rapidjson::Value out_level;
-    rapidjson::Value out_alllog;
-    rapidjson::Value out_mat_hs;
-    rapidjson::Value out_mat_r;
-    rapidjson::Value out_mat_hs2;
-    rapidjson::Value out_mat_t;
-    rapidjson::Value out_mat_dh;
-    rapidjson::Value out_app_flag;
-    rapidjson::Value out_interval;
-    rapidjson::Value out_element_info;
-    rapidjson::Value restart_save;
-    rapidjson::Value restart_load;
-    rapidjson::Value rpa;
-
-    // @param reading_information -- input_file -- density_of_states
-    rapidjson::Value dos_edelta_ev;
-    rapidjson::Value dos_sigma;
-    rapidjson::Value dos_scale;
-    rapidjson::Value dos_emin_ev;
-    rapidjson::Value dos_emax_ev;
-    rapidjson::Value dos_nche;
-    // @param reading_information -- input_file -- naos
-    rapidjson::Value bessel_nao_ecut;
-    rapidjson::Value bessel_nao_tolerence;
-    rapidjson::Value bessel_nao_rcut;
-    rapidjson::Value bessel_nao_smooth;
-    rapidjson::Value bessel_nao_sigma;
-    // @param reading_information -- input_file -- deepks
-    rapidjson::Value input_file_out_labels;
-    rapidjson::Value input_file_scf;
-    rapidjson::Value input_file_model;
-    rapidjson::Value bessel_descriptor_lmax;
-    rapidjson::Value bessel_descriptor_ecut;
-    rapidjson::Value bessel_descriptor_tolerence;
-    rapidjson::Value bessel_descriptor_rcut;
-    rapidjson::Value bessel_descriptor_smooth;
-    rapidjson::Value bessel_descriptor_sigma;
-    rapidjson::Value input_file_bandgap;
-    rapidjson::Value input_file_out_unittest;
-    // @param reading_information -- input_file -- ofdft
-    rapidjson::Value of_kinetic;
-    rapidjson::Value of_method;
-    rapidjson::Value of_conv;
-    rapidjson::Value of_tole;
-    rapidjson::Value of_tolp;
-    rapidjson::Value of_tf_weight;
-    rapidjson::Value of_vw_weight;
-    rapidjson::Value of_wt_alpha;
-    rapidjson::Value of_wt_beta;
-    rapidjson::Value of_wt_rho0;
-    rapidjson::Value of_hold_rho0;
-    rapidjson::Value of_lkt_a;
-    rapidjson::Value of_read_kernel;
-    rapidjson::Value of_kernel_file;
-    rapidjson::Value of_full_pw;
-    rapidjson::Value of_full_pw_dim;
-
-    // @param reading_information -- input_file -- electric_field_and_dipole_correction
-    rapidjson::Value efield_flag;
-    rapidjson::Value dip_cor_flag;
-    rapidjson::Value efield_dir;
-    rapidjson::Value efield_pos_max;
-    rapidjson::Value efield_pos_dec;
-    rapidjson::Value efield_amp;
-    // @param reading_information -- input_file -- gate_field 
-    rapidjson::Value gate_flag;
-    rapidjson::Value zgate;
-    rapidjson::Value block;
-    rapidjson::Value block_down;
-    rapidjson::Value block_up;
-    rapidjson::Value block_height;
-    // @param reading_information -- input_file -- exact_exchange
-    rapidjson::Value exx_hybrid_alpha;
-    rapidjson::Value exx_hse_omega;
-    rapidjson::Value exx_separate_loop;
-    rapidjson::Value exx_hybrid_step;
-    rapidjson::Value exx_mixing_beta;
-    rapidjson::Value exx_lambda;
-    rapidjson::Value exx_pca_threshold;
-    rapidjson::Value exx_c_threshold;
-    rapidjson::Value exx_v_threshold;
-    rapidjson::Value exx_dm_threshold;
-    rapidjson::Value exx_c_grad_threshold;
-    rapidjson::Value exx_v_grad_threshold;
-    rapidjson::Value exx_schwarz_threshold;
-    rapidjson::Value exx_cauchy_threshold;
-    rapidjson::Value exx_cauchy_force_threshold;
-    rapidjson::Value exx_cauchy_stress_threshold;
-    rapidjson::Value exx_ccp_threshold;
-    rapidjson::Value exx_ccp_rmesh_times;
-    rapidjson::Value exx_distribute_type;
-    rapidjson::Value exx_opt_orb_lmax;
-    rapidjson::Value exx_opt_orb_ecut;
-    rapidjson::Value exx_opt_orb_tolerence;
-    rapidjson::Value exx_real_number;
-
-    // @param reading_information -- input_file -- molecular_dynamics
-    rapidjson::Value md_type;
-    rapidjson::Value md_nstep;
-    rapidjson::Value md_dt;
-    rapidjson::Value md_thermostat;
-    rapidjson::Value md_tlast;
-    rapidjson::Value md_tfirst;
-    rapidjson::Value md_restart;
-    rapidjson::Value md_restartfreq;
-    rapidjson::Value md_dumpfreq;
-    rapidjson::Value dump_force;
-    rapidjson::Value dump_vel;
-    rapidjson::Value dump_virial;
-    rapidjson::Value md_seed;
-    rapidjson::Value md_tfreq;
-    rapidjson::Value md_tchain;
-    rapidjson::Value md_pmode;
-    rapidjson::Value md_prec_level;
-    rapidjson::Value ref_cell_factor;
-    rapidjson::Value md_pcouple;
-    rapidjson::Value md_pfirst;
-    rapidjson::Value md_plast;
-    rapidjson::Value md_pfreq;
-    rapidjson::Value md_pchain;
-    rapidjson::Value lj_rcut;
-    rapidjson::Value lj_epsilon;
-    rapidjson::Value lj_sigma;
-    rapidjson::Value pot_file;
-    rapidjson::Value msst_direction;
-    rapidjson::Value msst_vel;
-    rapidjson::Value msst_vis;
-    rapidjson::Value msst_tscale;
-    rapidjson::Value msst_qmass;
-    rapidjson::Value md_damp;
-    rapidjson::Value md_tolerance;
-    rapidjson::Value md_nraise;
-    rapidjson::Value cal_syns;
-    rapidjson::Value dmax;
-
-    // @param reading_information -- input_file -- dft_plus_u
-    rapidjson::Value orbital_corr(rapidjson::kArrayType);
-    rapidjson::Value hubbard_u(rapidjson::kArrayType);
-    rapidjson::Value yukawa_potential;
-    rapidjson::Value yukawa_lambda;
-    rapidjson::Value omc;
-
-    // @param reading_information -- input_file -- vdw_correction
-    rapidjson::Value vdw_method;
-    rapidjson::Value vdw_s6;
-    rapidjson::Value vdw_s8;
-    rapidjson::Value vdw_a1;
-    rapidjson::Value vdw_a2;
-    rapidjson::Value vdw_d;
-    rapidjson::Value vdw_abc;
-    rapidjson::Value vdw_C6_file;
-    rapidjson::Value vdw_C6_unit;
-    rapidjson::Value vdw_R0_file;
-    rapidjson::Value vdw_R0_unit;
-    rapidjson::Value vdw_cutoff_type;
-    rapidjson::Value vdw_cutoff_radius;
-    rapidjson::Value vdw_radius_unit;
-    rapidjson::Value vdw_cutoff_period(rapidjson::kArrayType);
-    rapidjson::Value vdw_cn_thr;
-    rapidjson::Value vdw_cn_thr_unit;
-
-    // @param reading_information -- input_file -- berry_phase_and_wannier90_interface
-    rapidjson::Value berry_phase;
-    rapidjson::Value gdir;
-    rapidjson::Value towannier90;
-    rapidjson::Value nnkpfile;
-    rapidjson::Value wannier_spin;
-
-    // @param reading_information -- input_file -- tddft
-    rapidjson::Value td_edm;
-    rapidjson::Value td_print_eij;
-    rapidjson::Value td_propagator;
-    rapidjson::Value td_vext;
-    rapidjson::Value td_vext_dire;
-    rapidjson::Value td_stype;
-    rapidjson::Value td_ttype;
-    rapidjson::Value td_tstart;
-    rapidjson::Value td_tend;
-    rapidjson::Value td_lcut1;
-    rapidjson::Value td_lcut2;
-    rapidjson::Value td_gauss_freq;
-    rapidjson::Value td_gauss_phase;
-    rapidjson::Value td_gauss_sigma;
-    rapidjson::Value td_gauss_t0;
-    rapidjson::Value td_gauss_amp;
-    rapidjson::Value td_trape_freq;
-    rapidjson::Value td_trape_phase;
-    rapidjson::Value td_trape_t1;
-    rapidjson::Value td_trape_t2;
-    rapidjson::Value td_trape_t3;
-    rapidjson::Value td_trape_amp;
-    rapidjson::Value td_trigo_freq1;
-    rapidjson::Value td_trigo_freq2;
-    rapidjson::Value td_trigo_phase1;
-    rapidjson::Value td_trigo_phase2;
-    rapidjson::Value td_trigo_amp;
-    rapidjson::Value td_heavi_t0;
-    rapidjson::Value td_heavi_amp;
-    rapidjson::Value td_out_dipole;
-    rapidjson::Value td_out_efield;
-    rapidjson::Value ocp;
-    rapidjson::Value ocp_set;
-
-    // @param reading_information -- input_file -- debuging_related
-    rapidjson::Value t_in_h;
-    rapidjson::Value vl_in_h;
-    rapidjson::Value vnl_in_h;
-    rapidjson::Value vh_in_h;
-    rapidjson::Value vion_in_h;
-    rapidjson::Value test_force;
-    rapidjson::Value test_stress;
-    rapidjson::Value colour;
-    rapidjson::Value test_skip_ewald;
-
-    // @param reading_information -- input_file -- electronic_conductivities
-    rapidjson::Value cal_cond;
-    rapidjson::Value cond_nche;
-    rapidjson::Value cond_dw;
-    rapidjson::Value cond_wcut;
-    rapidjson::Value cond_dt;
-    rapidjson::Value cond_dtbatch;
-    rapidjson::Value cond_fwhm;
-    rapidjson::Value cond_nonlocal;
-    // @param reading_information -- input_file -- implicit_solvation_model
-    rapidjson::Value imp_sol;
-    rapidjson::Value eb_k;
-    rapidjson::Value tau;
-    rapidjson::Value sigma_k;
-    rapidjson::Value nc_k;
-
-    // @param reading_information -- stru_infos：
-    rapidjson::Value stru_infos(rapidjson::kObjectType);
-    // rapidjson::Value ATOMIC_SPECIES(rapidjson::kArrayType);
-    // rapidjson::Value NUMERICAL_ORBITAL;
-    // rapidjson::Value LATTICE_CONSTANT(rapidjson::kArrayType);
-    // rapidjson::Value ATOMIC_POSITIONS(rapidjson::kArrayType);
-
-    // @param reading_information -- KPT_infos
-    rapidjson::Value KPT_infos(rapidjson::kObjectType);
-    // rapidjson::Value total_number;
-    // rapidjson::Value mode;
-    // rapidjson::Value vectors(rapidjson::kArrayType);
-
-    // @param reading_information -- orb_infos
-    rapidjson::Value orb_infos(rapidjson::kObjectType);
-
-    // @param reading_information -- pp
-    rapidjson::Value pp(rapidjson::kObjectType);
-
-    // @param init
-    rapidjson::Value init(rapidjson::kObjectType);
-    // @param init -- general
-    // rapidjson::Value calculation;
-    // rapidjson::Value esolver_type;
-    // rapidjson::Value basis_type;
-    // rapidjson::Value gamma_only;
-    // rapidjson::Value ks_solver;
-    // rapidjson::Value ntype;
-    // rapidjson::Value nspin;
-    // rapidjson::Value ecutwfc;
-    // rapidjson::Value scf_thr;
-    // rapidjson::Value scf_nmax;
-
-    // @param init -- symmetry
-    // rapidjson::Value symmetry(rapidjson::kObjectType);
-    // rapidjson::Value BRAVAIS_TYPE;
-    // rapidjson::Value BRAVAIS_LATTICE_NAME;
-    // rapidjson::Value IBRAV;
-    // rapidjson::Value LATTICE_CONSTANT_A;
-    // rapidjson::Value right_hand_lattice;
-
-    // @param init -- Kpoints
-    rapidjson::Value kpoints(rapidjson::kObjectType);
-    rapidjson::Value nkstot;
-    rapidjson::Value nkstot_ibz;
-    rapidjson::Value coordinates(rapidjson::kArrayType);
-    rapidjson::Value weight(rapidjson::kArrayType);
-
-    // @param init -- grid
-    rapidjson::Value grid(rapidjson::kObjectType);
-    rapidjson::Value energy_cutoff_for_wavefunc;
-    rapidjson::Value fft_grid_for_wave_functions(rapidjson::kArrayType);
-    rapidjson::Value number_of_plane_waves;
-    rapidjson::Value number_of_sticks;
-
-    // @param init -- Smearing
-    // rapidjson::Value smearing_method;
-    // rapidjson::Value smearing_sigma;
-
-    // @param init -- mixing
-    rapidjson::Value mixing;
-
-
-    // @param output
-    rapidjson::Value output(rapidjson::kArrayType);
-
-
-
-    // @param final_stru
-    rapidjson::Value final_stru(rapidjson::kObjectType);
-    rapidjson::Value cell;
-    rapidjson::Value coordinate;
-
-
-
-    /**
-     *  The functions below initialize the json output parameter 
-     *  tree to connect the nodes of the module
-    */
-
-    /**
-     * @brief   add Top stage：parameter in Abacus:
-     */
-    void Init_json_abacus()
-    {
-
-
-        // add First stage：parameter in abcus:
-
-        abacus.AddMember("general_info", general_info, doc.GetAllocator());
-
-        abacus.AddMember("readin_info", readin_info, doc.GetAllocator());
-        
-        abacus.AddMember("init", init, doc.GetAllocator());
-
-        abacus.AddMember("output", output, doc.GetAllocator());
-
-        abacus.AddMember("final_stru", final_stru, doc.GetAllocator());
-
-        doc.SetObject();
-        // abacus.SetObject();
-        doc.AddMember("ABACUS", abacus, doc.GetAllocator());
-        /**
-         * .
-         * .
-         * .
-         * .
-         * .
-         * .
-         * .
-         * */
-    }
-    /**
-     * @brief   add Second stage：parameter in Abacus - general_info:
-     */
-    void Init_json_abacus_generalInfo(){
-        general_info.AddMember("version", version, doc.GetAllocator());
-
-        general_info.AddMember("commit", commit, doc.GetAllocator());      
-
-        general_info.AddMember("begin_time", begin_time, doc.GetAllocator());      
-
-        general_info.AddMember("begin_date", begin_date, doc.GetAllocator());     
-
-        general_info.AddMember("device", device_g, doc.GetAllocator());                
-
-
-        
-        parallel.AddMember("drank", drank, doc.GetAllocator());
-
-        parallel.AddMember("dsize", dsize, doc.GetAllocator());
-                        
-        parallel.AddMember("dcolor", dcolor, doc.GetAllocator());
-    
-
-        // add Third stage：parameter in parallel:
-        general_info.AddMember("parallel", parallel, doc.GetAllocator());
-                
-    }
-    /**
-     * @brief   delete null node 
-     */
-    void RemoveNullValues(rapidjson::Value& parent) {
-        if (parent.IsObject()) {
-            for (rapidjson::Value::MemberIterator itr = parent.MemberBegin(); itr != parent.MemberEnd(); ) {
-                if (itr->value.IsNull()) {
-                    itr = parent.EraseMember(itr);
-                } else {
-                    // delet son null node
-                    RemoveNullValues(itr->value);
-                    ++itr;
-                }
-            }
-        } else if (parent.IsArray()) {
-            for (int i = 0; i < parent.Size(); ) {
-                if (parent[i].IsNull()) {
-                    parent.Erase(parent.Begin() + i);
-                } else {
-                    // delet son null node
-                    RemoveNullValues(parent[i]);
-                    ++i;
-                }
-            }
-        }
-    }
-
-    /**
-     * @brief   add Second stage：parameter in Abacus - readin_info:
-     */
-    void Init_json_abacus_readinInfo(){
-        //add Third stage：parameter in system_variables:
-        input_file.AddMember("suffix", input_suffix, doc.GetAllocator());
-        input_file.AddMember("ntype", ntype, doc.GetAllocator());
-        input_file.AddMember("calculation", calculation, doc.GetAllocator());
-        input_file.AddMember("esolver_type", esolver_type, doc.GetAllocator());
-        input_file.AddMember("symmetry", symmetry, doc.GetAllocator());
-        input_file.AddMember("symmetry_precfield", symmetry_precfield, doc.GetAllocator());
-        input_file.AddMember("symmetry_autoclose", symmetry_autoclose, doc.GetAllocator());
-        input_file.AddMember("kpar", kpar, doc.GetAllocator());
-        input_file.AddMember("bndpar", bndpar, doc.GetAllocator());
-        input_file.AddMember("latname", latname, doc.GetAllocator());
-        input_file.AddMember("init_wfc", init_wfc, doc.GetAllocator());
-        input_file.AddMember("init_chg", init_chg, doc.GetAllocator());
-        input_file.AddMember("init_vel", init_vel, doc.GetAllocator());
-        input_file.AddMember("nelec", nelec, doc.GetAllocator());
-        input_file.AddMember("nupdown", nupdown, doc.GetAllocator());
-        input_file.AddMember("dft_functional", dft_functional, doc.GetAllocator());
-        input_file.AddMember("xc_temperature", xc_temperature, doc.GetAllocator());
-        input_file.AddMember("pseudo_rcut", pseudo_rcut, doc.GetAllocator());
-        input_file.AddMember("pseudo_mesh", pseudo_mesh, doc.GetAllocator());
-        input_file.AddMember("mem_saver", mem_saver, doc.GetAllocator());
-        input_file.AddMember("diago_proc", diago_proc, doc.GetAllocator());
-        input_file.AddMember("nbspline", nbspline, doc.GetAllocator());
-        input_file.AddMember("kspacing", kspacing, doc.GetAllocator());
-        input_file.AddMember("min_dist_coef", min_dist_coef, doc.GetAllocator());
-        input_file.AddMember("device", device, doc.GetAllocator());
-
-        //add Third stage：parameter in files_related:
-        input_file.AddMember("stru_file", stru_file, doc.GetAllocator());
-        input_file.AddMember("kpoint_file", kpoint_file, doc.GetAllocator());
-        input_file.AddMember("pseudo_dir", pseudo_dir, doc.GetAllocator());
-        input_file.AddMember("orbital_dir", orbital_dir, doc.GetAllocator());
-        input_file.AddMember("read_file_dir", read_file_dir, doc.GetAllocator());
-        input_file.AddMember("wannier_card", wannier_card, doc.GetAllocator());
-    
-        //add Third stage：parameter in planewave_related:
-        input_file.AddMember("ecutwfc", ecutwfc, doc.GetAllocator());
-        input_file.AddMember("nx", nx, doc.GetAllocator());
-        input_file.AddMember("ny", ny, doc.GetAllocator());
-        input_file.AddMember("nz", nz, doc.GetAllocator());
-        input_file.AddMember("pw_seed", pw_seed, doc.GetAllocator());
-        input_file.AddMember("pw_diag_thr", pw_diag_thr, doc.GetAllocator());
-        input_file.AddMember("pw_diag_nmax", pw_diag_nmax, doc.GetAllocator());
-        input_file.AddMember("pw_diag_ndim", pw_diag_ndim, doc.GetAllocator());    
-    
-    
-        //add Third stage：parameter in numerical_atomic_orbitals_related:
-        input_file.AddMember("nb2d", nb2d, doc.GetAllocator());
-        input_file.AddMember("lmaxmax", lmaxmax, doc.GetAllocator());
-        input_file.AddMember("lcao_ecut", lcao_ecut, doc.GetAllocator());
-        input_file.AddMember("lcao_dk", lcao_dk, doc.GetAllocator());
-        input_file.AddMember("lcao_dr", lcao_dr, doc.GetAllocator());
-        input_file.AddMember("lcao_rmax", lcao_rmax, doc.GetAllocator());
-        input_file.AddMember("search_radius", search_radius, doc.GetAllocator());
-        input_file.AddMember("search_pbc", search_pbc, doc.GetAllocator());
-        input_file.AddMember("bx", bx, doc.GetAllocator());
-        input_file.AddMember("by", by, doc.GetAllocator());
-        input_file.AddMember("bz", bz, doc.GetAllocator());        
-    
-        //add Third stage：parameter in electronic_structure:
-        input_file.AddMember("basis_type", basis_type, doc.GetAllocator());
-        input_file.AddMember("ks_solver", ks_solver, doc.GetAllocator());
-        input_file.AddMember("nbands", nbands, doc.GetAllocator());
-        input_file.AddMember("nbands_istate", nbands_istate, doc.GetAllocator());
-        input_file.AddMember("nspin", nspin, doc.GetAllocator());
-        input_file.AddMember("smearing_method", smearing_method, doc.GetAllocator());
-        input_file.AddMember("smearing_sigma", smearing_sigma, doc.GetAllocator());
-        input_file.AddMember("smearing_sigma_temp", smearing_sigma_temp, doc.GetAllocator());
-        input_file.AddMember("mixing_type", mixing_type, doc.GetAllocator());
-        input_file.AddMember("mixing_beta", mixing_beta, doc.GetAllocator());
-        input_file.AddMember("mixing_ndim", mixing_ndim, doc.GetAllocator());
-        input_file.AddMember("mixing_gg0", mixing_gg0, doc.GetAllocator());
-        input_file.AddMember("mixing_tau", mixing_tau, doc.GetAllocator());
-        input_file.AddMember("mixing_dftu", mixing_dftu, doc.GetAllocator());
-        input_file.AddMember("gamma_only", gamma_only, doc.GetAllocator());
-        input_file.AddMember("printe", printe, doc.GetAllocator());
-        input_file.AddMember("scf_nmax", scf_nmax, doc.GetAllocator());
-        input_file.AddMember("scf_thr", scf_thr, doc.GetAllocator());
-        input_file.AddMember("scf_thr_type", scf_thr_type, doc.GetAllocator());
-        input_file.AddMember("chg_extrap", chg_extrap, doc.GetAllocator());
-        input_file.AddMember("lspinorb", lspinorb, doc.GetAllocator());
-        input_file.AddMember("noncolin", noncolin, doc.GetAllocator());
-        input_file.AddMember("soc_lambda", soc_lambda, doc.GetAllocator());    
-
-
-        //add Third stage：parameter in electronic_structure_SDFT:
-        input_file.AddMember("method_sto", method_sto, doc.GetAllocator());
-        input_file.AddMember("nbands_sto", nbands_sto, doc.GetAllocator());
-        input_file.AddMember("nche_sto", nche_sto, doc.GetAllocator());
-        input_file.AddMember("emin_sto", emin_sto, doc.GetAllocator());
-        input_file.AddMember("emax_sto", emax_sto, doc.GetAllocator());
-        input_file.AddMember("seed_sto", seed_sto, doc.GetAllocator());
-        input_file.AddMember("initsto_freq", initsto_freq, doc.GetAllocator());
-        input_file.AddMember("npart_sto", npart_sto, doc.GetAllocator());
-        
-        
-        //add Third stage：parameter in geometry_relaxation:
-        input_file.AddMember("relax_method", relax_method, doc.GetAllocator());
-        input_file.AddMember("relax_new", relax_new, doc.GetAllocator());
-        input_file.AddMember("relax_scale_force", relax_scale_force, doc.GetAllocator());
-        input_file.AddMember("relax_nmax", relax_nmax, doc.GetAllocator());
-        input_file.AddMember("relax_cg_thr", relax_cg_thr, doc.GetAllocator());
-        input_file.AddMember("cal_force", cal_force, doc.GetAllocator());
-        input_file.AddMember("force_thr", force_thr, doc.GetAllocator());
-        input_file.AddMember("force_thr_ev", force_thr_ev, doc.GetAllocator());
-        input_file.AddMember("force_thr_ev2", force_thr_ev2, doc.GetAllocator());
-        input_file.AddMember("relax_bfgs_w1", relax_bfgs_w1, doc.GetAllocator());
-        input_file.AddMember("relax_bfgs_w2", relax_bfgs_w2, doc.GetAllocator());
-        input_file.AddMember("relax_bfgs_rmax", relax_bfgs_rmax, doc.GetAllocator());
-        input_file.AddMember("relax_bfgs_rmin", relax_bfgs_rmin, doc.GetAllocator());
-        input_file.AddMember("relax_bfgs_init", relax_bfgs_init, doc.GetAllocator());
-        input_file.AddMember("cal_stress", cal_stress, doc.GetAllocator());
-        input_file.AddMember("stress_thr", stress_thr, doc.GetAllocator());
-        input_file.AddMember("press1", press1, doc.GetAllocator());
-        input_file.AddMember("press2", press2, doc.GetAllocator());
-        input_file.AddMember("press3", press3, doc.GetAllocator());
-        input_file.AddMember("fixed_axes", fixed_axes, doc.GetAllocator());
-        input_file.AddMember("fixed_ibrav", fixed_ibrav, doc.GetAllocator());
-        input_file.AddMember("fixed_atoms", fixed_atoms, doc.GetAllocator());
-        input_file.AddMember("cell_factor", cell_factor, doc.GetAllocator());
-        
-        
-        //add Third stage：parameter in output_information_related:
-        input_file.AddMember("out_mul", out_mul, doc.GetAllocator());
-        input_file.AddMember("out_freq_elec", out_freq_elec, doc.GetAllocator());
-        input_file.AddMember("out_freq_ion", out_freq_ion, doc.GetAllocator());        
-        input_file.AddMember("out_chg", out_chg, doc.GetAllocator());
-        input_file.AddMember("out_pot", out_pot, doc.GetAllocator());
-        input_file.AddMember("out_dm", out_dm, doc.GetAllocator());
-        input_file.AddMember("out_dm1", out_dm1, doc.GetAllocator());
-        input_file.AddMember("out_wfc_pw", out_wfc_pw, doc.GetAllocator());
-        input_file.AddMember("out_wfc_r", out_wfc_r, doc.GetAllocator());
-        input_file.AddMember("out_wfc_lcao", out_wfc_lcao, doc.GetAllocator());
-        input_file.AddMember("out_dos", out_dos, doc.GetAllocator());
-        input_file.AddMember("out_band", out_band, doc.GetAllocator());
-        input_file.AddMember("out_proj_band", out_proj_band, doc.GetAllocator());
-        input_file.AddMember("out_stru", out_stru, doc.GetAllocator());
-        input_file.AddMember("out_bandgap", out_bandgap, doc.GetAllocator());
-        input_file.AddMember("out_level", out_level, doc.GetAllocator());
-        input_file.AddMember("out_alllog", out_alllog, doc.GetAllocator());
-        input_file.AddMember("out_mat_hs", out_mat_hs, doc.GetAllocator());
-        input_file.AddMember("out_mat_r", out_mat_r, doc.GetAllocator());
-        input_file.AddMember("out_mat_hs2", out_mat_hs2, doc.GetAllocator());
-        input_file.AddMember("out_mat_t", out_mat_t, doc.GetAllocator());
-        input_file.AddMember("out_mat_dh", out_mat_dh, doc.GetAllocator());
-        input_file.AddMember("out_app_flag", out_app_flag, doc.GetAllocator());
-        input_file.AddMember("out_interval", out_interval, doc.GetAllocator());
-        input_file.AddMember("out_element_info", out_element_info, doc.GetAllocator());
-        input_file.AddMember("restart_save", restart_save, doc.GetAllocator());
-        input_file.AddMember("restart_load", restart_load, doc.GetAllocator());
-        input_file.AddMember("rpa", rpa, doc.GetAllocator());
-
-        //add Third stage：parameter in density_of_states:
-        input_file.AddMember("dos_edelta_ev", dos_edelta_ev, doc.GetAllocator());
-        input_file.AddMember("dos_sigma", dos_sigma, doc.GetAllocator());
-        input_file.AddMember("dos_scale", dos_scale, doc.GetAllocator());
-        input_file.AddMember("dos_emin_ev", dos_emin_ev, doc.GetAllocator());
-        input_file.AddMember("dos_emax_ev", dos_emax_ev, doc.GetAllocator());
-        input_file.AddMember("dos_nche", dos_nche, doc.GetAllocator());
-        
-        //add Third stage：parameter in naos:
-        input_file.AddMember("bessel_nao_ecut", bessel_nao_ecut, doc.GetAllocator());
-        input_file.AddMember("bessel_nao_tolerence", bessel_nao_tolerence, doc.GetAllocator());
-        input_file.AddMember("bessel_nao_rcut", bessel_nao_rcut, doc.GetAllocator());
-        input_file.AddMember("bessel_nao_smooth", bessel_nao_smooth, doc.GetAllocator());
-        input_file.AddMember("bessel_nao_sigma", bessel_nao_sigma, doc.GetAllocator());
-        
-        //add Third stage：parameter in deepks:
-        input_file.AddMember("input_file_out_labels", input_file_out_labels, doc.GetAllocator());
-        input_file.AddMember("input_file_scf", input_file_scf, doc.GetAllocator());
-        input_file.AddMember("input_file_model", input_file_model, doc.GetAllocator());
-        input_file.AddMember("bessel_descriptor_lmax", bessel_descriptor_lmax, doc.GetAllocator());
-        input_file.AddMember("bessel_descriptor_ecut", bessel_descriptor_ecut, doc.GetAllocator());
-        input_file.AddMember("bessel_descriptor_tolerence", bessel_descriptor_tolerence, doc.GetAllocator());
-        input_file.AddMember("bessel_descriptor_rcut", bessel_descriptor_rcut, doc.GetAllocator());
-        input_file.AddMember("bessel_descriptor_smooth", bessel_descriptor_smooth, doc.GetAllocator());
-        input_file.AddMember("bessel_descriptor_sigma", bessel_descriptor_sigma, doc.GetAllocator());
-        input_file.AddMember("input_file_bandgap", input_file_bandgap, doc.GetAllocator());
-        input_file.AddMember("input_file_out_unittest", input_file_out_unittest, doc.GetAllocator());
-        
-        //add Third stage：parameter in ofdft:
-        input_file.AddMember("of_kinetic", of_kinetic, doc.GetAllocator());
-        input_file.AddMember("of_method", of_method, doc.GetAllocator());
-        input_file.AddMember("of_conv", of_conv, doc.GetAllocator());
-        input_file.AddMember("of_tole", of_tole, doc.GetAllocator());
-        input_file.AddMember("of_tolp", of_tolp, doc.GetAllocator());
-        input_file.AddMember("of_tf_weight", of_tf_weight, doc.GetAllocator());
-        input_file.AddMember("of_vw_weight", of_vw_weight, doc.GetAllocator());
-        input_file.AddMember("of_wt_alpha", of_wt_alpha, doc.GetAllocator());
-        input_file.AddMember("of_wt_beta", of_wt_beta, doc.GetAllocator());
-        input_file.AddMember("of_wt_rho0", of_wt_rho0, doc.GetAllocator());
-        input_file.AddMember("of_hold_rho0", of_hold_rho0, doc.GetAllocator());
-        input_file.AddMember("of_lkt_a", of_lkt_a, doc.GetAllocator());
-        input_file.AddMember("of_read_kernel", of_read_kernel, doc.GetAllocator());
-        input_file.AddMember("of_kernel_file", of_kernel_file, doc.GetAllocator());
-        input_file.AddMember("of_full_pw", of_full_pw, doc.GetAllocator());
-        input_file.AddMember("of_full_pw_dim", of_full_pw_dim, doc.GetAllocator());
-        
-        
-        //add Third stage：parameter in electric_field_and_dipole_correction:
-        input_file.AddMember("efield_flag", efield_flag, doc.GetAllocator());
-        input_file.AddMember("dip_cor_flag", dip_cor_flag, doc.GetAllocator());
-        input_file.AddMember("efield_dir", efield_dir, doc.GetAllocator());
-        input_file.AddMember("efield_pos_max", efield_pos_max, doc.GetAllocator());
-        input_file.AddMember("efield_pos_dec", efield_pos_dec, doc.GetAllocator());
-        input_file.AddMember("efield_amp", efield_amp, doc.GetAllocator());
-        
-        //add Third stage：parameter in gate_field:
-        input_file.AddMember("gate_flag", gate_flag, doc.GetAllocator());
-        input_file.AddMember("zgate", zgate, doc.GetAllocator());
-        input_file.AddMember("block", block, doc.GetAllocator());
-        input_file.AddMember("block_down", block_down, doc.GetAllocator());
-        input_file.AddMember("block_up", block_up, doc.GetAllocator());
-        input_file.AddMember("block_height", block_height, doc.GetAllocator());
-    
-        //add Third stage：parameter in exact_exchange:
-        input_file.AddMember("exx_hybrid_alpha", exx_hybrid_alpha, doc.GetAllocator());
-        input_file.AddMember("exx_hse_omega", exx_hse_omega, doc.GetAllocator());
-        input_file.AddMember("exx_separate_loop", exx_separate_loop, doc.GetAllocator());
-        input_file.AddMember("exx_hybrid_step", exx_hybrid_step, doc.GetAllocator());
-        input_file.AddMember("exx_mixing_beta", exx_mixing_beta, doc.GetAllocator());
-        input_file.AddMember("exx_lambda", exx_lambda, doc.GetAllocator());
-        input_file.AddMember("exx_pca_threshold", exx_pca_threshold, doc.GetAllocator());
-        input_file.AddMember("exx_c_threshold", exx_c_threshold, doc.GetAllocator());
-        input_file.AddMember("exx_v_threshold", exx_v_threshold, doc.GetAllocator());
-        input_file.AddMember("exx_dm_threshold", exx_dm_threshold, doc.GetAllocator());
-        input_file.AddMember("exx_c_grad_threshold", exx_c_grad_threshold, doc.GetAllocator());
-        input_file.AddMember("exx_v_grad_threshold", exx_v_grad_threshold, doc.GetAllocator());
-        input_file.AddMember("exx_schwarz_threshold", exx_schwarz_threshold, doc.GetAllocator());
-        input_file.AddMember("exx_cauchy_threshold", exx_cauchy_threshold, doc.GetAllocator());
-        input_file.AddMember("exx_cauchy_force_threshold", exx_cauchy_force_threshold, doc.GetAllocator());
-        input_file.AddMember("exx_cauchy_stress_threshold", exx_cauchy_stress_threshold, doc.GetAllocator());
-        input_file.AddMember("exx_ccp_threshold", exx_ccp_threshold, doc.GetAllocator());
-        input_file.AddMember("exx_ccp_rmesh_times", exx_ccp_rmesh_times, doc.GetAllocator());
-        input_file.AddMember("exx_distribute_type", exx_distribute_type, doc.GetAllocator());
-        input_file.AddMember("exx_opt_orb_lmax", exx_opt_orb_lmax, doc.GetAllocator());
-        input_file.AddMember("exx_opt_orb_ecut", exx_opt_orb_ecut, doc.GetAllocator());
-        input_file.AddMember("exx_opt_orb_tolerence", exx_opt_orb_tolerence, doc.GetAllocator());
-        input_file.AddMember("exx_real_number", exx_real_number, doc.GetAllocator());
-        
-        
-        //add Third stage：parameter in molecular_dynamics:
-        input_file.AddMember("md_type", md_type, doc.GetAllocator());
-        input_file.AddMember("md_nstep", md_nstep, doc.GetAllocator());
-        input_file.AddMember("md_dt", md_dt, doc.GetAllocator());
-        input_file.AddMember("md_thermostat", md_thermostat, doc.GetAllocator());
-        input_file.AddMember("md_tlast", md_tlast, doc.GetAllocator());
-        input_file.AddMember("md_tfirst", md_tfirst, doc.GetAllocator());
-        input_file.AddMember("md_restart", md_restart, doc.GetAllocator());
-        input_file.AddMember("md_restartfreq", md_restartfreq, doc.GetAllocator());
-        input_file.AddMember("md_dumpfreq", md_dumpfreq, doc.GetAllocator());
-        input_file.AddMember("dump_force", dump_force, doc.GetAllocator());
-        input_file.AddMember("dump_vel", dump_vel, doc.GetAllocator());
-        input_file.AddMember("dump_virial", dump_virial, doc.GetAllocator());
-        input_file.AddMember("md_seed", md_seed, doc.GetAllocator());
-        input_file.AddMember("md_tfreq", md_tfreq, doc.GetAllocator());
-        input_file.AddMember("md_tchain", md_tchain, doc.GetAllocator());
-        input_file.AddMember("md_pmode", md_pmode, doc.GetAllocator());
-        input_file.AddMember("md_prec_level", md_prec_level, doc.GetAllocator());
-        input_file.AddMember("ref_cell_factor", ref_cell_factor, doc.GetAllocator());
-        input_file.AddMember("md_pcouple", md_pcouple, doc.GetAllocator());
-        input_file.AddMember("md_pfirst", md_pfirst, doc.GetAllocator());
-        input_file.AddMember("md_plast", md_plast, doc.GetAllocator());
-        input_file.AddMember("md_pfreq", md_pfreq, doc.GetAllocator());
-        input_file.AddMember("md_pchain", md_pchain, doc.GetAllocator());
-        input_file.AddMember("lj_rcut", lj_rcut, doc.GetAllocator());
-        input_file.AddMember("lj_epsilon", lj_epsilon, doc.GetAllocator());
-        input_file.AddMember("lj_sigma", lj_sigma, doc.GetAllocator());
-        input_file.AddMember("pot_file", pot_file, doc.GetAllocator());
-        input_file.AddMember("msst_direction", msst_direction, doc.GetAllocator());
-        input_file.AddMember("msst_vel", msst_vel, doc.GetAllocator());
-        input_file.AddMember("msst_vis", msst_vis, doc.GetAllocator());
-        input_file.AddMember("msst_tscale", msst_tscale, doc.GetAllocator());
-        input_file.AddMember("msst_qmass", msst_qmass, doc.GetAllocator());
-        input_file.AddMember("md_damp", md_damp, doc.GetAllocator());
-        input_file.AddMember("md_tolerance", md_tolerance, doc.GetAllocator());
-        input_file.AddMember("md_nraise", md_nraise, doc.GetAllocator());
-        input_file.AddMember("cal_syns", cal_syns, doc.GetAllocator());
-        input_file.AddMember("dmax", dmax, doc.GetAllocator());
-
-        //add Third stage：parameter in dft_plus_u:
-        input_file.AddMember("orbital_corr", orbital_corr, doc.GetAllocator());
-        input_file.AddMember("hubbard_u", hubbard_u, doc.GetAllocator());
-        input_file.AddMember("yukawa_potential", yukawa_potential, doc.GetAllocator());
-        input_file.AddMember("yukawa_lambda", yukawa_lambda, doc.GetAllocator());
-        input_file.AddMember("omc", omc, doc.GetAllocator());
-
-        //add Third stage：parameter in vdw_correction:
-        input_file.AddMember("vdw_method", vdw_method, doc.GetAllocator());
-        input_file.AddMember("vdw_s6", vdw_s6, doc.GetAllocator());
-        input_file.AddMember("vdw_s8", vdw_s8, doc.GetAllocator());
-        input_file.AddMember("vdw_a1", vdw_a1, doc.GetAllocator());
-        input_file.AddMember("vdw_a2", vdw_a2, doc.GetAllocator());
-        input_file.AddMember("vdw_d", vdw_d, doc.GetAllocator());
-        input_file.AddMember("vdw_abc", vdw_abc, doc.GetAllocator());
-        input_file.AddMember("vdw_C6_file", vdw_C6_file, doc.GetAllocator());
-        input_file.AddMember("vdw_C6_unit", vdw_C6_unit, doc.GetAllocator());
-        input_file.AddMember("vdw_R0_file", vdw_R0_file, doc.GetAllocator());
-        input_file.AddMember("vdw_R0_unit", vdw_R0_unit, doc.GetAllocator());
-        input_file.AddMember("vdw_cutoff_type", vdw_cutoff_type, doc.GetAllocator());
-        input_file.AddMember("vdw_cutoff_radius", vdw_cutoff_radius, doc.GetAllocator());
-        input_file.AddMember("vdw_radius_unit", vdw_radius_unit, doc.GetAllocator());
-        input_file.AddMember("vdw_cutoff_period", vdw_cutoff_period, doc.GetAllocator());
-        input_file.AddMember("vdw_cn_thr", vdw_cn_thr, doc.GetAllocator());
-        input_file.AddMember("vdw_cn_thr_unit", vdw_cn_thr_unit, doc.GetAllocator());
-
-        //add Third stage：parameter in berry_phase_and_wannier90_interface:
-        input_file.AddMember("berry_phase", berry_phase, doc.GetAllocator());
-        input_file.AddMember("gdir", gdir, doc.GetAllocator());
-        input_file.AddMember("towannier90", towannier90, doc.GetAllocator());
-        input_file.AddMember("nnkpfile", nnkpfile, doc.GetAllocator());
-        input_file.AddMember("wannier_spin", wannier_spin, doc.GetAllocator());    
-    
-        //add Third stage：parameter in tddft:
-        input_file.AddMember("td_edm", td_edm, doc.GetAllocator());
-        input_file.AddMember("td_print_eij", td_print_eij, doc.GetAllocator());
-        input_file.AddMember("td_propagator", td_propagator, doc.GetAllocator());
-        input_file.AddMember("td_vext", td_vext, doc.GetAllocator());
-        input_file.AddMember("td_vext_dire", td_vext_dire, doc.GetAllocator());
-        input_file.AddMember("td_stype", td_stype, doc.GetAllocator());
-        input_file.AddMember("td_ttype", td_ttype, doc.GetAllocator());
-        input_file.AddMember("td_tstart", td_tstart, doc.GetAllocator());
-        input_file.AddMember("td_tend", td_tend, doc.GetAllocator());
-        input_file.AddMember("td_lcut1", td_lcut1, doc.GetAllocator());
-        input_file.AddMember("td_lcut2", td_lcut2, doc.GetAllocator());
-        input_file.AddMember("td_gauss_freq", td_gauss_freq, doc.GetAllocator());
-        input_file.AddMember("td_gauss_phase", td_gauss_phase, doc.GetAllocator());
-        input_file.AddMember("td_gauss_sigma", td_gauss_sigma, doc.GetAllocator());
-        input_file.AddMember("td_gauss_t0", td_gauss_t0, doc.GetAllocator());
-        input_file.AddMember("td_gauss_amp", td_gauss_amp, doc.GetAllocator());
-        input_file.AddMember("td_trape_freq", td_trape_freq, doc.GetAllocator());
-        input_file.AddMember("td_trape_phase", td_trape_phase, doc.GetAllocator());
-        input_file.AddMember("td_trape_t1", td_trape_t1, doc.GetAllocator());
-        input_file.AddMember("td_trape_t2", td_trape_t2, doc.GetAllocator());
-        input_file.AddMember("td_trape_t3", td_trape_t3, doc.GetAllocator());
-        input_file.AddMember("td_trape_amp", td_trape_amp, doc.GetAllocator());
-        input_file.AddMember("td_trigo_freq1", td_trigo_freq1, doc.GetAllocator());
-        input_file.AddMember("td_trigo_freq2", td_trigo_freq2, doc.GetAllocator());
-        input_file.AddMember("td_trigo_phase1", td_trigo_phase1, doc.GetAllocator());
-        input_file.AddMember("td_trigo_phase2", td_trigo_phase2, doc.GetAllocator());
-        input_file.AddMember("td_trigo_amp", td_trigo_amp, doc.GetAllocator());
-        input_file.AddMember("td_heavi_t0", td_heavi_t0, doc.GetAllocator());
-        input_file.AddMember("td_heavi_amp", td_heavi_amp, doc.GetAllocator());
-        input_file.AddMember("td_out_dipole", td_out_dipole, doc.GetAllocator());
-        input_file.AddMember("td_out_efield", td_out_efield, doc.GetAllocator());
-        input_file.AddMember("ocp", ocp, doc.GetAllocator());
-        input_file.AddMember("ocp_set", ocp_set, doc.GetAllocator());
-
-        //add Third stage：parameter in debuging_related:
-        input_file.AddMember("t_in_h", t_in_h, doc.GetAllocator());
-        input_file.AddMember("vl_in_h", vl_in_h, doc.GetAllocator());
-        input_file.AddMember("vnl_in_h", vnl_in_h, doc.GetAllocator());
-        input_file.AddMember("vh_in_h", vh_in_h, doc.GetAllocator());
-        input_file.AddMember("vion_in_h", vion_in_h, doc.GetAllocator());
-        input_file.AddMember("test_force", test_force, doc.GetAllocator());
-        input_file.AddMember("test_stress", test_stress, doc.GetAllocator());
-        input_file.AddMember("colour", colour, doc.GetAllocator());
-        input_file.AddMember("test_skip_ewald", test_skip_ewald, doc.GetAllocator());
-
-        //add Third stage：parameter in electronic_conductivities:
-        input_file.AddMember("cal_cond", cal_cond, doc.GetAllocator());
-        input_file.AddMember("cond_nche", cond_nche, doc.GetAllocator());
-        input_file.AddMember("cond_dw", cond_dw, doc.GetAllocator());
-        input_file.AddMember("cond_wcut", cond_wcut, doc.GetAllocator());
-        input_file.AddMember("cond_dt", cond_dt, doc.GetAllocator());
-        input_file.AddMember("cond_dtbatch", cond_dtbatch, doc.GetAllocator());
-        input_file.AddMember("cond_fwhm", cond_fwhm, doc.GetAllocator());
-        input_file.AddMember("cond_nonlocal", cond_nonlocal, doc.GetAllocator());
-
-        //add Third stage：parameter in implicit_solvation_model:
-        input_file.AddMember("imp_sol", imp_sol, doc.GetAllocator());
-        input_file.AddMember("eb_k", eb_k, doc.GetAllocator());
-        input_file.AddMember("tau", tau, doc.GetAllocator());
-        input_file.AddMember("sigma_k", sigma_k, doc.GetAllocator());
-        input_file.AddMember("nc_k", nc_k, doc.GetAllocator());
-
-
-        RemoveNullValues(input_file);
-
-
-        // after add child_node's node in readin_info, add child node
-        // add parameters in readin_info:
-        readin_info.AddMember("input_file", input_file, doc.GetAllocator());
-
-    }
-
-
-    void Finish_json_tree(){
-        // Converts a json object to a string
-        rapidjson::StringBuffer buffer;
-        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
-        doc.Accept(writer);
-
-        // Output the json string to a file
-        std::string json_path;
-        json_path.append("abacus.json");
-
-        std::ofstream ofs(json_path);
-        ofs << buffer.GetString() << std::endl;
-        ofs.close();
-    }
-
-
-
-
-}
-
-
-#endif
\ No newline at end of file
diff --git a/source/module_base/para_json.h b/source/module_base/para_json.h
deleted file mode 100644
index 0e829dc80e..0000000000
--- a/source/module_base/para_json.h
+++ /dev/null
@@ -1,560 +0,0 @@
-
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <sstream>
-#include <vector>
-
-#ifdef __RAPIDJSON
-
-#include <rapidjson/document.h>
-#include <rapidjson/writer.h>
-#include <rapidjson/stringbuffer.h>
-
-
-/**
- * @brief   This namespace is used to store the json object of the 
- *          abacus parameter and its handlers. Used to read the parameters 
- *          at run time and finally organize them into json format files
- * 
-*/
-namespace Para_Json
-{
-
-    extern int test;
-    // @param doc: the output json file
-    extern rapidjson::Document doc;
-    extern rapidjson::Value abacus;
-
-    // @param general_info ：
-    extern rapidjson::Value general_info;
-    extern rapidjson::Value version;
-    extern rapidjson::Value commit;
-    extern rapidjson::Value begin_time;
-    extern rapidjson::Value begin_date;
-    extern rapidjson::Value device_g;
-    // @param general_info -- parallel：
-    extern rapidjson::Value parallel;
-    extern rapidjson::Value drank;
-    extern rapidjson::Value dsize;
-    extern rapidjson::Value dcolor ;
-    // @param general_info -- path
-    extern rapidjson::Value path;
-    extern rapidjson::Value global_out_dir;
-    extern rapidjson::Value global_in_card;
-    extern rapidjson::Value pseudo_dir_path ;
-    extern rapidjson::Value orbital_dir_path;
-
-    
-    // @param reading_information：
-    extern rapidjson::Value readin_info;
-    // @param reading_information -- input_para：
-    
-    // @param reading_information -- input_para -- system_variables：
-    extern rapidjson::Value system_variables;
-
-    extern rapidjson::Value input_file;
-    extern rapidjson::Value input_suffix;
-    extern rapidjson::Value ntype;
-    extern rapidjson::Value calculation;
-    extern rapidjson::Value esolver_type;
-    extern rapidjson::Value symmetry;
-    extern rapidjson::Value symmetry_precfield;
-    extern rapidjson::Value symmetry_autoclose;
-    extern rapidjson::Value kpar;
-    extern rapidjson::Value bndpar;
-    extern rapidjson::Value latname;
-    extern rapidjson::Value init_wfc;
-    extern rapidjson::Value init_chg;
-    extern rapidjson::Value init_vel;
-    extern rapidjson::Value nelec;
-    extern rapidjson::Value nupdown;
-    extern rapidjson::Value dft_functional;
-    extern rapidjson::Value xc_temperature;
-    extern rapidjson::Value pseudo_rcut;
-    extern rapidjson::Value pseudo_mesh;
-    extern rapidjson::Value mem_saver;
-    extern rapidjson::Value diago_proc;
-    extern rapidjson::Value nbspline;
-    extern rapidjson::Value kspacing;
-    extern rapidjson::Value min_dist_coef;
-    extern rapidjson::Value device;
-    // @param reading_information -- input_para -- files_related
-
-    extern rapidjson::Value stru_file;
-    extern rapidjson::Value kpoint_file;
-    extern rapidjson::Value pseudo_dir;
-    extern rapidjson::Value orbital_dir;
-    extern rapidjson::Value read_file_dir;
-    extern rapidjson::Value wannier_card;
-    // @param reading_information -- input_para -- planewave_related
-
-    extern rapidjson::Value ecutwfc;
-    extern rapidjson::Value nx;
-    extern rapidjson::Value ny;
-    extern rapidjson::Value nz;
-    extern rapidjson::Value pw_seed;
-    extern rapidjson::Value pw_diag_thr;
-    extern rapidjson::Value pw_diag_nmax;
-    extern rapidjson::Value pw_diag_ndim;
-    // @param reading_information -- input_para -- numerical_atomic_orbitals_related
-    
-    extern rapidjson::Value nb2d;
-    extern rapidjson::Value lmaxmax;
-    extern rapidjson::Value lcao_ecut;
-    extern rapidjson::Value lcao_dk;
-    extern rapidjson::Value lcao_dr;
-    extern rapidjson::Value lcao_rmax;
-    extern rapidjson::Value search_radius;
-    extern rapidjson::Value search_pbc;
-    extern rapidjson::Value bx;
-    extern rapidjson::Value by;
-    extern rapidjson::Value bz;
-    // @param reading_information -- input_para -- electronic_structure
-    
-    extern rapidjson::Value basis_type;
-    extern rapidjson::Value ks_solver;
-    extern rapidjson::Value nbands;
-    extern rapidjson::Value nbands_istate;
-    extern rapidjson::Value nspin;
-    extern rapidjson::Value smearing_method;
-    extern rapidjson::Value smearing_sigma;
-    extern rapidjson::Value smearing_sigma_temp;
-    extern rapidjson::Value mixing_type;
-    extern rapidjson::Value mixing_beta;
-    extern rapidjson::Value mixing_ndim;
-    extern rapidjson::Value mixing_gg0;
-    extern rapidjson::Value mixing_tau;
-    extern rapidjson::Value mixing_dftu;
-    extern rapidjson::Value gamma_only;
-    extern rapidjson::Value printe;
-    extern rapidjson::Value scf_nmax;
-    extern rapidjson::Value scf_thr;
-    extern rapidjson::Value scf_thr_type;
-    extern rapidjson::Value chg_extrap;
-    extern rapidjson::Value lspinorb;
-    extern rapidjson::Value noncolin;
-    extern rapidjson::Value soc_lambda;
-    // @param reading_information -- input_para -- electronic_structure_SDFT
-
-    extern rapidjson::Value method_sto;
-    extern rapidjson::Value nbands_sto;
-    extern rapidjson::Value nche_sto;
-    extern rapidjson::Value emin_sto;
-    extern rapidjson::Value emax_sto;
-    extern rapidjson::Value seed_sto;
-    extern rapidjson::Value initsto_freq;
-    extern rapidjson::Value npart_sto;
-    // @param reading_information -- input_para -- geometry_relaxation
-
-    extern rapidjson::Value relax_method;
-    extern rapidjson::Value relax_new;
-    extern rapidjson::Value relax_scale_force;
-    extern rapidjson::Value relax_nmax;
-    extern rapidjson::Value relax_cg_thr;
-    extern rapidjson::Value cal_force;
-    extern rapidjson::Value force_thr;
-    extern rapidjson::Value force_thr_ev;
-    extern rapidjson::Value force_thr_ev2;
-    extern rapidjson::Value relax_bfgs_w1;
-    extern rapidjson::Value relax_bfgs_w2;
-    extern rapidjson::Value relax_bfgs_rmax;
-    extern rapidjson::Value relax_bfgs_rmin;
-    extern rapidjson::Value relax_bfgs_init;
-    extern rapidjson::Value cal_stress;
-    extern rapidjson::Value stress_thr;
-    extern rapidjson::Value press1;
-    extern rapidjson::Value press2;
-    extern rapidjson::Value press3;
-    extern rapidjson::Value fixed_axes;
-    extern rapidjson::Value fixed_ibrav;
-    extern rapidjson::Value fixed_atoms;
-    extern rapidjson::Value cell_factor;
-
-    // @param reading_information -- input_para -- output_information_related
-
-    extern rapidjson::Value out_mul;
-    extern rapidjson::Value out_freq_elec;
-    extern rapidjson::Value out_freq_ion;
-    extern rapidjson::Value out_chg;
-    extern rapidjson::Value out_pot;
-    extern rapidjson::Value out_dm;
-    extern rapidjson::Value out_dm1;
-    extern rapidjson::Value out_wfc_pw;
-    extern rapidjson::Value out_wfc_r;
-    extern rapidjson::Value out_wfc_lcao;
-    extern rapidjson::Value out_dos;
-    extern rapidjson::Value out_band;
-    extern rapidjson::Value out_proj_band;
-    extern rapidjson::Value out_stru;
-    extern rapidjson::Value out_bandgap;
-    extern rapidjson::Value out_level;
-    extern rapidjson::Value out_alllog;
-    extern rapidjson::Value out_mat_hs;
-    extern rapidjson::Value out_mat_r;
-    extern rapidjson::Value out_mat_hs2;
-    extern rapidjson::Value out_mat_t;
-    extern rapidjson::Value out_mat_dh;
-    extern rapidjson::Value out_app_flag;
-    extern rapidjson::Value out_interval;
-    extern rapidjson::Value out_element_info;
-    extern rapidjson::Value restart_save;
-    extern rapidjson::Value restart_load;
-    extern rapidjson::Value rpa;
-
-    // @param reading_information -- input_para -- density_of_states
-
-    extern rapidjson::Value dos_edelta_ev;
-    extern rapidjson::Value dos_sigma;
-    extern rapidjson::Value dos_scale;
-    extern rapidjson::Value dos_emin_ev;
-    extern rapidjson::Value dos_emax_ev;
-    extern rapidjson::Value dos_nche;
-    // @param reading_information -- input_para -- naos
-    extern rapidjson::Value bessel_nao_ecut;
-    extern rapidjson::Value bessel_nao_tolerence;
-    extern rapidjson::Value bessel_nao_rcut;
-    extern rapidjson::Value bessel_nao_smooth;
-    extern rapidjson::Value bessel_nao_sigma;
-    // @param reading_information -- input_para -- deepks
-
-    extern rapidjson::Value deepks_out_labels;
-    extern rapidjson::Value deepks_scf;
-    extern rapidjson::Value deepks_model;
-    extern rapidjson::Value bessel_descriptor_lmax;
-    extern rapidjson::Value bessel_descriptor_ecut;
-    extern rapidjson::Value bessel_descriptor_tolerence;
-    extern rapidjson::Value bessel_descriptor_rcut;
-    extern rapidjson::Value bessel_descriptor_smooth;
-    extern rapidjson::Value bessel_descriptor_sigma;
-    extern rapidjson::Value deepks_bandgap;
-    extern rapidjson::Value deepks_out_unittest;
-    // @param reading_information -- input_para -- ofdft
-    extern rapidjson::Value of_kinetic;
-    extern rapidjson::Value of_method;
-    extern rapidjson::Value of_conv;
-    extern rapidjson::Value of_tole;
-    extern rapidjson::Value of_tolp;
-    extern rapidjson::Value of_tf_weight;
-    extern rapidjson::Value of_vw_weight;
-    extern rapidjson::Value of_wt_alpha;
-    extern rapidjson::Value of_wt_beta;
-    extern rapidjson::Value of_wt_rho0;
-    extern rapidjson::Value of_hold_rho0;
-    extern rapidjson::Value of_lkt_a;
-    extern rapidjson::Value of_read_kernel;
-    extern rapidjson::Value of_kernel_file;
-    extern rapidjson::Value of_full_pw;
-    extern rapidjson::Value of_full_pw_dim;
-
-    // @param reading_information -- input_para -- electric_field_and_dipole_correction
-    
-    extern rapidjson::Value efield_flag;
-    extern rapidjson::Value dip_cor_flag;
-    extern rapidjson::Value efield_dir;
-    extern rapidjson::Value efield_pos_max;
-    extern rapidjson::Value efield_pos_dec;
-    extern rapidjson::Value efield_amp;
-    // @param reading_information -- input_para -- gate_field 
-    
-    extern rapidjson::Value gate_flag;
-    extern rapidjson::Value zgate;
-    extern rapidjson::Value block;
-    extern rapidjson::Value block_down;
-    extern rapidjson::Value block_up;
-    extern rapidjson::Value block_height;
-    // @param reading_information -- input_para -- exact_exchange
-    extern rapidjson::Value exx_hybrid_alpha;
-    extern rapidjson::Value exx_hse_omega;
-    extern rapidjson::Value exx_separate_loop;
-    extern rapidjson::Value exx_hybrid_step;
-    extern rapidjson::Value exx_mixing_beta;
-    extern rapidjson::Value exx_lambda;
-    extern rapidjson::Value exx_pca_threshold;
-    extern rapidjson::Value exx_c_threshold;
-    extern rapidjson::Value exx_v_threshold;
-    extern rapidjson::Value exx_dm_threshold;
-    extern rapidjson::Value exx_c_grad_threshold;
-    extern rapidjson::Value exx_v_grad_threshold;
-    extern rapidjson::Value exx_schwarz_threshold;
-    extern rapidjson::Value exx_cauchy_threshold;
-    extern rapidjson::Value exx_cauchy_force_threshold;
-    extern rapidjson::Value exx_cauchy_stress_threshold;
-    extern rapidjson::Value exx_ccp_threshold;
-    extern rapidjson::Value exx_ccp_rmesh_times;
-    extern rapidjson::Value exx_distribute_type;
-    extern rapidjson::Value exx_opt_orb_lmax;
-    extern rapidjson::Value exx_opt_orb_ecut;
-    extern rapidjson::Value exx_opt_orb_tolerence;
-    extern rapidjson::Value exx_real_number;
-
-    // @param reading_information -- input_para -- molecular_dynamics
-    extern rapidjson::Value md_type;
-    extern rapidjson::Value md_nstep;
-    extern rapidjson::Value md_dt;
-    extern rapidjson::Value md_thermostat;
-    extern rapidjson::Value md_tlast;
-    extern rapidjson::Value md_tfirst;
-    extern rapidjson::Value md_restart;
-    extern rapidjson::Value md_restartfreq;
-    extern rapidjson::Value md_dumpfreq;
-    extern rapidjson::Value dump_force;
-    extern rapidjson::Value dump_vel;
-    extern rapidjson::Value dump_virial;
-    extern rapidjson::Value md_seed;
-    extern rapidjson::Value md_tfreq;
-    extern rapidjson::Value md_tchain;
-    extern rapidjson::Value md_pmode;
-    extern rapidjson::Value md_prec_level;
-    extern rapidjson::Value ref_cell_factor;
-    extern rapidjson::Value md_pcouple;
-    extern rapidjson::Value md_pfirst;
-    extern rapidjson::Value md_plast;
-    extern rapidjson::Value md_pfreq;
-    extern rapidjson::Value md_pchain;
-    extern rapidjson::Value lj_rcut;
-    extern rapidjson::Value lj_epsilon;
-    extern rapidjson::Value lj_sigma;
-    extern rapidjson::Value pot_file;
-    extern rapidjson::Value msst_direction;
-    extern rapidjson::Value msst_vel;
-    extern rapidjson::Value msst_vis;
-    extern rapidjson::Value msst_tscale;
-    extern rapidjson::Value msst_qmass;
-    extern rapidjson::Value md_damp;
-    extern rapidjson::Value md_tolerance;
-    extern rapidjson::Value md_nraise;
-    extern rapidjson::Value cal_syns;
-    extern rapidjson::Value dmax;
-
-    // @param reading_information -- input_para -- dft_plus_u
-    extern rapidjson::Value orbital_corr;
-    extern rapidjson::Value hubbard_u;
-    extern rapidjson::Value yukawa_potential;
-    extern rapidjson::Value yukawa_lambda;
-    extern rapidjson::Value omc;
-
-    // @param reading_information -- input_para -- vdw_correction
-    extern rapidjson::Value vdw_method;
-    extern rapidjson::Value vdw_s6;
-    extern rapidjson::Value vdw_s8;
-    extern rapidjson::Value vdw_a1;
-    extern rapidjson::Value vdw_a2;
-    extern rapidjson::Value vdw_d;
-    extern rapidjson::Value vdw_abc;
-    extern rapidjson::Value vdw_C6_file;
-    extern rapidjson::Value vdw_C6_unit;
-    extern rapidjson::Value vdw_R0_file;
-    extern rapidjson::Value vdw_R0_unit;
-    extern rapidjson::Value vdw_cutoff_type;
-    extern rapidjson::Value vdw_cutoff_radius;
-    extern rapidjson::Value vdw_radius_unit;
-    extern rapidjson::Value vdw_cutoff_period;
-    extern rapidjson::Value vdw_cn_thr;
-    extern rapidjson::Value vdw_cn_thr_unit;
-
-    // @param reading_information -- input_para -- berry_phase_and_wannier90_interface
-    extern rapidjson::Value berry_phase;
-    extern rapidjson::Value gdir;
-    extern rapidjson::Value towannier90;
-    extern rapidjson::Value nnkpfile;
-    extern rapidjson::Value wannier_spin;
-
-    // @param reading_information -- input_para -- tddft
-    extern rapidjson::Value td_edm;
-    extern rapidjson::Value td_print_eij;
-    extern rapidjson::Value td_propagator;
-    extern rapidjson::Value td_vext;
-    extern rapidjson::Value td_vext_dire;
-    extern rapidjson::Value td_stype;
-    extern rapidjson::Value td_ttype;
-    extern rapidjson::Value td_tstart;
-    extern rapidjson::Value td_tend;
-    extern rapidjson::Value td_lcut1;
-    extern rapidjson::Value td_lcut2;
-    extern rapidjson::Value td_gauss_freq;
-    extern rapidjson::Value td_gauss_phase;
-    extern rapidjson::Value td_gauss_sigma;
-    extern rapidjson::Value td_gauss_t0;
-    extern rapidjson::Value td_gauss_amp;
-    extern rapidjson::Value td_trape_freq;
-    extern rapidjson::Value td_trape_phase;
-    extern rapidjson::Value td_trape_t1;
-    extern rapidjson::Value td_trape_t2;
-    extern rapidjson::Value td_trape_t3;
-    extern rapidjson::Value td_trape_amp;
-    extern rapidjson::Value td_trigo_freq1;
-    extern rapidjson::Value td_trigo_freq2;
-    extern rapidjson::Value td_trigo_phase1;
-    extern rapidjson::Value td_trigo_phase2;
-    extern rapidjson::Value td_trigo_amp;
-    extern rapidjson::Value td_heavi_t0;
-    extern rapidjson::Value td_heavi_amp;
-    extern rapidjson::Value td_out_dipole;
-    extern rapidjson::Value td_out_efield;
-    extern rapidjson::Value ocp;
-    extern rapidjson::Value ocp_set;
-
-    // @param reading_information -- input_para -- debuging_related
-    extern rapidjson::Value t_in_h;
-    extern rapidjson::Value vl_in_h;
-    extern rapidjson::Value vnl_in_h;
-    extern rapidjson::Value vh_in_h;
-    extern rapidjson::Value vion_in_h;
-    extern rapidjson::Value test_force;
-    extern rapidjson::Value test_stress;
-    extern rapidjson::Value colour;
-    extern rapidjson::Value test_skip_ewald;
-
-    // @param reading_information -- input_para -- electronic_conductivities
-    extern rapidjson::Value cal_cond;
-    extern rapidjson::Value cond_nche;
-    extern rapidjson::Value cond_dw;
-    extern rapidjson::Value cond_wcut;
-    extern rapidjson::Value cond_dt;
-    extern rapidjson::Value cond_dtbatch;
-    extern rapidjson::Value cond_fwhm;
-    extern rapidjson::Value cond_nonlocal;
-
-    // @param reading_information -- input_para -- implicit_solvation_model
-    extern rapidjson::Value imp_sol;
-    extern rapidjson::Value eb_k;
-    extern rapidjson::Value tau;
-    extern rapidjson::Value sigma_k;
-    extern rapidjson::Value nc_k;
-
-    // @param reading_information -- stru_infos：
-    extern rapidjson::Value stru_infos;
-    // extern rapidjson::Value ATOMIC_SPECIES;
-    // extern rapidjson::Value NUMERICAL_ORBITAL;
-    // extern rapidjson::Value LATTICE_CONSTANT;
-    // extern rapidjson::Value ATOMIC_POSITIONS;
-
-    // @param reading_information -- KPT_infos
-    extern rapidjson::Value KPT_infos;
-    // extern rapidjson::Value total_number;
-    // extern rapidjson::Value mode;
-    // extern rapidjson::Value vectors;
-
-    // @param reading_information -- orb_infos
-    extern rapidjson::Value orb_infos;
-
-    // @param reading_information -- pp
-    extern rapidjson::Value pp;
-
-    // @param init
-    extern rapidjson::Value init;
-    // @param init -- general
-    // extern rapidjson::Value calculation;
-    // extern rapidjson::Value esolver_type;
-    // extern rapidjson::Value basis_type;
-    // extern rapidjson::Value gamma_only;
-    // extern rapidjson::Value ks_solver;
-    // extern rapidjson::Value ntype;
-    // extern rapidjson::Value nspin;
-    // extern rapidjson::Value ecutwfc;
-    // extern rapidjson::Value scf_thr;
-    // extern rapidjson::Value scf_nmax;
-
-    // @param init -- symmetry
-    // extern rapidjson::Value symmetry;
-    // extern rapidjson::Value BRAVAIS_TYPE;
-    // extern rapidjson::Value BRAVAIS_LATTICE_NAME;
-    // extern rapidjson::Value IBRAV;
-    // extern rapidjson::Value LATTICE_CONSTANT_A;
-    // extern rapidjson::Value right_hand_lattice;
-
-    // @param init -- Kpoints
-    extern rapidjson::Value kpoints;
-    extern rapidjson::Value nkstot;
-    extern rapidjson::Value nkstot_ibz;
-    extern rapidjson::Value coordinates;
-    extern rapidjson::Value weight;
-
-    // @param init -- grid
-    extern rapidjson::Value grid;
-    extern rapidjson::Value energy_cutoff_for_wavefunc;
-    extern rapidjson::Value fft_grid_for_wave_functions;
-    extern rapidjson::Value number_of_plane_waves;
-    extern rapidjson::Value number_of_sticks;
-
-    // @param init -- Smearing
-    // extern rapidjson::Value smearing_method;
-    // extern rapidjson::Value smearing_sigma;
-
-    // @param init -- mixing
-    extern rapidjson::Value mixing;
-
-
-    // @param output
-    extern rapidjson::Value output;
-
-
-
-    // @param final_stru
-    extern rapidjson::Value final_stru;
-    extern rapidjson::Value cell;
-    extern rapidjson::Value coordinate;
-
-
-
-
-    /**
-     *  The functions below initialize the json output parameter 
-     *  tree to connect the nodes of the module
-    */
-
-    /**
-     * @brief   add Top stage：parameter in Abacus:
-     */
-    void Init_json_abacus();
-
-
-    /**
-     * @brief   add Second stage：parameter in Abacus - general_info:
-     */
-    void Init_json_abacus_generalInfo();
-
-
-    /**
-     * @brief   add Second stage：parameter in Abacus - readin_info:
-     */
-    void Init_json_abacus_readinInfo();
-
-
-    /**
-     * @brief   finish json tree build
-     */
-    void Finish_json_tree();
-
-
-
-    /**
-     * @brief   This function is used to populate the template type parameter 
-     *          values into rapidjson's Value object
-     */
-    template <typename T> 
-    void set_json_value(rapidjson::Value &json_v,T *para){
-        if(std::is_same<T,int>::value)
-        {
-            json_v.SetInt(*reinterpret_cast<int*>(para)); 
-        }
-        else if(std::is_same<T,double>::value)
-        {
-            json_v.SetDouble(*reinterpret_cast<double*>(para));
-        }
-        else if(std::is_same<T,bool>::value)
-        {
-            json_v.SetBool(*reinterpret_cast<bool*>(para));
-        }
-        else if(std::is_same<T,std::string>::value)
-        {
-            // json_v.SetString(rapidjson::StringRef((*reinterpret_cast<std::string*>(para)).c_str()));
-
-            json_v.SetString((*reinterpret_cast<std::string*>(para)).c_str(), std::strlen((*reinterpret_cast<std::string*>(para)).c_str()), doc.GetAllocator());
-            //printf("exx_real_number = %s\n",(*reinterpret_cast<std::string*>(para)).c_str());
-        }
-    }
-}
-
-#endif
\ No newline at end of file
diff --git a/source/module_base/test/CMakeLists.txt b/source/module_base/test/CMakeLists.txt
index 008df422e5..666152b476 100644
--- a/source/module_base/test/CMakeLists.txt
+++ b/source/module_base/test/CMakeLists.txt
@@ -217,17 +217,3 @@ AddTest(
   SOURCES assoc_laguerre_test.cpp ../assoc_laguerre.cpp ../tool_quit.cpp ../global_variable.cpp ../global_file.cpp ../global_function.cpp ../memory.cpp ../timer.cpp
   LIBS ${math_libs} formatter
 )
-if(ENABLE_GOOGLEBENCH)
-  AddTest(
-    TARGET perf_sphbes
-    LIBS formatter
-    SOURCES perf_sphbes_test.cpp ../math_sphbes.cpp ../timer.cpp 
-  )
-endif()
-
-if(ENABLE_RAPIDJSON)
-  AddTest(
-    TARGET base_para_json_test
-    SOURCES para_json_test.cpp ../para_json.cpp
-  )
-endif()
diff --git a/source/module_base/test/complexmatrix_test.cpp b/source/module_base/test/complexmatrix_test.cpp
index 0adc52363a..026aeb40de 100644
--- a/source/module_base/test/complexmatrix_test.cpp
+++ b/source/module_base/test/complexmatrix_test.cpp
@@ -23,8 +23,8 @@
  *  - set_as_identity_matrix()
  *  - print():Output the elements of this complex matrix greater than threshold.
  *  - checkreal()
- *
- * Tested relative functions
+ * 
+ * Tested relative functions 
  *  - operator "+" "-" "*" between two ComplexMatrix
  *  - operator "*" between a ComplexMatrix and double or complex, and reverse.
  *  - trace()
@@ -35,13 +35,13 @@
  *  - conj()
  *  - scale_accumulate():
  *  - scaled_sum():
- *
+ * 
  */
 
 //a mock function of WARNING_QUIT, to avoid the uncorrected call by matrix.cpp at line 37.
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
 }
 
 inline void EXPECT_COMPLEX_EQ(const std::complex<double>& a,const std::complex<double>& b)
@@ -104,8 +104,8 @@ TEST_F(ComplexMatrixTest,ConstructorCM)
 
 TEST_F(ComplexMatrixTest,ConstructorCMrvalue)
 {
-    ModuleBase::ComplexMatrix cm2(cm22);
-    ModuleBase::ComplexMatrix cm1(std::move(cm22));
+    ModuleBase::ComplexMatrix cm2(cm22);        
+    ModuleBase::ComplexMatrix cm1(std::move(cm22)); 
     EXPECT_EQ(cm1.nr,cm2.nr);
     EXPECT_EQ(cm1.nc,cm2.nc);
     EXPECT_EQ(cm1.size,cm2.size);
@@ -338,15 +338,15 @@ TEST_F(ComplexMatrixTest,OperatorMultMatrix)
     EXPECT_EQ(cm33.nr,3);
     EXPECT_EQ(cm33.nc,3);
     EXPECT_EQ(cm33.size,9);
-    EXPECT_COMPLEX_EQ(cm33(0,0),std::complex<double>{-46.0,72.0  });
+    EXPECT_COMPLEX_EQ(cm33(0,0),std::complex<double>{-46.0,72.0  }); 
     EXPECT_COMPLEX_EQ(cm33(0,1),std::complex<double>{-46.0,118.0 });
     EXPECT_COMPLEX_EQ(cm33(0,2),std::complex<double>{-46.0,164.0 });
     EXPECT_COMPLEX_EQ(cm33(1,0),std::complex<double>{-54.0,84.0  });
     EXPECT_COMPLEX_EQ(cm33(1,1),std::complex<double>{-54.0,138.0 });
-    EXPECT_COMPLEX_EQ(cm33(1,2),std::complex<double>{-54.0,192.0 });
+    EXPECT_COMPLEX_EQ(cm33(1,2),std::complex<double>{-54.0,192.0 }); 
     EXPECT_COMPLEX_EQ(cm33(2,0),std::complex<double>{-62.0,96.0  });
     EXPECT_COMPLEX_EQ(cm33(2,1),std::complex<double>{-62.0,158.0 });
-    EXPECT_COMPLEX_EQ(cm33(2,2),std::complex<double>{-62.0,220.0 });
+    EXPECT_COMPLEX_EQ(cm33(2,2),std::complex<double>{-62.0,220.0 }); 
 
     EXPECT_DEATH(cm22 * cm32,"");
 }
@@ -525,7 +525,7 @@ TEST_F(ComplexMatrixTest,ScaleSumArray)
     cmout = new ModuleBase::ComplexMatrix*[2];
     cmin1 = new ModuleBase::ComplexMatrix*[2];
     cmin2 = new ModuleBase::ComplexMatrix*[2];
-
+   
     cmin1[0] = &cm1;
     cmin1[1] = &cm2;
     cmin2[0] = &cm3;
@@ -563,7 +563,7 @@ TEST_F(ComplexMatrixTest,print)
    EXPECT_THAT(output,testing::HasSubstr("(3,4)\t(4,5)\t"));
    ifs.close();
    remove("printtest1.log");
-// The condition of  std::abs(data)>threshold_abs && std::imag(data)) <= threshold_imag
+// The condition of  std::abs(data)>threshold_abs && std::imag(data)) <= threshold_imag 
    ofs.open("printtest2.log");
    cm22.print(ofs,1e-10,2);
    ofs.close();
diff --git a/source/module_base/test/inverse_matrix_test.cpp b/source/module_base/test/inverse_matrix_test.cpp
index a871f906cd..df68f58a56 100644
--- a/source/module_base/test/inverse_matrix_test.cpp
+++ b/source/module_base/test/inverse_matrix_test.cpp
@@ -19,7 +19,7 @@
 //a mock function of WARNING_QUIT, to avoid the uncorrected call by matrix.cpp at line 37.
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
 }
 
 TEST(InverseMatrixComplexTest, InverseMatrixComplex)
diff --git a/source/module_base/test/math_sphbes_test.cpp b/source/module_base/test/math_sphbes_test.cpp
index e72c6e289c..521d4dc2f4 100644
--- a/source/module_base/test/math_sphbes_test.cpp
+++ b/source/module_base/test/math_sphbes_test.cpp
@@ -352,27 +352,15 @@ TEST_F(Sphbes, Zeros)
 
     int lmax = 20;
     int nzeros = 500;
-    double* zeros = new double[nzeros*(lmax+1)];
+    double* zeros = new double[nzeros];
     for (int l = 0; l <= lmax; ++l)
     {
-        ModuleBase::Sphbes::sphbes_zeros(l, nzeros, zeros, false);
+        ModuleBase::Sphbes::sphbes_zeros(l, nzeros, zeros);
         for (int i = 0; i < nzeros; ++i)
         {
             EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[i])), 1e-14);
         }
     }
-
-
-    ModuleBase::Sphbes::sphbes_zeros(lmax, nzeros, zeros, true);
-    for (int l = 0; l <= lmax; ++l)
-    {
-        for (int i = 0; i < nzeros; ++i)
-        {
-            EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[l*nzeros+i])), 1e-14);
-        }
-    }
-
-    delete[] zeros;
 }
 
 TEST_F(Sphbes, ZerosOld)
diff --git a/source/module_base/test/math_ylmreal_test.cpp b/source/module_base/test/math_ylmreal_test.cpp
index 13d0bd2b69..d5e7a504ed 100644
--- a/source/module_base/test/math_ylmreal_test.cpp
+++ b/source/module_base/test/math_ylmreal_test.cpp
@@ -13,16 +13,16 @@
 ***********************************************/
 
 /**
- * For lmax <5 cases, the reference values are calculated by the formula from
+ * For lmax <5 cases, the reference values are calculated by the formula from 
  * https://formulasearchengine.com/wiki/Table_of_spherical_harmonics. Note, these
- * formula lack of the Condon–Shortley phase (-1)^m, and in this unit test, item
+ * formula lack of the Condon–Shortley phase (-1)^m, and in this unit test, item 
  * (-1)^m is multiplied.
  * For lmax >=5, the reference values are calculated by YlmReal::Ylm_Real.
  *
  * - Tested functions of class YlmReal
  *      - Ylm_Real
  *      - Ylm_Real2
- *      - rlylm
+ *      - rlylm 
  *      - YlmRealTemplate (double and float)
  *
  * - Tested functions of class Ylm
@@ -30,9 +30,9 @@
  *      - sph_harm
  *      - rl_sph_harm
  *      - grad_rl_sph_harm
- *      - equality_value_test: test the eqaulity of Ylm function between rl_sph_harm (spherical input) and  get_ylm_real (Cartesian input)
+ *      - equality_value_test: test the eqaulity of Ylm function between rl_sph_harm (spherical input) and  get_ylm_real (Cartesian input) 
  *      - equality_gradient_test:test the eqaulity of Ylm gradient function between grad_rl_sph_harm(spherical input) and  rlylm (Cartesian input)
- *
+ * 
  */
 
 
@@ -40,7 +40,7 @@
 //mock functions of WARNING_QUIT and WARNING
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
     void WARNING(const std::string &file,const std::string &description) {return ;}
 }
 
@@ -58,7 +58,7 @@ class YlmRealTest : public testing::Test
     ModuleBase::Vector3<double> *g; //vectors of the 4 points
     double *ref;        //reference of Ylm
     double *rly;        //Ylm
-    double (*rlgy)[3];  //the gradient of Ylm
+    double (*rlgy)[3];  //the gradient of Ylm  
     std::vector<double> rlyvector; //Ylm
     std::vector<std::vector<double>> rlgyvector; //the gradient of Ylm
 
@@ -91,101 +91,101 @@ class YlmRealTest : public testing::Test
     double y4m4(const double &x, const double &y, const double &z) {double r=norm(x,y,z); return 3./4.*sqrt(35./M_PI) * x*y*(x*x - y*y) / (r*r*r*r);}
 
     //the reference values are calculated by ModuleBase::Ylm::grad_rl_sph_harm
-    //1st dimension: example, 2nd dimension: Ylm, 3rd dimension: dx/dy/dz
+    //1st dimension: example, 2nd dimension: Ylm, 3rd dimension: dx/dy/dz 
     double rlgyref[4][64][3] = {
-        {   { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, {-6.30783e-01,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -1.09255e+00},
-            { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 1.09255e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  1.09255e+00, -0.00000e+00},
-            {-0.00000e+00,  0.00000e+00, -1.11953e+00}, { 1.37114e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  4.57046e-01,  0.00000e+00},
-            { 0.00000e+00,  0.00000e+00,  1.44531e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.77013e+00,  0.00000e+00, -0.00000e+00},
-            { 0.00000e+00, -1.77013e+00,  0.00000e+00}, { 1.26943e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.00714e+00},
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.89235e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00, -9.46175e-01,  0.00000e+00},
-            {-0.00000e+00,  0.00000e+00, -1.77013e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 2.50334e+00,  0.00000e+00,  0.00000e+00},
-            {-0.00000e+00,  2.50334e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.75425e+00}, {-2.26473e+00,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -4.52947e-01,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.39677e+00}, {-0.00000e+00, -0.00000e+00,  0.00000e+00},
-            { 2.44619e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.46771e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.07566e+00},
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.28191e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -3.28191e+00,  0.00000e+00},
-            {-1.90708e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.91311e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00},
-            { 2.76362e+00,  0.00000e+00, -0.00000e+00}, {-0.00000e+00,  9.21205e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.76362e+00},
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.02739e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00, -2.01826e+00,  0.00000e+00},
-            {-0.00000e+00,  0.00000e+00, -2.36662e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 4.09910e+00,  0.00000e+00,  0.00000e+00},
-            {-0.00000e+00,  4.09910e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.38995e+00}, { 3.16161e+00,  0.00000e+00, -0.00000e+00},
-            { 0.00000e+00,  4.51658e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  3.31900e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
-            {-3.28564e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -1.40813e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -3.11349e+00},
-            {-0.00000e+00, -0.00000e+00,  0.00000e+00}, { 3.63241e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  2.59458e+00,  0.00000e+00},
-            { 0.00000e+00,  0.00000e+00,  2.64596e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-4.95014e+00,  0.00000e+00, -0.00000e+00},
+        {   { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, {-6.30783e-01,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -1.09255e+00}, 
+            { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 1.09255e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  1.09255e+00, -0.00000e+00}, 
+            {-0.00000e+00,  0.00000e+00, -1.11953e+00}, { 1.37114e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  4.57046e-01,  0.00000e+00}, 
+            { 0.00000e+00,  0.00000e+00,  1.44531e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.77013e+00,  0.00000e+00, -0.00000e+00}, 
+            { 0.00000e+00, -1.77013e+00,  0.00000e+00}, { 1.26943e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.00714e+00}, 
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.89235e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00, -9.46175e-01,  0.00000e+00}, 
+            {-0.00000e+00,  0.00000e+00, -1.77013e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 2.50334e+00,  0.00000e+00,  0.00000e+00}, 
+            {-0.00000e+00,  2.50334e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.75425e+00}, {-2.26473e+00,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -4.52947e-01,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.39677e+00}, {-0.00000e+00, -0.00000e+00,  0.00000e+00}, 
+            { 2.44619e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.46771e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.07566e+00}, 
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.28191e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -3.28191e+00,  0.00000e+00}, 
+            {-1.90708e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.91311e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00}, 
+            { 2.76362e+00,  0.00000e+00, -0.00000e+00}, {-0.00000e+00,  9.21205e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.76362e+00}, 
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.02739e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00, -2.01826e+00,  0.00000e+00}, 
+            {-0.00000e+00,  0.00000e+00, -2.36662e+00}, { 0.00000e+00, -0.00000e+00,  0.00000e+00}, { 4.09910e+00,  0.00000e+00,  0.00000e+00}, 
+            {-0.00000e+00,  4.09910e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -2.38995e+00}, { 3.16161e+00,  0.00000e+00, -0.00000e+00}, 
+            { 0.00000e+00,  4.51658e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  3.31900e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            {-3.28564e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -1.40813e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00, -3.11349e+00}, 
+            {-0.00000e+00, -0.00000e+00,  0.00000e+00}, { 3.63241e+00,  0.00000e+00, -0.00000e+00}, { 0.00000e+00,  2.59458e+00,  0.00000e+00}, 
+            { 0.00000e+00,  0.00000e+00,  2.64596e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-4.95014e+00,  0.00000e+00, -0.00000e+00}, 
             { 0.00000e+00, -4.95014e+00,  0.00000e+00}
         },
         {
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 0.00000e+00, -6.30783e-01,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -0.00000e+00, -1.09255e+00}, { 0.00000e+00, -1.09255e+00,  0.00000e+00}, { 1.09255e+00,  0.00000e+00, -0.00000e+00},
-            { 0.00000e+00, -0.00000e+00, -1.11953e+00}, { 4.57046e-01,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.37114e+00, -0.00000e+00},
-            { 0.00000e+00, -0.00000e+00, -1.44531e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 1.77013e+00,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00,  1.77013e+00,  0.00000e+00}, { 0.00000e+00,  1.26943e+00, -0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00,  0.00000e+00,  2.00714e+00}, { 0.00000e+00,  1.89235e+00, -0.00000e+00}, {-9.46175e-01,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.77013e+00}, { 0.00000e+00,  2.50334e+00, -0.00000e+00},
-            {-2.50334e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.75425e+00}, {-4.52947e-01,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -2.26473e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.39677e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
-            {-1.46771e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -2.44619e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.07566e+00},
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.28191e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -3.28191e+00,  0.00000e+00},
-            { 0.00000e+00, -1.90708e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.91311e+00},
-            { 0.00000e+00, -2.76362e+00,  0.00000e+00}, { 9.21205e-01,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -0.00000e+00, -2.76362e+00}, { 0.00000e+00, -3.02739e+00,  0.00000e+00}, { 2.01826e+00,  0.00000e+00,  0.00000e+00},
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.36662e+00}, { 0.00000e+00, -4.09910e+00,  0.00000e+00},
-            { 4.09910e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.38995e+00}, { 4.51658e-01,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00,  3.16161e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.31900e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
-            { 1.40813e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.28564e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.11349e+00},
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 2.59458e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.63241e+00, -0.00000e+00},
-            { 0.00000e+00,  0.00000e+00, -2.64596e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.95014e+00,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00,  4.95014e+00, -0.00000e+00}
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 0.00000e+00, -6.30783e-01,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -0.00000e+00, -1.09255e+00}, { 0.00000e+00, -1.09255e+00,  0.00000e+00}, { 1.09255e+00,  0.00000e+00, -0.00000e+00}, 
+            { 0.00000e+00, -0.00000e+00, -1.11953e+00}, { 4.57046e-01,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.37114e+00, -0.00000e+00}, 
+            { 0.00000e+00, -0.00000e+00, -1.44531e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 1.77013e+00,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00,  1.77013e+00,  0.00000e+00}, { 0.00000e+00,  1.26943e+00, -0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00,  0.00000e+00,  2.00714e+00}, { 0.00000e+00,  1.89235e+00, -0.00000e+00}, {-9.46175e-01,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.77013e+00}, { 0.00000e+00,  2.50334e+00, -0.00000e+00}, 
+            {-2.50334e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.75425e+00}, {-4.52947e-01,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -2.26473e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.39677e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            {-1.46771e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -2.44619e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.07566e+00}, 
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-3.28191e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -3.28191e+00,  0.00000e+00}, 
+            { 0.00000e+00, -1.90708e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.91311e+00}, 
+            { 0.00000e+00, -2.76362e+00,  0.00000e+00}, { 9.21205e-01,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -0.00000e+00, -2.76362e+00}, { 0.00000e+00, -3.02739e+00,  0.00000e+00}, { 2.01826e+00,  0.00000e+00,  0.00000e+00}, 
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.36662e+00}, { 0.00000e+00, -4.09910e+00,  0.00000e+00}, 
+            { 4.09910e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -0.00000e+00, -2.38995e+00}, { 4.51658e-01,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00,  3.16161e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.31900e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            { 1.40813e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.28564e+00, -0.00000e+00}, { 0.00000e+00, -0.00000e+00, -3.11349e+00}, 
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 2.59458e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.63241e+00, -0.00000e+00}, 
+            { 0.00000e+00,  0.00000e+00, -2.64596e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.95014e+00,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00,  4.95014e+00, -0.00000e+00}           
         },
         {
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.26157e+00}, {-1.09255e+00,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -1.09255e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.22045e-16}, {-0.00000e+00,  0.00000e+00, -0.00000e+00},
-            { 0.00000e+00,  0.00000e+00,  2.23906e+00}, {-1.82818e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -1.82818e+00,  0.00000e+00},
-            { 0.00000e+00,  0.00000e+00,  8.81212e-16}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.84324e-16,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00,  5.55112e-17,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  3.38514e+00}, {-2.67619e+00,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -2.67619e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.30756e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
-            {-5.52973e-16,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.66533e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.67801e+00}, {-3.62357e+00,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -3.62357e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.87108e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
-            {-1.22267e-15,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.68219e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.93038e-32,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -6.16298e-33,  0.00000e+00},
-            { 0.00000e+00,  0.00000e+00,  6.10264e+00}, {-4.66097e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -4.66097e+00,  0.00000e+00},
-            { 0.00000e+00,  0.00000e+00,  8.98664e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-2.30221e-15,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00,  6.93334e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
-            { 1.77767e-31,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -2.22209e-32,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  7.64784e+00}, {-5.78122e+00,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -5.78122e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.51096e-14}, {-0.00000e+00,  0.00000e+00,  0.00000e+00},
-            {-3.91011e-15,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.17757e-15,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00},
-            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.67737e-31,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -5.84671e-32,  0.00000e+00},
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 1.13319e-47,  0.00000e+00,  0.00000e+00},
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.26157e+00}, {-1.09255e+00,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -1.09255e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.22045e-16}, {-0.00000e+00,  0.00000e+00, -0.00000e+00}, 
+            { 0.00000e+00,  0.00000e+00,  2.23906e+00}, {-1.82818e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -1.82818e+00,  0.00000e+00}, 
+            { 0.00000e+00,  0.00000e+00,  8.81212e-16}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-1.84324e-16,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00,  5.55112e-17,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  3.38514e+00}, {-2.67619e+00,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -2.67619e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  2.30756e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            {-5.52973e-16,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.66533e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.67801e+00}, {-3.62357e+00,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -3.62357e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.87108e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            {-1.22267e-15,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  3.68219e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.93038e-32,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -6.16298e-33,  0.00000e+00}, 
+            { 0.00000e+00,  0.00000e+00,  6.10264e+00}, {-4.66097e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -4.66097e+00,  0.00000e+00}, 
+            { 0.00000e+00,  0.00000e+00,  8.98664e-15}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, {-2.30221e-15,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00,  6.93334e-16,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            { 1.77767e-31,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -2.22209e-32,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  7.64784e+00}, {-5.78122e+00,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -5.78122e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  1.51096e-14}, {-0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            {-3.91011e-15,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  1.17757e-15,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, 
+            {-0.00000e+00,  0.00000e+00,  0.00000e+00}, { 4.67737e-31,  0.00000e+00,  0.00000e+00}, { 0.00000e+00, -5.84671e-32,  0.00000e+00}, 
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 1.13319e-47,  0.00000e+00,  0.00000e+00}, 
             { 0.00000e+00, -1.41649e-48,  0.00000e+00}
         },
         {
-            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00},
-            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 3.64183e-01,  3.64183e-01, -7.28366e-01}, { 6.30783e-01, -0.00000e+00,  6.30783e-01},
-            {-0.00000e+00,  6.30783e-01,  6.30783e-01}, {-6.30783e-01,  6.30783e-01, -1.66533e-16}, {-6.30783e-01, -6.30783e-01,  0.00000e+00},
-            {-7.46353e-01, -7.46353e-01,  0.00000e+00}, { 0.00000e+00,  3.04697e-01, -1.21879e+00}, { 3.04697e-01,  0.00000e+00, -1.21879e+00},
-            { 9.63537e-01, -9.63537e-01,  4.01253e-16}, { 9.63537e-01,  9.63537e-01,  9.63537e-01}, {-4.44089e-16,  1.18009e+00, -2.22045e-16},
-            {-1.18009e+00, -1.11022e-16,  0.00000e+00}, { 4.88603e-01,  4.88603e-01,  1.30294e+00}, {-1.03006e+00, -7.72548e-01,  7.72548e-01},
-            {-7.72548e-01, -1.03006e+00,  7.72548e-01}, {-7.28366e-01,  7.28366e-01, -5.25363e-16}, {-3.64183e-01, -3.64183e-01, -2.18510e+00},
-            { 7.69185e-16, -2.04397e+00, -6.81324e-01}, { 2.04397e+00,  1.92296e-16,  6.81324e-01}, { 9.63537e-01,  9.63537e-01, -1.44756e-16},
-            {-9.63537e-01,  9.63537e-01, -5.55112e-17}, { 5.19779e-01,  5.19779e-01, -1.81923e+00}, { 1.40917e+00,  8.05238e-01,  8.05238e-01},
-            { 8.05238e-01,  1.40917e+00,  8.05238e-01}, { 0.00000e+00, -4.44089e-16,  3.24739e-16}, {-1.06523e+00, -1.06523e+00,  2.13046e+00},
-            {-2.17439e-01,  1.73951e+00,  1.73951e+00}, {-1.73951e+00,  2.17439e-01, -1.73951e+00}, {-1.84503e+00, -1.84503e+00, -9.22517e-01},
-            { 1.84503e+00, -1.84503e+00,  6.58625e-16}, { 1.45863e+00,  1.11022e-15,  0.00000e+00}, {-8.88178e-16,  1.45863e+00,  0.00000e+00},
-            {-1.46807e+00, -1.46807e+00,  5.87227e-01}, {-4.48502e-01, -3.36617e-16, -2.24251e+00}, {-3.36617e-16, -4.48502e-01, -2.24251e+00},
-            { 7.09144e-01, -7.09144e-01,  1.87222e-16}, { 2.12743e+00,  2.12743e+00, -9.38779e-16}, { 7.09144e-01, -5.11006e-16, -2.12743e+00},
-            { 1.02201e-15, -7.09144e-01,  2.12743e+00}, { 1.81260e+00,  1.81260e+00,  2.58943e+00}, {-2.07154e+00,  2.07154e+00, -1.66969e-15},
-            {-3.03637e+00, -2.31111e-15, -6.07275e-01}, { 1.84889e-15, -3.03637e+00, -6.07275e-01}, { 1.05183e+00, -1.05183e+00,  5.77778e-17},
-            { 1.05183e+00,  1.05183e+00,  4.03986e-17}, { 1.27464e+00,  1.27464e+00,  1.69952e+00}, {-1.28472e+00, -1.20442e+00,  1.92707e+00},
-            {-1.20442e+00, -1.28472e+00,  1.92707e+00}, {-8.52285e-01,  8.52285e-01, -6.74704e-16}, {-1.50789e+00, -1.50789e+00, -2.95022e+00},
-            {-1.11260e+00, -2.08612e+00,  9.27164e-01}, { 2.08612e+00,  1.11260e+00, -9.27164e-01}, {-3.07506e-01, -3.07506e-01, -3.69007e+00},
-            { 1.23002e+00, -1.23002e+00,  2.28018e-15}, { 3.69007e+00, -1.53753e-01,  1.84503e+00}, {-1.53753e-01,  3.69007e+00,  1.84503e+00},
-            {-2.35197e+00,  2.35197e+00, -8.00513e-16}, {-2.35197e+00, -2.35197e+00, -7.83988e-01}, { 1.37903e-15, -1.46671e+00,  9.77875e-17},
+            { 0.00000e+00,  0.00000e+00,  0.00000e+00}, { 0.00000e+00,  0.00000e+00,  4.88603e-01}, {-4.88603e-01,  0.00000e+00,  0.00000e+00}, 
+            { 0.00000e+00, -4.88603e-01,  0.00000e+00}, { 3.64183e-01,  3.64183e-01, -7.28366e-01}, { 6.30783e-01, -0.00000e+00,  6.30783e-01}, 
+            {-0.00000e+00,  6.30783e-01,  6.30783e-01}, {-6.30783e-01,  6.30783e-01, -1.66533e-16}, {-6.30783e-01, -6.30783e-01,  0.00000e+00}, 
+            {-7.46353e-01, -7.46353e-01,  0.00000e+00}, { 0.00000e+00,  3.04697e-01, -1.21879e+00}, { 3.04697e-01,  0.00000e+00, -1.21879e+00}, 
+            { 9.63537e-01, -9.63537e-01,  4.01253e-16}, { 9.63537e-01,  9.63537e-01,  9.63537e-01}, {-4.44089e-16,  1.18009e+00, -2.22045e-16}, 
+            {-1.18009e+00, -1.11022e-16,  0.00000e+00}, { 4.88603e-01,  4.88603e-01,  1.30294e+00}, {-1.03006e+00, -7.72548e-01,  7.72548e-01}, 
+            {-7.72548e-01, -1.03006e+00,  7.72548e-01}, {-7.28366e-01,  7.28366e-01, -5.25363e-16}, {-3.64183e-01, -3.64183e-01, -2.18510e+00}, 
+            { 7.69185e-16, -2.04397e+00, -6.81324e-01}, { 2.04397e+00,  1.92296e-16,  6.81324e-01}, { 9.63537e-01,  9.63537e-01, -1.44756e-16}, 
+            {-9.63537e-01,  9.63537e-01, -5.55112e-17}, { 5.19779e-01,  5.19779e-01, -1.81923e+00}, { 1.40917e+00,  8.05238e-01,  8.05238e-01}, 
+            { 8.05238e-01,  1.40917e+00,  8.05238e-01}, { 0.00000e+00, -4.44089e-16,  3.24739e-16}, {-1.06523e+00, -1.06523e+00,  2.13046e+00}, 
+            {-2.17439e-01,  1.73951e+00,  1.73951e+00}, {-1.73951e+00,  2.17439e-01, -1.73951e+00}, {-1.84503e+00, -1.84503e+00, -9.22517e-01}, 
+            { 1.84503e+00, -1.84503e+00,  6.58625e-16}, { 1.45863e+00,  1.11022e-15,  0.00000e+00}, {-8.88178e-16,  1.45863e+00,  0.00000e+00}, 
+            {-1.46807e+00, -1.46807e+00,  5.87227e-01}, {-4.48502e-01, -3.36617e-16, -2.24251e+00}, {-3.36617e-16, -4.48502e-01, -2.24251e+00}, 
+            { 7.09144e-01, -7.09144e-01,  1.87222e-16}, { 2.12743e+00,  2.12743e+00, -9.38779e-16}, { 7.09144e-01, -5.11006e-16, -2.12743e+00}, 
+            { 1.02201e-15, -7.09144e-01,  2.12743e+00}, { 1.81260e+00,  1.81260e+00,  2.58943e+00}, {-2.07154e+00,  2.07154e+00, -1.66969e-15}, 
+            {-3.03637e+00, -2.31111e-15, -6.07275e-01}, { 1.84889e-15, -3.03637e+00, -6.07275e-01}, { 1.05183e+00, -1.05183e+00,  5.77778e-17}, 
+            { 1.05183e+00,  1.05183e+00,  4.03986e-17}, { 1.27464e+00,  1.27464e+00,  1.69952e+00}, {-1.28472e+00, -1.20442e+00,  1.92707e+00}, 
+            {-1.20442e+00, -1.28472e+00,  1.92707e+00}, {-8.52285e-01,  8.52285e-01, -6.74704e-16}, {-1.50789e+00, -1.50789e+00, -2.95022e+00}, 
+            {-1.11260e+00, -2.08612e+00,  9.27164e-01}, { 2.08612e+00,  1.11260e+00, -9.27164e-01}, {-3.07506e-01, -3.07506e-01, -3.69007e+00}, 
+            { 1.23002e+00, -1.23002e+00,  2.28018e-15}, { 3.69007e+00, -1.53753e-01,  1.84503e+00}, {-1.53753e-01,  3.69007e+00,  1.84503e+00}, 
+            {-2.35197e+00,  2.35197e+00, -8.00513e-16}, {-2.35197e+00, -2.35197e+00, -7.83988e-01}, { 1.37903e-15, -1.46671e+00,  9.77875e-17}, 
             { 1.46671e+00,  1.14919e-15,  1.34475e-16}
         }
     };
@@ -206,71 +206,71 @@ class YlmRealTest : public testing::Test
         rlgy = new double[nylm][3];
         rlgyvector.resize(nylm,std::vector<double>(3));
         ref = new double[64*4]{
-            y00(g[0].x, g[0].y, g[0].z),  y00(g[1].x, g[1].y, g[1].z),  y00(g[2].x, g[2].y, g[2].z),  y00(g[3].x, g[3].y, g[3].z),
-            y10(g[0].x, g[0].y, g[0].z),  y10(g[1].x, g[1].y, g[1].z),  y10(g[2].x, g[2].y, g[2].z),  y10(g[3].x, g[3].y, g[3].z),
-            y11(g[0].x, g[0].y, g[0].z),  y11(g[1].x, g[1].y, g[1].z),  y11(g[2].x, g[2].y, g[2].z),  y11(g[3].x, g[3].y, g[3].z),
-            y1m1(g[0].x, g[0].y, g[0].z), y1m1(g[1].x, g[1].y, g[1].z), y1m1(g[2].x, g[2].y, g[2].z), y1m1(g[3].x, g[3].y, g[3].z),
-            y20(g[0].x, g[0].y, g[0].z),  y20(g[1].x, g[1].y, g[1].z),  y20(g[2].x, g[2].y, g[2].z),  y20(g[3].x, g[3].y, g[3].z),
-            y21(g[0].x, g[0].y, g[0].z),  y21(g[1].x, g[1].y, g[1].z),  y21(g[2].x, g[2].y, g[2].z),  y21(g[3].x, g[3].y, g[3].z),
-            y2m1(g[0].x, g[0].y, g[0].z), y2m1(g[1].x, g[1].y, g[1].z), y2m1(g[2].x, g[2].y, g[2].z), y2m1(g[3].x, g[3].y, g[3].z),
-            y22(g[0].x, g[0].y, g[0].z),  y22(g[1].x, g[1].y, g[1].z),  y22(g[2].x, g[2].y, g[2].z),  y22(g[3].x, g[3].y, g[3].z),
-            y2m2(g[0].x, g[0].y, g[0].z), y2m2(g[1].x, g[1].y, g[1].z), y2m2(g[2].x, g[2].y, g[2].z), y2m2(g[3].x, g[3].y, g[3].z),
-            y30(g[0].x, g[0].y, g[0].z),  y30(g[1].x, g[1].y, g[1].z),  y30(g[2].x, g[2].y, g[2].z),  y30(g[3].x, g[3].y, g[3].z),
-            y31(g[0].x, g[0].y, g[0].z),  y31(g[1].x, g[1].y, g[1].z),  y31(g[2].x, g[2].y, g[2].z),  y31(g[3].x, g[3].y, g[3].z),
-            y3m1(g[0].x, g[0].y, g[0].z), y3m1(g[1].x, g[1].y, g[1].z), y3m1(g[2].x, g[2].y, g[2].z), y3m1(g[3].x, g[3].y, g[3].z),
-            y32(g[0].x, g[0].y, g[0].z),  y32(g[1].x, g[1].y, g[1].z),  y32(g[2].x, g[2].y, g[2].z),  y32(g[3].x, g[3].y, g[3].z),
-            y3m2(g[0].x, g[0].y, g[0].z), y3m2(g[1].x, g[1].y, g[1].z), y3m2(g[2].x, g[2].y, g[2].z), y3m2(g[3].x, g[3].y, g[3].z),
-            y33(g[0].x, g[0].y, g[0].z),  y33(g[1].x, g[1].y, g[1].z),  y33(g[2].x, g[2].y, g[2].z),  y33(g[3].x, g[3].y, g[3].z),
-            y3m3(g[0].x, g[0].y, g[0].z), y3m3(g[1].x, g[1].y, g[1].z), y3m3(g[2].x, g[2].y, g[2].z), y3m3(g[3].x, g[3].y, g[3].z),
-            y40(g[0].x, g[0].y, g[0].z),  y40(g[1].x, g[1].y, g[1].z),  y40(g[2].x, g[2].y, g[2].z),  y40(g[3].x, g[3].y, g[3].z),
-            y41(g[0].x, g[0].y, g[0].z),  y41(g[1].x, g[1].y, g[1].z),  y41(g[2].x, g[2].y, g[2].z),  y41(g[3].x, g[3].y, g[3].z),
-            y4m1(g[0].x, g[0].y, g[0].z), y4m1(g[1].x, g[1].y, g[1].z), y4m1(g[2].x, g[2].y, g[2].z), y4m1(g[3].x, g[3].y, g[3].z),
-            y42(g[0].x, g[0].y, g[0].z),  y42(g[1].x, g[1].y, g[1].z),  y42(g[2].x, g[2].y, g[2].z),  y42(g[3].x, g[3].y, g[3].z),
-            y4m2(g[0].x, g[0].y, g[0].z), y4m2(g[1].x, g[1].y, g[1].z), y4m2(g[2].x, g[2].y, g[2].z), y4m2(g[3].x, g[3].y, g[3].z),
-            y43(g[0].x, g[0].y, g[0].z),  y43(g[1].x, g[1].y, g[1].z),  y43(g[2].x, g[2].y, g[2].z),  y43(g[3].x, g[3].y, g[3].z),
-            y4m3(g[0].x, g[0].y, g[0].z), y4m3(g[1].x, g[1].y, g[1].z), y4m3(g[2].x, g[2].y, g[2].z), y4m3(g[3].x, g[3].y, g[3].z),
-            y44(g[0].x, g[0].y, g[0].z),  y44(g[1].x, g[1].y, g[1].z),  y44(g[2].x, g[2].y, g[2].z),  y44(g[3].x, g[3].y, g[3].z),
-            y4m4(g[0].x, g[0].y, g[0].z), y4m4(g[1].x, g[1].y, g[1].z), y4m4(g[2].x, g[2].y, g[2].z), y4m4(g[3].x, g[3].y, g[3].z),
-              0.000000000000000,    0.000000000000000,    0.935602579627389,    0.090028400200397,
-             -0.452946651195697,   -0.000000000000000,   -0.000000000000000,   -0.348678494661834,
-             -0.000000000000000,   -0.452946651195697,   -0.000000000000000,   -0.348678494661834,
-             -0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000,
-             -0.000000000000000,   -0.000000000000000,    0.000000000000000,   -0.000000000000000,
-              0.489238299435250,    0.000000000000000,   -0.000000000000000,   -0.376615818502422,
-              0.000000000000000,   -0.489238299435250,   -0.000000000000000,    0.376615818502422,
-              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.532615198330370,
-              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000,
-             -0.656382056840170,   -0.000000000000000,   -0.000000000000000,   -0.168427714314628,
-             -0.000000000000000,   -0.656382056840170,   -0.000000000000000,   -0.168427714314628,
-             -0.317846011338142,   -0.317846011338142,    1.017107236282055,    0.226023830284901,
-             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.258942827786103,
-             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.258942827786103,
-              0.460602629757462,   -0.460602629757462,    0.000000000000000,   -0.000000000000000,
-              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.409424559784410,
-             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.136474853261470,
-             -0.000000000000000,    0.000000000000000,   -0.000000000000000,   -0.136474853261470,
-             -0.504564900728724,   -0.504564900728724,    0.000000000000000,   -0.598002845308118,
-             -0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000,
-             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.350610246256556,
-             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.350610246256556,
-              0.683184105191914,   -0.683184105191914,    0.000000000000000,   -0.000000000000000,
-              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.202424920056864,
-              0.000000000000000,    0.000000000000000,    1.092548430592079,   -0.350435072502801,
-              0.451658037912587,    0.000000000000000,   -0.000000000000000,    0.046358202625865,
-              0.000000000000000,    0.451658037912587,   -0.000000000000000,    0.046358202625865,
-              0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000,
-              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.492067081245654,
-             -0.469376801586882,   -0.000000000000000,   -0.000000000000000,    0.187354445356332,
-             -0.000000000000000,    0.469376801586882,   -0.000000000000000,   -0.187354445356332,
-              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.355076798886913,
-              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000,
-              0.518915578720260,    0.000000000000000,   -0.000000000000000,   -0.443845998608641,
-              0.000000000000000,    0.518915578720260,   -0.000000000000000,   -0.443845998608641,
-              0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000,
-              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.452635881587108,
-             -0.707162732524596,    0.000000000000000,   -0.000000000000000,    0.120972027847095,
-             -0.000000000000000,    0.707162732524596,   -0.000000000000000,   -0.120972027847095
-         } ;
+            y00(g[0].x, g[0].y, g[0].z),  y00(g[1].x, g[1].y, g[1].z),  y00(g[2].x, g[2].y, g[2].z),  y00(g[3].x, g[3].y, g[3].z),  
+            y10(g[0].x, g[0].y, g[0].z),  y10(g[1].x, g[1].y, g[1].z),  y10(g[2].x, g[2].y, g[2].z),  y10(g[3].x, g[3].y, g[3].z),  
+            y11(g[0].x, g[0].y, g[0].z),  y11(g[1].x, g[1].y, g[1].z),  y11(g[2].x, g[2].y, g[2].z),  y11(g[3].x, g[3].y, g[3].z),  
+            y1m1(g[0].x, g[0].y, g[0].z), y1m1(g[1].x, g[1].y, g[1].z), y1m1(g[2].x, g[2].y, g[2].z), y1m1(g[3].x, g[3].y, g[3].z), 
+            y20(g[0].x, g[0].y, g[0].z),  y20(g[1].x, g[1].y, g[1].z),  y20(g[2].x, g[2].y, g[2].z),  y20(g[3].x, g[3].y, g[3].z),  
+            y21(g[0].x, g[0].y, g[0].z),  y21(g[1].x, g[1].y, g[1].z),  y21(g[2].x, g[2].y, g[2].z),  y21(g[3].x, g[3].y, g[3].z),  
+            y2m1(g[0].x, g[0].y, g[0].z), y2m1(g[1].x, g[1].y, g[1].z), y2m1(g[2].x, g[2].y, g[2].z), y2m1(g[3].x, g[3].y, g[3].z),                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
+            y22(g[0].x, g[0].y, g[0].z),  y22(g[1].x, g[1].y, g[1].z),  y22(g[2].x, g[2].y, g[2].z),  y22(g[3].x, g[3].y, g[3].z),  
+            y2m2(g[0].x, g[0].y, g[0].z), y2m2(g[1].x, g[1].y, g[1].z), y2m2(g[2].x, g[2].y, g[2].z), y2m2(g[3].x, g[3].y, g[3].z), 
+            y30(g[0].x, g[0].y, g[0].z),  y30(g[1].x, g[1].y, g[1].z),  y30(g[2].x, g[2].y, g[2].z),  y30(g[3].x, g[3].y, g[3].z),  
+            y31(g[0].x, g[0].y, g[0].z),  y31(g[1].x, g[1].y, g[1].z),  y31(g[2].x, g[2].y, g[2].z),  y31(g[3].x, g[3].y, g[3].z),  
+            y3m1(g[0].x, g[0].y, g[0].z), y3m1(g[1].x, g[1].y, g[1].z), y3m1(g[2].x, g[2].y, g[2].z), y3m1(g[3].x, g[3].y, g[3].z), 
+            y32(g[0].x, g[0].y, g[0].z),  y32(g[1].x, g[1].y, g[1].z),  y32(g[2].x, g[2].y, g[2].z),  y32(g[3].x, g[3].y, g[3].z),  
+            y3m2(g[0].x, g[0].y, g[0].z), y3m2(g[1].x, g[1].y, g[1].z), y3m2(g[2].x, g[2].y, g[2].z), y3m2(g[3].x, g[3].y, g[3].z), 
+            y33(g[0].x, g[0].y, g[0].z),  y33(g[1].x, g[1].y, g[1].z),  y33(g[2].x, g[2].y, g[2].z),  y33(g[3].x, g[3].y, g[3].z),  
+            y3m3(g[0].x, g[0].y, g[0].z), y3m3(g[1].x, g[1].y, g[1].z), y3m3(g[2].x, g[2].y, g[2].z), y3m3(g[3].x, g[3].y, g[3].z), 
+            y40(g[0].x, g[0].y, g[0].z),  y40(g[1].x, g[1].y, g[1].z),  y40(g[2].x, g[2].y, g[2].z),  y40(g[3].x, g[3].y, g[3].z),  
+            y41(g[0].x, g[0].y, g[0].z),  y41(g[1].x, g[1].y, g[1].z),  y41(g[2].x, g[2].y, g[2].z),  y41(g[3].x, g[3].y, g[3].z),  
+            y4m1(g[0].x, g[0].y, g[0].z), y4m1(g[1].x, g[1].y, g[1].z), y4m1(g[2].x, g[2].y, g[2].z), y4m1(g[3].x, g[3].y, g[3].z), 
+            y42(g[0].x, g[0].y, g[0].z),  y42(g[1].x, g[1].y, g[1].z),  y42(g[2].x, g[2].y, g[2].z),  y42(g[3].x, g[3].y, g[3].z),  
+            y4m2(g[0].x, g[0].y, g[0].z), y4m2(g[1].x, g[1].y, g[1].z), y4m2(g[2].x, g[2].y, g[2].z), y4m2(g[3].x, g[3].y, g[3].z), 
+            y43(g[0].x, g[0].y, g[0].z),  y43(g[1].x, g[1].y, g[1].z),  y43(g[2].x, g[2].y, g[2].z),  y43(g[3].x, g[3].y, g[3].z),  
+            y4m3(g[0].x, g[0].y, g[0].z), y4m3(g[1].x, g[1].y, g[1].z), y4m3(g[2].x, g[2].y, g[2].z), y4m3(g[3].x, g[3].y, g[3].z), 
+            y44(g[0].x, g[0].y, g[0].z),  y44(g[1].x, g[1].y, g[1].z),  y44(g[2].x, g[2].y, g[2].z),  y44(g[3].x, g[3].y, g[3].z),  
+            y4m4(g[0].x, g[0].y, g[0].z), y4m4(g[1].x, g[1].y, g[1].z), y4m4(g[2].x, g[2].y, g[2].z), y4m4(g[3].x, g[3].y, g[3].z), 
+              0.000000000000000,    0.000000000000000,    0.935602579627389,    0.090028400200397, 
+             -0.452946651195697,   -0.000000000000000,   -0.000000000000000,   -0.348678494661834, 
+             -0.000000000000000,   -0.452946651195697,   -0.000000000000000,   -0.348678494661834, 
+             -0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000, 
+             -0.000000000000000,   -0.000000000000000,    0.000000000000000,   -0.000000000000000, 
+              0.489238299435250,    0.000000000000000,   -0.000000000000000,   -0.376615818502422, 
+              0.000000000000000,   -0.489238299435250,   -0.000000000000000,    0.376615818502422, 
+              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.532615198330370, 
+              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000, 
+             -0.656382056840170,   -0.000000000000000,   -0.000000000000000,   -0.168427714314628, 
+             -0.000000000000000,   -0.656382056840170,   -0.000000000000000,   -0.168427714314628, 
+             -0.317846011338142,   -0.317846011338142,    1.017107236282055,    0.226023830284901, 
+             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.258942827786103, 
+             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.258942827786103, 
+              0.460602629757462,   -0.460602629757462,    0.000000000000000,   -0.000000000000000, 
+              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.409424559784410, 
+             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.136474853261470, 
+             -0.000000000000000,    0.000000000000000,   -0.000000000000000,   -0.136474853261470, 
+             -0.504564900728724,   -0.504564900728724,    0.000000000000000,   -0.598002845308118, 
+             -0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000, 
+             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.350610246256556, 
+             -0.000000000000000,   -0.000000000000000,   -0.000000000000000,    0.350610246256556, 
+              0.683184105191914,   -0.683184105191914,    0.000000000000000,   -0.000000000000000, 
+              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.202424920056864, 
+              0.000000000000000,    0.000000000000000,    1.092548430592079,   -0.350435072502801, 
+              0.451658037912587,    0.000000000000000,   -0.000000000000000,    0.046358202625865, 
+              0.000000000000000,    0.451658037912587,   -0.000000000000000,    0.046358202625865, 
+              0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000, 
+              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.492067081245654, 
+             -0.469376801586882,   -0.000000000000000,   -0.000000000000000,    0.187354445356332, 
+             -0.000000000000000,    0.469376801586882,   -0.000000000000000,   -0.187354445356332, 
+              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.355076798886913, 
+              0.000000000000000,    0.000000000000000,    0.000000000000000,   -0.000000000000000, 
+              0.518915578720260,    0.000000000000000,   -0.000000000000000,   -0.443845998608641, 
+              0.000000000000000,    0.518915578720260,   -0.000000000000000,   -0.443845998608641, 
+              0.000000000000000,   -0.000000000000000,    0.000000000000000,    0.000000000000000, 
+              0.000000000000000,    0.000000000000000,    0.000000000000000,    0.452635881587108, 
+             -0.707162732524596,    0.000000000000000,   -0.000000000000000,    0.120972027847095, 
+             -0.000000000000000,    0.707162732524596,   -0.000000000000000,   -0.120972027847095  
+         } ; 
     }
 
     void TearDown()
@@ -293,11 +293,11 @@ TEST_F(YlmRealTest,YlmReal)
     ModuleBase::YlmReal::Ylm_Real(nylm,ng,g,ylm);
     for(int i=0;i<nylm;++i)
     {
-        for(int j=0;j<ng;++j)
+        for(int j=0;j<ng;++j) 
         {
             EXPECT_NEAR(ylm(i,j),ref[i*ng+j],doublethreshold)  << "Ylm[" << i << "], example " << j << " not pass";
         }
-    }
+    } 
 }
 
 TEST_F(YlmRealTest,YlmRealTemplate)
@@ -318,7 +318,7 @@ TEST_F(YlmRealTest,gradYlmReal)
     ModuleBase::YlmReal::grad_Ylm_Real(nylm,ng,g,ylm,dylm[0],dylm[1],dylm[2]);
     for(int i=0;i<nylm;++i)
     {
-        for(int j=0;j<ng;++j)
+        for(int j=0;j<ng;++j) 
         {
             EXPECT_NEAR(ylm(i,j),ref[i*ng+j],doublethreshold)  << "Ylm[" << i << "], example " << j << " not pass";
         }
@@ -328,7 +328,7 @@ TEST_F(YlmRealTest,gradYlmReal)
     double step = 1e-7;
     for(int id = 0 ; id < 3 ; ++id)
     {
-        for(int j=0;j<ng;++j)
+        for(int j=0;j<ng;++j) 
         {
             ModuleBase::Vector3<double> gplus = g[j];
             ModuleBase::Vector3<double> gminus = g[j];
@@ -352,16 +352,16 @@ TEST_F(YlmRealTest,YlmReal2)
     ModuleBase::YlmReal::Ylm_Real2(nylm,ng,g,ylm);
     for(int i=0;i<nylm;++i)
     {
-        for(int j=0;j<ng;++j)
+        for(int j=0;j<ng;++j) 
         {
             EXPECT_NEAR(ylm(i,j),ref[i*ng+j],doublethreshold) << "Ylm[" << i << "], example " << j << " not pass";
         }
-    }
+    } 
 }
 
 
 TEST_F(YlmRealTest,YlmRealRlylm)
-{
+{    
     for(int j=0;j<ng;++j)
     {
         ModuleBase::YlmReal::rlylm(lmax,g[j].x,g[j].y,g[j].z,rly);
@@ -374,7 +374,7 @@ TEST_F(YlmRealTest,YlmRealRlylm)
 
 
 TEST_F(YlmRealTest,YlmGetYlmReal)
-{
+{    
     for(int j=0;j<ng;++j)
     {
         ModuleBase::Ylm::get_ylm_real(lmax+1,g[j],rly);
@@ -386,7 +386,7 @@ TEST_F(YlmRealTest,YlmGetYlmReal)
 }
 
 TEST_F(YlmRealTest,YlmSphHarm)
-{
+{    
     ModuleBase::Ylm::set_coefficients ();
     for(int j=0;j<ng;++j)
     {
@@ -395,13 +395,13 @@ TEST_F(YlmRealTest,YlmSphHarm)
         for(int i=0;i<nylm;++i)
         {
             EXPECT_NEAR(rlyvector[i],ref[i*ng+j],doublethreshold)  << "Ylm[" << i << "], example " << j << " not pass";
-
+            
         }
     }
 }
 
 TEST_F(YlmRealTest,YlmRlSphHarm)
-{
+{    
     ModuleBase::Ylm::set_coefficients ();
     for(int j=0;j<ng;++j)
     {
@@ -410,13 +410,13 @@ TEST_F(YlmRealTest,YlmRlSphHarm)
         for(int i=0;i<nylm;++i)
         {
             EXPECT_NEAR(rlyvector[i],ref[i*ng+j],doublethreshold)  << "Ylm[" << i << "], example " << j << " not pass";
-
+            
         }
     }
 }
 //used to be test1 in ylm.h
 TEST_F(YlmRealTest,YlmGradRlSphHarm)
-{
+{    
     ModuleBase::Ylm::set_coefficients ();
     for(int j=0;j<ng;++j)
     {
@@ -426,7 +426,7 @@ TEST_F(YlmRealTest,YlmGradRlSphHarm)
         {
             EXPECT_NEAR(rlyvector[i],ref[i*ng+j],doublethreshold)  << "Ylm[" << i << "], example " << j << " not pass";
             for(int k=0;k<3;++k) {EXPECT_NEAR(rlgyvector[i][k],rlgyref[j][i][k],1e-5);}
-
+            
         }
     }
 }
@@ -435,7 +435,7 @@ TEST_F(YlmRealTest,YlmGradRlSphHarm)
 TEST_F(YlmRealTest, equality_value_test)
 {
 
-
+    
     ModuleBase::Vector3<double> R (20.0, 0.0, 0.0);
 	const double xdr = R.x/R.norm();
 	const double ydr = R.y/R.norm();
@@ -444,17 +444,17 @@ TEST_F(YlmRealTest, equality_value_test)
 	const double rl = std::pow( R.norm(), L);
 	//std::cout << " rl=" << rl << std::endl;
 	ModuleBase::Ylm::set_coefficients();
-
+	
 	int nu = 100;
-
+	
 	// Peize Lin change rlya 2016-08-26
 	std::vector<double> rlya;
 	double rlyb[400];
 	ModuleBase::Ylm::ZEROS( rlyb, 400);
-
+	
 	ModuleBase::Ylm::rl_sph_harm(L, xdr, ydr, zdr, rlya);
 	ModuleBase::Ylm::get_ylm_real(L+1, R, rlyb);
-
+	
 	for (int i=0; i < nu; i++)
 	{
 		double diff = fabs(rlya[i]-rlyb[i]);
@@ -467,21 +467,21 @@ TEST_F(YlmRealTest, equality_value_test)
 TEST_F(YlmRealTest, equality_gradient_test)
 {
 
-
+    
     ModuleBase::Vector3<double> R (0.1,-0.2,0.5);
 	ModuleBase::Ylm::set_coefficients();
-
+	
 	//int nu = 100;
 
 	std::vector<double> rlya;
 	double rlyb[400];
-
+	
 	std::vector<std::vector<double>> grlya;
 	double grlyb[400][3];
-
+	
 	ModuleBase::Ylm::grad_rl_sph_harm (9, R.x, R.y, R.z, rlya, grlya);
 	ModuleBase::Ylm::rlylm (10, R.x, R.y, R.z, rlyb, grlyb);
-
+	
 	for (int i = 0; i < 100; i++)
 	{
 		double diffx = fabs(grlya[i][2]-grlyb[i][2]);
diff --git a/source/module_base/test/para_json_test.cpp b/source/module_base/test/para_json_test.cpp
deleted file mode 100644
index 3ce6ecce49..0000000000
--- a/source/module_base/test/para_json_test.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-#include "module_base/para_json.h"
-#include "gtest/gtest.h"
-#ifdef __MPI
-#include "mpi.h"
-#endif
-
-#include <stdlib.h>
-#include "rapidjson/document.h"
-/************************************************
- *  unit test of Input::ParaJson
- ***********************************************/
-
-/**
- * - Tested Functions:
- *   - Init()
- *     - init json tree from input::Init and check if the json string is valid
- */
-
-class ParaJsonTest : public ::testing::Test
-{
-  protected:
-    std::string testString;
-};
-
-// check if a string is a valid JSON string
-bool isValidJSON(const std::string& jsonString)
-{
-    rapidjson::Document document;
-    document.Parse(jsonString.c_str());
-
-    return !document.HasParseError();
-}
-
-TEST_F(ParaJsonTest, Init)
-{
-    //std::string input_file = "./support/INPUT";
-    //Input input_tmp;
-    //EXPECT_NO_THROW(input_tmp.Init(input_file));
-
-    // int status = system("rm -r ./OUT.autotest/");
-    // EXPECT_EQ(status,0);
-    // Para_Json::Init_json_abacus_readinInfo();
-    Para_Json::Init_json_abacus_generalInfo();
-    Para_Json::Init_json_abacus();
-    Para_Json::Finish_json_tree();
-    rapidjson::StringBuffer buffer;
-    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
-    Para_Json::doc.Accept(writer);
-    std::string json = buffer.GetString();
-    EXPECT_EQ(isValidJSON(json), true);
-}
-
-int main(int argc, char** argv)
-{
-#ifdef __MPI
-    MPI_Init(&argc, &argv);
-    MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC);
-    MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK);
-#endif
-    testing::InitGoogleTest(&argc, argv);
-    int result;
-    result = RUN_ALL_TESTS();
-#ifdef __MPI
-    MPI_Finalize();
-#endif
-    return result;
-}
-
diff --git a/source/module_base/test/perf_sphbes_test.cpp b/source/module_base/test/perf_sphbes_test.cpp
deleted file mode 100644
index 4c574baa8e..0000000000
--- a/source/module_base/test/perf_sphbes_test.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-#include"../math_sphbes.h"
-#include<fstream>
-#include <benchmark/benchmark.h>
-#include <iostream>
-#include <cstring>
-#include <cmath>
-
-/************************************************
-*  performace test of class Sphbes
-***********************************************/
-
-/**
- * Tested function: 
- *      - sphbesj
- *      - Spherical_Bessel
- */
-
-class PerfSphbes : public benchmark::Fixture {
-public:
-    const double q = 1;
-    const int n = 1000;
-    double stop = 1000.0;
-    double dr = 0.0;
-    double* rc, *rinf, *jc, *jinf;
-    void SetUp(const benchmark::State& state){
-        const double rcut = state.range(0) + 0.5;
-        rc = new double[n + 10]; 
-        rinf = new double[n + 10];
-        jc = new double[n + 10];
-        jinf = new double[n + 10];
-
-        // generate data points in (0, rcut] in log scale
-        double rmin = 0.0001;
-        double log_rmin = std::log(rmin);
-        double log_rcut = std::log(rcut);
-        dr = (log_rcut - log_rmin) / (n-1);
-        memset(rc, 0, (n+10) * sizeof(double));
-        for (int i = 0; i < n; i++)
-            rc[i] = std::exp(log_rmin + i * dr);
-        
-        // generate data points in [rcut, stop] in linear scale
-        memset(rinf, 0, (n+10) * sizeof(double));
-        rinf[0] = rcut;
-        dr = (stop - rcut) / (n-1);
-        for (int i = 1; i < n; i++)
-            rinf[i] += rinf[i-1] + dr;
-    }
-    void TearDown(const benchmark::State& state){
-        delete[] rc;
-        delete[] rinf;
-        delete[] jc;
-        delete[]  jinf;
-    }
-};    
-
-BENCHMARK_DEFINE_F(PerfSphbes, BM_Spherical_Bessel)(benchmark::State& state) {
-    for (auto _ : state) {
-        ModuleBase::Sphbes::Spherical_Bessel(n, rc, q, state.range(0), jc);
-        ModuleBase::Sphbes::Spherical_Bessel(n, rinf, q, state.range(0), jinf);
-    }
-}
-
-BENCHMARK_DEFINE_F(PerfSphbes, BM_sphbesj)(benchmark::State& state) {
-    for (auto _ : state) {
-        ModuleBase::Sphbes::sphbesj(n, rc, q, state.range(0), jc);
-        ModuleBase::Sphbes::sphbesj(n, rinf, q, state.range(0), jinf);
-    }
-}
-
-BENCHMARK_REGISTER_F(PerfSphbes, BM_sphbesj)->DenseRange(0, 11, 1)->Unit(benchmark::kMicrosecond);
-BENCHMARK_REGISTER_F(PerfSphbes, BM_Spherical_Bessel)->DenseRange(0, 11, 1)->Unit(benchmark::kMicrosecond);
-BENCHMARK_MAIN(); 
\ No newline at end of file
diff --git a/source/module_base/tool_quit.h b/source/module_base/tool_quit.h
index f944696d5a..eafaf673cc 100644
--- a/source/module_base/tool_quit.h
+++ b/source/module_base/tool_quit.h
@@ -33,13 +33,13 @@ void WARNING(const std::string &file, const std::string &description);
  * @brief Close .log files and exit
  *
  */
-[[noreturn]] void QUIT(void);
+void QUIT(void);
 
 /**
  * @brief Close .log files and exit
  *
  */
-[[noreturn]] void QUIT(int ret);
+void QUIT(int ret);
 
 /**
  * @brief Combine the functions of WARNING and QUIT
@@ -47,7 +47,7 @@ void WARNING(const std::string &file, const std::string &description);
  * @param file The file where warning happens
  * @param description The warning information
  */
-[[noreturn]] void WARNING_QUIT(const std::string& file, const std::string& description);
+void WARNING_QUIT(const std::string &file, const std::string &description);
 
 /**
  * @brief Combine the functions of WARNING and QUIT
@@ -55,7 +55,7 @@ void WARNING(const std::string &file, const std::string &description);
  * @param file The file where warning happens
  * @param description The warning information
  */
-[[noreturn]] void WARNING_QUIT(const std::string& file, const std::string& description, int ret);
+void WARNING_QUIT(const std::string &file, const std::string &description, int ret);
 
 /**
  * @brief Check, if true, WARNING_QUIT
diff --git a/source/module_cell/klist.cpp b/source/module_cell/klist.cpp
index e11ab183f5..52bc42440d 100644
--- a/source/module_cell/klist.cpp
+++ b/source/module_cell/klist.cpp
@@ -362,10 +362,6 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 
 			//recalculate nkstot.
 			nkstot = 0;
-            /* ISSUE#3482: to distinguish different kline segments */
-            std::vector<int> kpt_segids;
-            kl_segids.clear(); kl_segids.shrink_to_fit();
-            int kpt_segid = 0;
 			for(int iks=0; iks<nks_special; iks++)
 			{
 				ifk >> ksx[iks];
@@ -375,9 +371,6 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 				//std::cout << " nkl[" << iks << "]=" << nkl[iks] << std::endl;
 				assert(nkl[iks] >= 0);
 				nkstot += nkl[iks];
-                /* ISSUE#3482: to distinguish different kline segments */
-                if((nkl[iks] == 1)&&(iks!=(nks_special-1))) kpt_segid++;
-                kpt_segids.push_back(kpt_segid);
 			}
 			assert( nkl[nks_special-1] == 1);
 
@@ -396,7 +389,6 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 					kvec_c[count].x = ksx[iks-1] + is*dx;
 					kvec_c[count].y = ksy[iks-1] + is*dy;
 					kvec_c[count].z = ksz[iks-1] + is*dz;
-                    kl_segids.push_back(kpt_segids[iks-1]); /* ISSUE#3482: to distinguish different kline segments */
 					++count;
 				}
 			}
@@ -405,14 +397,15 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 			kvec_c[count].x = ksx[nks_special-1];
 			kvec_c[count].y = ksy[nks_special-1];
 			kvec_c[count].z = ksz[nks_special-1];
-            kl_segids.push_back(kpt_segids[nks_special-1]); /* ISSUE#3482: to distinguish different kline segments */
 			++count;
 
 			//std::cout << " count = " << count << std::endl;
-			assert(count == nkstot);
-            assert(kl_segids.size() == nkstot); /* ISSUE#3482: to distinguish different kline segments */
-			
-            std::for_each(wk.begin(), wk.end(), [](double& d){d = 1.0;});
+			assert (count == nkstot );
+
+			for(int ik=0; ik<nkstot; ik++)
+			{
+				wk[ik] = 1.0;
+			}
 
             this->kc_done = true;
 
@@ -446,22 +439,15 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 
 			//recalculate nkstot.
 			nkstot = 0;
-            /* ISSUE#3482: to distinguish different kline segments */
-            std::vector<int> kpt_segids;
-            kl_segids.clear(); kl_segids.shrink_to_fit();
-            int kpt_segid = 0;
 			for(int iks=0; iks<nks_special; iks++)
 			{
 				ifk >> ksx[iks];
 				ifk >> ksy[iks];
 				ifk >> ksz[iks];
-				ModuleBase::GlobalFunc::READ_VALUE( ifk, nkl[iks] ); /* so ifk is ifstream for kpoint, then nkl is number of kpoints on line */
+				ModuleBase::GlobalFunc::READ_VALUE( ifk, nkl[iks] );
 				//std::cout << " nkl[" << iks << "]=" << nkl[iks] << std::endl;
 				assert(nkl[iks] >= 0);
 				nkstot += nkl[iks];
-                /* ISSUE#3482: to distinguish different kline segments */
-                if((nkl[iks] == 1)&&(iks!=(nks_special-1))) kpt_segid++;
-                kpt_segids.push_back(kpt_segid);
 			}
 			assert( nkl[nks_special-1] == 1);
 
@@ -480,7 +466,6 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 					kvec_d[count].x = ksx[iks-1] + is*dx;
 					kvec_d[count].y = ksy[iks-1] + is*dy;
 					kvec_d[count].z = ksz[iks-1] + is*dz;
-                    kl_segids.push_back(kpt_segids[iks-1]); /* ISSUE#3482: to distinguish different kline segments */
 					++count;
 				}
 			}
@@ -489,16 +474,18 @@ bool K_Vectors::read_kpoints(const std::string &fn)
 			kvec_d[count].x = ksx[nks_special-1];
 			kvec_d[count].y = ksy[nks_special-1];
 			kvec_d[count].z = ksz[nks_special-1];
-            kl_segids.push_back(kpt_segids[nks_special-1]); /* ISSUE#3482: to distinguish different kline segments */
 			++count;
 
 			//std::cout << " count = " << count << std::endl;
-			assert(count == nkstot );
-            assert(kl_segids.size() == nkstot); /* ISSUE#3482: to distinguish different kline segments */
+			assert (count == nkstot );
 
-			std::for_each(wk.begin(), wk.end(), [](double& d){d = 1.0;});
+			for(int ik=0; ik<nkstot; ik++)
+			{
+				wk[ik] = 1.0;
+			}
 
             this->kd_done = true;
+
 		}
 
         else
@@ -1135,9 +1122,6 @@ void K_Vectors::mpi_k(void)
 
     Parallel_Common::bcast_int(nmp, 3);
 
-    kl_segids.resize(nkstot);
-    Parallel_Common::bcast_int(kl_segids.data(), nkstot);
-
     Parallel_Common::bcast_double(koffset, 3);
 
     this->nks = GlobalC::Pkpoints.nks_pool[GlobalV::MY_POOL];
@@ -1368,8 +1352,6 @@ void K_Vectors::mpi_k_after_vc(void)
     Parallel_Common::bcast_int(nspin);
     Parallel_Common::bcast_int(nkstot);
     Parallel_Common::bcast_int(nmp, 3);
-    kl_segids.resize(nkstot);
-    Parallel_Common::bcast_int(kl_segids.data(), nkstot);
     Parallel_Common::bcast_double(koffset, 3);
 
     this->nks = GlobalC::Pkpoints.nks_pool[GlobalV::MY_POOL];
diff --git a/source/module_cell/klist.h b/source/module_cell/klist.h
index aa92cf29fd..a9e06f8614 100644
--- a/source/module_cell/klist.h
+++ b/source/module_cell/klist.h
@@ -29,7 +29,6 @@ class K_Vectors
     int nkstot_full;    /// number of k points in full k mesh
 
     int nmp[3];						// Number of Monhorst-Pack
-    std::vector<int> kl_segids;	// index of kline segment
 
     K_Vectors();
     ~K_Vectors();
diff --git a/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp b/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp
index bb447bca4c..617674256a 100644
--- a/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp
+++ b/source/module_cell/module_neighbor/test/sltk_atom_input_test.cpp
@@ -223,7 +223,7 @@ TEST_F(SltkAtomInputTest, ConstructorNoExpand)
     GlobalV::test_grid = 1;
     // this is a bug if radius is too small
     // because the expand_flag will be false!
-    radius = 0;
+    radius = 1e-1000;
     Atom_input Atom_inp(ofs, *ucell, ucell->nat, ucell->ntype, pbc, radius, test_atom_in);
     EXPECT_FALSE(Atom_inp.getExpandFlag());
     // call set_FAtom and Load_atom
diff --git a/source/module_cell/read_atoms.cpp b/source/module_cell/read_atoms.cpp
index 4c6bf9c0eb..dc517bccd7 100644
--- a/source/module_cell/read_atoms.cpp
+++ b/source/module_cell/read_atoms.cpp
@@ -535,101 +535,100 @@ bool UnitCell::read_atom_positions(std::ifstream &ifpos, std::ofstream &ofs_runn
 				ModuleBase::GlobalFunc::ZEROS(atoms[it].mag,na);
 				for (int ia = 0;ia < na; ia++)
 				{
- 				// modify the reading of frozen ions and velocities  -- Yuanbo Li 2021/8/20
-					ifpos >> v.x >> v.y >> v.z;
-					mv.x = true ;
-					mv.y = true ;
-					mv.z = true ;
-					atoms[it].vel[ia].set(0,0,0);
-					atoms[it].mag[ia]=magnet.start_magnetization[it];//if this line is used, default startmag_type would be 2
-					atoms[it].angle1[ia]=0;
-					atoms[it].angle2[ia]=0;
-					atoms[it].m_loc_[ia].set(0,0,0);
-
-					std::string tmpid;
-					tmpid = ifpos.get();
-
-					if( (int)tmpid[0] < 0 )
-					{
-						std::cout << "read_atom_positions, mismatch in atom number for atom type: " << atoms[it].label << std::endl;
-						exit(1); 
-					}
-
-					bool input_vec_mag=false;
-					bool input_angle_mag=false;
-					// read if catch goodbit before "\n" and "#"
-					while ( (tmpid != "\n") && (ifpos.good()) && (tmpid !="#") )
-					{
-						tmpid = ifpos.get() ;
-						// old method of reading frozen ions
-						char tmp = (char)tmpid[0];
-						if ( tmp >= 48 && tmp <= 57 )
-						{
-								mv.x = std::stoi(tmpid);
-								ifpos >> mv.y >> mv.z ;
-						}
-						// new method of reading frozen ions and velocities
-						if ( tmp >= 'a' && tmp <='z')
-						{
-							ifpos.putback(tmp);
-							ifpos >> tmpid;
-						}
-						if ( tmpid == "m" )
-						{
-								ifpos >> mv.x >> mv.y >> mv.z ;
-						}
-						else if ( tmpid == "v" ||tmpid == "vel" || tmpid == "velocity" )
-						{
-								ifpos >> atoms[it].vel[ia].x >> atoms[it].vel[ia].y >> atoms[it].vel[ia].z;
-						}
-						else if ( tmpid == "mag" || tmpid == "magmom")
-						{
-							set_element_mag_zero = true;
-							double tmpamg=0;
-							ifpos >> tmpamg;
-							tmp=ifpos.get();
-							while (tmp==' ')
-							{
-								tmp=ifpos.get();
-							}
-							
-							if((tmp >= 48 && tmp <= 57) or tmp=='-')
-							{
-								ifpos.putback(tmp);
-								ifpos >> atoms[it].m_loc_[ia].y>>atoms[it].m_loc_[ia].z;
-								atoms[it].m_loc_[ia].x=tmpamg;
-								atoms[it].mag[ia]=sqrt(pow(atoms[it].m_loc_[ia].x,2)+pow(atoms[it].m_loc_[ia].y,2)+pow(atoms[it].m_loc_[ia].z,2));
-								input_vec_mag=true;
-								
-							}
-							else
-							{
-								ifpos.putback(tmp);
-								atoms[it].mag[ia]=tmpamg;
-							}
-							
-							// atoms[it].mag[ia];
-						}
-						else if ( tmpid == "angle1")
-						{
-								ifpos >> atoms[it].angle1[ia];
-								atoms[it].angle1[ia]=atoms[it].angle1[ia]/180 *ModuleBase::PI;
-								input_angle_mag=true;
-								set_element_mag_zero = true;
-						}
-						else if ( tmpid == "angle2")
-						{
-								ifpos >> atoms[it].angle2[ia];
-								atoms[it].angle2[ia]=atoms[it].angle2[ia]/180 *ModuleBase::PI;
-								input_angle_mag=true;
-								set_element_mag_zero = true;
-						}	
-					}
-					// move to next line
-					while ( (tmpid != "\n") && (ifpos.good()) )
-					{
-							tmpid = ifpos.get();
-					}
+ // modify the reading of frozen ions and velocities  -- Yuanbo Li 2021/8/20
+                                        ifpos >> v.x >> v.y >> v.z;
+                                        mv.x = true ;
+                                        mv.y = true ;
+                                        mv.z = true ;
+                                        atoms[it].vel[ia].set(0,0,0);
+										atoms[it].mag[ia]=magnet.start_magnetization[it];//if this line is used, default startmag_type would be 2
+										atoms[it].angle1[ia]=0;
+										atoms[it].angle2[ia]=0;
+										atoms[it].m_loc_[ia].set(0,0,0);
+
+                                        std::string tmpid;
+                                        tmpid = ifpos.get();
+
+										if( (int)tmpid[0] < 0 )
+										{
+											std::cout << "read_atom_positions, mismatch in atom number for atom type: " << atoms[it].label << std::endl;
+											exit(1); 
+										}
+
+										bool input_vec_mag=false;
+										bool input_angle_mag=false;
+                                        while ( (tmpid != "\n") && (ifpos.eof()==false) && (tmpid !="#") )
+                                        {
+                                                tmpid = ifpos.get() ;
+                                                // old method of reading frozen ions
+                                                char tmp = (char)tmpid[0];
+                                                if ( tmp >= 48 && tmp <= 57 )
+                                                {
+                                                        mv.x = std::stoi(tmpid);
+                                                        ifpos >> mv.y >> mv.z ;
+                                                }
+                                                // new method of reading frozen ions and velocities
+												if ( tmp >= 'a' && tmp <='z')
+												{
+													ifpos.putback(tmp);
+													ifpos >> tmpid;
+												}
+                                                if ( tmpid == "m" )
+                                                {
+                                                        ifpos >> mv.x >> mv.y >> mv.z ;
+                                                }
+                                                else if ( tmpid == "v" ||tmpid == "vel" || tmpid == "velocity" )
+                                                {
+                                                        ifpos >> atoms[it].vel[ia].x >> atoms[it].vel[ia].y >> atoms[it].vel[ia].z;
+                                                }
+												else if ( tmpid == "mag" || tmpid == "magmom")
+												{
+													set_element_mag_zero = true;
+													double tmpamg=0;
+													ifpos >> tmpamg;
+													tmp=ifpos.get();
+													while (tmp==' ')
+													{
+														tmp=ifpos.get();
+													}
+													
+													if((tmp >= 48 && tmp <= 57) or tmp=='-')
+													{
+														ifpos.putback(tmp);
+														ifpos >> atoms[it].m_loc_[ia].y>>atoms[it].m_loc_[ia].z;
+														atoms[it].m_loc_[ia].x=tmpamg;
+														atoms[it].mag[ia]=sqrt(pow(atoms[it].m_loc_[ia].x,2)+pow(atoms[it].m_loc_[ia].y,2)+pow(atoms[it].m_loc_[ia].z,2));
+														input_vec_mag=true;
+														
+													}
+													else
+													{
+														ifpos.putback(tmp);
+														atoms[it].mag[ia]=tmpamg;
+													}
+													
+													// atoms[it].mag[ia];
+												}
+												else if ( tmpid == "angle1")
+												{
+													 ifpos >> atoms[it].angle1[ia];
+													 atoms[it].angle1[ia]=atoms[it].angle1[ia]/180 *ModuleBase::PI;
+													 input_angle_mag=true;
+													 set_element_mag_zero = true;
+												}
+												else if ( tmpid == "angle2")
+												{
+													 ifpos >> atoms[it].angle2[ia];
+													 atoms[it].angle2[ia]=atoms[it].angle2[ia]/180 *ModuleBase::PI;
+													 input_angle_mag=true;
+													 set_element_mag_zero = true;
+												}
+												
+                                        }
+					while ( (tmpid != "\n") && (ifpos.eof()==false) )
+                                        {
+                                                tmpid = ifpos.get();
+                                        }
 					std::string mags;
 					//cout<<"mag"<<atoms[it].mag[ia]<<"angle1"<<atoms[it].angle1[ia]<<"angle2"<<atoms[it].angle2[ia]<<'\n';
 
diff --git a/source/module_elecstate/occupy.cpp b/source/module_elecstate/occupy.cpp
index e896aae4e8..80918dd3f1 100644
--- a/source/module_elecstate/occupy.cpp
+++ b/source/module_elecstate/occupy.cpp
@@ -79,12 +79,6 @@ void Occupy::decision(const std::string &name, const std::string &smearing_metho
         {
             gaussian_type = 2; // 2nd Methfessel-Paxton method.
         }
-        else if (smearing_method == "mp3")
-        {
-            // acually any order Methfessel-Paxton method can be supported in Occupy::w1gauss()
-            // however the parameter is string instead of int
-            ModuleBase::WARNING_QUIT("occupy", "Some refactor of smearing shoule be done before supporting any order of Methfessel-Paxton method!");
-        }
 
         else if (smearing_method == "marzari-vanderbilt" || smearing_method == "cold" || smearing_method == "mv")
         {
@@ -603,3 +597,411 @@ double Occupy::w1gauss(const double &x, const int n)
 
     return w1;
 } // end function w1gauss
+
+/*
+void Occupy::tweights(const int nks,const int nspin,const int nband,const double &nelec,
+                      const int ntetra,const ModuleBase::matrix &tetra, double **ekb, double &ef, ModuleBase::matrix
+&wg)
+{
+    //===================================================================
+    // calculates weights with the tetrahedron method (Bloechl version)
+    // integer :: nks, nspin, GlobalV::NBANDS, ntetra, tetra (4, ntetra)
+    //===================================================================
+
+    double e1, e2, e3, e4, c1, c2, c3, c4, dosef;
+    int ik, ibnd, nt, nk, ns, i, kp1, kp2, kp3, kp4;
+
+    double etetra[4];
+    int itetra[4];
+
+    // Calculate the Fermi energy ef
+    efermit(ekb, GlobalV::NBANDS, nks, nelec, nspin, ntetra, tetra, ef);
+
+    for (ik = 0;ik < nks;ik++)
+    {
+        for (ibnd = 0;ibnd < nband;ibnd++)
+        {
+            wg(ik, ibnd) = 0.0;
+        } // enddo
+    } // enddo
+
+    for (ns = 0;ns < nspin;ns++)
+    {
+        //==================================================================
+        // nk is used to select k-points with up (ns=1) or down (ns=2) spin
+        //==================================================================
+        if (ns == 1)
+        {
+            nk = 0;
+        }
+        else
+        {
+            nk = nks / 2;
+        }
+
+        for (nt = 0;nt < ntetra;nt++)
+        {
+            for (ibnd = 0;ibnd < GlobalV::NBANDS;ibnd++)
+            {
+                //======================================================
+                // etetra are the energies at the vertexes of the nt-th
+                // tetrahedron
+                //======================================================
+                for (i = 0;i < 4;i++)
+                {
+                    etetra [i] = ekb[static_cast<int>( tetra(nt,i) ) + nk][ibnd];
+                }
+
+                itetra[0] = 0;
+
+                ModuleBase::hpsort(4, etetra, itetra);
+
+                //===============================================
+                // ...sort in ascending order: e1 < e2 < e3 < e4
+                //===============================================
+                e1 = etetra [0];
+                e2 = etetra [1];
+                e3 = etetra [2];
+                e4 = etetra [3];
+
+                //==============================================================
+                // kp1-kp4 are the irreducible k-points corresponding to e1-e4
+                //==============================================================
+
+                kp1 = static_cast<int>( tetra(nt,itetra[0]) )+ nk;
+                kp2 = static_cast<int>( tetra(nt,itetra[1]) )+ nk;
+                kp3 = static_cast<int>( tetra(nt,itetra[2]) )+ nk;
+                kp4 = static_cast<int>( tetra(nt,itetra[3]) )+ nk;
+
+                //======================
+                // calculate weights wg
+                //======================
+                if (ef >= e4)
+                {
+                    wg(kp1, ibnd) = wg(kp1, ibnd) + 0.250 / ntetra;
+                    wg(kp2, ibnd) = wg(kp2, ibnd) + 0.250 / ntetra;
+                    wg(kp3, ibnd) = wg(kp3, ibnd) + 0.250 / ntetra;
+                    wg(kp4, ibnd) = wg(kp4, ibnd) + 0.250 / ntetra;
+                }
+                else if (ef < e4 && ef >= e3)
+                {
+                    c4 = 0.250 / ntetra * pow(e4 - ef, 3) / (e4 - e1) / (e4 - e2)
+                         / (e4 - e3);
+                    dosef = 3.0 / ntetra * (e4 - ef) * (e4 - ef) / (e4 - e1) / (e4 - e2)
+                            / (e4 - e3);
+                    wg(kp1, ibnd) = wg(kp1, ibnd) + 0.250 / ntetra - c4 *
+                                    (e4 - ef) / (e4 - e1) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0;
+                    wg(kp2, ibnd) = wg(kp2, ibnd) + 0.250 / ntetra - c4 *
+                                    (e4 - ef) / (e4 - e2) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0;
+                    wg(kp3, ibnd) = wg(kp3, ibnd) + 0.250 / ntetra - c4 *
+                                    (e4 - ef) / (e4 - e3) + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0;
+                    wg(kp4, ibnd) = wg(kp4, ibnd) + 0.250 / ntetra - c4 *
+                                    (4.0 - (e4 - ef) * (1.0 / (e4 - e1) + 1.0 / (e4 - e2)
+                                                        + 1.0 / (e4 - e3))) + dosef * (e1 + e2 + e3 + e4 - 4.0 *
+                                                                                       ekb[kp4][ibnd]) / 40.0;
+                }
+
+                else if (ef < e3 && ef >= e2)
+                {
+                    c1 = 0.250 / ntetra * (ef - e1) * (ef - e1) / (e4 - e1) / (e3 - e1);
+                    c2 = 0.250 / ntetra * (ef - e1) * (ef - e2) * (e3 - ef)
+                         / (e4 - e1) / (e3 - e2) / (e3 - e1);
+                    c3 = 0.250 / ntetra * (ef - e2) * (ef - e2) * (e4 - ef) / (e4 - e2)
+                         / (e3 - e2) / (e4 - e1);
+                    dosef = 1.0 / ntetra / (e3 - e1) / (e4 - e1) * (3.0 *
+                            (e2 - e1) + 6.0 * (ef - e2) - 3.0 * (e3 - e1 + e4 - e2)
+                            * (ef - e2) * (ef - e2) / (e3 - e2) / (e4 - e2));
+                    wg(kp1, ibnd) = wg(kp1, ibnd) + c1 + (c1 + c2) * (e3 - ef)
+                                    / (e3 - e1) + (c1 + c2 + c3) * (e4 - ef) / (e4 - e1) + dosef *
+                                    (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0;
+                    wg(kp2, ibnd) = wg(kp2, ibnd) + c1 + c2 + c3 + (c2 + c3)
+                                    * (e3 - ef) / (e3 - e2) + c3 * (e4 - ef) / (e4 - e2) + dosef *
+                                    (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0;
+                    wg(kp3, ibnd) = wg(kp3, ibnd) + (c1 + c2) * (ef - e1)
+                                    / (e3 - e1) + (c2 + c3) * (ef - e2) / (e3 - e2) + dosef *
+                                    (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0;
+                    wg(kp4, ibnd) = wg(kp4, ibnd) + (c1 + c2 + c3) * (ef - e1)
+                                    / (e4 - e1) + c3 * (ef - e2) / (e4 - e2) + dosef * (e1 + e2 +
+                                            e3 + e4 - 4.0 * ekb[kp4][ibnd]) / 40.0;
+                }
+                else if (ef < e2 && ef >= e1)
+                {
+                    c4 = 0.250 / ntetra * (ef - e1) * (ef - e1) * (ef - e1) / (e2 - e1) /
+                         (e3 - e1) / (e4 - e1);
+                    dosef = 3.0 / ntetra * (ef - e1) * (ef - e1) / (e2 - e1) / (e3 - e1)
+                            / (e4 - e1);
+                    wg(kp1, ibnd) = wg(kp1, ibnd) + c4 * (4.0 - (ef - e1)
+                                                          * (1.0 / (e2 - e1) + 1.0 / (e3 - e1) + 1.0 / (e4 - e1)))
+                                    + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp1][ibnd]) / 40.0;
+                    wg(kp2, ibnd) = wg(kp2, ibnd) + c4 * (ef - e1) / (e2 - e1)
+                                    + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp2][ibnd]) / 40.0;
+                    wg(kp3, ibnd) = wg(kp3, ibnd) + c4 * (ef - e1) / (e3 - e1)
+                                    + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp3][ibnd]) / 40.0;
+                    wg(kp4, ibnd) = wg(kp4, ibnd) + c4 * (ef - e1) / (e4 - e1)
+                                    + dosef * (e1 + e2 + e3 + e4 - 4.0 * ekb[kp4][ibnd]) / 40.0;
+                } // endif
+            } // enddo
+        } // enddo
+    } // enddo
+
+    //=====================================================================
+    // add correct spin normalization : 2 for LDA, 1 for LSDA calculations
+    //=====================================================================
+    for (ik = 0;ik < nks;ik++)
+    {
+        for (ibnd = 0;ibnd < GlobalV::NBANDS;ibnd++)
+        {
+            wg(ik, ibnd) = wg(ik, ibnd) * 2.0 / nspin;
+        }
+    }
+    return;
+} // end subroutine tweights
+*/
+
+/*
+double Occupy::wsweight(const ModuleBase::Vector3<double> &r, ModuleBase::Vector3<double> *rws,const int nrws)
+{
+    //============================================================
+    // integer ir, nreq, nrws
+    // real(kind=dp) r(3), rrt, ck, eps, rws(0:3,nrws), wsweight
+    // parameter (eps=1.0e-6)
+    //============================================================
+    const double eps = 1.0e-6;
+
+    int nreq = 1;
+
+    for (int ir = 0;ir < nrws;ir++)
+    {
+        const double rrt = r * rws[ir];
+        const double ck = rrt - rws[ir].x;
+        //	rrt = r[1]*rws(1,ir) + r[2]*rws(2,ir) + r[3]*rws(3,ir);
+        //	ck = rrt-rws(0,ir);
+
+        if (ck > eps)
+        {
+            break;
+        }
+
+        if (std::abs(ck) < eps)
+        {
+            nreq++;
+        }
+    } // end do
+
+    const double wswe = 1.0 / nreq;
+
+    return wswe;
+} // end function wsweight
+*/
+
+/*
+void Occupy::efermit(double** ekb,const int nband,const int nks,const double &nelec,const int nspin,
+                     const int ntetra,const ModuleBase::matrix &tetra, double &ef)
+{
+    //=======================================================
+    // Finds the Fermi energy - tetrahedron method (Bloechl)
+    // the transformation Ry to eV
+    //=======================================================
+
+    // parameter :
+    const int maxiter = 300;
+    const double eps = 1.0e-10;
+
+    double efbetter;
+
+    //===================================
+    // nlw : the minimum energy band
+    // elw : the lower limit of the fermi ener
+    // eup : the upper limit of the fermi ener
+    // external sumkt
+    // find bounds for the Fermi energy.
+    //===================================
+    const int nlw = max(  1, static_cast<int>( (nelec / 2.0 - 5.0) )  );
+    double elw = ekb[nlw][0];
+    double eup = ekb[0][GlobalV::NBANDS-1];
+
+    for (int ik = 1;ik < nks;ik++)// do ik = 2, nks
+    {
+        elw = min(elw, ekb[ik][nlw]);
+        eup = max(eup, ekb[ik][GlobalV::NBANDS-1]);
+    }
+    for (int ik = 1;ik < nks;ik++)// do ik = 2, nks
+    {
+        elw = min(elw, ekb[ik][nlw]);
+        eup = max(eup, ekb[ik][GlobalV::NBANDS-1]);
+    }
+
+    //===============================
+    // Bisection method
+    // the number of states with eup
+    // the number of states with elw
+    //===============================
+    const double sumkup = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, eup);
+    const double sumklw = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, elw);
+
+    GlobalV::ofs_running << "\n sumkup = " << sumkup;
+    GlobalV::ofs_running << "\n sumklw = " << sumklw << std::endl;
+
+    if ((sumkup - nelec) < - eps || (sumklw - nelec) > eps)
+    {
+        ModuleBase::WARNING("efermit","unexpected error.");
+    }
+
+    double better = 1.0e+10;
+
+    bool converge = false;
+
+    double sumkmid = 0.0;
+    for (int iter = 0;iter < maxiter;iter++)
+    {
+        // the number of states with ef
+        ef = (eup + elw) / 2.0;
+        sumkmid = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, ef);
+
+        if (std::abs(sumkmid - nelec) < better)
+        {
+            better = std::abs(sumkmid - nelec);
+            efbetter = ef;
+        }
+
+        // converged
+        if (std::abs(sumkmid - nelec) < eps)
+        {
+            converge = true;
+            break;
+        }
+        else if ((sumkmid - nelec) < - eps)
+        {
+            elw = ef;
+        }
+        else
+        {
+            eup = ef;
+        }
+    }
+    if (!converge)
+    {
+        // unconverged exit:
+        // the best available ef is used . Needed in some difficult cases
+        ef = efbetter;
+        sumkmid = sumkt(ekb, GlobalV::NBANDS, nks, nspin, ntetra, tetra, ef);
+    }
+
+    //==============================================================
+    // Check if Fermi level is above any of the highest eigenvalues
+    //==============================================================
+    for (int ik = 0;ik < nks;ik++)
+    {
+        if (ef > ekb[ik][GlobalV::NBANDS-1] + 1.e-4)
+        {
+            std::cout << "\n ef = " << ef;
+        }
+    }
+    return;
+} // end subroutine efermit
+*/
+
+/*
+double Occupy::sumkt(double** ekb,const int nband,const int nks,const int nspin,const int ntetra,
+                     const ModuleBase::matrix &tetra,const double &e)
+{
+    double etetra[4];
+    double sum = 0.0;
+
+    int nk = 0 ;
+    for (int ns = 0; ns < nspin;ns++)
+    {
+        //==================================================================
+        // nk is used to select k-points with up (ns=1) or down (ns=2) spin
+        //==================================================================
+        if (ns == 1)
+        {
+            nk = 0;
+        }
+        else
+        {
+            nk = nks / 2;
+        }
+
+        for (int nt = 0; nt < ntetra; nt++)
+        {
+            for (int ibnd = 0; ibnd < GlobalV::NBANDS; ibnd++)
+            {
+                //======================================================
+                // etetra are the energies at the vertexes of the nt-th
+                // tetrahedron
+                //======================================================
+                for (int i = 0; i < 4; i++)
+                {
+                    etetra [i] = ekb[  static_cast<int>( (tetra(i, nt) + nk) )][ ibnd  ];
+                }
+
+                piksort(4, etetra);
+                //===========================================
+                //sort in ascending order: e1 < e2 < e3 < e4
+                //===========================================
+                const double e1 = etetra [0];
+                const double e2 = etetra [1];
+                const double e3 = etetra [2];
+                const double e4 = etetra [3];
+
+                //===============================================
+                // calculate sum over k of the integrated charge
+                //===============================================
+                if (e >= e4)
+                {
+                    sum += 1.0 / ntetra;
+                }
+                else if (e < e4 && e >= e3)
+                {
+                    sum += 1.0 / ntetra * (1.0 - pow((e4 - e), 3) / (e4 - e1)
+                                           / (e4 - e2) / (e4 - e3));
+                }
+                else if (e < e3 && e >= e2)
+                {
+                    sum += 1.0 / ntetra / (e3 - e1) / (e4 - e1) *
+                           ((e2 - e1) * (e2 - e1) + 3.0 * (e2 - e1) * (e - e2) +
+                            3.0 * (e - e2) * (e - e2) - (e3 - e1 + e4 - e2) /
+                            (e3 - e2) / (e4 - e2) * pow((e - e2), 3));
+                }
+                else if (e < e2 && e >= e1)
+                {
+                    sum += 1.0 / ntetra * pow((e - e1), 3) /
+                           (e2 - e1) / (e3 - e1) / (e4 - e1);
+                }
+            }//ibnd
+        }//nt
+    }//ns
+
+// add correct spin normalization : 2 for LDA, 1 for LSDA calculations
+    sum *= 2.0 / nspin;
+    return sum;
+} // end function sumkt
+*/
+
+/*
+void Occupy::piksort(const int n, double *a)
+{
+    int i;
+    bool b = true;
+    for (int j = 1;j < n;j++) // do j = 2, n
+    {
+        const double temp = a [j];
+        for (i = j - 1;i >= 0;i--)  // do i = j - 1, 1, - 1
+        {
+            if (a [i] <= temp)
+            {
+                b = false;
+                break;
+            }
+            a [i + 1] = a [i];
+        }
+        if (b)
+        {
+            i = 0;
+        }
+        a [i + 1] = temp;
+    }
+    return;
+} //end subroutine piksort
+*/
diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp
index 0b2608e5ea..520dcfd176 100644
--- a/source/module_esolver/esolver_ks.cpp
+++ b/source/module_esolver/esolver_ks.cpp
@@ -409,7 +409,7 @@ namespace ModuleESolver
                         }
                     }
 
-                    this->conv_elec = (drho < this->scf_thr && iter!=GlobalV::MIXING_RESTART);
+                    this->conv_elec = (drho < this->scf_thr);
 
                     // If drho < hsolver_error in the first iter or drho < scf_thr, we do not change rho.
                     if (drho < hsolver_error || this->conv_elec)
@@ -435,16 +435,8 @@ namespace ModuleESolver
                         //     }
                         //     p_chgmix->auto_set(bandgap_for_autoset, GlobalC::ucell);
                         // }
-                        // mixing will restart after GlobalV::MIXING_RESTART steps
-                        // So, GlobalV::MIXING_RESTART=1 means mix from scratch
-                        if (GlobalV::MIXING_RESTART > 0 && iter == GlobalV::MIXING_RESTART - 1)
-                        {
-                            // do not mix charge density
-                        }
-                        else
-                        {
-                            p_chgmix->mix_rho(pelec->charge); // update chr->rho by mixing
-                        }
+                        
+                        p_chgmix->mix_rho(pelec->charge);
                         if (GlobalV::SCF_THR_TYPE == 2) pelec->charge->renormalize_rho(); // renormalize rho in R-space would induce a error in K-space
                         //----------charge mixing done-----------
                     }
@@ -475,11 +467,6 @@ namespace ModuleESolver
                     bool stop = this->do_after_converge(iter);
                     if(stop) {std::cout << "break\n"; break;}
                 }
-                // notice for restart
-                if (GlobalV::MIXING_RESTART > 0 && iter == GlobalV::MIXING_RESTART - 1)
-                {
-                    std::cout<<"SCF restart after this step!"<<std::endl;
-                }
             }
             afterscf(istep);
             ModuleBase::timer::tick(this->classname, "Run");
diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index 4c4d6c342f..8fdc9217fd 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -309,7 +309,7 @@ namespace ModuleESolver
     GlobalV::ofs_running << " !FINAL_ETOT_IS " << this->pelec->f_en.etot * ModuleBase::Ry_to_eV << " eV" << std::endl;
     GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl;
 
-    if (INPUT.out_dos != 0 || INPUT.out_band[0] != 0 || INPUT.out_proj_band != 0)
+    if (INPUT.out_dos != 0 || INPUT.out_band != 0 || INPUT.out_proj_band != 0)
     {
         GlobalV::ofs_running << "\n\n\n\n";
         GlobalV::ofs_running << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl;
@@ -331,7 +331,7 @@ namespace ModuleESolver
 
     int nspin0 = (GlobalV::NSPIN == 2) ? 2 : 1;
 
-    if (INPUT.out_band[0]) // pengfei 2014-10-13
+    if (INPUT.out_band) // pengfei 2014-10-13
     {
         int nks = 0;
         if (nspin0 == 1)
@@ -348,15 +348,7 @@ namespace ModuleESolver
             std::stringstream ss2;
             ss2 << GlobalV::global_out_dir << "BANDS_" << is + 1 << ".dat";
             GlobalV::ofs_running << "\n Output bands in file: " << ss2.str() << std::endl;
-            ModuleIO::nscf_band(is, 
-                                ss2.str(), 
-                                nks, 
-                                GlobalV::NBANDS, 
-                                0.0, 
-                                INPUT.out_band[1],
-                                this->pelec->ekb, 
-                                this->kv, 
-                                &(GlobalC::Pkpoints));
+            ModuleIO::nscf_band(is, ss2.str(), nks, GlobalV::NBANDS, 0.0, this->pelec->ekb, this->kv, &(GlobalC::Pkpoints));
         }
     } // out_band
 
@@ -491,19 +483,8 @@ namespace ModuleESolver
     template <typename TK, typename TR>
     void ESolver_KS_LCAO<TK, TR>::eachiterinit(const int istep, const int iter)
 {
-    if (iter == 1 || iter == GlobalV::MIXING_RESTART)
-    {
-        if (iter == GlobalV::MIXING_RESTART) // delete mixing and re-construct it to restart 
-        {
-            this->p_chgmix->set_mixing(GlobalV::MIXING_MODE,
-                                GlobalV::MIXING_BETA,
-                                GlobalV::MIXING_NDIM,
-                                GlobalV::MIXING_GG0,
-                                GlobalV::MIXING_TAU,
-                                GlobalV::MIXING_BETA_MAG);
-        }
+    if (iter == 1)
         this->p_chgmix->mix_reset();
-    }
 
     // mohan update 2012-06-05
     this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband();
diff --git a/source/module_esolver/esolver_ks_pw.cpp b/source/module_esolver/esolver_ks_pw.cpp
index 07779f62ec..294f1636f7 100644
--- a/source/module_esolver/esolver_ks_pw.cpp
+++ b/source/module_esolver/esolver_ks_pw.cpp
@@ -492,19 +492,9 @@ void ESolver_KS_PW<T, Device>::othercalculation(const int istep)
 template <typename T, typename Device>
 void ESolver_KS_PW<T, Device>::eachiterinit(const int istep, const int iter)
 {
-    if (iter == 1 || iter == GlobalV::MIXING_RESTART)
-    {
-        if (iter == GlobalV::MIXING_RESTART) // delete mixing and re-construct it to restart 
-        {
-            this->p_chgmix->set_mixing(GlobalV::MIXING_MODE,
-                                GlobalV::MIXING_BETA,
-                                GlobalV::MIXING_NDIM,
-                                GlobalV::MIXING_GG0,
-                                GlobalV::MIXING_TAU,
-                                GlobalV::MIXING_BETA_MAG);
-        }
+    if (iter == 1)
         this->p_chgmix->mix_reset();
-    }
+
     // mohan move harris functional to here, 2012-06-05
     // use 'rho(in)' and 'v_h and v_xc'(in)
     this->pelec->f_en.deband_harris = this->pelec->cal_delta_eband();
@@ -904,58 +894,6 @@ void ESolver_KS_PW<T, Device>::afterscf(const int istep)
                             this->kspw_psi[0].get_pointer() - this->kspw_psi[0].get_psi_bias(),
                             this->psi[0].size());
     }
-
-    if(INPUT.band_print_num > 0)
-    {
-        std::complex<double> * wfcr = new std::complex<double>[this->pw_rho->nxyz];
-        double * rho_band = new double [this->pw_rho->nxyz];
-        for(int i = 0; i < this->pw_rho->nxyz; i++)
-        {
-            rho_band[i] = 0.0;
-        }
-
-        for(int i = 0; i < INPUT.band_print_num; i++)
-        {
-            int ib = INPUT.bands_to_print[i];
-            for(int ik = 0; ik < this->kv.nks; ik++)
-            {
-                this->psi->fix_k(ik);
-                this->pw_wfc->recip_to_real(this->ctx,&psi[0](ib,0),wfcr,ik);
-
-                double w1 = static_cast<double>(this->kv.wk[ik] / GlobalC::ucell.omega);
-
-                for(int i = 0; i < this->pw_rho->nxyz; i++)
-                {
-                    rho_band[i] += std::norm(wfcr[i]) * w1;
-                }
-            }
-
-            std::stringstream ssc;
-            ssc << GlobalV::global_out_dir << "band" << ib << ".cube";     
-
-            ModuleIO::write_rho
-            (
-#ifdef __MPI
-                this->pw_big->bz,
-                this->pw_big->nbz,
-                this->pw_big->nplane,
-                this->pw_big->startz_current,
-#endif
-                rho_band,
-                0,
-                GlobalV::NSPIN,
-                0,
-                ssc.str(),
-                this->pw_rho->nx,
-                this->pw_rho->ny,
-                this->pw_rho->nz,
-                0.0,
-                &(GlobalC::ucell),
-                11);
-        }
-        delete[] wfcr;
-        delete[] rho_band;
-    }
 }
 
 template <typename T, typename Device>
@@ -1021,7 +959,7 @@ void ESolver_KS_PW<T, Device>::postprocess()
     GlobalV::ofs_running << " !FINAL_ETOT_IS " << this->pelec->f_en.etot * ModuleBase::Ry_to_eV << " eV" << std::endl;
     GlobalV::ofs_running << " --------------------------------------------\n\n" << std::endl;
 
-    if (INPUT.out_dos != 0 || INPUT.out_band[0] != 0)
+    if (INPUT.out_dos != 0 || INPUT.out_band != 0)
     {
         GlobalV::ofs_running << "\n\n\n\n";
         GlobalV::ofs_running << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl;
@@ -1063,7 +1001,7 @@ void ESolver_KS_PW<T, Device>::postprocess()
         }
     }
 
-    if (INPUT.out_band[0]) // pengfei 2014-10-13
+    if (INPUT.out_band) // pengfei 2014-10-13
     {
         int nks = 0;
         if (nspin0 == 1)
@@ -1084,7 +1022,6 @@ void ESolver_KS_PW<T, Device>::postprocess()
                                 nks,
                                 GlobalV::NBANDS,
                                 0.0,
-                                INPUT.out_band[1],
                                 this->pelec->ekb,
                                 this->kv,
                                 &(GlobalC::Pkpoints));
diff --git a/source/module_hamilt_general/module_xc/test/test_xc.cpp b/source/module_hamilt_general/module_xc/test/test_xc.cpp
index a770a88458..558556b66b 100644
--- a/source/module_hamilt_general/module_xc/test/test_xc.cpp
+++ b/source/module_hamilt_general/module_xc/test/test_xc.cpp
@@ -11,7 +11,7 @@
 
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
 }
 
 namespace GlobalV
@@ -95,7 +95,7 @@ class XCTest_PBEsol : public testing::Test
                 e_gga.push_back(e);
                 v1_gga.push_back(v1);
                 v2_gga.push_back(v2);
-            }
+            }                                           
         }
 };
 
diff --git a/source/module_hamilt_general/module_xc/test/test_xc1.cpp b/source/module_hamilt_general/module_xc/test/test_xc1.cpp
index bc5c439630..8e7a451e71 100644
--- a/source/module_hamilt_general/module_xc/test/test_xc1.cpp
+++ b/source/module_hamilt_general/module_xc/test/test_xc1.cpp
@@ -12,7 +12,7 @@
 
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
 }
 
 namespace GlobalV
diff --git a/source/module_hamilt_general/module_xc/test/test_xc2.cpp b/source/module_hamilt_general/module_xc/test/test_xc2.cpp
index 5bf75a3c68..4b1b7e888e 100644
--- a/source/module_hamilt_general/module_xc/test/test_xc2.cpp
+++ b/source/module_hamilt_general/module_xc/test/test_xc2.cpp
@@ -11,7 +11,7 @@
 
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
 }
 
 namespace GlobalV
@@ -202,7 +202,7 @@ class XCTest_PZ_SPN : public testing::Test
                 e_lda.push_back(e);
                 v1_lda.push_back(v1);
                 v2_lda.push_back(v2);
-            }
+            }          
         }
 };
 
@@ -238,7 +238,7 @@ class XCTest_SLATER1_SPN : public testing::Test
                 e_lda.push_back(e);
                 v1_lda.push_back(v1);
                 v2_lda.push_back(v2);
-            }
+            }          
         }
 };
 
@@ -273,7 +273,7 @@ class XCTest_SLATER_RXC_SPN : public testing::Test
                 e_lda.push_back(e);
                 v1_lda.push_back(v1);
                 v2_lda.push_back(v2);
-            }
+            }           
         }
 };
 
@@ -310,7 +310,7 @@ class XCTest_P86_SPN : public testing::Test
                 v1_gga.push_back(v1);
                 v2_gga.push_back(v2);
                 v3_gga.push_back(v3);
-            }
+            }         
         }
 };
 
diff --git a/source/module_hamilt_general/module_xc/test/test_xc4.cpp b/source/module_hamilt_general/module_xc/test/test_xc4.cpp
index b4c8b70093..114c817b0f 100644
--- a/source/module_hamilt_general/module_xc/test/test_xc4.cpp
+++ b/source/module_hamilt_general/module_xc/test/test_xc4.cpp
@@ -11,7 +11,7 @@
 
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description) {exit(1);}
+    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
 }
 
 namespace GlobalV
@@ -49,7 +49,7 @@ class XCTest_SCAN : public testing::Test
                 v2_.push_back(v2);
                 v3_.push_back(v3);
             }
-        }
+        }                       
 };
 
 TEST_F(XCTest_SCAN, set_xc_type)
diff --git a/source/module_hamilt_general/module_xc/test/test_xc5.cpp b/source/module_hamilt_general/module_xc/test/test_xc5.cpp
index d9dfed1b20..bd2f87da8a 100644
--- a/source/module_hamilt_general/module_xc/test/test_xc5.cpp
+++ b/source/module_hamilt_general/module_xc/test/test_xc5.cpp
@@ -25,6 +25,9 @@ class XCTest_VXC : public testing::Test
         double et2 = 0, vt2 = 0;
         ModuleBase::matrix v2;
 
+        double et4 = 0, vt4 = 0;
+        ModuleBase::matrix v4;
+
         void SetUp()
         {
             ModulePW::PW_Basis rhopw;
@@ -84,6 +87,13 @@ class XCTest_VXC : public testing::Test
             vt2 = std::get<1>(etxc_vtxc_v);
             v2  = std::get<2>(etxc_vtxc_v);
 
+            GlobalV::NSPIN = 4;
+            GlobalV::DOMAG = true;
+            etxc_vtxc_v
+                = XC_Functional::v_xc(rhopw.nrxx,&chr,&ucell);
+            et4 = std::get<0>(etxc_vtxc_v);
+            vt4 = std::get<1>(etxc_vtxc_v);
+            v4  = std::get<2>(etxc_vtxc_v);
         }
 };
 
@@ -111,6 +121,29 @@ TEST_F(XCTest_VXC, set_xc_type)
     EXPECT_NEAR(v2(1,3),-1.97506482,1.0e-8);
     EXPECT_NEAR(v2(1,4),-2.160374198,1.0e-8);
 
+    EXPECT_NEAR(et4,-27.40098253,1.0e-8);
+    EXPECT_NEAR(vt4,-35.81948838,1.0e-8);
+    EXPECT_NEAR(v4(0,0),0,1.0e-8);
+    EXPECT_NEAR(v4(0,1),-1.559604078,1.0e-8);
+    EXPECT_NEAR(v4(0,2),-1.920028447,1.0e-8);
+    EXPECT_NEAR(v4(0,3),-2.168396069,1.0e-8);
+    EXPECT_NEAR(v4(0,4),-2.36419592,1.0e-8);
+    EXPECT_NEAR(v4(1,0),0,1.0e-8);
+    EXPECT_NEAR(v4(1,1),-0.09308179605,1.0e-8);
+    EXPECT_NEAR(v4(1,2),-0.123132664,1.0e-8);
+    EXPECT_NEAR(v4(1,3),-0.144332804,1.0e-8);
+    EXPECT_NEAR(v4(1,4),-0.16127282,1.0e-8);
+    EXPECT_NEAR(v4(2,0),0,1.0e-8);
+    EXPECT_NEAR(v4(2,1),-0.9308179605,1.0e-8);
+    EXPECT_NEAR(v4(2,2),-1.23132664,1.0e-8);
+    EXPECT_NEAR(v4(2,3),-1.44332804,1.0e-8);
+    EXPECT_NEAR(v4(2,4),-1.6127282,1.0e-8);
+    EXPECT_NEAR(v4(3,0),0,1.0e-8);
+    EXPECT_NEAR(v4(3,1),-0.09308179605,1.0e-8);
+    EXPECT_NEAR(v4(3,2),-0.123132664,1.0e-8);
+    EXPECT_NEAR(v4(3,3),-0.144332804,1.0e-8);
+    EXPECT_NEAR(v4(3,4),-0.16127282,1.0e-8);
+
 }
 
 class XCTest_VXC_Libxc : public testing::Test
@@ -123,6 +156,9 @@ class XCTest_VXC_Libxc : public testing::Test
         double et2 = 0, vt2 = 0;
         ModuleBase::matrix v2;
 
+        double et4 = 0, vt4 = 0;
+        ModuleBase::matrix v4;
+
         void SetUp()
         {
             ModulePW::PW_Basis rhopw;
@@ -182,6 +218,13 @@ class XCTest_VXC_Libxc : public testing::Test
             vt2 = std::get<1>(etxc_vtxc_v);
             v2  = std::get<2>(etxc_vtxc_v);
 
+            GlobalV::NSPIN = 4;
+            GlobalV::DOMAG = true;
+            etxc_vtxc_v
+                = XC_Functional::v_xc(rhopw.nrxx,&chr,&ucell);
+            et4 = std::get<0>(etxc_vtxc_v);
+            vt4 = std::get<1>(etxc_vtxc_v);
+            v4  = std::get<2>(etxc_vtxc_v);
         }
 };
 
@@ -209,6 +252,28 @@ TEST_F(XCTest_VXC_Libxc, set_xc_type)
     EXPECT_NEAR(v2(1,3),-1.975058937,1.0e-8);
     EXPECT_NEAR(v2(1,4),-2.160368003,1.0e-8);
 
+    EXPECT_NEAR(et4,-27.28201062,1.0e-8);
+    EXPECT_NEAR(vt4,-35.98253991,1.0e-8);
+    EXPECT_NEAR(v4(0,0),0,1.0e-8);
+    EXPECT_NEAR(v4(0,1),-1.268278149,1.0e-8);
+    EXPECT_NEAR(v4(0,2),-1.598108222,1.0e-8);
+    EXPECT_NEAR(v4(0,3),-1.828079634,1.0e-8);
+    EXPECT_NEAR(v4(0,4),-2.010634115,1.0e-8);
+    EXPECT_NEAR(v4(1,0),0,1.0e-8);
+    EXPECT_NEAR(v4(1,1),-0.1255782493,1.0e-8);
+    EXPECT_NEAR(v4(1,2),-0.1582362929,1.0e-8);
+    EXPECT_NEAR(v4(1,3),-0.1810068558,1.0e-8);
+    EXPECT_NEAR(v4(1,4),-0.1990824429,1.0e-8);
+    EXPECT_NEAR(v4(2,0),0,1.0e-8);
+    EXPECT_NEAR(v4(2,1),-1.255782493,1.0e-8);
+    EXPECT_NEAR(v4(2,2),-1.582362929,1.0e-8);
+    EXPECT_NEAR(v4(2,3),-1.810068558,1.0e-8);
+    EXPECT_NEAR(v4(2,4),-1.990824429,1.0e-8);
+    EXPECT_NEAR(v4(3,0),0,1.0e-8);
+    EXPECT_NEAR(v4(3,1),-0.1255782493,1.0e-8);
+    EXPECT_NEAR(v4(3,2),-0.1582362929,1.0e-8);
+    EXPECT_NEAR(v4(3,3),-0.1810068558,1.0e-8);
+    EXPECT_NEAR(v4(3,4),-0.1990824429,1.0e-8);
 }
 
 class XCTest_VXC_meta : public testing::Test
diff --git a/source/module_hamilt_general/module_xc/test/xc3_mock.h b/source/module_hamilt_general/module_xc/test/xc3_mock.h
index da7f1e6f08..628937adfe 100644
--- a/source/module_hamilt_general/module_xc/test/xc3_mock.h
+++ b/source/module_hamilt_general/module_xc/test/xc3_mock.h
@@ -75,7 +75,7 @@ namespace ModulePW
         return x;
     }
 
-
+    
     template <typename FPTYPE, typename Device>
     void PW_Basis_K::real_to_recip(const Device* ctx,
                        const std::complex<FPTYPE>* in,
@@ -115,7 +115,7 @@ namespace ModulePW
                                                                      const int ik,
                                                                      const bool add,
                                                                      const double factor) const;
-#if __CUDA || __ROCM
+#if __CUDA || __ROCM                                                                     
     template void PW_Basis_K::real_to_recip<double, psi::DEVICE_GPU>(const psi::DEVICE_GPU* ctx,
                                                                      const std::complex<double>* in,
                                                                      std::complex<double>* out,
@@ -129,7 +129,7 @@ namespace ModulePW
                                                                      const int ik,
                                                                      const bool add,
                                                                      const double factor) const;
-#endif
+#endif 
 
     FFT::FFT(){};
     FFT::~FFT(){};
@@ -144,13 +144,9 @@ namespace ModulePW
 
 namespace ModuleBase
 {
-    void WARNING_QUIT(const std::string &file,const std::string &description)
-    {
-        std::cout << " " << file <<"  warning : "<< description<<std::endl;
-        exit(1);
-    }
+    void WARNING_QUIT(const std::string &file,const std::string &description) {return ;}
     void WARNING(const std::string &file,const std::string &description) {};
-
+    
     void Matrix3::Identity(){};
 
     IntArray::IntArray(int,int){};
@@ -232,4 +228,4 @@ namespace Parallel_Reduce
     template void reduce_pool<float>(float& object);
     template void reduce_pool<float>(float* object, const int n);
     template void reduce_pool<double>(double* object, const int n);
-}
+}
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
index 7ee0394d4d..3d85150b22 100644
--- a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
@@ -33,7 +33,7 @@ ModuleBase::matrix SpinConstrain<std::complex<double>, psi::DEVICE_CPU>::cal_MW_
         const char N_char = 'N';
         const int one_int = 1;
         const std::complex<double> one_float = {1.0, 0.0}, zero_float = {0.0, 0.0};        
-        pzgemm_(&N_char,
+        pzgemm_(&T_char,
                 &T_char,
                 &nw,
                 &nw,
diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp
index bd0ad4ce3a..0c0595a9ce 100644
--- a/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw_helper.cpp
@@ -22,7 +22,7 @@ std::vector<std::vector<std::vector<double>>> SpinConstrain<std::complex<double>
                 AorbMulP[is][iat].resize(nw_it, 0.0);
                 for (int iw = 0; iw < nw_it; iw++)
                 {
-                    AorbMulP[is][iat][iw] = std::abs(orbMulP(is, num))< 1e-10 ? 0.0 : orbMulP(is, num);
+                    AorbMulP[is][iat][iw] = orbMulP(is, num);
                     num++;
                 }
             }
@@ -92,10 +92,16 @@ void SpinConstrain<std::complex<double>, psi::DEVICE_CPU>::calculate_MW(
             }
             else if (this->nspin_ == 4)
             {
-                this->Mi_[iat].x = (std::abs(total_charge_soc[1]) < this->sc_thr_)? 0.0 : total_charge_soc[1];
-                this->Mi_[iat].y = (std::abs(total_charge_soc[2]) < this->sc_thr_)? 0.0 : total_charge_soc[2];
-                this->Mi_[iat].z = (std::abs(total_charge_soc[3]) < this->sc_thr_)? 0.0 : total_charge_soc[3];
+                this->Mi_[iat].x = total_charge_soc[1];
+                this->Mi_[iat].y = total_charge_soc[2];
+                this->Mi_[iat].z = total_charge_soc[3];
             }
+            if (std::abs(this->Mi_[iat].x) < 1e-12)
+                this->Mi_[iat].x = 0.0;
+            if (std::abs(this->Mi_[iat].y) < 1e-12)
+                this->Mi_[iat].y = 0.0;
+            if (std::abs(this->Mi_[iat].z) < 1e-12)
+                this->Mi_[iat].z = 0.0;
         }
     }
 }
diff --git a/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp b/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp
index 845db88062..db64d5490d 100644
--- a/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/lambda_loop.cpp
@@ -45,7 +45,6 @@ void SpinConstrain<std::complex<double>, psi::DEVICE_CPU>::run_lambda_loop(int o
         }
         else
         {
-            where_fill_scalar_else_2d(this->constrain_, 0, zero, delta_lambda, delta_lambda);
             add_scalar_multiply_2d(initial_lambda, delta_lambda, one, this->lambda_);
             this->cal_mw_from_lambda(i_step);
             new_spin = this->Mi_;
@@ -88,7 +87,6 @@ void SpinConstrain<std::complex<double>, psi::DEVICE_CPU>::run_lambda_loop(int o
         add_scalar_multiply_2d(dnu, search, alpha_trial, dnu);
         delta_lambda = dnu;
 
-        where_fill_scalar_else_2d(this->constrain_, 0, zero, delta_lambda, delta_lambda);
         add_scalar_multiply_2d(initial_lambda, delta_lambda, one, this->lambda_);
         this->cal_mw_from_lambda(i_step);
 
@@ -117,4 +115,4 @@ void SpinConstrain<std::complex<double>, psi::DEVICE_CPU>::run_lambda_loop(int o
         }
         alpha_trial = alpha_trial * pow(g, 0.7);
     }
-}
+}
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp b/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp
index a55ad59681..fedb46a976 100644
--- a/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp
+++ b/source/module_hamilt_lcao/module_tddft/test/tddft_test.cpp
@@ -28,8 +28,7 @@ void MPIInit()
     npcol = 1;
     Cblacs_pinfo(&myrank, &mysize);
     Cblacs_get(-1, 0, &ictxt);
-    char order[] = "Row";
-    Cblacs_gridinit(&ictxt, order, nprow, npcol);
+    Cblacs_gridinit(&ictxt, "Row", nprow, npcol);
     Cblacs_gridinfo(ictxt, &nprow, &npcol, &myprow, &mypcol);
 }
 
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index 8e4ee5b15b..fbaf7b1806 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -1,13 +1,14 @@
 #include <complex>
 #ifdef __PEXSI
-#include "c_pexsi_interface.h"
 #include "diago_pexsi.h"
+
+#include "c_pexsi_interface.h"
 #include "module_base/global_variable.h"
 #include "module_base/lapack_connector.h"
 #include "module_base/timer.h"
 #include "module_base/tool_quit.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
-#include "module_pexsi/pexsi_solver.h"
+#include "pexsi/pexsi_solver.h"
 
 typedef hamilt::MatrixBlock<double> matd;
 typedef hamilt::MatrixBlock<std::complex<double>> matcd;
@@ -15,7 +16,7 @@ typedef hamilt::MatrixBlock<std::complex<double>> matcd;
 namespace hsolver
 {
 
-template <>
+template<>
 void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in)
 {
     ModuleBase::TITLE("DiagoPEXSI", "diag");
@@ -24,31 +25,30 @@ void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>&
     std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
     MPI_Comm COMM_DIAG = MPI_COMM_WORLD;
     this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt,
-                                       this->ParaV->nb,
-                                       this->ParaV->nrow,
-                                       this->ParaV->ncol,
-                                       h_mat.p,
-                                       s_mat.p,
-                                       this->DM,
-                                       this->EDM,
-                                       this->totalEnergyH,
-                                       this->totalEnergyS,
-                                       this->totalFreeEnergy);
+                                this->ParaV->nb,
+                                this->ParaV->nrow,
+                                this->ParaV->ncol,
+                                h_mat.p,
+                                s_mat.p,
+                                this->DM,
+                                this->EDM,
+                                this->totalEnergyH,
+                                this->totalEnergyS,
+                                this->totalFreeEnergy);
     this->ps->solve();
-    this->EDM = this->ps->get_EDM();
-    this->DM = this->ps->get_DM(); // loc.dm_gamma[ik] loc.dm_gamma[0]?
-    this->totalFreeEnergy = this->ps->get_totalFreeEnergy();
-    this->totalEnergyH = this->ps->get_totalEnergyH();
-    this->totalEnergyS = this->ps->get_totalEnergyS();
+    this->EDM = this->ps->EDM;
+    this->DM = this->ps->DM; // loc.dm_gamma[ik] loc.dm_gamma[0]?
+    this->totalFreeEnergy = this->ps->totalFreeEnergy;
+    this->totalEnergyH = this->ps->totalEnergyH;
+    this->totalEnergyS = this->ps->totalEnergyS;
 }
 
-template <>
-void DiagoPexsi<std::complex<double>>::diag(hamilt::Hamilt<std::complex<double>>* phm_in,
-                                            psi::Psi<std::complex<double>>& psi,
-                                            double* eigenvalue_in)
+template<>
+void DiagoPexsi<std::complex<double>>::diag(hamilt::Hamilt<std::complex<double>>* phm_in, psi::Psi<std::complex<double>>& psi, double* eigenvalue_in)
 {
     ModuleBase::TITLE("DiagoPEXSI", "diag");
     ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case");
+    
 }
 
 } // namespace hsolver
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index c212d7795a..018397a33d 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -3,7 +3,7 @@
 
 #include "diagh.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
-#include "module_pexsi/pexsi_solver.h"
+#include "pexsi/pexsi_solver.h"
 
 namespace hsolver
 {
diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp
index ae784d2009..16fa5f335b 100644
--- a/source/module_hsolver/hsolver_pw.cpp
+++ b/source/module_hsolver/hsolver_pw.cpp
@@ -624,31 +624,17 @@ void HSolverPW<T, Device>::hamiltSolvePsiK(hamilt::Hamilt<T, Device>* hm, psi::P
         hm->ops->hPsi(info);
         ModuleBase::timer::tick("DiagoCG_New", "hpsi_func");
     };
-    auto spsi_func = [this, hm](const ct::Tensor& psi_in, ct::Tensor& spsi_out) {
+    auto spsi_func = [hm](const ct::Tensor& psi_in, ct::Tensor& spsi_out) {
         ModuleBase::timer::tick("DiagoCG_New", "spsi_func");
         // psi_in should be a 2D tensor: 
         // psi_in.shape() = [nbands, nbasis]
         const auto ndim = psi_in.shape().ndim();
         REQUIRES_OK(ndim <= 2, "dims of psi_in should be less than or equal to 2");
-
-        if (GlobalV::use_uspp)
-        {
-            // Convert a Tensor object to a psi::Psi object
-            hm->sPsi(psi_in.data<T>(), spsi_out.data<T>(), 
+        // Convert a Tensor object to a psi::Psi object
+        hm->sPsi(psi_in.data<T>(), spsi_out.data<T>(), 
             ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1), 
             ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1), 
             ndim == 1 ? 1 : psi_in.shape().dim_size(0));
-        } else
-        {
-            psi::memory::synchronize_memory_op<T, Device, Device>()(
-                this->ctx,
-                this->ctx,
-                spsi_out.data<T>(),
-                psi_in.data<T>(),
-                static_cast<size_t>((ndim == 1 ? 1 : psi_in.shape().dim_size(0))
-                                    * (ndim == 1 ? psi_in.NumElements() : psi_in.shape().dim_size(1))));
-        }
-        
         ModuleBase::timer::tick("DiagoCG_New", "spsi_func");
     };
     auto psi_tensor = ct::TensorMap(
@@ -790,4 +776,4 @@ template class HSolverPW<std::complex<float>, psi::DEVICE_GPU>;
 template class HSolverPW<std::complex<double>, psi::DEVICE_GPU>;
 #endif
 
-} // namespace hsolver
+} // namespace hsolver
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/CMakeLists.txt b/source/module_hsolver/module_pexsi/CMakeLists.txt
index 87d16ff557..8faab8b4b4 100644
--- a/source/module_hsolver/module_pexsi/CMakeLists.txt
+++ b/source/module_hsolver/module_pexsi/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_library(pexsi OBJECT dist_bcd_matrix.cpp dist_ccs_matrix.cpp dist_matrix_transformer.cpp pexsi_solver.cpp simple_pexsi.cpp)
+add_library(pexsi OBJECT DistBCDMatrix.cpp DistCCSMatrix.cpp DistMatrixTransformer.cpp pexsi_solver.cpp simplePEXSI.cpp)
 
 if(ENABLE_COVERAGE)
   add_coverage(pexsi)
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
index e498b83a2e..cf815bd4ae 100644
--- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
@@ -1,4 +1,3 @@
-#ifdef __PEXSI
 #include "dist_bcd_matrix.h"
 
 #include <mpi.h>
@@ -111,5 +110,4 @@ int DistBCDMatrix::pnum(const int prow, const int pcol)
 {
     return this->prowpcol2pnum[prow * this->npcols + pcol];
 }
-} // namespace pexsi
-#endif
\ No newline at end of file
+} // namespace pexsi
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
index 98b8512893..7dbddbad7c 100644
--- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
@@ -2,8 +2,6 @@
 #define DISTBCDMATRIX_H
 
 #include <mpi.h>
-
-#include "module_hsolver/module_pexsi/dist_matrix_transformer.h"
 // a Block Cyclic Data Distribution matrix
 // http://www.netlib.org/utk/papers/factor/node3.html
 // local matrix elements is stored in column major
@@ -29,27 +27,6 @@ class DistBCDMatrix
     int pnum(const int prow, const int pcol);
     //~DistBCDMatrix();
 
-    const MPI_Comm get_comm() const
-    {
-        return comm;
-    };
-    const MPI_Group get_group() const
-    {
-        return group;
-    };
-    const int get_nrow() const
-    {
-        return nrow;
-    };
-    const int get_ncol() const
-    {
-        return ncol;
-    };
-    const char get_LAYOUT() const
-    {
-        return LAYOUT;
-    };
-
   private:
     // MPI communicator
     MPI_Comm comm;
diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
index ddd02aaa9a..365622d249 100644
--- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
+++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
@@ -1,4 +1,3 @@
-#ifdef __PEXSI
 #include "dist_ccs_matrix.h"
 
 #include <mpi.h>
@@ -115,5 +114,4 @@ DistCCSMatrix::~DistCCSMatrix()
     delete[] colptrLocal;
     delete[] rowindLocal;
 }
-} // namespace pexsi
-#endif
\ No newline at end of file
+} // namespace pexsi
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
index a63a0dc16c..aa5e67b6ab 100644
--- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
+++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
@@ -19,44 +19,6 @@ class DistCCSMatrix
     int globalCol(int localCol);
     int localCol(int globalCol, int& mypcol);
     void setnnz(int nnzLocal);
-
-    const MPI_Comm get_comm() const
-    {
-        return comm;
-    };
-    const MPI_Group get_group() const
-    {
-        return group;
-    };
-    const MPI_Group get_group_data() const
-    {
-        return group_data;
-    };
-    const int get_size() const
-    {
-        return size;
-    };
-    const int get_nnz() const
-    {
-        return nnz;
-    };
-    const int get_nnzlocal() const
-    {
-        return nnzLocal;
-    };
-    const int get_numcol_local() const
-    {
-        return numColLocal;
-    };
-    int* get_colptr_local() const
-    {
-        return colptrLocal;
-    };
-    int* get_rowind_local() const
-    {
-        return rowindLocal;
-    };
-
     ~DistCCSMatrix();
 
   private:
@@ -88,8 +50,6 @@ class DistCCSMatrix
     // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal
     int* colptrLocal;
     int* rowindLocal;
-
-    // friend class DistMatrixTransformer;
 };
 } // namespace pexsi
 #endif // DISTCCSMATRIX_H
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
index ef6c6fec72..01b96f42cc 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
@@ -1,6 +1,3 @@
-#ifdef __PEXSI
-#include "dist_matrix_transformer.h"
-
 #include <mpi.h>
 
 #include <climits>
@@ -31,11 +28,11 @@ namespace pexsi
 // wether this function is called for the first time for a index array; nprocs: total number of processes size_process:
 // the number of indices in each process displacement_process: the start position in each process index: the array
 // contains the indices
-inline int DistMatrixTransformer::MinimumIndexPosition(const bool isFirst,
-                                                       const int nprocs,
-                                                       int* size_process,
-                                                       int* displacement_process,
-                                                       const int* index)
+inline int MinimumIndexPosition(const bool isFirst,
+                                const int nprocs,
+                                int* size_process,
+                                int* displacement_process,
+                                const int* index)
 {
     // usually the minimum index is continuous, so it will be a good idea to
     // check the one next to the previous index first.
@@ -107,16 +104,16 @@ inline int DistMatrixTransformer::MinimumIndexPosition(const bool isFirst,
     }
 }
 
-inline void DistMatrixTransformer::buildCCSParameter(const int size,
-                                                     const int nprocs,
-                                                     std::vector<int> size_process,
-                                                     std::vector<int> displacement_process,
-                                                     const int* position_index,
-                                                     DistCCSMatrix& DST_Matrix,
-                                                     int* buffer2ccsIndex)
+inline void buildCCSParameter(const int size,
+                              const int nprocs,
+                              std::vector<int> size_process,
+                              std::vector<int> displacement_process,
+                              const int* position_index,
+                              DistCCSMatrix& DST_Matrix,
+                              int* buffer2ccsIndex)
 {
     // find the minimum one from left buffer index
-    if (DST_Matrix.get_nnzlocal() <= 0)
+    if (DST_Matrix.nnzLocal <= 0)
         return;
 
     int pre_col = -1;
@@ -126,34 +123,31 @@ inline void DistMatrixTransformer::buildCCSParameter(const int size,
     while (p_mini >= 0)
     {
         int index_mini = position_index[p_mini];
-        int col_mini = index_mini / DST_Matrix.get_size(); //-DST_Matrix.firstCol;
-        int row_mini = index_mini % DST_Matrix.get_size();
+        int col_mini = index_mini / DST_Matrix.size; //-DST_Matrix.firstCol;
+        int row_mini = index_mini % DST_Matrix.size;
         if (col_mini > pre_col) // a new column starts, column pointer is a 1-based array
         {
             pre_col = col_mini;
-            DST_Matrix.get_colptr_local()[col_mini] = nnz_now + 1;
+            DST_Matrix.colptrLocal[col_mini] = nnz_now + 1;
         }
-        DST_Matrix.get_rowind_local()[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based
+        DST_Matrix.rowindLocal[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based
         // copy data from buffer to M, be careful M is a 0-based array
         buffer2ccsIndex[nnz_now] = p_mini;
         ++nnz_now;
         p_mini = MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index);
     }
     // The last element of colptrLocal is nnzLocal+1
-    DST_Matrix.get_colptr_local()[DST_Matrix.get_numcol_local()] = nnz_now + 1;
+    DST_Matrix.colptrLocal[DST_Matrix.numColLocal] = nnz_now + 1;
 }
 
-inline void DistMatrixTransformer::buffer2CCSvalue(int nnzLocal,
-                                                   int* buffer2ccsIndex,
-                                                   double* buffer,
-                                                   double* nzvalLocal)
+inline void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal)
 {
     for (int i = 0; i < nnzLocal; ++i)
     {
         nzvalLocal[i] = buffer[buffer2ccsIndex[i]];
     }
 }
-inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std::map<int, int>& P)
+inline void countMatrixDistribution(int N, double* A, std::map<int, int>& P)
 {
     for (int i = 0; i < N; ++i)
     {
@@ -167,15 +161,15 @@ inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std
 }
 
 // find out the index of non-zero elements
-inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT,
-                                                  const int nrow,
-                                                  const int ncol,
-                                                  double* H_2d,
-                                                  double* S_2d,
-                                                  const double ZERO_Limit,
-                                                  int& nnz,
-                                                  std::vector<int>& rowidx,
-                                                  std::vector<int>& colidx)
+inline int getNonZeroIndex(char LAYOUT,
+                           const int nrow,
+                           const int ncol,
+                           double* H_2d,
+                           double* S_2d,
+                           const double ZERO_Limit,
+                           int& nnz,
+                           std::vector<int>& rowidx,
+                           std::vector<int>& colidx)
 {
 #ifdef _DEBUG
     char f_log[80];
@@ -281,21 +275,21 @@ inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT,
     return 0;
 }
 
-int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
-                                                   DistCCSMatrix& DST_Matrix,
-                                                   const int NPROC_TRANS,
-                                                   MPI_Group& GROUP_TRANS,
-                                                   MPI_Comm& COMM_TRANS,
-                                                   const int nnz,
-                                                   std::vector<int>& rowidx,
-                                                   std::vector<int>& colidx,
-                                                   int& sender_size,
-                                                   std::vector<int>& sender_size_process,
-                                                   std::vector<int>& sender_displacement_process,
-                                                   int& receiver_size,
-                                                   std::vector<int>& receiver_size_process,
-                                                   std::vector<int>& receiver_displacement_process,
-                                                   std::vector<int>& buffer2ccsIndex)
+int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
+                            DistCCSMatrix& DST_Matrix,
+                            const int NPROC_TRANS,
+                            MPI_Group& GROUP_TRANS,
+                            MPI_Comm& COMM_TRANS,
+                            const int nnz,
+                            std::vector<int>& rowidx,
+                            std::vector<int>& colidx,
+                            int& sender_size,
+                            std::vector<int>& sender_size_process,
+                            std::vector<int>& sender_displacement_process,
+                            int& receiver_size,
+                            std::vector<int>& receiver_size_process,
+                            std::vector<int>& receiver_displacement_process,
+                            std::vector<int>& buffer2ccsIndex)
 {
     // debug
     int myproc;
@@ -328,12 +322,12 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
     std::vector<int> proc_map_data_trans;
     if (myproc == 0)
     {
-        MPI_Group_size(DST_Matrix.get_group_data(), &nproc_data);
+        MPI_Group_size(DST_Matrix.group_data, &nproc_data);
         MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS);
         proc_map_data_trans.resize(nproc_data, 0);
         for (int i = 0; i < nproc_data; ++i)
         {
-            MPI_Group_translate_ranks(DST_Matrix.get_group_data(), 1, &i, GROUP_TRANS, &proc_map_data_trans[i]);
+            MPI_Group_translate_ranks(DST_Matrix.group_data, 1, &i, GROUP_TRANS, &proc_map_data_trans[i]);
         }
         MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS);
     }
@@ -435,7 +429,7 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
         int dst_col = DST_Matrix.localCol(g_col, dst_process);
         int l_row = rowidx[i];
         int dst_row = SRC_Matrix.globalRow(l_row);
-        sender_index[i] = dst_col * DST_Matrix.get_size() + dst_row;
+        sender_index[i] = dst_col * DST_Matrix.size + dst_row;
     }
 // debug
 #ifdef _DEBUG
@@ -484,10 +478,10 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
     return 0;
 }
 
-int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
-                                             DistCCSMatrix& DST_Matrix,
-                                             MPI_Group& GROUP_TRANS,
-                                             MPI_Comm& COMM_TRANS)
+int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
+                      DistCCSMatrix& DST_Matrix,
+                      MPI_Group& GROUP_TRANS,
+                      MPI_Comm& COMM_TRANS)
 {
 // debug
 #ifdef _DEBUG
@@ -505,7 +499,7 @@ int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
 #endif
     // build transfortram communicator which contains both processes of BCD processors and
     // CCS processors with nonzero elements
-    MPI_Group_union(DST_Matrix.get_group_data(), SRC_Matrix.get_group(), &GROUP_TRANS);
+    MPI_Group_union(DST_Matrix.group_data, SRC_Matrix.group, &GROUP_TRANS);
     MPI_Comm_create(MPI_COMM_WORLD, GROUP_TRANS, &COMM_TRANS);
 // debug
 #ifdef _DEBUG
@@ -563,7 +557,7 @@ int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
     return 0;
 }
 
-int DistMatrixTransformer::deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS)
+int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS)
 {
     MPI_Group_free(&GROUP_TRANS);
     if (COMM_TRANS != MPI_COMM_NULL)
@@ -577,13 +571,13 @@ int DistMatrixTransformer::deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm
 // two destination matrices share the same non-zero elements positions
 // if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero,
 // even if one of them is acturely zero All matrices must have same MPI communicator
-int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
-                                             double* H_2d,
-                                             double* S_2d,
-                                             const double ZERO_Limit,
-                                             DistCCSMatrix& DST_Matrix,
-                                             double*& H_ccs,
-                                             double*& S_ccs)
+int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
+                      double* H_2d,
+                      double* S_2d,
+                      const double ZERO_Limit,
+                      DistCCSMatrix& DST_Matrix,
+                      double*& H_ccs,
+                      double*& S_ccs)
 {
 // debug
 #ifdef _DEBUG
@@ -620,9 +614,9 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
         {
             log << "nprocs: " << SRC_Matrix.nprocs << " ; myprow: " << SRC_Matrix.myprow
                 << " ; mypcol: " << SRC_Matrix.mypcol << std::endl;
-            log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.get_nrow() << " ; ncol: " << SRC_Matrix.get_ncol()
+            log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.nrow << " ; ncol: " << SRC_Matrix.ncol
                 << std::endl;
-            log << "layout:" << SRC_Matrix.get_LAYOUT() << std::endl;
+            log << "layout:" << SRC_Matrix.LAYOUT << std::endl;
             log << "ZERO = " << ZERO_Limit << std::endl;
             log << "DST_Matrix parameters:" << std::endl;
             log << "size: " << DST_Matrix.size << " ;nproc_data: " << DST_Matrix.nproc_data << std::endl;
@@ -639,11 +633,11 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
         if (myproc < 100)
             log << "start counting nnz..." << std::endl;
 #endif
-        if (SRC_Matrix.get_comm() != MPI_COMM_NULL)
+        if (SRC_Matrix.comm != MPI_COMM_NULL)
         {
-            getNonZeroIndex(SRC_Matrix.get_LAYOUT(),
-                            SRC_Matrix.get_nrow(),
-                            SRC_Matrix.get_ncol(),
+            getNonZeroIndex(SRC_Matrix.LAYOUT,
+                            SRC_Matrix.nrow,
+                            SRC_Matrix.ncol,
                             H_2d,
                             S_2d,
                             ZERO_Limit,
@@ -660,11 +654,11 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
             if(SRC_Matrix.comm != MPI_COMM_NULL)
             {
                 log<<"NonZeroIndex :"<<std::endl;
-                if(SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
+                if(SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
                 {
                     for(int i=0; i<nnz; ++i)
                     {
-                        int HS_idx=rowidx[i]*SRC_Matrix.get_ncol()+colidx[i];
+                        int HS_idx=rowidx[i]*SRC_Matrix.ncol+colidx[i];
                         log<<rowidx[i]<<' '<<colidx[i]<<' '<<HS_idx;
                         log<<' '<<H_2d[HS_idx]<<' '<<S_2d[HS_idx]<<std::endl;
                     }
@@ -673,7 +667,7 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                 {
                     for(int i=0; i<nnz; ++i)
                     {
-                        int HS_idx=colidx[i]*SRC_Matrix.get_nrow()+rowidx[i];
+                        int HS_idx=colidx[i]*SRC_Matrix.nrow+rowidx[i];
                         log<<rowidx[i]<<' '<<colidx[i]<<' '<<HS_idx;
                         log<<' '<<H_2d[HS_idx]<<' '<<S_2d[HS_idx]<<std::endl;
                     }
@@ -713,18 +707,18 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
         std::vector<double> sender_buffer(sender_size);
         std::vector<double> receiver_buffer(receiver_size);
         // put H to sender buffer
-        if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
+        if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]];
+                sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]];
             }
         }
         else
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]];
+                sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]];
             }
         }
 #ifdef _DEBUG
@@ -755,18 +749,18 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
 #endif
 
         // put S to sender buffer
-        if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
+        if (SRC_Matrix.LAYOUT == 'R' || SRC_Matrix.LAYOUT == 'r')
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]];
+                sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.ncol + colidx[i]];
             }
         }
         else
         {
             for (int i = 0; i < sender_size; ++i)
             {
-                sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]];
+                sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.nrow + rowidx[i]];
             }
         }
 #ifdef _DEBUG
@@ -810,12 +804,12 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
 
 // transform two sparse matrices from Compressed Column Storage (CCS) to block cyclic distribution (BCD) distribution
 // two source matrices share the same non-zero elements positions
-int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
-                                             double* DMnzvalLocal,
-                                             double* EDMnzvalLocal,
-                                             DistBCDMatrix& DST_Matrix,
-                                             double* DM,
-                                             double* EDM)
+int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
+                      double* DMnzvalLocal,
+                      double* EDMnzvalLocal,
+                      DistBCDMatrix& DST_Matrix,
+                      double* DM,
+                      double* EDM)
 {
 // debug
 #ifdef _DEBUG
@@ -846,7 +840,7 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
     if (COMM_TRANS != MPI_COMM_NULL)
     {
         // init DM and EDM with 0
-        for (int i = 0; i < DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); ++i)
+        for (int i = 0; i < DST_Matrix.nrow * DST_Matrix.ncol; ++i)
         {
             DM[i] = 0;
             EDM[i] = 0;
@@ -883,12 +877,12 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
         MPI_Comm_rank(COMM_TRANS, &myproc_trans);
         if (myproc_trans == 0)
         {
-            MPI_Group_size(DST_Matrix.get_group(), &nproc_bcd);
+            MPI_Group_size(DST_Matrix.group, &nproc_bcd);
             MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS);
             proc_map_bcd_trans.resize(nproc_bcd, 0);
             for (int i = 0; i < nproc_bcd; ++i)
             {
-                MPI_Group_translate_ranks(DST_Matrix.get_group(), 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]);
+                MPI_Group_translate_ranks(DST_Matrix.group, 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]);
             }
             MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS);
         }
@@ -939,7 +933,7 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             log << "display all columns and rows of nonzeros values:\n";
         int log_nnz = 0;
 #endif
-        for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol)
+        for (int icol = 0; icol < SRC_Matrix.numColLocal; ++icol)
         {
             int g_col = SRC_Matrix.globalCol(icol);
             int recv_pcol_bcd;
@@ -948,9 +942,9 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             // log<<g_col<<"\n ";
             // #endif
             // OUT(ofs_running, "transformCCStoBCD: recv_pcol_bcd", recv_pcol_bcd);
-            for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; ++rowidx)
+            for (int rowidx = SRC_Matrix.colptrLocal[icol] - 1; rowidx < SRC_Matrix.colptrLocal[icol + 1] - 1; ++rowidx)
             {
-                int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1;
+                int g_row = SRC_Matrix.rowindLocal[rowidx] - 1;
                 int recv_prow_bcd;
                 int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd);
                 int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
@@ -1026,7 +1020,7 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
 #endif
 
         // setup up sender index and receiver index
-        int sender_size = SRC_Matrix.get_nnzlocal();
+        int sender_size = SRC_Matrix.nnzLocal;
         int* sender_index;
         double* sender_buffer;
         int* dst_index;
@@ -1125,14 +1119,14 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
         if (myproc < 100)
             log << "idx start at " << idx << std::endl;
 #endif
-        for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol)
+        for (int icol = 0; icol < SRC_Matrix.numColLocal; ++icol)
         {
             int g_col = SRC_Matrix.globalCol(icol);
             int recv_pcol_bcd;
             int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd);
-            for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; ++rowidx)
+            for (int rowidx = SRC_Matrix.colptrLocal[icol] - 1; rowidx < SRC_Matrix.colptrLocal[icol + 1] - 1; ++rowidx)
             {
-                int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1;
+                int g_row = SRC_Matrix.rowindLocal[rowidx] - 1;
                 int recv_prow_bcd;
                 int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd);
 #ifdef _DEBUG
@@ -1321,9 +1315,9 @@ for(int i=0; i<receiver_size; ++i)
         log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" < 0"<<std::endl;
         log.flush();
     }
-    else if(receiver_index[i*2]>DST_Matrix.get_nrow())
+    else if(receiver_index[i*2]>DST_Matrix.nrow)
     {
-        log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.get_nrow()<<std::endl;
+        log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.nrow<<std::endl;
         log.flush();
     }
     if(receiver_index[i*2+1]<0)
@@ -1331,9 +1325,9 @@ for(int i=0; i<receiver_size; ++i)
         log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" < 0"<<std::endl;
         log.flush();
     }
-    else if(receiver_index[i*2+1]>DST_Matrix.get_ncol())
+    else if(receiver_index[i*2+1]>DST_Matrix.ncol)
     {
-        log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.get_ncol()<<std::endl;
+        log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.ncol<<std::endl;
         log.flush();
     }
 }
@@ -1382,10 +1376,10 @@ MPI_Barrier(COMM_TRANS);
                         << std::endl;
                     log.flush();
                 }
-                else if (receiver_index[i * 2] > DST_Matrix.get_nrow())
+                else if (receiver_index[i * 2] > DST_Matrix.nrow)
                 {
                     log << "ERROR! receiver_index(BCD)[" << 2 * i << "] = " << receiver_index[i * 2] << " > "
-                        << DST_Matrix.get_nrow() << std::endl;
+                        << DST_Matrix.nrow << std::endl;
                     log.flush();
                 }
                 if (receiver_index[i * 2 + 1] < 0)
@@ -1394,10 +1388,10 @@ MPI_Barrier(COMM_TRANS);
                         << std::endl;
                     log.flush();
                 }
-                else if (receiver_index[i * 2 + 1] > DST_Matrix.get_ncol())
+                else if (receiver_index[i * 2 + 1] > DST_Matrix.ncol)
                 {
                     log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " > "
-                        << DST_Matrix.get_ncol() << std::endl;
+                        << DST_Matrix.ncol << std::endl;
                     log.flush();
                 }
             }
@@ -1434,14 +1428,14 @@ MPI_Barrier(COMM_TRANS);
 // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM");
 #endif
         // transform receiver_buffer to DM
-        if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r')
+        if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
         {
-            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
+            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = ix * DST_Matrix.get_ncol() + iy;
+                int idx = ix * DST_Matrix.ncol + iy;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1450,7 +1444,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is " << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " ncol = " << DST_Matrix.get_ncol() << std::endl;
+                            << " ncol = " << DST_Matrix.ncol << std::endl;
                         log.flush();
                     }
                 }
@@ -1460,12 +1454,12 @@ MPI_Barrier(COMM_TRANS);
         }
         else
         {
-            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
+            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = iy * DST_Matrix.get_nrow() + ix;
+                int idx = iy * DST_Matrix.nrow + ix;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1474,7 +1468,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " nrow = " << DST_Matrix.get_nrow() << std::endl;
+                            << " nrow = " << DST_Matrix.nrow << std::endl;
                         log.flush();
                     }
                 }
@@ -1518,14 +1512,14 @@ MPI_Barrier(COMM_TRANS);
 // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM");
 #endif
         // transform receiver_buffer to EDM
-        if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r')
+        if (DST_Matrix.LAYOUT == 'R' || DST_Matrix.LAYOUT == 'r')
         {
-            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
+            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = ix * DST_Matrix.get_ncol() + iy;
+                int idx = ix * DST_Matrix.ncol + iy;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1534,7 +1528,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " ncol = " << DST_Matrix.get_ncol() << std::endl;
+                            << " ncol = " << DST_Matrix.ncol << std::endl;
                         log.flush();
                     }
                 }
@@ -1544,12 +1538,12 @@ MPI_Barrier(COMM_TRANS);
         }
         else
         {
-            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
+            int DST_Matrix_elem = DST_Matrix.nrow * DST_Matrix.ncol;
             for (int i = 0; i < receiver_size; ++i)
             {
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
-                int idx = iy * DST_Matrix.get_nrow() + ix;
+                int idx = iy * DST_Matrix.nrow + ix;
 #ifdef _DEBUG
                 if (myproc < 100)
                 {
@@ -1558,7 +1552,7 @@ MPI_Barrier(COMM_TRANS);
                         log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem
                             << std::endl;
                         log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " nrow = " << DST_Matrix.get_nrow() << std::endl;
+                            << " nrow = " << DST_Matrix.nrow << std::endl;
                         log.flush();
                     }
                 }
@@ -1602,4 +1596,3 @@ MPI_Barrier(COMM_TRANS);
 }
 
 } // namespace pexsi
-#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
index e3e27e995a..1d28866c96 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
@@ -1,9 +1,8 @@
 #ifndef DISTMATRIXTRANSFORMER_H
 #define DISTMATRIXTRANSFORMER_H
 
-#include <mpi.h>
-
-#include <vector>
+#include "dist_bcd_matrix.h"
+#include "dist_ccs_matrix.h"
 // transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
 // they should have same MPI communicator
 // The local matrix of BCD is column-major order
@@ -16,62 +15,6 @@
 // even if one of them is acturely zero All matrices must have same MPI communicator
 namespace pexsi
 {
-class DistBCDMatrix;
-class DistCCSMatrix;
-
-namespace DistMatrixTransformer
-{
-int MinimumIndexPosition(const bool isFirst,
-                         const int nprocs,
-                         int* size_process,
-                         int* displacement_process,
-                         const int* index);
-
-void buildCCSParameter(const int size,
-                       const int nprocs,
-                       std::vector<int> size_process,
-                       std::vector<int> displacement_process,
-                       const int* position_index,
-                       DistCCSMatrix& DST_Matrix,
-                       int* buffer2ccsIndex);
-
-void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal);
-
-void countMatrixDistribution(int N, double* A, std::map<int, int>& P);
-
-int getNonZeroIndex(char LAYOUT,
-                    const int nrow,
-                    const int ncol,
-                    double* H_2d,
-                    double* S_2d,
-                    const double ZERO_Limit,
-                    int& nnz,
-                    std::vector<int>& rowidx,
-                    std::vector<int>& colidx);
-
-int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
-                            DistCCSMatrix& DST_Matrix,
-                            const int NPROC_TRANS,
-                            MPI_Group& GROUP_TRANS,
-                            MPI_Comm& COMM_TRANS,
-                            const int nnz,
-                            std::vector<int>& rowidx,
-                            std::vector<int>& colidx,
-                            int& sender_size,
-                            std::vector<int>& sender_size_process,
-                            std::vector<int>& sender_displacement_process,
-                            int& receiver_size,
-                            std::vector<int>& receiver_size_process,
-                            std::vector<int>& receiver_displacement_process,
-                            std::vector<int>& buffer2ccsIndex);
-
-int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
-                      DistCCSMatrix& DST_Matrix,
-                      MPI_Group& GROUP_TRANS,
-                      MPI_Comm& COMM_TRANS);
-
-int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS);
-
 int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                       double* H_2d,
                       double* S_2d,
@@ -88,7 +31,6 @@ int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
                       double* ENDnzvalLocal,
                       DistBCDMatrix& DST_Matrix,
                       double* DM_2d,
-                      double* ED_2d);
-}; // namespace DistMatrixTransformer
+                      double* END_2d);
 } // namespace pexsi
 #endif // DISTMATRIXTRANSFORMER_H
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
index 1be66abf59..90d16ae993 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -1,4 +1,3 @@
-#ifdef __PEXSI
 #include "pexsi_solver.h"
 
 #include <mpi.h>
@@ -6,11 +5,6 @@
 #include <cstring>
 
 #include "module_base/global_variable.h"
-#include "simple_pexsi.h"
-
-extern MPI_Comm DIAG_WORLD;
-extern MPI_Comm GRID_WORLD;
-extern MPI_Group GRID_GROUP;
 
 namespace pexsi
 {
@@ -43,7 +37,9 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text,
 
 int PEXSI_Solver::solve()
 {
-
+    extern MPI_Comm DIAG_WORLD;
+    extern MPI_Comm GRID_WORLD;
+    extern MPI_Group GRID_GROUP;
     simplePEXSI(DIAG_WORLD,
                 GRID_WORLD,
                 GRID_GROUP,
@@ -65,12 +61,12 @@ int PEXSI_Solver::solve()
     return 0;
 }
 
-double* PEXSI_Solver::get_DM() const
+const double* PEXSI_Solver::get_DM() const
 {
     return DM;
 }
 
-double* PEXSI_Solver::get_EDM() const
+const double* PEXSI_Solver::get_EDM() const
 {
     return EDM;
 }
@@ -79,16 +75,4 @@ const double PEXSI_Solver::get_totalFreeEnergy() const
 {
     return totalFreeEnergy;
 }
-
-const double PEXSI_Solver::get_totalEnergyH() const
-{
-    return totalEnergyH;
-}
-
-const double PEXSI_Solver::get_totalEnergyS() const
-{
-    return totalEnergyS;
-}
-
-} // namespace pexsi
-#endif
\ No newline at end of file
+} // namespace pexsi
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h
index b3d7aed152..0c3164e5f0 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.h
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.h
@@ -18,11 +18,9 @@ class PEXSI_Solver
                  double& totalEnergyS,
                  double& totalFreeEnergy);
     int solve();
-    double* get_DM() const;
-    double* get_EDM() const;
+    const double* get_DM() const;
+    const double* get_EDM() const;
     const double get_totalFreeEnergy() const;
-    const double get_totalEnergyH() const;
-    const double get_totalEnergyS() const;
 
   private:
     int blacs_text;
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
index df72a061c5..845beef18c 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -2,7 +2,6 @@
 // the H and S matrices are given by 2D block cyclic distribution
 // the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution
 // #include "mpi.h"
-#ifdef __PEXSI
 #include <mpi.h>
 
 #include <cfloat>
@@ -19,7 +18,6 @@
 #include "module_base/lapack_connector.h"
 #include "module_base/timer.h"
 #include "module_base/tool_quit.h"
-#include "module_base/global_variable.h"
 
 namespace pexsi
 {
@@ -104,252 +102,220 @@ int loadPEXSIOption(MPI_Comm comm,
     // 10: numElectronPEXSITolerance
     // 11: ZERO_Limit
     double double_para[12];
+    int myid;
+    MPI_Comm_rank(comm, &myid);
+    if (myid == 0)
+    {
+        std::ifstream ifs(PexsiOptionFile.c_str());
+        if (!ifs)
+        {
+            return 1;
+        }
+        setDefaultOption(int_para, double_para);
 
-    // read in PEXSI options from GlobalV
-    int_para[0] = GlobalV::pexsi_npole;
-    int_para[1] = GlobalV::pexsi_inertia;
-    int_para[2] = GlobalV::pexsi_nmax;
-    int_para[3] = 0;
-    int_para[4] = 1; // GlobalV::pexsi_symbolic;
-    int_para[5] = GlobalV::pexsi_comm;
-    int_para[6] = 0;
-    int_para[7] = GlobalV::pexsi_storage;
-    int_para[8] = GlobalV::pexsi_ordering;
-    int_para[9] = GlobalV::pexsi_row_ordering;
-    int_para[10] = GlobalV::pexsi_nproc;
-    int_para[11] = GlobalV::pexsi_symm;
-    int_para[12] = GlobalV::pexsi_trans;
-    int_para[13] = GlobalV::pexsi_method;
-    int_para[14] = 2;
-    int_para[15] = 0;
-    int_para[16] = GlobalV::pexsi_nproc_pole;
-
-    double_para[0] = GlobalV::NSPIN; // GlobalV::pexsi_spin;
-    double_para[1] = GlobalV::pexsi_temp;
-    double_para[2] = GlobalV::pexsi_gap;
-    double_para[3] = GlobalV::pexsi_delta_e;
-    double_para[4] = GlobalV::pexsi_mu_lower;
-    double_para[5] = GlobalV::pexsi_mu_upper;
-    double_para[6] = GlobalV::pexsi_mu;
-    double_para[7] = GlobalV::pexsi_mu_thr;
-    double_para[8] = GlobalV::pexsi_mu_expand;
-    double_para[9] = GlobalV::pexsi_mu_guard;
-    double_para[10] = GlobalV::pexsi_elec_thr;
-    double_para[11] = GlobalV::pexsi_zero_thr;
-    // int myid;
-    // MPI_Comm_rank(comm, &myid);
-    // if (myid == 0)
-    // {
-    //     std::ifstream ifs(PexsiOptionFile.c_str());
-    //     if (!ifs)
-    //     {
-    //         return 1;
-    //     }
-    //     setDefaultOption(int_para, double_para);
-
-    //     ifs.clear();
-    //     ifs.seekg(0);
+        ifs.clear();
+        ifs.seekg(0);
 
-    //     char key[128];
-    //     char lowercase_key[128];
-    //     const int LINE_LINGTH = 1024;
-    //     char unused_string[LINE_LINGTH];
+        char key[128];
+        char lowercase_key[128];
+        const int LINE_LINGTH = 1024;
+        char unused_string[LINE_LINGTH];
 
-    //     while (ifs.good())
-    //     {
-    //         ifs >> key;
-    //         //~ cout<<"readin word is: "<<key<<endl;
-    //         strtolower(key, lowercase_key);
-    //         if (strcmp("spin", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.spin;
-    //             ifs >> double_para[0];
-    //             //~ cout<<"double_para[0]: "<<key<<" = "<<double_para[0]<<endl;
-    //         }
-    //         else if (strcmp("temperature", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.temperature;
-    //             ifs >> double_para[1];
-    //             //~ cout<<"double_para[1]: "<<key<<" = "<<double_para[1]<<endl;
-    //         }
-    //         else if (strcmp("gap", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.gap;
-    //             ifs >> double_para[2];
-    //             //~ cout<<"double_para[2]: "<<key<<" = "<<double_para[2]<<endl;
-    //         }
-    //         else if (strcmp("deltae", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.deltaE;
-    //             ifs >> double_para[3];
-    //             //~ cout<<"double_para[3]: "<<key<<" = "<<double_para[3]<<endl;
-    //         }
-    //         else if (strcmp("numpole", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.numPole;
-    //             ifs >> int_para[0];
-    //             //~ cout<<"int_para[0]: "<<key<<" = "<<int_para[0]<<endl;
-    //         }
-    //         else if (strcmp("isinertiacount", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.isInertiaCount;
-    //             ifs >> int_para[1];
-    //             //~ cout<<"int_para[1]: "<<key<<" = "<<int_para[1]<<endl;
-    //         }
-    //         else if (strcmp("maxpexsiiter", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.maxPEXSIIter;
-    //             ifs >> int_para[2];
-    //             //~ cout<<"int_para[2]: "<<key<<" = "<<int_para[2]<<endl;
-    //         }
-    //         else if (strcmp("mumin0", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.muMin0;
-    //             ifs >> double_para[4];
-    //             //~ cout<<"double_para[4]: "<<key<<" = "<<double_para[4]<<endl;
-    //         }
-    //         else if (strcmp("mumax0", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.muMax0;
-    //             ifs >> double_para[5];
-    //             //~ cout<<"double_para[5]: "<<key<<" = "<<double_para[5]<<endl;
-    //         }
-    //         else if (strcmp("mu0", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.mu0;
-    //             ifs >> double_para[6];
-    //             //~ cout<<"double_para[6]: "<<key<<" = "<<double_para[6]<<endl;
-    //         }
-    //         else if (strcmp("muinertiatolerance", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.muInertiaTolerance;
-    //             ifs >> double_para[7];
-    //             //~ cout<<"double_para[7]: "<<key<<" = "<<double_para[7]<<endl;
-    //         }
-    //         else if (strcmp("muinertiaexpansion", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.muInertiaExpansion;
-    //             ifs >> double_para[8];
-    //             //~ cout<<"double_para[8]: "<<key<<" = "<<double_para[8]<<endl;
-    //         }
-    //         else if (strcmp("mupexsisafeguard", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.muPEXSISafeGuard;
-    //             ifs >> double_para[9];
-    //             //~ cout<<"double_para[9]: "<<key<<" = "<<double_para[9]<<endl;
-    //         }
-    //         else if (strcmp("numelectronpexsitolerance", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.numElectronPEXSITolerance;
-    //             ifs >> double_para[10];
-    //             //~ cout<<"double_para[10]: "<<key<<" = "<<double_para[10]<<endl;
-    //         }
-    //         else if (strcmp("zero_limit", lowercase_key) == 0)
-    //         {
-    //             ifs >> double_para[11];
-    //         }
-    //         else if (strcmp("matrixtype", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.matrixType;
-    //             ifs >> int_para[3];
-    //             //~ cout<<"int_para[3]: "<<key<<" = "<<int_para[3]<<endl;
-    //         }
-    //         else if (strcmp("issymbolicfactorize", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.isSymbolicFactorize;
-    //             ifs >> int_para[4];
-    //             //~ cout<<"int_para[4]: "<<key<<" = "<<int_para[4]<<endl;
-    //         }
-    //         else if (strcmp("isconstructcommpattern", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.isConstructCommPattern;
-    //             ifs >> int_para[5];
-    //             //~ cout<<"int_para[5]: "<<key<<" = "<<int_para[5]<<endl;
-    //         }
-    //         else if (strcmp("solver", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.solver;
-    //             ifs >> int_para[6];
-    //             //~ cout<<"int_para[6]: "<<key<<" = "<<int_para[6]<<endl;
-    //         }
-    //         else if (strcmp("symmetricstorage", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.symmetricStorage;
-    //             ifs >> int_para[7];
-    //             //~ cout<<"int_para[7]: "<<key<<" = "<<int_para[7]<<endl;
-    //         }
-    //         else if (strcmp("ordering", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.ordering;
-    //             ifs >> int_para[8];
-    //             //~ cout<<"int_para[8]: "<<key<<" = "<<int_para[8]<<endl;
-    //         }
-    //         else if (strcmp("rowordering", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.rowOrdering;
-    //             ifs >> int_para[9];
-    //             //~ cout<<"int_para[9]: "<<key<<" = "<<int_para[9]<<endl;
-    //         }
-    //         else if (strcmp("npsymbfact", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.npSymbFact;
-    //             ifs >> int_para[10];
-    //             //~ cout<<"int_para[10]: "<<key<<" = "<<int_para[10]<<endl;
-    //         }
-    //         else if (strcmp("symmetric", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.symmetric;
-    //             ifs >> int_para[11];
-    //             //~ cout<<"int_para[11]: "<<key<<" = "<<int_para[11]<<endl;
-    //         }
-    //         else if (strcmp("transpose", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.transpose;
-    //             ifs >> int_para[12];
-    //             //~ cout<<"int_para[12]: "<<key<<" = "<<int_para[12]<<endl;
-    //         }
-    //         else if (strcmp("method", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.method;
-    //             ifs >> int_para[13];
-    //             //~ cout<<"int_para[13]: "<<key<<" = "<<int_para[13]<<endl;
-    //         }
-    //         else if (strcmp("npoints", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.nPoints;
-    //             ifs >> int_para[14];
-    //             //~ cout<<"int_para[14]: "<<key<<" = "<<int_para[14]<<endl;
-    //         }
-    //         else if (strcmp("verbosity", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.verbosity;
-    //             ifs >> int_para[15];
-    //             //~ cout<<"int_para[15]: "<<key<<" = "<<int_para[15]<<endl;
-    //         }
-    //         else if (strcmp("numprocessperpole", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.verbosity;
-    //             ifs >> int_para[16];
-    //             //~ cout<<"int_para[16]: "<<key<<" = "<<int_para[16]<<endl;
-    //         }
-    //         else
-    //         {
-    //             if (key[0] == '#' || key[0] == '/')
-    //             {
-    //                 ifs.getline(unused_string, LINE_LINGTH);
-    //             }
-    //             else
-    //             {
-    //                 std::cout << " THE PARAMETER NAME '" << key << "' IS NOT USED!" << std::endl;
-    //                 return 1;
-    //             }
-    //         }
-    //     }
-    // }
+        while (ifs.good())
+        {
+            ifs >> key;
+            //~ cout<<"readin word is: "<<key<<endl;
+            strtolower(key, lowercase_key);
+            if (strcmp("spin", lowercase_key) == 0)
+            {
+                //~ ifs>>options.spin;
+                ifs >> double_para[0];
+                //~ cout<<"double_para[0]: "<<key<<" = "<<double_para[0]<<endl;
+            }
+            else if (strcmp("temperature", lowercase_key) == 0)
+            {
+                //~ ifs>>options.temperature;
+                ifs >> double_para[1];
+                //~ cout<<"double_para[1]: "<<key<<" = "<<double_para[1]<<endl;
+            }
+            else if (strcmp("gap", lowercase_key) == 0)
+            {
+                //~ ifs>>options.gap;
+                ifs >> double_para[2];
+                //~ cout<<"double_para[2]: "<<key<<" = "<<double_para[2]<<endl;
+            }
+            else if (strcmp("deltae", lowercase_key) == 0)
+            {
+                //~ ifs>>options.deltaE;
+                ifs >> double_para[3];
+                //~ cout<<"double_para[3]: "<<key<<" = "<<double_para[3]<<endl;
+            }
+            else if (strcmp("numpole", lowercase_key) == 0)
+            {
+                //~ ifs>>options.numPole;
+                ifs >> int_para[0];
+                //~ cout<<"int_para[0]: "<<key<<" = "<<int_para[0]<<endl;
+            }
+            else if (strcmp("isinertiacount", lowercase_key) == 0)
+            {
+                //~ ifs>>options.isInertiaCount;
+                ifs >> int_para[1];
+                //~ cout<<"int_para[1]: "<<key<<" = "<<int_para[1]<<endl;
+            }
+            else if (strcmp("maxpexsiiter", lowercase_key) == 0)
+            {
+                //~ ifs>>options.maxPEXSIIter;
+                ifs >> int_para[2];
+                //~ cout<<"int_para[2]: "<<key<<" = "<<int_para[2]<<endl;
+            }
+            else if (strcmp("mumin0", lowercase_key) == 0)
+            {
+                //~ ifs>>options.muMin0;
+                ifs >> double_para[4];
+                //~ cout<<"double_para[4]: "<<key<<" = "<<double_para[4]<<endl;
+            }
+            else if (strcmp("mumax0", lowercase_key) == 0)
+            {
+                //~ ifs>>options.muMax0;
+                ifs >> double_para[5];
+                //~ cout<<"double_para[5]: "<<key<<" = "<<double_para[5]<<endl;
+            }
+            else if (strcmp("mu0", lowercase_key) == 0)
+            {
+                //~ ifs>>options.mu0;
+                ifs >> double_para[6];
+                //~ cout<<"double_para[6]: "<<key<<" = "<<double_para[6]<<endl;
+            }
+            else if (strcmp("muinertiatolerance", lowercase_key) == 0)
+            {
+                //~ ifs>>options.muInertiaTolerance;
+                ifs >> double_para[7];
+                //~ cout<<"double_para[7]: "<<key<<" = "<<double_para[7]<<endl;
+            }
+            else if (strcmp("muinertiaexpansion", lowercase_key) == 0)
+            {
+                //~ ifs>>options.muInertiaExpansion;
+                ifs >> double_para[8];
+                //~ cout<<"double_para[8]: "<<key<<" = "<<double_para[8]<<endl;
+            }
+            else if (strcmp("mupexsisafeguard", lowercase_key) == 0)
+            {
+                //~ ifs>>options.muPEXSISafeGuard;
+                ifs >> double_para[9];
+                //~ cout<<"double_para[9]: "<<key<<" = "<<double_para[9]<<endl;
+            }
+            else if (strcmp("numelectronpexsitolerance", lowercase_key) == 0)
+            {
+                //~ ifs>>options.numElectronPEXSITolerance;
+                ifs >> double_para[10];
+                //~ cout<<"double_para[10]: "<<key<<" = "<<double_para[10]<<endl;
+            }
+            else if (strcmp("zero_limit", lowercase_key) == 0)
+            {
+                ifs >> double_para[11];
+            }
+            else if (strcmp("matrixtype", lowercase_key) == 0)
+            {
+                //~ ifs>>options.matrixType;
+                ifs >> int_para[3];
+                //~ cout<<"int_para[3]: "<<key<<" = "<<int_para[3]<<endl;
+            }
+            else if (strcmp("issymbolicfactorize", lowercase_key) == 0)
+            {
+                //~ ifs>>options.isSymbolicFactorize;
+                ifs >> int_para[4];
+                //~ cout<<"int_para[4]: "<<key<<" = "<<int_para[4]<<endl;
+            }
+            else if (strcmp("isconstructcommpattern", lowercase_key) == 0)
+            {
+                //~ ifs>>options.isConstructCommPattern;
+                ifs >> int_para[5];
+                //~ cout<<"int_para[5]: "<<key<<" = "<<int_para[5]<<endl;
+            }
+            else if (strcmp("solver", lowercase_key) == 0)
+            {
+                //~ ifs>>options.solver;
+                ifs >> int_para[6];
+                //~ cout<<"int_para[6]: "<<key<<" = "<<int_para[6]<<endl;
+            }
+            else if (strcmp("symmetricstorage", lowercase_key) == 0)
+            {
+                //~ ifs>>options.symmetricStorage;
+                ifs >> int_para[7];
+                //~ cout<<"int_para[7]: "<<key<<" = "<<int_para[7]<<endl;
+            }
+            else if (strcmp("ordering", lowercase_key) == 0)
+            {
+                //~ ifs>>options.ordering;
+                ifs >> int_para[8];
+                //~ cout<<"int_para[8]: "<<key<<" = "<<int_para[8]<<endl;
+            }
+            else if (strcmp("rowordering", lowercase_key) == 0)
+            {
+                //~ ifs>>options.rowOrdering;
+                ifs >> int_para[9];
+                //~ cout<<"int_para[9]: "<<key<<" = "<<int_para[9]<<endl;
+            }
+            else if (strcmp("npsymbfact", lowercase_key) == 0)
+            {
+                //~ ifs>>options.npSymbFact;
+                ifs >> int_para[10];
+                //~ cout<<"int_para[10]: "<<key<<" = "<<int_para[10]<<endl;
+            }
+            else if (strcmp("symmetric", lowercase_key) == 0)
+            {
+                //~ ifs>>options.symmetric;
+                ifs >> int_para[11];
+                //~ cout<<"int_para[11]: "<<key<<" = "<<int_para[11]<<endl;
+            }
+            else if (strcmp("transpose", lowercase_key) == 0)
+            {
+                //~ ifs>>options.transpose;
+                ifs >> int_para[12];
+                //~ cout<<"int_para[12]: "<<key<<" = "<<int_para[12]<<endl;
+            }
+            else if (strcmp("method", lowercase_key) == 0)
+            {
+                //~ ifs>>options.method;
+                ifs >> int_para[13];
+                //~ cout<<"int_para[13]: "<<key<<" = "<<int_para[13]<<endl;
+            }
+            else if (strcmp("npoints", lowercase_key) == 0)
+            {
+                //~ ifs>>options.nPoints;
+                ifs >> int_para[14];
+                //~ cout<<"int_para[14]: "<<key<<" = "<<int_para[14]<<endl;
+            }
+            else if (strcmp("verbosity", lowercase_key) == 0)
+            {
+                //~ ifs>>options.verbosity;
+                ifs >> int_para[15];
+                //~ cout<<"int_para[15]: "<<key<<" = "<<int_para[15]<<endl;
+            }
+            else if (strcmp("numprocessperpole", lowercase_key) == 0)
+            {
+                //~ ifs>>options.verbosity;
+                ifs >> int_para[16];
+                //~ cout<<"int_para[16]: "<<key<<" = "<<int_para[16]<<endl;
+            }
+            else
+            {
+                if (key[0] == '#' || key[0] == '/')
+                {
+                    ifs.getline(unused_string, LINE_LINGTH);
+                }
+                else
+                {
+                    std::cout << " THE PARAMETER NAME '" << key << "' IS NOT USED!" << std::endl;
+                    return 1;
+                }
+            }
+        }
+    }
 
-    // // broadcast all options
-    // MPI_Bcast(int_para, 17, MPI_INT, 0, comm);
-    // MPI_Bcast(double_para, 12, MPI_DOUBLE, 0, comm);
+    // broadcast all options
+    MPI_Bcast(int_para, 17, MPI_INT, 0, comm);
+    MPI_Bcast(double_para, 12, MPI_DOUBLE, 0, comm);
 
     // setup PEXSI options from int_para and double_para
     options.numPole = int_para[0];
@@ -492,7 +458,14 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
         log_PEXSIgrid(pexsi_prow, pexsi_pcol, f_log);
 //}
 #endif
-    outputFileIndex = -1;
+    if (myid % (pexsi_prow * pexsi_pcol) == 0)
+    {
+        outputFileIndex = myid / (pexsi_prow * pexsi_pcol);
+    }
+    else
+    {
+        outputFileIndex = -1;
+    }
     // OUT(ofs_running, "checkpoint04");
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIPlanInit");
     if (comm_PEXSI != MPI_COMM_NULL)
@@ -550,7 +523,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     // transform H and S from 2D block cyclic distribution to compressed column sparse matrix
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
     // OUT(ofs_running, "checkpoint12");
-    DistMatrixTransformer::transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal);
+    transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal);
     // MPI_Barrier(MPI_COMM_WORLD);
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
     // OUT(ofs_running, "checkpoint13");
@@ -569,11 +542,11 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
         PPEXSILoadRealHSMatrix(plan,
                                options,
                                size,
-                               DST_Matrix.get_nnz(),
-                               DST_Matrix.get_nnzlocal(),
-                               DST_Matrix.get_numcol_local(),
-                               DST_Matrix.get_colptr_local(),
-                               DST_Matrix.get_rowind_local(),
+                               DST_Matrix.nnz,
+                               DST_Matrix.nnzLocal,
+                               DST_Matrix.numColLocal,
+                               DST_Matrix.colptrLocal,
+                               DST_Matrix.rowindLocal,
                                HnzvalLocal,
                                isSIdentity,
                                SnzvalLocal,
@@ -627,9 +600,9 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
             delete[] EDMnzvalLocal;
         if (FDMnzvalLocal != nullptr)
             delete[] FDMnzvalLocal;
-        DMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
-        EDMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
-        FDMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
+        DMnzvalLocal = new double[DST_Matrix.nnzLocal];
+        EDMnzvalLocal = new double[DST_Matrix.nnzLocal];
+        FDMnzvalLocal = new double[DST_Matrix.nnzLocal];
         if (myid < numProcessPerPole)
         {
             PPEXSIRetrieveRealDFTMatrix(plan,
@@ -660,8 +633,8 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     {
         delete[] DM;
         delete[] EDM;
-        DM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
-        EDM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
+        DM = new double[SRC_Matrix.nrow * SRC_Matrix.ncol];
+        EDM = new double[SRC_Matrix.nrow * SRC_Matrix.ncol];
     }
 #ifdef _DEBUG
     // OUT(ofs_running, "checkpoint19");
@@ -671,7 +644,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
 #endif
     // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
-    DistMatrixTransformer::transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM);
+    transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM);
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
     // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
 
@@ -729,5 +702,4 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     // MPI_Barrier(MPI_COMM_WORLD);
     return 0;
 }
-} // namespace pexsi
-#endif
\ No newline at end of file
+} // namespace pexsi
\ No newline at end of file
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index b74ac6b104..cc079cf208 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -22,7 +22,6 @@
 #include "module_base/global_variable.h"
 #include "module_base/parallel_common.h"
 #include "module_base/timer.h"
-#include "module_base/tool_quit.h"
 #include "version.h"
 Input INPUT;
 
@@ -304,7 +303,6 @@ void Input::Default(void)
     mixing_mode = "broyden";
     mixing_beta = -10;
     mixing_ndim = 8;
-    mixing_restart = 0;
     mixing_gg0 = 1.00;       // use Kerker defaultly
     mixing_beta_mag = -10.0; // only set when nspin == 2 || nspin == 4
     mixing_gg0_mag = 0.0;    // defaultly exclude Kerker from mixing magnetic density
@@ -329,8 +327,6 @@ void Input::Default(void)
 
     out_bandgap = 0; // QO added for bandgap printing
 
-    band_print_num = 0;
-
     deepks_out_labels = 0; // caoyu added 2020-11-24, mohan added 2021-01-03
     deepks_scf = 0;
     deepks_bandgap = 0;
@@ -340,7 +336,7 @@ void Input::Default(void)
     out_wfc_pw = 0;
     out_wfc_r = 0;
     out_dos = 0;
-    out_band = {0, 8};
+    out_band = 0;
     out_proj_band = 0;
     out_mat_hs = {0, 8};
     out_mat_xc = 0;
@@ -639,34 +635,6 @@ void Input::Default(void)
     qo_thr = 1e-6;
     qo_screening_coeff = {};
 
-    //==========================================================
-    // variables for PEXSI
-    //==========================================================
-    pexsi_npole = 54;
-    pexsi_inertia = 1;
-    pexsi_nmax = 80;
-    // pexsi_symbolic = 1;
-    pexsi_comm = 1;
-    pexsi_storage = 1;
-    pexsi_ordering = 0;
-    pexsi_row_ordering = 1;
-    pexsi_nproc = 1;
-    pexsi_symm = 1;
-    pexsi_trans = 0;
-    pexsi_method = 1;
-    pexsi_nproc_pole = 1;
-    // pexsi_spin = 2;
-    pexsi_temp = 0.0001;
-    pexsi_gap = 0;
-    pexsi_delta_e = 20.0;
-    pexsi_mu_lower = -10;
-    pexsi_mu_upper = 10;
-    pexsi_mu = 0.0;
-    pexsi_mu_thr = 0.05;
-    pexsi_mu_expand = 0.3;
-    pexsi_mu_guard = 0.2;
-    pexsi_elec_thr = 0.001;
-    pexsi_zero_thr = 1e-10;
     return;
 }
 
@@ -1288,10 +1256,6 @@ bool Input::Read(const std::string& fn)
         {
             read_value(ifs, mixing_ndim);
         }
-        else if (strcmp("mixing_restart", word) == 0)
-        {
-            read_value(ifs, mixing_restart);
-        }
         else if (strcmp("mixing_gg0", word) == 0) // mohan add 2014-09-27
         {
             read_value(ifs, mixing_gg0);
@@ -1363,14 +1327,6 @@ bool Input::Read(const std::string& fn)
         {
             read_bool(ifs, out_chg);
         }
-        else if (strcmp("band_print_num", word) == 0)
-        {
-            read_value(ifs, band_print_num);
-        }
-        else if (strcmp("bands_to_print", word) == 0)
-        {
-            ifs.ignore(150, '\n');
-        }
         else if (strcmp("out_dm", word) == 0)
         {
             read_bool(ifs, out_dm);
@@ -1422,13 +1378,13 @@ bool Input::Read(const std::string& fn)
         }
         else if (strcmp("out_band", word) == 0)
         {
-            read_value2stdvector(ifs, out_band);
-            if(out_band.size() == 1) out_band.push_back(8);
+            read_bool(ifs, out_band);
         }
         else if (strcmp("out_proj_band", word) == 0)
         {
             read_bool(ifs, out_proj_band);
         }
+
         else if (strcmp("out_mat_hs", word) == 0)
         {
             read_value2stdvector(ifs, out_mat_hs);
@@ -2334,9 +2290,6 @@ bool Input::Read(const std::string& fn)
         {
             read_value(ifs, sc_file);
         }
-        //----------------------------------------------------------------------------------
-        //    Quasiatomic orbital
-        //----------------------------------------------------------------------------------
         else if (strcmp("qo_switch", word) == 0){
             read_bool(ifs, qo_switch);
         }
@@ -2352,106 +2305,6 @@ bool Input::Read(const std::string& fn)
         else if (strcmp("qo_screening_coeff", word) == 0){
             read_value2stdvector(ifs, qo_screening_coeff);
         }
-        //----------------------------------------------------------------------------------
-        //    PEXSI
-        //----------------------------------------------------------------------------------
-        else if (strcmp("pexsi_npole", word) == 0){
-            read_value(ifs, pexsi_npole);
-        }
-        else if (strcmp("pexsi_inertia", word) == 0){
-            read_value(ifs, pexsi_inertia);
-        }
-        else if (strcmp("pexsi_nmax", word) == 0) {
-            read_value(ifs, pexsi_nmax);
-        }
-        // else if (strcmp("pexsi_symbolic", word) == 0)
-        // {
-        //     read_value(ifs, pexsi_symbolic);
-        // }
-        else if (strcmp("pexsi_comm", word) == 0)
-        {
-            read_value(ifs, pexsi_comm);
-        }
-        else if (strcmp("pexsi_storage", word) == 0)
-        {
-            read_value(ifs, pexsi_storage);
-        }
-        else if (strcmp("pexsi_ordering", word) == 0)
-        {
-            read_value(ifs, pexsi_ordering);
-        }
-        else if (strcmp("pexsi_row_ordering", word) == 0)
-        {
-            read_value(ifs, pexsi_row_ordering);
-        }
-        else if (strcmp("pexsi_nproc", word) == 0)
-        {
-            read_value(ifs, pexsi_nproc);
-        }
-        else if (strcmp("pexsi_symm", word) == 0)
-        {
-            read_value(ifs, pexsi_symm);
-        }
-        else if (strcmp("pexsi_trans", word) == 0)
-        {
-            read_value(ifs, pexsi_trans);
-        }
-        else if (strcmp("pexsi_method", word) == 0)
-        {
-            read_value(ifs, pexsi_method);
-        }
-        else if (strcmp("pexsi_nproc_pole", word) == 0)
-        {
-            read_value(ifs, pexsi_nproc_pole);
-        }
-        // else if (strcmp("pexsi_spin", word) == 0)
-        // {
-        //     read_value(ifs, pexsi_spin);
-        // }
-        else if (strcmp("pexsi_temp", word) == 0)
-        {
-            read_value(ifs, pexsi_temp);
-        }
-        else if (strcmp("pexsi_gap", word) == 0)
-        {
-            read_value(ifs, pexsi_gap);
-        }
-        else if (strcmp("pexsi_delta_e", word) == 0)
-        {
-            read_value(ifs, pexsi_delta_e);
-        }
-        else if (strcmp("pexsi_mu_lower", word) == 0)
-        {
-            read_value(ifs, pexsi_mu_lower);
-        }
-        else if (strcmp("pexsi_mu_upper", word) == 0)
-        {
-            read_value(ifs, pexsi_mu_upper);
-        }
-        else if (strcmp("pexsi_mu", word) == 0)
-        {
-            read_value(ifs, pexsi_mu);
-        }
-        else if (strcmp("pexsi_mu_thr", word) == 0)
-        {
-            read_value(ifs, pexsi_mu_thr);
-        }
-        else if (strcmp("pexsi_mu_expand", word) == 0)
-        {
-            read_value(ifs, pexsi_mu_expand);
-        }
-        else if (strcmp("pexsi_mu_guard", word) == 0)
-        {
-            read_value(ifs, pexsi_mu_guard);
-        }
-        else if (strcmp("pexsi_elec_thr", word) == 0)
-        {
-            read_value(ifs, pexsi_elec_thr);
-        }
-        else if (strcmp("pexsi_zero_thr", word) == 0)
-        {
-            read_value(ifs, pexsi_zero_thr);
-        }
         else
         {
             // xiaohui add 2015-09-15
@@ -2516,29 +2369,6 @@ bool Input::Read(const std::string& fn)
         ModuleBase::WARNING_QUIT("Input", "The ntype in INPUT is not equal to the ntype counted in STRU, check it.");
     }
 
-    if(band_print_num > 0)
-    {
-        bands_to_print.resize(band_print_num);
-        ifs.clear();
-        ifs.seekg(0); // move to the beginning of the file
-        ifs.rdstate();
-        while (ifs.good())
-        {
-            ifs >> word1;
-            if (ifs.eof() != 0)
-                break;
-            strtolower(word1, word); // convert uppercase std::string to lower case; word1 --> word
-
-            if (strcmp("bands_to_print", word) == 0)
-            {
-                for(int i = 0; i < band_print_num; i ++)
-                {
-                    ifs >> bands_to_print[i];
-                }
-            }
-        }
-    }
-
     //----------------------------------------------------------
     //       DFT+U    Xin Qu  added on 2020-10-29
     //----------------------------------------------------------
@@ -2996,7 +2826,7 @@ void Input::Default_2(void) // jiyy add 2019-08-04
         this->relax_nmax = 1;
         out_stru = 0;
         out_dos = 0;
-        out_band[0] = 0;
+        out_band = 0;
         out_proj_band = 0;
         cal_force = 0;
         init_wfc = "file";
@@ -3013,7 +2843,7 @@ void Input::Default_2(void) // jiyy add 2019-08-04
         this->relax_nmax = 1;
         out_stru = 0;
         out_dos = 0;
-        out_band[0] = 0;
+        out_band = 0;
         out_proj_band = 0;
         cal_force = 0;
         init_wfc = "file";
@@ -3462,7 +3292,6 @@ void Input::Bcast()
     Parallel_Common::bcast_string(mixing_mode);
     Parallel_Common::bcast_double(mixing_beta);
     Parallel_Common::bcast_int(mixing_ndim);
-    Parallel_Common::bcast_int(mixing_restart);
     Parallel_Common::bcast_double(mixing_gg0); // mohan add 2014-09-27
     Parallel_Common::bcast_double(mixing_beta_mag);
     Parallel_Common::bcast_double(mixing_gg0_mag);
@@ -3496,8 +3325,7 @@ void Input::Bcast()
     Parallel_Common::bcast_int(out_wfc_pw);
     Parallel_Common::bcast_bool(out_wfc_r);
     Parallel_Common::bcast_int(out_dos);
-    if(GlobalV::MY_RANK != 0) out_band.resize(2); /* If this line is absent, will cause segmentation fault in io_input_test_para */
-    Parallel_Common::bcast_int(out_band.data(), 2);
+    Parallel_Common::bcast_bool(out_band);
     Parallel_Common::bcast_bool(out_proj_band);
     if(GlobalV::MY_RANK != 0) out_mat_hs.resize(2); /* If this line is absent, will cause segmentation fault in io_input_test_para */
     Parallel_Common::bcast_int(out_mat_hs.data(), 2);
@@ -3695,17 +3523,6 @@ void Input::Bcast()
     Parallel_Common::bcast_bool(restart_save);  // Peize Lin add 2020.04.04
     Parallel_Common::bcast_bool(restart_load);  // Peize Lin add 2020.04.04
 
-    Parallel_Common::bcast_int(band_print_num);
-    if(GlobalV::MY_RANK != 0)
-    {
-        bands_to_print.resize(band_print_num);
-    }
-
-    for(int i = 0; i < band_print_num; i++)
-    {
-        Parallel_Common::bcast_int(bands_to_print[i]);
-    }
-
     //-----------------------------------------------------------------------------------
     // DFT+U (added by Quxin 2020-10-29)
     //-----------------------------------------------------------------------------------
@@ -3808,34 +3625,6 @@ void Input::Bcast()
     Parallel_Common::bcast_bool(qo_switch);
     Parallel_Common::bcast_string(qo_basis);
     Parallel_Common::bcast_double(qo_thr);
-    //==========================================================
-    // PEXSI
-    //==========================================================
-    Parallel_Common::bcast_int(pexsi_npole);
-    Parallel_Common::bcast_int(pexsi_inertia);
-    Parallel_Common::bcast_int(pexsi_nmax);
-    // Parallel_Common::bcast_int(pexsi_symbolic);
-    Parallel_Common::bcast_int(pexsi_comm);
-    Parallel_Common::bcast_int(pexsi_storage);
-    Parallel_Common::bcast_int(pexsi_ordering);
-    Parallel_Common::bcast_int(pexsi_row_ordering);
-    Parallel_Common::bcast_int(pexsi_nproc);
-    Parallel_Common::bcast_int(pexsi_symm);
-    Parallel_Common::bcast_int(pexsi_trans);
-    Parallel_Common::bcast_int(pexsi_method);
-    Parallel_Common::bcast_int(pexsi_nproc_pole);
-    // Parallel_Common::bcast_double(pexsi_spin);
-    Parallel_Common::bcast_double(pexsi_temp);
-    Parallel_Common::bcast_double(pexsi_gap);
-    Parallel_Common::bcast_double(pexsi_delta_e);
-    Parallel_Common::bcast_double(pexsi_mu_lower);
-    Parallel_Common::bcast_double(pexsi_mu_upper);
-    Parallel_Common::bcast_double(pexsi_mu);
-    Parallel_Common::bcast_double(pexsi_mu_thr);
-    Parallel_Common::bcast_double(pexsi_mu_expand);
-    Parallel_Common::bcast_double(pexsi_mu_guard);
-    Parallel_Common::bcast_double(pexsi_elec_thr);
-    Parallel_Common::bcast_double(pexsi_zero_thr);
     /* broadcasting std::vector is sometime a annorying task... */
     if (ntype != 0) /* ntype has been broadcasted before */
     {
@@ -4133,11 +3922,10 @@ void Input::Check(void)
         }
         else if (ks_solver == "pexsi")
         {
-#ifdef __PEXSI
-            GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl;
+#ifndef __MPI
+            ModuleBase::WARNING_QUIT("Input", "Cusolver can not be used for series version.");
 #else
-            ModuleBase::WARNING_QUIT("Input",
-                "Can not use PEXSI if abacus is not compiled with PEXSI. Please change ks_solver to scalapack_gvx.");
+            GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl;
 #endif
 
 
diff --git a/source/module_io/input.h b/source/module_io/input.h
index 1d29c6311a..b4e983abad 100644
--- a/source/module_io/input.h
+++ b/source/module_io/input.h
@@ -232,7 +232,6 @@ class Input
     std::string mixing_mode; // "plain","broyden",...
     double mixing_beta; // 0 : no_mixing
     int mixing_ndim; // used in Broyden method
-    int mixing_restart;
     double mixing_gg0; // used in kerker method. mohan add 2014-09-27
     double mixing_beta_mag;
     double mixing_gg0_mag;
@@ -260,13 +259,11 @@ class Input
     bool out_chg; // output charge density. 0: no; 1: yes
     bool out_dm; // output density matrix.
     bool out_dm1;
-    int band_print_num;
-    std::vector<int> bands_to_print;
     int out_pot; // yes or no
     int out_wfc_pw; // 0: no; 1: txt; 2: dat
     bool out_wfc_r; // 0: no; 1: yes
     int out_dos; // dos calculation. mohan add 20090909
-    std::vector<int> out_band; // band calculation pengfei 2014-10-13
+    bool out_band; // band calculation pengfei 2014-10-13
     bool out_proj_band; // projected band structure calculation jiyy add 2022-05-11
     std::vector<int> out_mat_hs; // output H matrix and S matrix in local basis.
     bool out_mat_xc; // output exchange-correlation matrix in KS-orbital representation.
@@ -602,34 +599,6 @@ class Input
     double qo_thr = 1e-6;
     std::vector<std::string> qo_strategy = {};
     std::vector<double> qo_screening_coeff = {};
-    //==========================================================
-    // variables for PEXSI
-    //==========================================================
-    int pexsi_npole = 54;
-    int pexsi_inertia = 1;
-    int pexsi_nmax = 80;
-    // int pexsi_symbolic = 1;
-    int pexsi_comm = 1;
-    int pexsi_storage = 1;
-    int pexsi_ordering = 0;
-    int pexsi_row_ordering = 1;
-    int pexsi_nproc = 1;
-    int pexsi_symm = 1;
-    int pexsi_trans = 0;
-    int pexsi_method = 1;
-    int pexsi_nproc_pole = 1;
-    // double pexsi_spin = 2;
-    double pexsi_temp = 0.0001;
-    double pexsi_gap = 0;
-    double pexsi_delta_e = 20.0;
-    double pexsi_mu_lower = -10;
-    double pexsi_mu_upper = 10;
-    double pexsi_mu = 0.0;
-    double pexsi_mu_thr = 0.05;
-    double pexsi_mu_expand = 0.3;
-    double pexsi_mu_guard = 0.2;
-    double pexsi_elec_thr = 0.001;
-    double pexsi_zero_thr = 1e-10;
     
   private:
     //==========================================================
@@ -698,15 +667,7 @@ class Input
     template <typename T>
     typename std::enable_if<std::is_same<T, double>::value, T>::type cast_string(const std::string& str) { return std::stod(str); }
     template <typename T>
-    typename std::enable_if<std::is_same<T, int>::value, T>::type cast_string(const std::string& str)
-    {
-        if (str == "true" || str == "1")
-            return 1;
-        else if (str == "false" || str == "0")
-            return 0;
-        else
-            return std::stoi(str);
-    }
+    typename std::enable_if<std::is_same<T, int>::value, T>::type cast_string(const std::string& str) { return std::stoi(str); }
     template <typename T>
     typename std::enable_if<std::is_same<T, bool>::value, T>::type cast_string(const std::string& str) { return (str == "true" || str == "1"); }
     template <typename T>
diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp
index d6e3371111..a52245d05c 100644
--- a/source/module_io/input_conv.cpp
+++ b/source/module_io/input_conv.cpp
@@ -750,7 +750,6 @@ void Input_Conv::Convert(void)
     GlobalV::MIXING_MODE = INPUT.mixing_mode;
     GlobalV::MIXING_BETA = INPUT.mixing_beta;
     GlobalV::MIXING_NDIM = INPUT.mixing_ndim;
-    GlobalV::MIXING_RESTART = INPUT.mixing_restart;
     GlobalV::MIXING_GG0 = INPUT.mixing_gg0;
     GlobalV::MIXING_BETA_MAG = INPUT.mixing_beta_mag;
     GlobalV::MIXING_GG0_MAG = INPUT.mixing_gg0_mag;
@@ -766,35 +765,6 @@ void Input_Conv::Convert(void)
     GlobalV::qo_strategy = INPUT.qo_strategy;
     GlobalV::qo_thr = INPUT.qo_thr;
     GlobalV::qo_screening_coeff = INPUT.qo_screening_coeff;
-
-    //-----------------------------------------------
-    // PEXSI related parameters
-    //-----------------------------------------------
-    GlobalV::pexsi_npole = INPUT.pexsi_npole;
-    GlobalV::pexsi_inertia = INPUT.pexsi_inertia;
-    GlobalV::pexsi_nmax = INPUT.pexsi_nmax;
-    // GlobalV::pexsi_symbolic = INPUT.pexsi_symbolic;
-    GlobalV::pexsi_comm = INPUT.pexsi_comm;
-    GlobalV::pexsi_storage = INPUT.pexsi_storage;
-    GlobalV::pexsi_ordering = INPUT.pexsi_ordering;
-    GlobalV::pexsi_row_ordering = INPUT.pexsi_row_ordering;
-    GlobalV::pexsi_nproc = INPUT.pexsi_nproc;
-    GlobalV::pexsi_symm = INPUT.pexsi_symm;
-    GlobalV::pexsi_trans = INPUT.pexsi_trans;
-    GlobalV::pexsi_method = INPUT.pexsi_method;
-    GlobalV::pexsi_nproc_pole = INPUT.pexsi_nproc_pole;
-    // GlobalV::pexsi_spin = INPUT.pexsi_spin;
-    GlobalV::pexsi_temp = INPUT.pexsi_temp;
-    GlobalV::pexsi_gap = INPUT.pexsi_gap;
-    GlobalV::pexsi_delta_e = INPUT.pexsi_delta_e;
-    GlobalV::pexsi_mu_lower = INPUT.pexsi_mu_lower;
-    GlobalV::pexsi_mu_upper = INPUT.pexsi_mu_upper;
-    GlobalV::pexsi_mu = INPUT.pexsi_mu;
-    GlobalV::pexsi_mu_thr = INPUT.pexsi_mu_thr;
-    GlobalV::pexsi_mu_expand = INPUT.pexsi_mu_expand;
-    GlobalV::pexsi_mu_guard = INPUT.pexsi_mu_guard;
-    GlobalV::pexsi_elec_thr = INPUT.pexsi_elec_thr;
-    GlobalV::pexsi_zero_thr = INPUT.pexsi_zero_thr;
     ModuleBase::timer::tick("Input_Conv", "Convert");
     return;
 }
diff --git a/source/module_io/mulliken_charge.cpp b/source/module_io/mulliken_charge.cpp
index bdcdb5a035..393da5fda4 100644
--- a/source/module_io/mulliken_charge.cpp
+++ b/source/module_io/mulliken_charge.cpp
@@ -44,7 +44,7 @@ ModuleBase::matrix ModuleIO::cal_mulliken(const std::vector<std::vector<double>>
         const char N_char = 'N';
         const int one_int = 1;
         const double one_float = 1.0, zero_float = 0.0;        
-        pdgemm_(&N_char,
+        pdgemm_(&T_char,
                 &T_char,
                 &GlobalV::NLOCAL,
                 &GlobalV::NLOCAL,
@@ -156,7 +156,7 @@ ModuleBase::matrix ModuleIO::cal_mulliken(const std::vector<std::vector<std::com
         const char N_char = 'N';
         const int one_int = 1;
         const std::complex<double> one_float = {1.0, 0.0}, zero_float = {0.0, 0.0};        
-        pzgemm_(&N_char,
+        pzgemm_(&T_char,
                 &T_char,
                 &GlobalV::NLOCAL,
                 &GlobalV::NLOCAL,
diff --git a/source/module_io/nscf_band.cpp b/source/module_io/nscf_band.cpp
index 290dc58bd3..d8b7b05ca6 100644
--- a/source/module_io/nscf_band.cpp
+++ b/source/module_io/nscf_band.cpp
@@ -3,7 +3,6 @@
 #include "module_base/global_variable.h"
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
-#include "module_base/formatter_physfmt.h"
 
 void ModuleIO::nscf_band(
 	const int &is,
@@ -11,7 +10,6 @@ void ModuleIO::nscf_band(
 	const int &nks, 
 	const int &nband,
 	const double &fermie,
-	const int &precision,
 	const ModuleBase::matrix& ekb,
 	const K_Vectors& kv,
 	const Parallel_Kpoints* Pkpoints)
@@ -35,28 +33,23 @@ void ModuleIO::nscf_band(
 		if (ik>0)
 		{
 			auto delta=kv.kvec_c[ik]-kv.kvec_c[ik-1];
-			klength[ik] = klength[ik-1];
-			klength[ik] += (kv.kl_segids[ik] == kv.kl_segids[ik-1]) ? delta.norm() : 0.0;
+			klength[ik] = klength[ik-1] + delta.norm();
 		}
-		/* first find if present kpoint in present pool */
 		if ( GlobalV::MY_POOL == Pkpoints->whichpool[ik] )
 		{
-			/* then get the local kpoint index, which starts definitly from 0 */
 			const int ik_now = ik - Pkpoints->startk_pool[GlobalV::MY_POOL];
-			/* if present kpoint corresponds the spin of the present one */
 			if( kv.isk[ik_now+is*nks] == is )
 			{ 
 				if ( GlobalV::RANK_IN_POOL == 0)
 				{
-					formatter::PhysicalFmt physfmt; // create a physical formatter temporarily
-					std::ofstream ofs(out_band_dir.c_str(), std::ios::app);
-					physfmt.adjust_formatter_flexible(4, 0, false); // for integer
-					ofs << physfmt.get_p_formatter()->format(ik+1);
-					physfmt.adjust_formatter_flexible(precision, 4.0/double(precision), false); // for decimal
-					ofs << physfmt.get_p_formatter()->format(klength[ik]);
+					std::ofstream ofs(out_band_dir.c_str(),std::ios::app);
+					ofs << std::setprecision(8);
+					//start from 1
+					ofs << ik+1;
+					ofs << " " << klength[ik] << " ";
 					for(int ib = 0; ib < nband; ib++)
 					{
-						ofs << physfmt.get_p_formatter()->format((ekb(ik_now+is*nks, ib)-fermie) * ModuleBase::Ry_to_eV);
+						ofs << " " << (ekb(ik_now+is*nks, ib)-fermie) * ModuleBase::Ry_to_eV;
 					}
 					ofs << std::endl;
 					ofs.close();	
@@ -90,30 +83,18 @@ void ModuleIO::nscf_band(
 #else
 //	std::cout<<"\n nband = "<<nband<<std::endl;
 //	std::cout<<out_band_dir<<std::endl;
-	formatter::PhysicalFmt physfmt; // create a physical formatter temporarily
-	std::vector<double> klength;
-	klength.resize(nks);
-	klength[0] = 0.0;
+
 	std::ofstream ofs(out_band_dir.c_str());
 	for(int ik=0;ik<nks;ik++)
 	{
-		if (ik>0)
-		{
-			auto delta=kv.kvec_c[ik]-kv.kvec_c[ik-1];
-			klength[ik] = klength[ik-1];
-			klength[ik] += (kv.kl_segids[ik] == kv.kl_segids[ik-1]) ? delta.norm() : 0.0;
-		}
 		if( kv.isk[ik] == is)
 		{
-			physfmt.adjust_formatter_flexible(4, 0, false); // for integer
-			ofs << physfmt.get_p_formatter()->format(ik+1);
-			physfmt.adjust_formatter_flexible(precision, 4.0/double(precision), false); // for decimal
-			ofs << physfmt.get_p_formatter()->format(klength[ik]); // add klength, in accordance with the MPI version
+			ofs<<std::setw(12)<<ik + 1;
 			for(int ibnd = 0; ibnd < nband; ibnd++)
 			{
-				ofs << physfmt.get_p_formatter()->format((ekb(ik, ibnd)-fermie) * ModuleBase::Ry_to_eV);
+				ofs <<std::setw(15) << (ekb(ik, ibnd)-fermie) * ModuleBase::Ry_to_eV;
 			}
-			ofs << std::endl;
+			ofs<<std::endl;
 		}
 	}
 	ofs.close();
diff --git a/source/module_io/nscf_band.h b/source/module_io/nscf_band.h
index 3ec96d4a9f..6a22427551 100644
--- a/source/module_io/nscf_band.h
+++ b/source/module_io/nscf_band.h
@@ -12,7 +12,6 @@ namespace ModuleIO
 		const int &nks, 
 		const int &nband, 
 		const double &fermie,
-		const int &precision,
 		const ModuleBase::matrix &ekb,
 		const K_Vectors& kv,
 		const Parallel_Kpoints* Pkpoints);
diff --git a/source/module_io/parameter_pool.cpp b/source/module_io/parameter_pool.cpp
index 524df9de87..906b9b57d8 100644
--- a/source/module_io/parameter_pool.cpp
+++ b/source/module_io/parameter_pool.cpp
@@ -69,7 +69,7 @@ int count_ntype(const std::string& fn)
  * @param input_value_path parameter default value file path
  * @param input_value_path parameter input value file path
  */
-void Init(const std::string& default_type_path,
+bool Init(const std::string& default_type_path,
           const std::string& default_value_path,
           const std::string& input_value_path)
 {
@@ -103,8 +103,10 @@ void strtolower(char* sa, char* sb)
  * @brief Reads the default parameters from the specified file and saves them to the global variable
  *        default_parametes_type
  * @param fn Specifies the path to the file
+ * @return true Read successfully
+ * @return false Read failure
  */
-void default_parametes_reader(const std::string& fn, std::map<std::string, std::string>& default_parametes_type)
+bool default_parametes_reader(const std::string& fn, std::map<std::string, std::string>& default_parametes_type)
 {
     std::ifstream inputFile(fn.c_str());
     if (inputFile.is_open())
@@ -120,24 +122,28 @@ void default_parametes_reader(const std::string& fn, std::map<std::string, std::
     }
     else
     {
-        ModuleBase::WARNING_QUIT("Input", "Cannot open file" + fn);
+        std::cout << "Cannot open file !" << std::endl;
     }
 }
 /**
  * @brief This function is used to read the input parameter file and store it as a key-value pair
  * @param fn Enter the path to the parameter file
  */
-void input_parameters_get(const std::string& fn, std::map<std::string, InputParameter>& input)
+bool input_parameters_get(const std::string& fn, std::map<std::string, InputParameter>& input)
 {
+    // The module title information is displayed
     ModuleBase::TITLE("Input", "Read");
+    // If it is not the primary node, return false
     if (GlobalV::MY_RANK != 0)
-        return;
+        return false;
 
     // Open the input parameter file
     std::ifstream ifs(fn.c_str(), std::ios::in); // "in_datas/input_parameters"
+    // If the opening fails, an error message is printed and false is returned
     if (!ifs)
     {
-        ModuleBase::WARNING_QUIT("Input", "Can't find the INPUT file at " + fn);
+        std::cout << " Can't find the INPUT file." << std::endl;
+        return false;
     }
     ifs.clear();
     ifs.seekg(0);
@@ -160,7 +166,8 @@ void input_parameters_get(const std::string& fn, std::map<std::string, InputPara
     // If ierr is 0, the word "INPUT_PARAMETERS" is not found, and an error message is printed with false
     if (ierr == 0)
     {
-        ModuleBase::WARNING_QUIT("Input", "INPUT_PARAMETERS statement not found.");
+        std::cout << " Error parameter list." << std::endl;
+        return false; // return error : false
     }
     ifs.rdstate();
 
@@ -267,11 +274,15 @@ void input_parameters_get(const std::string& fn, std::map<std::string, InputPara
         }
         else if (ifs.bad() != 0)
         {
-            ModuleBase::WARNING_QUIT("Input", "Bad input parameters.");
+            std::cout << " Bad input parameters. " << std::endl;
+            return false;
         }
         else if (ifs.fail() != 0)
         {
-            ModuleBase::WARNING_QUIT("Input", "Fail to read parameters: word = " + std::string(word));
+            std::cout << " word = " << word << std::endl;
+            std::cout << " Fail to read parameters. " << std::endl;
+            ifs.clear();
+            return false;
         }
         else if (ifs.good() == 0)
         {
@@ -295,9 +306,11 @@ void input_parameters_get(const std::string& fn, std::map<std::string, InputPara
     {
         ModuleBase::WARNING_QUIT("Input", "The ntype in INPUT is not equal to the ntype counted in STRU, check it.");
     }
+
+    return true;
 }
 
-void input_parameters_set(std::map<std::string, InputParameter> input_parameters)
+bool input_parameters_set(std::map<std::string, InputParameter> input_parameters)
 {
     if (input_parameters.count("nupdown") != 0)
     {
@@ -818,10 +831,6 @@ void input_parameters_set(std::map<std::string, InputParameter> input_parameters
     {
         INPUT.mixing_ndim = *static_cast<int*>(input_parameters["mixing_ndim"].get());
     }
-    else if (input_parameters.count("mixing_restart") != 0)
-    {
-        INPUT.mixing_restart = *static_cast<int*>(input_parameters["mixing_restart"].get());
-    }
     else if (input_parameters.count("mixing_gg0") != 0)
     {
         INPUT.mixing_gg0 = *static_cast<double*>(input_parameters["mixing_gg0"].get());
@@ -908,7 +917,7 @@ void input_parameters_set(std::map<std::string, InputParameter> input_parameters
     }
     else if (input_parameters.count("out_band") != 0)
     {
-        INPUT.out_band = *static_cast<std::vector<int>*>(input_parameters["out_band"].get());
+        INPUT.out_band = *static_cast<bool*>(input_parameters["out_band"].get());
     }
     else if (input_parameters.count("out_proj_band") != 0)
     {
diff --git a/source/module_io/parameter_pool.h b/source/module_io/parameter_pool.h
index bd4ae575dd..83baedd036 100644
--- a/source/module_io/parameter_pool.h
+++ b/source/module_io/parameter_pool.h
@@ -241,12 +241,12 @@ class InputParameter
         }
     }
 };
-void Init(const std::string& default_type_path,
+bool Init(const std::string& default_type_path,
           const std::string& default_value_path,
           const std::string& input_value_path);
-void default_parametes_reader(const std::string& fn, std::map<std::string, std::string>& default_parametes_type);
-void input_parameters_get(const std::string& fn, std::map<std::string, InputParameter>& input);
-void input_parameters_set(std::map<std::string, InputParameter> input_parameters);
+bool default_parametes_reader(const std::string& fn, std::map<std::string, std::string>& default_parametes_type);
+bool input_parameters_get(const std::string& fn, std::map<std::string, InputParameter>& input);
+bool input_parameters_set(std::map<std::string, InputParameter> input_parameters);
 
 extern std::map<std::string, InputParameter> input_parameters;
 extern std::map<std::string, std::string> default_parametes_type;
diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp
index a566827792..f0d7e43f68 100644
--- a/source/module_io/test/input_conv_test.cpp
+++ b/source/module_io/test/input_conv_test.cpp
@@ -183,7 +183,6 @@ TEST_F(InputConvTest, Conv)
 	EXPECT_EQ(GlobalV::sc_mag_switch,0);
     EXPECT_TRUE(GlobalV::decay_grad_switch);
     EXPECT_EQ(GlobalV::sc_file, "sc.json");
-	EXPECT_EQ(GlobalV::MIXING_RESTART,0);
 }
 
 TEST_F(InputConvTest, ConvRelax)
diff --git a/source/module_io/test/input_test.cpp b/source/module_io/test/input_test.cpp
index 11bce873ab..02a5a19e10 100644
--- a/source/module_io/test/input_test.cpp
+++ b/source/module_io/test/input_test.cpp
@@ -176,11 +176,9 @@ TEST_F(InputTest, Default)
         EXPECT_EQ(INPUT.out_wfc_pw,0);
         EXPECT_EQ(INPUT.out_wfc_r,0);
         EXPECT_EQ(INPUT.out_dos,0);
-        EXPECT_EQ(INPUT.out_band[0],0);
-		EXPECT_EQ(INPUT.out_band[1],8);
+        EXPECT_EQ(INPUT.out_band,0);
         EXPECT_EQ(INPUT.out_proj_band,0);
         EXPECT_EQ(INPUT.out_mat_hs[0],0);
-		EXPECT_EQ(INPUT.out_mat_hs[1],8);
         EXPECT_EQ(INPUT.out_mat_hs2,0);
         EXPECT_EQ(INPUT.out_mat_xc, 0);
         EXPECT_EQ(INPUT.out_interval,1);
@@ -541,11 +539,9 @@ TEST_F(InputTest, Read)
         EXPECT_EQ(INPUT.out_wfc_pw,0);
         EXPECT_EQ(INPUT.out_wfc_r,0);
         EXPECT_EQ(INPUT.out_dos,0);
-        EXPECT_EQ(INPUT.out_band[0],0);
-		EXPECT_EQ(INPUT.out_band[1],8);
+        EXPECT_EQ(INPUT.out_band,0);
         EXPECT_EQ(INPUT.out_proj_band,0);
         EXPECT_EQ(INPUT.out_mat_hs[0],0);
-		EXPECT_EQ(INPUT.out_mat_hs[1],8);
         EXPECT_EQ(INPUT.out_mat_hs2,0);
         EXPECT_EQ(INPUT.out_mat_xc, 0);
         EXPECT_EQ(INPUT.out_interval,1);
@@ -925,8 +921,7 @@ TEST_F(InputTest, Default_2)
     EXPECT_EQ(INPUT.relax_nmax, 1);
     EXPECT_EQ(INPUT.out_stru, 0);
     EXPECT_EQ(INPUT.symmetry, "0");
-	EXPECT_EQ(INPUT.out_band[0],0);
-	EXPECT_EQ(INPUT.out_band[1],8);
+	EXPECT_EQ(INPUT.out_band,0);
 	EXPECT_EQ(INPUT.out_proj_band,0);
 	EXPECT_EQ(INPUT.cal_force,0);
 	EXPECT_EQ(INPUT.init_wfc,"file");
@@ -948,8 +943,7 @@ TEST_F(InputTest, Default_2)
     EXPECT_EQ(INPUT.relax_nmax, 1);
     EXPECT_EQ(INPUT.symmetry, "0");
     EXPECT_EQ(INPUT.out_stru, 0);
-	EXPECT_EQ(INPUT.out_band[0],0);
-	EXPECT_EQ(INPUT.out_band[1],8);
+	EXPECT_EQ(INPUT.out_band,0);
 	EXPECT_EQ(INPUT.out_proj_band,0);
 	EXPECT_EQ(INPUT.cal_force,0);
 	EXPECT_EQ(INPUT.init_wfc,"file");
diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp
index d005fdfccc..58e04eb32d 100644
--- a/source/module_io/test/input_test_para.cpp
+++ b/source/module_io/test/input_test_para.cpp
@@ -26,13 +26,12 @@ class InputParaTest : public ::testing::Test
 #ifdef __MPI
 TEST_F(InputParaTest, Bcast)
 {
-    INPUT.Default();
     if (GlobalV::MY_RANK == 0)
     {
-        INPUT.suffix = "BcastTest";
+        INPUT.Default(); /* hmmm... why there is not Default_2 here? and, seems Default is execute directly on each processor? */
     }
     INPUT.Bcast();
-    EXPECT_EQ(INPUT.suffix, "BcastTest");
+    EXPECT_EQ(INPUT.suffix, "ABACUS");
     EXPECT_EQ(INPUT.stru_file, "");
     EXPECT_EQ(INPUT.kpoint_file, "");
     EXPECT_EQ(INPUT.pseudo_dir, "");
@@ -181,8 +180,7 @@ TEST_F(InputParaTest, Bcast)
     EXPECT_EQ(INPUT.out_wfc_pw, 0);
     EXPECT_EQ(INPUT.out_wfc_r, 0);
     EXPECT_EQ(INPUT.out_dos, 0);
-    EXPECT_EQ(INPUT.out_band[0], 0);
-    EXPECT_EQ(INPUT.out_band[1], 8);
+    EXPECT_EQ(INPUT.out_band, 0);
     EXPECT_EQ(INPUT.out_proj_band, 0);
     EXPECT_EQ(INPUT.out_mat_hs[0], 0);
     EXPECT_EQ(INPUT.out_mat_hs[1], 8);
@@ -381,7 +379,6 @@ TEST_F(InputParaTest, Bcast)
     EXPECT_TRUE(INPUT.mdp.dump_virial);
     EXPECT_FALSE(INPUT.mixing_tau);
     EXPECT_FALSE(INPUT.mixing_dftu);
-    EXPECT_EQ(INPUT.mixing_restart,0);
     EXPECT_EQ(INPUT.out_bandgap, 0);
     EXPECT_EQ(INPUT.out_mat_t, 0);
 
diff --git a/source/module_io/test/support/INPUT b/source/module_io/test/support/INPUT
index 4fbde867db..469dff2ff4 100644
--- a/source/module_io/test/support/INPUT
+++ b/source/module_io/test/support/INPUT
@@ -59,7 +59,7 @@ out_pot                        2 #output realspace potential
 out_wfc_pw                     0 #output wave functions
 out_wfc_r                      0 #output wave functions in realspace
 out_dos                        0 #output energy and dos
-out_band                       0 #output energy and band structure
+out_band                       false #output energy and band structure
 out_proj_band                  FaLse #output projected band structure
 restart_save                   f #print to disk every step for restart
 restart_load                   F #restart from disk
diff --git a/source/module_io/test/support/witestfile b/source/module_io/test/support/witestfile
index 4db819d53f..4043773876 100644
--- a/source/module_io/test/support/witestfile
+++ b/source/module_io/test/support/witestfile
@@ -55,7 +55,7 @@ out_pot                        2 #output realspace potential
 out_wfc_pw                     0 #output wave functions
 out_wfc_r                      0 #output wave functions in realspace
 out_dos                        0 #output energy and dos
-out_band                       0 #output energy and band structure
+out_band                       false #output energy and band structure
 out_proj_band                  FaLse #output projected band structure
 restart_save                   f #print to disk every step for restart
 restart_load                   F #restart from disk
diff --git a/source/module_io/test/to_qo_test.cpp b/source/module_io/test/to_qo_test.cpp
index 9477b2eb54..93692f858e 100644
--- a/source/module_io/test/to_qo_test.cpp
+++ b/source/module_io/test/to_qo_test.cpp
@@ -543,39 +543,7 @@ TEST_F(toQOTest, CalculateSelfOvlpRFull)
     //tqo.write_ovlp(tqo.ovlp_R()[0], "QO_self_ovlp.dat");
 }
 
-/* Si_dojo_soc.upf is special: two p orbitals, one s orbital */
-
-TEST_F(toQOTest, BuildPswfcPartial1)
-{
-    define_fcc_cell(ucell);
-    toQO tqo("pswfc", {"s", "s"});
-    tqo.unwrap_unitcell(&ucell);
-    tqo.build_ao(ucell.ntype, ucell.pseudo_fn);
-    EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals
-    EXPECT_EQ(tqo.nchi(), 2);
-}
-
-TEST_F(toQOTest, BuildPswfcPartial2)
-{
-    define_fcc_cell(ucell);
-    toQO tqo("pswfc", {"ps", "s"});
-    tqo.unwrap_unitcell(&ucell);
-    tqo.build_ao(ucell.ntype, ucell.pseudo_fn);
-    EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals
-    EXPECT_EQ(tqo.nchi(), 8); // the first element is Si, it has two p orbitals, so 3+3+1+1
-}
-
-TEST_F(toQOTest, BuildPswfcPartial3)
-{
-    define_fcc_cell(ucell);
-    toQO tqo("pswfc", {"all", "p"});
-    tqo.unwrap_unitcell(&ucell);
-    tqo.build_ao(ucell.ntype, ucell.pseudo_fn);
-    EXPECT_EQ(tqo.p_ao()->nchi(), 5); // AO will always read and import all orbitals
-    EXPECT_EQ(tqo.nchi(), 10);
-}
-
-TEST_F(toQOTest, BuildPswfcAll)
+TEST_F(toQOTest, BuildPswfc)
 {
     define_fcc_cell(ucell);
     toQO tqo("pswfc", {"all", "all"});
diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp
index 8dccb5627a..d61133715d 100644
--- a/source/module_io/test/write_input_test.cpp
+++ b/source/module_io/test/write_input_test.cpp
@@ -384,16 +384,13 @@ TEST_F(write_input, Mixing7)
     std::string output((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
     EXPECT_THAT(output, testing::HasSubstr("#Parameters (7.Charge Mixing)"));
     EXPECT_THAT(output, testing::HasSubstr("mixing_type                    broyden #plain; pulay; broyden"));
-    EXPECT_THAT(output, testing::HasSubstr("mixing_beta                    0.7 #mixing parameter: 0 means no new charge"));
+    EXPECT_THAT(output,
+                testing::HasSubstr("mixing_beta                    0.7 #mixing parameter: 0 means no new charge"));
     EXPECT_THAT(output, testing::HasSubstr("mixing_ndim                    8 #mixing dimension in pulay or broyden"));
     EXPECT_THAT(output, testing::HasSubstr("mixing_gg0                     0 #mixing parameter in kerker"));
-    EXPECT_THAT(output, testing::HasSubstr("mixing_beta_mag                -10 #mixing parameter for magnetic density"));
-    EXPECT_THAT(output, testing::HasSubstr("mixing_gg0_mag                 0 #mixing parameter in kerker"));
-    EXPECT_THAT(output, testing::HasSubstr("mixing_gg0_min                 0.1 #the minimum kerker coefficient"));
-    EXPECT_THAT(output, testing::HasSubstr("mixing_angle                   -10 #angle mixing parameter for non-colinear calculations"));
     EXPECT_THAT(output, testing::HasSubstr("mixing_tau                     0 #whether to mix tau in mGGA calculation"));
-    EXPECT_THAT(output, testing::HasSubstr("mixing_dftu                    0 #whether to mix locale in DFT+U calculation"));
-    EXPECT_THAT(output, testing::HasSubstr("mixing_restart                 0 #which step to restart mixing during SCF"));
+    EXPECT_THAT(output,
+                testing::HasSubstr("mixing_dftu                    0 #whether to mix locale in DFT+U calculation"));
     EXPECT_THAT(output, testing::HasSubstr(""));
     ifs.close();
     remove("write_input_test.log");
diff --git a/source/module_io/test_serial/nscf_band_test.cpp b/source/module_io/test_serial/nscf_band_test.cpp
index db9bf752fb..4483bf37cd 100644
--- a/source/module_io/test_serial/nscf_band_test.cpp
+++ b/source/module_io/test_serial/nscf_band_test.cpp
@@ -54,16 +54,9 @@ class BandTest : public ::testing::Test
 	    ekb(1,1) =  2.0;
 	    ekb(1,2) =  3.0;
         kv = new K_Vectors;
-        // specify the kpoints
-        kv->kvec_c.resize(nks);
-        kv->kvec_c[0] = ModuleBase::Vector3<double>(0.0, 0.0, 0.0);
-        kv->kvec_c[1] = ModuleBase::Vector3<double>(1.0, 0.0, 0.0);
         kv->isk.resize(nks);
         kv->isk[0] = 0;
         kv->isk[1] = 1;
-        kv->kl_segids.resize(nks);
-        kv->kl_segids[0] = 0;
-        kv->kl_segids[1] = 0;
         Pkpoints = new Parallel_Kpoints;
     }
 
@@ -88,12 +81,12 @@ class BandTest : public ::testing::Test
 TEST_F(BandTest, nscf_band)
 {
     // Call the function to be tested
-    ModuleIO::nscf_band(is, out_band_dir, nks, nband, fermie, 8, ekb, *kv, Pkpoints);
+    ModuleIO::nscf_band(is, out_band_dir, nks, nband, fermie, ekb, *kv, Pkpoints);
 
     // Check the output file
     std::ifstream ifs(out_band_dir);
     std::string str((std::istreambuf_iterator<char>(ifs)),std::istreambuf_iterator<char>());
     ASSERT_TRUE(ifs.is_open());
-    EXPECT_THAT(str, testing::HasSubstr("1   0.00000000 -27.21139600 -13.60569800   0.00000000"));
+    EXPECT_THAT(str, testing::HasSubstr("1       -27.2114       -13.6057              0"));
     ifs.close();
 }
diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp
index cb26bc2283..6003a4fdb0 100644
--- a/source/module_io/write_input.cpp
+++ b/source/module_io/write_input.cpp
@@ -85,7 +85,6 @@ void Input::Print(const std::string &fn) const
     ModuleBase::GlobalFunc::OUTP(ofs, "cal_force", cal_force, "if calculate the force at the end of the electronic iteration");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_freq_ion", out_freq_ion, "the frequency ( >= 0 ) of ionic step to output charge density and wavefunction. 0: output only when ion steps are finished");
     ModuleBase::GlobalFunc::OUTP(ofs, "device", device, "the computing device for ABACUS");
-    ModuleBase::GlobalFunc::OUTP(ofs, "precision", precision, "the computing precision for ABACUS");
 
     ofs << "\n#Parameters (2.PW)" << std::endl;
     ModuleBase::GlobalFunc::OUTP(ofs, "ecutwfc", ecutwfc, "#energy cutoff for wave functions");
@@ -123,7 +122,7 @@ void Input::Print(const std::string &fn) const
     ModuleBase::GlobalFunc::OUTP(ofs, "out_wfc_pw", out_wfc_pw, "output wave functions");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_wfc_r", out_wfc_r, "output wave functions in realspace");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_dos", out_dos, "output energy and dos");
-    ModuleBase::GlobalFunc::OUTP(ofs, "out_band", out_band[0], "output energy and band structure (with precision "+std::to_string(out_band[1])+")");
+    ModuleBase::GlobalFunc::OUTP(ofs, "out_band", out_band, "output energy and band structure");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_proj_band", out_proj_band, "output projected band structure");
     ModuleBase::GlobalFunc::OUTP(ofs, "restart_save", restart_save, "print to disk every step for restart");
     ModuleBase::GlobalFunc::OUTP(ofs, "restart_load", restart_load, "restart from disk");
@@ -223,7 +222,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
     ModuleBase::GlobalFunc::OUTP(ofs, "lcao_dk", lcao_dk, "delta k for 1D integration in LCAO");
     ModuleBase::GlobalFunc::OUTP(ofs, "lcao_dr", lcao_dr, "delta r for 1D integration in LCAO");
     ModuleBase::GlobalFunc::OUTP(ofs, "lcao_rmax", lcao_rmax, "max R for 1D two-center integration table");
-    ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs", out_mat_hs[0], "output H and S matrix (with precision "+std::to_string(out_mat_hs[1])+")");
+    ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs", out_mat_hs[0], "output H and S matrix");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs2", out_mat_hs2, "output H(R) and S(R) matrix");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_dh", out_mat_dh, "output of derivative of H(R) matrix");
     ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_xc", out_mat_xc, "output exchange-correlation matrix in KS-orbital representation");
@@ -248,7 +247,6 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_type", mixing_mode, "plain; pulay; broyden");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_beta", mixing_beta, "mixing parameter: 0 means no new charge");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_ndim", mixing_ndim, "mixing dimension in pulay or broyden");
-    ModuleBase::GlobalFunc::OUTP(ofs, "mixing_restart", mixing_restart, "which step to restart mixing during SCF");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0", mixing_gg0, "mixing parameter in kerker");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_beta_mag", mixing_beta_mag, "mixing parameter for magnetic density");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0_mag", mixing_gg0_mag, "mixing parameter in kerker");
@@ -495,32 +493,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_switch", qo_switch, "0: no QO analysis; 1: QO analysis");
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_basis", qo_basis, "type of QO basis function: hydrogen: hydrogen-like basis, pswfc: read basis from pseudopotential");
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_thr", qo_thr, "accuracy for evaluating cutoff radius of QO basis function");
-
-    ofs << "\n#Parameters (24.PEXSI)" << std::endl;
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_npole", pexsi_npole, "Number of poles in expansion");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_inertia", pexsi_inertia, "Whether inertia counting is used at the very beginning of PEXSI process");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nmax", pexsi_nmax, "Maximum number of PEXSI iterations after each inertia counting procedure.");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_comm", pexsi_comm, "Whether to construct PSelInv communication pattern");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices, 0: non-symmetric, 1: symmetric");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_ordering", pexsi_ordering, "Ordering strategy for factorization and selected inversion");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc", pexsi_nproc, "Number of processors for parmetis");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "matrix symmetry, 0: non-symmetric, 1: symmetric");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "transpose, 0: no transpose, 1: transpose");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_method", pexsi_method, "pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc_pole", pexsi_nproc_pole, "Number of processes used by each pole");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_temp", pexsi_temp, "Temperature, in the same unit as H");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_gap", pexsi_gap, "Spectral gap");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \f$S^{-1} H\f$");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_lower", pexsi_mu_lower, "Initial guess of lower bound for mu");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_upper", pexsi_mu_upper, "Initial guess of upper bound for mu");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu", pexsi_mu, "Initial guess for mu (for the solver)");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_thr", pexsi_mu_thr, "Stopping criterion in terms of the chemical potential for the inertia counting procedure");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_expand", pexsi_mu_expand, "If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_guard", pexsi_mu_guard, "Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_elec_thr", pexsi_elec_thr, "Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_zero_thr", pexsi_zero_thr, "if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0");
-
+  
     ofs.close();
     return;
 }
\ No newline at end of file
diff --git a/source/module_ri/Exx_LRI.hpp b/source/module_ri/Exx_LRI.hpp
index c9b3b69601..ace9097bb0 100644
--- a/source/module_ri/Exx_LRI.hpp
+++ b/source/module_ri/Exx_LRI.hpp
@@ -12,6 +12,7 @@
 #include "module_ri/exx_abfs-construct_orbs.h"
 #include "module_ri/exx_abfs-io.h"
 #include "module_ri/conv_coulomb_pot_k.h"
+#include "module_ri/conv_coulomb_pot_k-template.h"
 #include "module_base/tool_title.h"
 #include "module_base/timer.h"
 #include "module_ri/serialization_cereal.h"
@@ -70,19 +71,14 @@ void Exx_LRI<Tdata>::init(const MPI_Comm &mpi_comm_in, const K_Vectors &kv_in)
 			case Conv_Coulomb_Pot_K::Ccp_Type::Ccp:
 				return {};
 			case Conv_Coulomb_Pot_K::Ccp_Type::Hf:
-			{
-				// 4/3 * pi * Rcut^3 = V_{supercell} = V_{unitcell} * Nk
-				const int nspin0 = (GlobalV::NSPIN==2) ? 2 : 1;
-				const double hf_Rcut = std::pow(0.75 * this->p_kv->nkstot_full/nspin0 * GlobalC::ucell.omega / (ModuleBase::PI), 1.0/3.0);
-				return {{"hf_Rcut", hf_Rcut}};
-			}
+				return {};
 			case Conv_Coulomb_Pot_K::Ccp_Type::Hse:
 				return {{"hse_omega", this->info.hse_omega}};
 			default:
 				throw std::domain_error(std::string(__FILE__)+" line "+std::to_string(__LINE__));	break;
 		}
 	};
-    this->abfs_ccp = Conv_Coulomb_Pot_K::cal_orbs_ccp(this->abfs, this->info.ccp_type, get_ccp_parameter(), this->info.ccp_rmesh_times);
+    this->abfs_ccp = Conv_Coulomb_Pot_K::cal_orbs_ccp(this->abfs, this->info.ccp_type, get_ccp_parameter(), this->info.ccp_rmesh_times, this->p_kv->nkstot_full);
 
 
 	for( size_t T=0; T!=this->abfs.size(); ++T )
diff --git a/source/module_ri/LRI_CV_Tools.hpp b/source/module_ri/LRI_CV_Tools.hpp
index 8ad95c3715..532e7104fb 100644
--- a/source/module_ri/LRI_CV_Tools.hpp
+++ b/source/module_ri/LRI_CV_Tools.hpp
@@ -250,10 +250,11 @@ LRI_CV_Tools::cal_latvec_range(const double &rcut_times)
 	const ModuleBase::Vector3<double> proj = ModuleBase::Mathzone::latvec_projection(
 		std::array<ModuleBase::Vector3<double>,3>{GlobalC::ucell.a1, GlobalC::ucell.a2, GlobalC::ucell.a3});
 	const ModuleBase::Vector3<double> latvec_times = Rcut_max * rcut_times / (proj * GlobalC::ucell.lat0);
-    const ModuleBase::Vector3<Tcell> latvec_times_ceil = {static_cast<Tcell>(std::ceil(latvec_times.x)),
-                                                          static_cast<Tcell>(std::ceil(latvec_times.y)),
-                                                          static_cast<Tcell>(std::ceil(latvec_times.z))};
-    const ModuleBase::Vector3<Tcell> period = 2 * latvec_times_ceil + ModuleBase::Vector3<Tcell>{1,1,1};
+	const ModuleBase::Vector3<Tcell> latvec_times_ceil = 
+		{std::ceil(latvec_times.x),
+		 std::ceil(latvec_times.y),
+		 std::ceil(latvec_times.z)};
+	const ModuleBase::Vector3<Tcell> period = 2 * latvec_times_ceil + ModuleBase::Vector3<Tcell>{1,1,1};
 	return std::array<Tcell,3>{period.x, period.y, period.z};
 }
 
@@ -307,7 +308,7 @@ LRI_CV_Tools::get_dCVws(
 				const Abfs::Vector3_Order<double> R_delta = -tau0+tau1+(RI_Util::array3_to_Vector3(cell1)*GlobalC::ucell.latvec);
 				dCVws[it0][it1][R_delta][ix] = dCVs_B.second;
 			}
-		}
+		}		
 	}
 	return dCVws;
 }
diff --git a/source/module_ri/conv_coulomb_pot_k-template.h b/source/module_ri/conv_coulomb_pot_k-template.h
new file mode 100644
index 0000000000..9a3d245286
--- /dev/null
+++ b/source/module_ri/conv_coulomb_pot_k-template.h
@@ -0,0 +1,51 @@
+#ifndef CONV_COULOMB_POT_K_TEMPLATE_H
+#define CONV_COULOMB_POT_K_TEMPLATE_H
+
+#include "conv_coulomb_pot_k.h"
+#include <vector>
+#include <cmath>
+
+#include "../module_ri/test_code/exx_abfs-construct_orbs-test.h"
+
+
+template< typename T >
+T Conv_Coulomb_Pot_K::cal_orbs_ccp(
+	const T & orbs,
+	const Ccp_Type &ccp_type,
+	const std::map<std::string,double> &parameter,
+	const double rmesh_times, 
+    const int& nks)
+{
+	T orbs_ccp(orbs.size());
+	for( size_t i=0; i!=orbs.size(); ++i )
+		orbs_ccp[i] = cal_orbs_ccp(orbs[i], ccp_type, parameter, rmesh_times, nks );
+	return orbs_ccp;
+}
+
+extern template
+Numerical_Orbital_Lm Conv_Coulomb_Pot_K::cal_orbs_ccp<Numerical_Orbital_Lm>(
+	const Numerical_Orbital_Lm & orbs,
+	const Ccp_Type &ccp_type,
+	const std::map<std::string,double> &parameter,
+    const double rmesh_times,
+    const int& nks);
+
+	
+	
+template< typename T >
+double Conv_Coulomb_Pot_K::get_rmesh_proportion(
+	const T & orbs,
+	const double psi_threshold)
+{
+	double rmesh_proportion=0;
+	for( const auto &orb : orbs )
+		rmesh_proportion = std::max(rmesh_proportion, get_rmesh_proportion(orb,psi_threshold));
+	return rmesh_proportion;
+}
+
+extern template
+double Conv_Coulomb_Pot_K::get_rmesh_proportion(
+	const Numerical_Orbital_Lm & orbs,
+	const double psi_threshold);
+	
+#endif
\ No newline at end of file
diff --git a/source/module_ri/conv_coulomb_pot_k.cpp b/source/module_ri/conv_coulomb_pot_k.cpp
index 62dd582a44..9f573509ee 100644
--- a/source/module_ri/conv_coulomb_pot_k.cpp
+++ b/source/module_ri/conv_coulomb_pot_k.cpp
@@ -2,109 +2,104 @@
 #include "../module_base/constants.h"
 #include "../module_basis/module_ao/ORB_atomic_lm.h"
 #include "../module_hamilt_pw/hamilt_pwdft/global.h"
-
-namespace Conv_Coulomb_Pot_K
+std::vector<double> Conv_Coulomb_Pot_K::cal_psi_ccp( const std::vector<double> & psif )
 {
+	std::vector<double> psik2_ccp(psif.size());
+	for( size_t ik=0; ik<psif.size(); ++ik )
+		psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik];
+	return psik2_ccp;
+}
 
-	std::vector<double> cal_psi_ccp(
-		const std::vector<double> & psif)
-	{
-		std::vector<double> psik2_ccp(psif.size());
-		for( size_t ik=0; ik<psif.size(); ++ik )
-			psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik];
-		return psik2_ccp;
-	}
-
-	// rongshi add 2022-07-27
-	// Sphere truction -- Spencer
-	std::vector<double> cal_psi_hf(
-		const std::vector<double> &psif,
-		const std::vector<double> &k_radial,
-		const double hf_Rcut)
-	{
-		std::vector<double> psik2_ccp(psif.size());
-		for (size_t ik = 0; ik < psif.size(); ++ik)
-			psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1 - std::cos(k_radial[ik] * hf_Rcut));
-		return psik2_ccp;
-	}
+// rongshi add 2022-07-27
+// Sphere truction -- Spencer
+std::vector<double> Conv_Coulomb_Pot_K::cal_psi_hf(const int& nks, const std::vector<double> &psif,
+                                                   const std::vector<double> &k_radial,
+                                                   const double omega = 0)
+{	
+    const int nspin0 = (GlobalV::NSPIN==2) ? 2 : 1;
+    const double Rc = std::pow(0.75 * nks/nspin0 * GlobalC::ucell.omega / (ModuleBase::PI), 1.0/3.0);
+    std::vector<double> psik2_ccp(psif.size());
+    for (size_t ik = 0; ik < psif.size(); ++ik)
+        psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1 - std::cos(k_radial[ik] * Rc));
+    return psik2_ccp;
+}
 
 
-	std::vector<double> cal_psi_hse(
-		const std::vector<double> & psif,
-		const std::vector<double> & k_radial,
-		const double hse_omega)
-	{
-		std::vector<double> psik2_ccp(psif.size());
-		for( size_t ik=0; ik<psif.size(); ++ik )
-			psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1-std::exp(-(k_radial[ik]*k_radial[ik])/(4*hse_omega*hse_omega)));
-		return psik2_ccp;
-	}
+std::vector<double> Conv_Coulomb_Pot_K::cal_psi_hse( 
+	const std::vector<double> & psif,
+	const std::vector<double> & k_radial,
+	const double omega)
+{
+	std::vector<double> psik2_ccp(psif.size());
+	for( size_t ik=0; ik<psif.size(); ++ik )
+		psik2_ccp[ik] = ModuleBase::FOUR_PI * psif[ik] * (1-std::exp(-(k_radial[ik]*k_radial[ik])/(4*omega*omega)));
+	return psik2_ccp;
+}
 
 
 
-	template<>
-	Numerical_Orbital_Lm cal_orbs_ccp<Numerical_Orbital_Lm>(
-		const Numerical_Orbital_Lm &orbs,
-		const Ccp_Type &ccp_type,
-		const std::map<std::string,double> &parameter,
-		const double rmesh_times)
+template<>
+Numerical_Orbital_Lm Conv_Coulomb_Pot_K::cal_orbs_ccp<Numerical_Orbital_Lm>(
+	const Numerical_Orbital_Lm &orbs,
+	const Ccp_Type &ccp_type,
+	const std::map<std::string,double> &parameter,
+    const double rmesh_times,
+    const int& nks)
+{
+	std::vector<double> psik2_ccp;
+	switch(ccp_type)
 	{
-		std::vector<double> psik2_ccp;
-		switch(ccp_type)
-		{
-			case Ccp_Type::Ccp:
-				psik2_ccp = cal_psi_ccp( orbs.get_psif() );		break;
-			case Ccp_Type::Hf:
-				psik2_ccp = cal_psi_hf( orbs.get_psif(), orbs.get_k_radial(), parameter.at("hf_Rcut"));      break;
-			case Ccp_Type::Hse:
-				psik2_ccp = cal_psi_hse( orbs.get_psif(), orbs.get_k_radial(), parameter.at("hse_omega") );		break;
-			default:
-				throw( ModuleBase::GlobalFunc::TO_STRING(__FILE__)+" line "+ModuleBase::GlobalFunc::TO_STRING(__LINE__) );		break;
-		}
-
-		const double dr = orbs.get_rab().back();
-		const int Nr = (static_cast<int>(orbs.getNr()*rmesh_times)) | 1;
-		std::vector<double> rab(Nr);
-		for( size_t ir=0; ir<std::min(orbs.getNr(),Nr); ++ir )
-			rab[ir] = orbs.getRab(ir);
-		for( size_t ir=orbs.getNr(); ir<Nr; ++ir )
-			rab[ir] = dr;
-		std::vector<double> r_radial(Nr);
-		for( size_t ir=0; ir<std::min(orbs.getNr(),Nr); ++ir )
-			r_radial[ir] = orbs.getRadial(ir);
-		for( size_t ir=orbs.getNr(); ir<Nr; ++ir )
-			r_radial[ir] = orbs.get_r_radial().back() + (ir - orbs.getNr() + 1) * dr;
-
-		Numerical_Orbital_Lm orbs_ccp;
-		orbs_ccp.set_orbital_info(
-			orbs.getLabel(),
-			orbs.getType(),
-			orbs.getL(),
-			orbs.getChi(),
-			Nr,
-			ModuleBase::GlobalFunc::VECTOR_TO_PTR(rab),
-			ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_radial),
-			Numerical_Orbital_Lm::Psi_Type::Psik2,
-			ModuleBase::GlobalFunc::VECTOR_TO_PTR(psik2_ccp),
-			orbs.getNk(),
-			orbs.getDk(),
-			orbs.getDruniform(),
-			false,
-			true, GlobalV::CAL_FORCE);
-		return orbs_ccp;
+		case Ccp_Type::Ccp:
+			psik2_ccp = cal_psi_ccp( orbs.get_psif() );		break;
+		case Ccp_Type::Hf:
+        	psik2_ccp = cal_psi_hf(nks, orbs.get_psif(), orbs.get_k_radial());      break;
+		case Ccp_Type::Hse:
+			psik2_ccp = cal_psi_hse( orbs.get_psif(), orbs.get_k_radial(), parameter.at("hse_omega") );		break;
+		default:
+			throw( ModuleBase::GlobalFunc::TO_STRING(__FILE__)+" line "+ModuleBase::GlobalFunc::TO_STRING(__LINE__) );		break;
 	}
 
-	template<>
-	double get_rmesh_proportion(
-		const Numerical_Orbital_Lm &orbs,
-		const double psi_threshold)
+	const double dr = orbs.get_rab().back();
+	const int Nr = (static_cast<int>(orbs.getNr()*rmesh_times)) | 1;
+	std::vector<double> rab(Nr);
+	for( size_t ir=0; ir<std::min(orbs.getNr(),Nr); ++ir )
+		rab[ir] = orbs.getRab(ir);
+	for( size_t ir=orbs.getNr(); ir<Nr; ++ir )
+		rab[ir] = dr;
+	std::vector<double> r_radial(Nr);
+	for( size_t ir=0; ir<std::min(orbs.getNr(),Nr); ++ir )
+		r_radial[ir] = orbs.getRadial(ir);
+	for( size_t ir=orbs.getNr(); ir<Nr; ++ir )
+        r_radial[ir] = orbs.get_r_radial().back() + (ir - orbs.getNr() + 1) * dr;
+	
+	Numerical_Orbital_Lm orbs_ccp;
+	orbs_ccp.set_orbital_info(
+ 		orbs.getLabel(),
+	 	orbs.getType(),
+		orbs.getL(),
+		orbs.getChi(),
+	    Nr,
+		ModuleBase::GlobalFunc::VECTOR_TO_PTR(rab),
+		ModuleBase::GlobalFunc::VECTOR_TO_PTR(r_radial),
+		Numerical_Orbital_Lm::Psi_Type::Psik2,
+		ModuleBase::GlobalFunc::VECTOR_TO_PTR(psik2_ccp),
+		orbs.getNk(),
+		orbs.getDk(),
+		orbs.getDruniform(),
+		false,
+		true, GlobalV::CAL_FORCE);
+	return orbs_ccp;
+}
+
+template<>
+double Conv_Coulomb_Pot_K::get_rmesh_proportion(
+	const Numerical_Orbital_Lm &orbs,
+	const double psi_threshold)
+{
+	for(int ir=orbs.getNr()-1; ir>=0; --ir)
 	{
-		for(int ir=orbs.getNr()-1; ir>=0; --ir)
-		{
-			if(std::abs(orbs.getPsi(ir))>=psi_threshold)
-				return static_cast<double>(ir)/orbs.getNr();
-		}
-		return 0.0;
+		if(std::abs(orbs.getPsi(ir))>=psi_threshold)
+			return static_cast<double>(ir)/orbs.getNr();
 	}
-
+	return 0.0;
 }
diff --git a/source/module_ri/conv_coulomb_pot_k.h b/source/module_ri/conv_coulomb_pot_k.h
index d464a53f91..9adec9d915 100644
--- a/source/module_ri/conv_coulomb_pot_k.h
+++ b/source/module_ri/conv_coulomb_pot_k.h
@@ -5,37 +5,40 @@
 #include <map>
 #include <string>
 
-namespace Conv_Coulomb_Pot_K
+class Conv_Coulomb_Pot_K
 {
-	enum class Ccp_Type{		//	parameter:
-		Ccp,					//
-		Hf,						//		"hf_Rcut"
-		Hse};					//		"hse_omega"
+public:
 
-	template<typename T> T cal_orbs_ccp(
+	enum class Ccp_Type{		//  parameter:
+		Ccp,                 // 
+		Hf,					//
+		Hse};					//  	"hse_omega"
+
+	template<typename T> static T cal_orbs_ccp(
 		const T &orbs,
 		const Ccp_Type &ccp_type,
 		const std::map<std::string,double> &parameter,
-        const double rmesh_times);
-
-  //private:
-	template< typename T > double get_rmesh_proportion(
+        const double rmesh_times,
+        const int& nks);
+	
+private:
+		
+	template< typename T > static double get_rmesh_proportion(
 		const T &orbs,
 		const double psi_threshold);
+		
+private:
 
-  //private:
-	std::vector<double> cal_psi_ccp(
-		const std::vector<double> & psif);
-	std::vector<double> cal_psi_hf(
-		const std::vector<double> &psif,
-		const std::vector<double> &k_radial,
-		const double hf_Rcut);
-	std::vector<double> cal_psi_hse(
+	static std::vector<double> cal_psi_ccp( const std::vector<double> & psif );
+	
+	static std::vector<double> cal_psi_hf(const int& nks, const std::vector<double> &psif,
+                                          const std::vector<double> &k_radial,
+                                          const double omega);
+
+	static std::vector<double> cal_psi_hse( 
 		const std::vector<double> & psif,
 		const std::vector<double> & k_radial,
-		const double hse_omega);
-}
-
-#include "conv_coulomb_pot_k.hpp"
+		const double omega);
+};
 
 #endif
\ No newline at end of file
diff --git a/source/module_ri/conv_coulomb_pot_k.hpp b/source/module_ri/conv_coulomb_pot_k.hpp
deleted file mode 100644
index 5ca3abe5c8..0000000000
--- a/source/module_ri/conv_coulomb_pot_k.hpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef CONV_COULOMB_POT_K_HPP
-#define CONV_COULOMB_POT_K_HPP
-
-#include "conv_coulomb_pot_k.h"
-#include <vector>
-#include <cmath>
-
-namespace Conv_Coulomb_Pot_K
-{
-
-	template< typename T >
-	std::vector<T> cal_orbs_ccp(
-		const std::vector<T> & orbs,
-		const Ccp_Type &ccp_type,
-		const std::map<std::string,double> &parameter,
-		const double rmesh_times)
-	{
-		std::vector<T> orbs_ccp(orbs.size());
-		for( size_t i=0; i!=orbs.size(); ++i )
-			orbs_ccp[i] = cal_orbs_ccp(orbs[i], ccp_type, parameter, rmesh_times);
-		return orbs_ccp;
-	}
-
-	template< typename T >
-	double get_rmesh_proportion(
-		const std::vector<T> & orbs,
-		const double psi_threshold)
-	{
-		double rmesh_proportion=0;
-		for( const auto &orb : orbs )
-			rmesh_proportion = std::max(rmesh_proportion, get_rmesh_proportion(orb,psi_threshold));
-		return rmesh_proportion;
-	}
-
-}
-
-#endif
\ No newline at end of file
diff --git a/source/module_ri/exx_lip.cpp b/source/module_ri/exx_lip.cpp
index 0c4211d890..2f685be5f1 100644
--- a/source/module_ri/exx_lip.cpp
+++ b/source/module_ri/exx_lip.cpp
@@ -481,7 +481,7 @@ void Exx_Lip::b_cal( int ik, int iq, int ib)
 	}
 
 	std::complex<double> * const porter = new std::complex<double> [rho_basis->nrxx];
-
+	
 	for(size_t iw=0; iw< GlobalV::NLOCAL; ++iw)
 	{
 		const std::complex<double> * const phi_w = phi[iw];
@@ -495,7 +495,7 @@ void Exx_Lip::b_cal( int ik, int iq, int ib)
 		if( Conv_Coulomb_Pot_K::Ccp_Type::Ccp==info.ccp_type || Conv_Coulomb_Pot_K::Ccp_Type::Hf==info.ccp_type )
 			if((iq==iq_vecik) && (gzero_rank_in_pool==GlobalV::RANK_IN_POOL))							/// need to check while use k_point parallel
 				b0[iw] = b_w[rho_basis->ig_gge0];
-
+		
 		for( size_t ig=0; ig<rho_basis->npw; ++ig)
 			b_w[ig] *= recip_qkg2[ig];
 	}
@@ -634,14 +634,12 @@ void Exx_Lip::write_q_pack() const
 	if(!GlobalV::RANK_IN_POOL)
 	{
 		const std::string exx_q_pack = "exx_q_pack/";
-		int return_value=0;
+
 		const std::string command_mkdir = "test -d " + GlobalV::global_out_dir + exx_q_pack + " || mkdir " + GlobalV::global_out_dir + exx_q_pack;
-        return_value = system(command_mkdir.c_str());
-        assert(return_value == 0);
+		system( command_mkdir.c_str() );	// Need to check
 
-        const std::string command_kpoint = "test -f " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card + " || cp " + GlobalV::global_kpoint_card + " " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card;
-        return_value = system(command_kpoint.c_str());
-		assert(return_value==0);
+		const std::string command_kpoint = "test -f " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card + " || cp " + GlobalV::global_kpoint_card + " " + GlobalV::global_out_dir + exx_q_pack + GlobalV::global_kpoint_card;
+		system( command_kpoint.c_str() );	// Need to check
 
 		std::stringstream ss_wf_wg;
 		ss_wf_wg << GlobalV::global_out_dir << exx_q_pack << "wf_wg_" << GlobalV::MY_POOL;
diff --git a/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat b/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat
index 1aa4b94ac1..af0ad58c0d 100644
--- a/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat
+++ b/tests/integrate/107_PW_OBOD_MemSaver/refBANDS_1.dat
@@ -1,6 +1,6 @@
-   1   0.00000000  -3.38705933  -0.79801307   5.06488210   5.06488210   7.84114355   9.60594903
-   2   0.17320508  -3.86201936  -0.07517988   5.13653145   5.13653145   7.91810064   9.68495616
-   3   0.34641016  -4.63079629   1.43419304   5.35284854   5.35284854   8.15540136   9.81498032
-   4   0.51961524  -5.25812925   3.25094996   5.69541211   5.69541211   8.51864422   9.65342396
-   5   0.69282032  -5.65198054   5.13706981   6.08466055   6.08466055   8.86784679   9.13706533
-   6   0.86602540  -5.78587376   6.28876728   6.28876728   6.28876728   8.83643550   8.83643550
+1 0  -3.3870593 -0.79801307 5.0648821 5.0648821 7.8411435 9.605949
+2 0.17320508  -3.8620194 -0.075179882 5.1365314 5.1365314 7.9181006 9.6849562
+3 0.34641016  -4.6307963 1.434193 5.3528485 5.3528485 8.1554014 9.8149803
+4 0.51961524  -5.2581292 3.25095 5.6954121 5.6954121 8.5186442 9.653424
+5 0.69282032  -5.6519805 5.1370698 6.0846605 6.0846605 8.8678468 9.1370653
+6 0.8660254  -5.7858738 6.2887673 6.2887673 6.2887673 8.8364355 8.8364355
diff --git a/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat b/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat
index 0a1088ceb5..ced64a27bf 100644
--- a/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat
+++ b/tests/integrate/107_PW_OB_outputbands/refBANDS_1.dat
@@ -1,6 +1,6 @@
-   1   0.00000000  -3.38704889  -0.79801666   5.06489026   5.06490253   7.84115900   9.60595365
-   2   0.17320508  -3.86198803  -0.07518005   5.13653307   5.13653200   7.91811409   9.68496369
-   3   0.34641016  -4.63079483   1.43421001   5.35290770   5.35285262   8.15538060   9.81497711
-   4   0.51961524  -5.25812025   3.25095973   5.69541240   5.69542353   8.51883375   9.65336577
-   5   0.69282032  -5.65197852   5.13708720   6.08467575   6.08469139   8.86796173   9.13705931
-   6   0.86602540  -5.78586742   6.28881704   6.28877545   6.28878152   8.83653074   8.83650706
+1 0  -3.3870489 -0.79801666 5.0648903 5.0649025 7.841159 9.6059536
+2 0.17320508  -3.861988 -0.075180055 5.1365331 5.136532 7.9181141 9.6849637
+3 0.34641016  -4.6307948 1.43421 5.3529077 5.3528526 8.1553806 9.8149771
+4 0.51961524  -5.2581202 3.2509597 5.6954124 5.6954235 8.5188338 9.6533658
+5 0.69282032  -5.6519785 5.1370872 6.0846758 6.0846914 8.8679617 9.1370593
+6 0.8660254  -5.7858674 6.288817 6.2887755 6.2887815 8.8365307 8.8365071
diff --git a/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref b/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref
index bffad6b08a..168c5723a3 100644
--- a/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref
+++ b/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref
@@ -3,92 +3,92 @@ CALCULATE THE MULLIkEN ANALYSIS FOR EACH ATOM
  Total charge:	32
 Decomposed Mulliken populations
 0                 Zeta of Fe                        Spin 1                        Spin 2                        Spin 3                        Spin 4
-s                        0                           1.317                       0.06196                       -0.2625                      -0.07949
-  sum over m                                         1.317                       0.06196                       -0.2625                      -0.07949
-s                        1                           1.726                      -0.01809                       0.09886                      -0.01413
-  sum over m                                         1.726                      -0.01809                       0.09886                      -0.01413
-s                        2                         0.03246                      -0.04153                        0.2209                      -0.02228
-  sum over m                                       0.03246                      -0.04153                        0.2209                      -0.02228
-s                        3                        -0.02921                      0.005609                        -0.025                     -0.005114
-  sum over m                                      -0.02921                      0.005609                        -0.025                     -0.005114
-  sum over m+zeta                                    3.046                      0.007945                        0.0323                        -0.121
-pz                        0                           2.034                     -0.001186                      0.005932                    -3.981e-06
-px                        0                           2.033                     -0.001283                      0.006419                    -3.989e-06
-py                        0                           2.033                     -0.001188                      0.005944                    -3.979e-06
-  sum over m                                           6.1                     -0.003658                        0.0183                    -1.195e-05
-pz                        1                        -0.02621                     0.0005578                     -0.002789                             0
-px                        1                        -0.02639                     0.0006107                     -0.003054                             0
-py                        1                        -0.02603                     0.0005536                     -0.002768                             0
-  sum over m                                      -0.07863                      0.001722                     -0.008611                             0
-  sum over m+zeta                                    6.021                     -0.001936                      0.009684                    -1.277e-05
-dz^2                        0                           1.964                     0.0008269                     -0.004128                    -1.088e-05
-dxz                        0                           1.044                         0.156                       -0.7849                    -0.0003055
-dyz                        0                          0.9592                        0.1564                       -0.7869                    -0.0003096
-dx^2-y^2                        0                           1.967                      0.000752                     -0.003754                    -1.059e-05
-dxy                        0                           1.055                        0.1558                       -0.7835                    -0.0003047
-  sum over m                                         6.988                        0.4698                        -2.363                    -0.0009413
-dz^2                        1                         0.03863                    -0.0008716                      0.004365                    -1.357e-05
-dxz                        1                        -0.03708                     -0.004148                       0.02101                     1.956e-05
-dyz                        1                        -0.03373                     -0.004494                       0.02274                     1.968e-05
-dx^2-y^2                        1                         0.03943                    -0.0009117                      0.004566                    -1.471e-05
-dxy                        1                        -0.03733                     -0.004056                       0.02055                     1.945e-05
-  sum over m                                      -0.03008                      -0.01448                       0.07324                     3.041e-05
-  sum over m+zeta                                    6.958                        0.4553                         -2.29                    -0.0009109
-fz^3                        0                       -0.007044                     0.0007552                     -0.003776                    -1.406e-06
-fxz^2                        0                       -0.002046                     0.0002628                     -0.001314                             0
-fyz^2                        0                        -0.00273                       0.00029                      -0.00145                             0
-fzx^2-zy^2                        0                       5.811e-05                             0                     3.451e-06                             0
-fxyz                        0                        1.14e-05                     1.249e-06                    -6.306e-06                             0
-fx^3-3*xy^2                        0                       -0.003379                     0.0004381                      -0.00219                             0
-f3yx^2-y^3                        0                        -0.00407                     0.0004626                     -0.002313                             0
-  sum over m                                       -0.0192                      0.002209                      -0.01105                    -4.307e-06
-  sum over m+zeta                                  -0.0192                      0.002209                      -0.01105                    -4.307e-06
-Total Charge on atom:  Fe               16.01
-Total Magnetism on atom:  Fe                   (0.4635, -2.259, -0.1219)
+s                        0                           1.317                       0.05552                        0.2843                       0.02903
+  sum over m                                         1.317                       0.05552                        0.2843                       0.02903
+s                        1                           1.726                      -0.01923                      -0.09498                      0.005159
+  sum over m                                         1.726                      -0.01923                      -0.09498                      0.005159
+s                        2                         0.03246                      -0.04333                       -0.2148                      0.008137
+  sum over m                                       0.03246                      -0.04333                       -0.2148                      0.008137
+s                        3                        -0.02921                      0.005194                       0.02641                      0.001867
+  sum over m                                      -0.02921                      0.005194                       0.02641                      0.001867
+  sum over m+zeta                                    3.046                     -0.001842                     0.0009368                       0.04419
+pz                        0                           2.034                     -0.001185                     -0.005932                     1.545e-06
+px                        0                           2.033                     -0.001283                     -0.006419                     1.538e-06
+py                        0                           2.033                     -0.001188                     -0.005944                     1.543e-06
+  sum over m                                           6.1                     -0.003656                      -0.01829                     4.626e-06
+pz                        1                        -0.02622                     0.0005602                      0.002791                             0
+px                        1                        -0.02639                     0.0006145                      0.003054                             0
+py                        1                        -0.02603                     0.0005563                       0.00277                             0
+  sum over m                                      -0.07864                      0.001731                      0.008615                             0
+  sum over m+zeta                                    6.021                     -0.001925                      -0.00968                     5.611e-06
+dz^2                        0                           1.964                     0.0008273                      0.004131                     4.077e-06
+dxz                        0                           1.044                        0.1755                        0.7507                      0.002258
+dyz                        0                          0.9544                        0.1768                        0.7532                      0.002329
+dx^2-y^2                        0                           1.967                     0.0007523                      0.003756                     3.978e-06
+dxy                        0                           1.055                        0.1751                        0.7495                      0.002251
+  sum over m                                         6.984                         0.529                         2.261                      0.006846
+dz^2                        1                         0.03863                    -0.0008699                     -0.004363                     5.197e-06
+dxz                        1                        -0.03759                     -0.005346                      -0.01936                    -0.0001322
+dyz                        1                        -0.03407                     -0.005734                      -0.02118                    -0.0001342
+dx^2-y^2                        1                         0.03943                    -0.0009093                     -0.004564                     5.691e-06
+dxy                        1                        -0.03787                     -0.005246                       -0.0189                    -0.0001314
+  sum over m                                      -0.03146                      -0.01811                      -0.06836                     -0.000387
+  sum over m+zeta                                    6.952                        0.5109                         2.193                      0.006459
+fz^3                        0                       -0.007049                     0.0007578                      0.003775                             0
+fxz^2                        0                       -0.002045                     0.0002638                      0.001312                             0
+fyz^2                        0                       -0.002729                     0.0002912                      0.001448                             0
+fzx^2-zy^2                        0                       6.273e-05                             0                    -6.642e-06                             0
+fxyz                        0                       1.153e-05                     1.446e-06                     5.675e-06                             0
+fx^3-3*xy^2                        0                        -0.00338                       0.00044                      0.002189                             0
+f3yx^2-y^3                        0                        -0.00407                     0.0004646                      0.002311                             0
+  sum over m                                       -0.0192                      0.002219                       0.01103                     2.581e-06
+  sum over m+zeta                                  -0.0192                      0.002219                       0.01103                     2.581e-06
+Total Charge on atom:  Fe                  16
+Total Magnetism on atom:  Fe                   (0.5093, 2.195, 0.05066)
 
 
 1                 Zeta of Fe                        Spin 1                        Spin 2                        Spin 3                        Spin 4
-s                        0                           1.275                       0.04699                       -0.2823                       0.07949
-  sum over m                                         1.275                       0.04699                       -0.2823                       0.07949
-s                        1                           1.755                      -0.01866                       0.08491                       0.01412
-  sum over m                                         1.755                      -0.01866                       0.08491                       0.01412
-s                        2                        -0.02899                      -0.04221                        0.1978                       0.02226
-  sum over m                                      -0.02899                      -0.04221                        0.1978                       0.02226
-s                        3                        -0.04712                       0.00595                      -0.03281                      0.005133
-  sum over m                                      -0.04712                       0.00595                      -0.03281                      0.005133
-  sum over m+zeta                                    2.954                     -0.007928                      -0.03239                         0.121
-pz                        0                           2.032                     -0.001371                       0.00685                     3.967e-06
-px                        0                           2.025                    -0.0009218                      0.004606                     3.958e-06
-py                        0                           2.032                     -0.001333                      0.006664                     3.965e-06
-  sum over m                                         6.089                     -0.003626                       0.01812                     1.189e-05
-pz                        1                        -0.02529                     0.0005803                     -0.002904                             0
-px                        1                        -0.01606                     0.0001295                    -0.0006492                             0
-py                        1                        -0.02466                     0.0005625                     -0.002815                             0
-  sum over m                                      -0.06602                      0.001272                     -0.006367                             0
-  sum over m+zeta                                    6.023                     -0.002353                       0.01175                      1.25e-05
-dz^2                        0                           1.957                      0.001154                     -0.005778                     1.149e-05
-dxz                        0                           1.091                        0.1517                       -0.7637                    -8.462e-05
-dyz                        0                          0.9556                        0.1553                       -0.7815                    -8.443e-05
-dx^2-y^2                        0                           1.947                      0.001648                     -0.008249                     1.233e-05
-dxy                        0                           1.106                        0.1508                       -0.7591                    -8.432e-05
-  sum over m                                         7.056                        0.4606                        -2.318                    -0.0002295
-dz^2                        1                         0.03925                     -0.001067                      0.005328                     1.289e-05
-dxz                        1                        -0.03558                     -0.002824                       0.01439                       2.5e-06
-dyz                        1                        -0.03117                     -0.003962                        0.0201                     2.798e-06
-dx^2-y^2                        1                         0.04266                     -0.001401                      0.006997                      1.29e-05
-dxy                        1                        -0.03637                     -0.002747                       0.01401                     2.475e-06
-  sum over m                                      -0.02122                        -0.012                       0.06082                     3.356e-05
-  sum over m+zeta                                    7.035                        0.4486                        -2.257                     -0.000196
-fz^3                        0                       -0.006615                     0.0007206                     -0.003605                     1.352e-06
-fxz^2                        0                       -0.001955                     0.0002554                     -0.001278                             0
-fyz^2                        0                       -0.002684                     0.0002735                     -0.001368                             0
-fzx^2-zy^2                        0                       9.383e-05                      1.68e-05                    -8.473e-05                             0
-fxyz                        0                       2.053e-05                      3.66e-06                    -1.839e-05                             0
-fx^3-3*xy^2                        0                       -0.003204                     0.0004266                     -0.002134                             0
-f3yx^2-y^3                        0                       -0.003695                     0.0004558                     -0.002281                             0
-  sum over m                                      -0.01804                      0.002152                      -0.01077                     4.022e-06
-  sum over m+zeta                                 -0.01804                      0.002152                      -0.01077                     4.022e-06
-Total Charge on atom:  Fe               15.99
-Total Magnetism on atom:  Fe                   (0.4405, -2.289, 0.1208)
+s                        0                           1.275                       0.05341                        0.2605                      -0.02903
+  sum over m                                         1.275                       0.05341                        0.2605                      -0.02903
+s                        1                           1.755                      -0.01752                      -0.08879                     -0.005156
+  sum over m                                         1.755                      -0.01752                      -0.08879                     -0.005156
+s                        2                        -0.02898                       -0.0404                       -0.2039                      -0.00813
+  sum over m                                      -0.02898                       -0.0404                       -0.2039                      -0.00813
+s                        3                        -0.04711                      0.006367                       0.03139                     -0.001874
+  sum over m                                      -0.04711                      0.006367                       0.03139                     -0.001874
+  sum over m+zeta                                    2.954                      0.001862                    -0.0008532                      -0.04419
+pz                        0                           2.032                     -0.001369                     -0.006852                    -1.367e-06
+px                        0                           2.025                    -0.0009208                     -0.004608                    -1.387e-06
+py                        0                           2.032                     -0.001332                     -0.006666                    -1.366e-06
+  sum over m                                         6.089                     -0.003622                      -0.01813                    -4.119e-06
+pz                        1                        -0.02528                     0.0005889                      0.002889                             0
+px                        1                        -0.01606                     0.0001369                     0.0006408                             0
+py                        1                        -0.02466                      0.000571                      0.002802                             0
+  sum over m                                        -0.066                      0.001297                      0.006331                     2.367e-06
+  sum over m+zeta                                    6.023                     -0.002325                      -0.01179                    -1.753e-06
+dz^2                        0                           1.957                      0.001158                      0.005774                    -3.913e-06
+dxz                        0                           1.097                        0.1724                        0.7275                      0.002311
+dyz                        0                          0.9509                        0.1759                        0.7475                      0.002269
+dx^2-y^2                        0                           1.947                      0.001654                      0.008245                    -4.075e-06
+dxy                        0                           1.113                        0.1714                        0.7227                      0.002304
+  sum over m                                         7.065                        0.5225                         2.212                      0.006876
+dz^2                        1                         0.03925                     -0.001062                     -0.005333                    -4.383e-06
+dxz                        1                         -0.0366                     -0.003947                      -0.01263                    -0.0001213
+dyz                        1                        -0.03157                     -0.005197                      -0.01856                    -0.0001267
+dx^2-y^2                        1                         0.04266                     -0.001394                     -0.007002                    -4.206e-06
+dxy                        1                        -0.03743                     -0.003854                      -0.01222                    -0.0001203
+  sum over m                                      -0.02369                      -0.01545                      -0.05575                    -0.0003768
+  sum over m+zeta                                    7.041                        0.5071                         2.156                      0.006499
+fz^3                        0                       -0.006614                     0.0007261                      0.003596                             0
+fxz^2                        0                       -0.001954                     0.0002565                      0.001276                             0
+fyz^2                        0                       -0.002684                     0.0002742                      0.001366                             0
+fzx^2-zy^2                        0                        9.09e-05                      1.99e-05                     8.018e-05                             0
+fxyz                        0                       2.062e-05                     4.102e-06                     1.816e-05                             0
+fx^3-3*xy^2                        0                       -0.003203                     0.0004291                       0.00213                             0
+f3yx^2-y^3                        0                       -0.003698                     0.0004635                      0.002271                             0
+  sum over m                                      -0.01804                      0.002174                       0.01074                             0
+  sum over m+zeta                                 -0.01804                      0.002174                       0.01074                             0
+Total Charge on atom:  Fe                  16
+Total Magnetism on atom:  Fe                   (0.5088, 2.154, -0.03769)
 
 
diff --git a/tests/integrate/204_NO_KP_NC_deltaspin/result.ref b/tests/integrate/204_NO_KP_NC_deltaspin/result.ref
index 8a17a1fada..649ae1ef31 100644
--- a/tests/integrate/204_NO_KP_NC_deltaspin/result.ref
+++ b/tests/integrate/204_NO_KP_NC_deltaspin/result.ref
@@ -1,4 +1,4 @@
-etotref -6844.326716364628
-etotperatomref -3422.1633581823
+etotref -6844.685232776227
+etotperatomref -3422.3426163881
 Compare_mulliken_pass 0
-totaltimeref 36.59
+totaltimeref 21.55
diff --git a/tests/integrate/207_NO_KP_OB/refBANDS_1.dat b/tests/integrate/207_NO_KP_OB/refBANDS_1.dat
index 87a35be1b0..c3cd3a0b6d 100644
--- a/tests/integrate/207_NO_KP_OB/refBANDS_1.dat
+++ b/tests/integrate/207_NO_KP_OB/refBANDS_1.dat
@@ -1,6 +1,6 @@
-   1   0.00000000  -3.20074324  -0.55268317   5.34115051   5.34115051   8.29516160  10.29249200
-   2   0.17320508  -3.66389133   0.16302859   5.41224692   5.41224692   8.38313120  10.35082356
-   3   0.34641016  -4.42333349   1.67151268   5.62380682   5.62380682   8.64342675  10.39966330
-   4   0.51961524  -5.04480873   3.49944170   5.95924219   5.95924219   9.02968708  10.13225743
-   5   0.69282032  -5.43307067   5.39954292   6.34143853   6.34143853   9.35699627   9.58079775
-   6   0.86602540  -5.56433513   6.53981221   6.53981221   6.53981221   9.28022796   9.28022796
+1 0  -3.2007432 -0.55268317 5.3411505 5.3411505 8.2951616 10.292492
+2 0.17320508  -3.6638913 0.16302859 5.4122469 5.4122469 8.3831312 10.350824
+3 0.34641016  -4.4233335 1.6715127 5.6238068 5.6238068 8.6434268 10.399663
+4 0.51961524  -5.0448087 3.4994417 5.9592422 5.9592422 9.0296871 10.132257
+5 0.69282032  -5.4330707 5.3995429 6.3414385 6.3414385 9.3569963 9.5807977
+6 0.8660254  -5.5643351 6.5398122 6.5398122 6.5398122 9.280228 9.280228
diff --git a/tests/integrate/Autotest.sh b/tests/integrate/Autotest.sh
index 37908d1769..5310998097 100755
--- a/tests/integrate/Autotest.sh
+++ b/tests/integrate/Autotest.sh
@@ -74,11 +74,6 @@ check_out(){
     # check every 'key' word
     #------------------------------------------------------
     for key in $properties; do
-    
-        if [ $key == "totaltimeref" ]; then
-            # echo "time=$cal ref=$ref"
-            break
-        fi
 
         #--------------------------------------------------
         # calculated value
@@ -96,6 +91,11 @@ check_out(){
         #--------------------------------------------------
         deviation=`awk 'BEGIN {x='$ref';y='$cal';printf "%.'$ca'f\n",x-y}'`
 
+        if [ $key == "totaltimeref" ]; then
+            # echo "time=$cal ref=$ref"
+            break
+        fi
+
 
         #--------------------------------------------------
         # If deviation < threshold, then the test passes,
diff --git a/toolchain/README.md b/toolchain/README.md
index a8ce2f711e..3e747006b7 100644
--- a/toolchain/README.md
+++ b/toolchain/README.md
@@ -91,7 +91,6 @@ The needed dependencies version default:
 - `LibXC` 6.2.2
 - `ELPA` 2023.05.001
 - `CEREAL` 1.3.2
-- `RapidJSON` 1.1.0
 And Intel-oneAPI need user or server manager to manually install from Intel.
 [Intel-oneAPI](https://www.intel.cn/content/www/cn/zh/developer/tools/oneapi/toolkits.html)
 
@@ -103,12 +102,12 @@ Dependencies below are optional， which is NOT installed by default:
 Users can install them by using `--with-*=install` in toolchain*.sh, which is `no` in default.
 > Notice: LibRI, LibComm and Libnpy is on actively development, you should check-out the package version when using this toolchain. Also, LibRI and LibComm can be installed by github submodule, which is also work for libnpy, which is more recommended.
 
-Notice: for `CEREAL`,`RapidJSON`, `Libnpy`, `LibRI` and `LibComm`, 
+Notice: for `CEREAL`, `Libnpy`, `LibRI` and `LibComm`, 
 you need to download them from github.com, 
 rename it as formatted, and put them in `build` directory at the same time
 e.g.:
 ```shell
-# packages downloaded from github.com, RapidJSON is not supported now
+# packages downloaded from github.com
 mv v1.3.2.tar.gz build/cereal-1.3.2.tar.gz
 ```
 
@@ -176,7 +175,7 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
         -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \
         ......
 ```
-Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS for now
+Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS
 
 ### shell problem
 If you encounter problem like:
@@ -207,8 +206,6 @@ The default compiler for Intel-oneAPI is `icpx` and `icx`, which will cause prob
 
 The best way is to change `icpx` to `icpc`, `icx` to `icc`. user can manually change it in toolchain*.sh via `--with-intel-classic=yes`
 
-Notice: `icc` and `icpc` from Intel Classic Compiler of Intel-oneAPI is not supported for 2024.0 and newer version.
-
 
 ### LibRI and LibComm problem
 (There is some problem sometimes when compling with LibRI and LibComm, detailed information is needed)
@@ -219,9 +216,6 @@ Sometimes Intel-oneAPI have problem to link `mpirun`,
 which will always show in 2023.2.0 version of MPI in Intel-oneAPI. 
 Try `source /path/to/setvars.sh` or install another version of IntelMPI may help.
 
-which is fixed in 2024.0.0 version of Intel-oneAPI, 
-And will not occur in Intel-MPI before 2021.10.0 (Intel-oneAPI before 2023.2.0)
-
 More problem and possible solution can be accessed via [#2928](https://github.com/deepmodeling/abacus-develop/issues/2928)
 
 
@@ -236,6 +230,9 @@ from ABACUS repo, make dependencies package more independent and flexible.
 2. Users can manually change `pkg_install_dir` variable 
 in `scripts/stage*/install*` to change the installation directory 
 of each packages, which may let the installation more fiexible.
+3. Users can manually change `INSTALL` variable in `scripts/common_vars.sh`
+to change the installation directory of all packages, which may let the
+installation more fiexible.
 
 
 ## More
diff --git a/toolchain/install_abacus_toolchain.sh b/toolchain/install_abacus_toolchain.sh
index 68c7959ce1..be41687307 100755
--- a/toolchain/install_abacus_toolchain.sh
+++ b/toolchain/install_abacus_toolchain.sh
@@ -182,12 +182,10 @@ The --with-PKG options follow the rules:
   --with-scalapack        Parallel linear algebra library, needed for parallel
                           calculations.
                           Default = install
-  --with-elpa             Eigenvalue SoLvers for Petaflop-Applications library.
-                          Fast library for large parallel jobs， Especially for ABACUS LCAO
-                          Default = install
   --with-cereal           Enable cereal for ABACUS LCAO
                           Default = install
-  --with-rapidjson        Enable rapidjson for ABACUS to read/write json files
+  --with-elpa             Eigenvalue SoLvers for Petaflop-Applications library.
+                          Fast library for large parallel jobs.
                           Default = install
   --with-libtorch         Enable libtorch the machine learning framework needed for DeePKS
                           Default = no
@@ -232,7 +230,7 @@ EOF
 tool_list="gcc intel cmake"
 mpi_list="mpich openmpi intelmpi"
 math_list="mkl acml openblas"
-lib_list="fftw libxc scalapack elpa cereal rapidjson libtorch libnpy libri libcomm"
+lib_list="fftw libxc scalapack elpa cereal libtorch libnpy libri libcomm"
 package_list="${tool_list} ${mpi_list} ${math_list} ${lib_list}"
 # ------------------------------------------------------------------------
 
@@ -266,8 +264,7 @@ with_acml="__SYSTEM__"
 with_openblas="__INSTALL__"
 with_elpa="__INSTALL__"
 with_cereal="__INSTALL__"
-with_rapidjson="__INSTALL__"
-# with_libtorch="__DONTUSE__" # default
+# with_libtorch="__DONTUSE__"
 # with_libnpy="__DONTUSE__"
 # with_libri="__DONTUSE__"
 # with_libcomm="__DONTUSE__"
@@ -552,9 +549,6 @@ while [ $# -ge 1 ]; do
     --with-cereal*)
       with_cereal=$(read_with "${1}")
       ;;
-    --with-rapidjson*)
-      with_rapidjson=$(read_with "${1}")
-      ;;
     --with-libnpy*)
       with_libnpy=$(read_with "${1}")
       ;;
diff --git a/toolchain/scripts/common_vars.sh b/toolchain/scripts/common_vars.sh
index 51f941f6fe..d04d40fa39 100755
--- a/toolchain/scripts/common_vars.sh
+++ b/toolchain/scripts/common_vars.sh
@@ -7,7 +7,8 @@
 # directories and files used by the installer
 ROOTDIR=${ROOTDIR:-"$(pwd -P)"}
 SCRIPTDIR=${SCRIPTDIR:-"${ROOTDIR}/scripts"}
-INSTALLDIR=${INSTALLDIR:-"${ROOTDIR}/install"} # should not be changed
+INSTALLDIR=${INSTALLDIR:-"${ROOTDIR}/install"}
+#INSTALLDIR=${INSTALLDIR:-"${HOME}/abacus_deps"} # advanced installation
 BUILDDIR=${BUILDDIR:-"${ROOTDIR}/build"}
 SETUPFILE=${SETUPFILE:-"${INSTALLDIR}/setup"}
 ARCH_FILE_TEMPLATE=${ARCH_FILE_TEMPLATE:-"${SCRIPTDIR}/arch_base.tmpl"}
diff --git a/toolchain/scripts/stage4/install_rapidjson.sh b/toolchain/scripts/stage4/install_rapidjson.sh
deleted file mode 100755
index b63789ffff..0000000000
--- a/toolchain/scripts/stage4/install_rapidjson.sh
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/bin/bash -e
-
-# TODO: Review and if possible fix shellcheck errors.
-# shellcheck disable=all
-# RAPIDJSON is not need any complex setting
-# Only problem is the installation from github.com
-
-[ "${BASH_SOURCE[0]}" ] && SCRIPT_NAME="${BASH_SOURCE[0]}" || SCRIPT_NAME=$0
-SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_NAME")/.." && pwd -P)"
-
-rapidjson_ver="1.1.0"
-rapidjson_sha256="bf7ced29704a1e696fbccf2a2b4ea068e7774fa37f6d7dd4039d0787f8bed98e"
-source "${SCRIPT_DIR}"/common_vars.sh
-source "${SCRIPT_DIR}"/tool_kit.sh
-source "${SCRIPT_DIR}"/signal_trap.sh
-source "${INSTALLDIR}"/toolchain.conf
-source "${INSTALLDIR}"/toolchain.env
-
-[ -f "${BUILDDIR}/setup_rapidjson" ] && rm "${BUILDDIR}/setup_rapidjson"
-
-RAPIDJSON_CFLAGS=""
-! [ -d "${BUILDDIR}" ] && mkdir -p "${BUILDDIR}"
-cd "${BUILDDIR}"
-
-case "$with_rapidjson" in
-  __INSTALL__)
-    echo "==================== Installing RAPIDJSON ===================="
-    dirname="rapidjson-${rapidjson_ver}"
-    pkg_install_dir="${INSTALLDIR}/$dirname"
-    #pkg_install_dir="${HOME}/lib/rapidjson/${rapidjson_ver}"
-    install_lock_file="$pkg_install_dir/install_successful"
-    url="https://github.com/Tencent/rapidjson/archive/refs/tags/v${rapidjson_ver}.tar.gz"
-    filename="rapidjson-${rapidjson_ver}.tar.gz"
-    if verify_checksums "${install_lock_file}"; then
-        echo "$dirname is already installed, skipping it."
-    else
-        if [ -f $filename ]; then
-        echo "$filename is found"
-        else
-        # download from github.com and checksum
-            echo "wget --quiet $url -O $filename"
-            if ! wget --quiet $url -O $filename; then
-            report_error "failed to download $url"
-            recommend_offline_installation $filename $url
-            fi
-        # checksum
-        checksum "$filename" "$rapidjson_sha256"
-        fi
-        echo "Installing from scratch into ${pkg_install_dir}"
-        [ -d $dirname ] && rm -rf $dirname
-        tar -xzf $filename
-        mkdir -p "${pkg_install_dir}"
-        cp -r $dirname/* "${pkg_install_dir}/"
-        write_checksums "${install_lock_file}" "${SCRIPT_DIR}/stage4/$(basename ${SCRIPT_NAME})"
-    fi
-        ;;
-    __SYSTEM__)
-        echo "==================== CANNOT Finding RAPIDJSON from system paths NOW ===================="
-        recommend_offline_installation $filename $url
-        # How to do it in rapidjson? -- Zhaoqing in 2023/08/23
-        # check_lib -lxcf03 "libxc"
-        # check_lib -lxc "libxc"
-        # add_include_from_paths LIBXC_CFLAGS "xc.h" $INCLUDE_PATHS
-        # add_lib_from_paths LIBXC_LDFLAGS "libxc.*" $LIB_PATHS
-        ;;
-    __DONTUSE__) ;;
-    
-    *)
-    echo "==================== Linking RAPIDJSON to user paths ===================="
-    check_dir "${pkg_install_dir}"
-    RAPIDJSON_CFLAGS="-I'${pkg_install_dir}'"
-    ;;
-esac
-if [ "$with_rapidjson" != "__DONTUSE__" ]; then
-    if [ "$with_rapidjson" != "__SYSTEM__" ]; then
-    # LibRI deps should find rapidjson include in CPATH
-        cat << EOF > "${BUILDDIR}/setup_rapidjson"
-prepend_path CPATH "$pkg_install_dir/include"
-export CPATH="${pkg_install_dir}/include:"${CPATH}
-EOF
-        cat "${BUILDDIR}/setup_rapidjson" >> $SETUPFILE
-    fi
-    cat << EOF >> "${BUILDDIR}/setup_rapidjson"
-export RAPIDJSON_CFLAGS="${RAPIDJSON_CFLAGS}"
-export RAPIDJSON_ROOT="$pkg_install_dir"
-EOF
-fi
-
-load "${BUILDDIR}/setup_rapidjson"
-write_toolchain_env "${INSTALLDIR}"
-
-cd "${ROOTDIR}"
-report_timing "rapidjson"
diff --git a/toolchain/scripts/stage4/install_stage4.sh b/toolchain/scripts/stage4/install_stage4.sh
index b5c7cf5eed..ffe8f670c9 100755
--- a/toolchain/scripts/stage4/install_stage4.sh
+++ b/toolchain/scripts/stage4/install_stage4.sh
@@ -4,7 +4,6 @@
 # shellcheck disable=all
 
 ./scripts/stage4/install_cereal.sh
-./scripts/stage4/install_rapidjson.sh
 ./scripts/stage4/install_libtorch.sh
 ./scripts/stage4/install_libnpy.sh
 ./scripts/stage4/install_libri.sh
diff --git a/toolchain/scripts/tool_kit.sh b/toolchain/scripts/tool_kit.sh
index 120b623fee..d07445089b 100755
--- a/toolchain/scripts/tool_kit.sh
+++ b/toolchain/scripts/tool_kit.sh
@@ -54,12 +54,9 @@ By download $__filename from $__url,
 Rename it as $__filename and put it into ${BUILDDIR},
 And re-run toolchain installation script.
 
-You can manually install requirements packages via:
-1. Download from www.cp2k.org/static/downloads (for OpenBLAS, OpenMPI and Others)
-2. Download from github.com (for CEREAL, RapidJSON, libnpy, LibRI and others stage4 packages)
-3. Use git submodule update --init --recursive (for LibRI)
-4. wget https://bohrium-api.dp.tech/ds-dl/abacus-deps-93wi-v2 -O abacus-deps.zip
-5. for Intel-oneAPI, please contact your server manager our visit Intel official website
+Instead of github.com. you can manually install requirements packages via:
+1. Download from www.cp2k.org/static/downloads
+2. wget https://bohrium-api.dp.tech/ds-dl/abacus-deps-93wi-v1 -O abacus-deps-v1.zip
 EOF
 }
 
diff --git a/toolchain/toolchain_gnu.sh b/toolchain/toolchain_gnu.sh
index 04635bb63d..57105f601b 100755
--- a/toolchain/toolchain_gnu.sh
+++ b/toolchain/toolchain_gnu.sh
@@ -18,7 +18,6 @@
 --with-fftw=install \
 --with-elpa=install \
 --with-cereal=install \
---with-rapidjson=install \
 --with-libtorch=no \
 --with-libnpy=no \
 --with-libri=no \
diff --git a/toolchain/toolchain_intel-mpich.sh b/toolchain/toolchain_intel-mpich.sh
index fcf3cc41ee..ffc2626670 100755
--- a/toolchain/toolchain_intel-mpich.sh
+++ b/toolchain/toolchain_intel-mpich.sh
@@ -21,7 +21,6 @@
 --with-fftw=no \
 --with-elpa=install \
 --with-cereal=install \
---with-rapidjson=install \
 --with-libtorch=no \
 --with-libnpy=no \
 --with-libri=no \
diff --git a/toolchain/toolchain_intel.sh b/toolchain/toolchain_intel.sh
index e5298c570d..8f391be008 100755
--- a/toolchain/toolchain_intel.sh
+++ b/toolchain/toolchain_intel.sh
@@ -22,7 +22,6 @@
 --with-fftw=no \
 --with-elpa=install \
 --with-cereal=install \
---with-rapidjson=install \
 --with-libtorch=no \
 --with-libnpy=no \
 --with-libri=no \

From c4c5272ed2418d275d404c753dcb9f1ffd49a433 Mon Sep 17 00:00:00 2001
From: Hongxu Ren <60290838+Flying-dragon-boxing@users.noreply.github.com>
Date: Sun, 28 Jan 2024 14:17:41 +0800
Subject: [PATCH 18/44] Update FindPEXSI.cmake to fix Comments

---
 cmake/FindPEXSI.cmake | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake
index 062764acce..b1565d2c06 100644
--- a/cmake/FindPEXSI.cmake
+++ b/cmake/FindPEXSI.cmake
@@ -1,9 +1,14 @@
 ###############################################################################
-# - Find cereal
-# Find the native cereal headers.
+# - Find PEXSI
+# Find PEXSI and its dependencies.
 #
-#  PEXSI_FOUND - True if cereal is found.
-#  PEXSI_INCLUDE_DIR - Where to find cereal headers.
+#  PEXSI_FOUND          - True if pexsi is found.
+#  PEXSI_INCLUDE_DIR    - Where to find pexsi headers.
+#  PEXSI_LIBRARY        - pexsi library.
+#  ParMETIS_INCLUDE_DIR - Where to find pexsi headers.
+#  ParMETIS_LIBRARY     - parmetis library.
+#  METIS_LIBRARY        - metis library.
+#  SuperLU_DIST_LIBRARY - superlu_dist library.
 
 find_path(PEXSI_INCLUDE_DIR
     NAMES c_pexsi_interface.h
@@ -42,7 +47,7 @@ find_library(SuperLU_DIST_LIBRARY
 )
 
 # Handle the QUIET and REQUIRED arguments and
-# set Cereal_FOUND to TRUE if all variables are non-zero.
+# set PEXSI_FOUND to TRUE if all variables are non-zero.
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_DIST_LIBRARY)
 

From f3e18a469bf9d481b862ae008b4700b2e07a3f47 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Mon, 29 Jan 2024 12:52:57 +0800
Subject: [PATCH 19/44] Fix CI errors

---
 source/Makefile                        |  4 +-
 source/module_base/parallel_global.cpp | 65 ++++++++++++++++----------
 source/module_esolver/esolver_ks.cpp   |  5 +-
 source/module_hsolver/diago_elpa.cpp   | 32 -------------
 source/module_io/input.cpp             |  2 +-
 5 files changed, 46 insertions(+), 62 deletions(-)

diff --git a/source/Makefile b/source/Makefile
index 7bd81f26d9..e726b8fa1d 100644
--- a/source/Makefile
+++ b/source/Makefile
@@ -7,7 +7,7 @@ include Makefile.vars
 INCLUDES = -I. -Icommands -I../ -Imodule_base/module_container
 
 LIBS = -lm -lpthread
-OPTS = ${INCLUDES} -std=c++14 -pedantic -m64 ${INCLUDES}
+OPTS = -std=c++14 -pedantic -m64 ${INCLUDES}
 HONG = -D__LCAO
 HONG += -D__ELPA
 ifeq ($(OPENMP), ON)
@@ -45,7 +45,7 @@ ifeq ($(DEBUG), ON)
     endif
     OPTS += -O0 -fsanitize=address -fno-omit-frame-pointer -Wall -g #It can check segmental defaults
 else
-    HONG += -O0 -march=native -DNDEBUG
+    HONG += -Ofast -march=native -DNDEBUG
 endif
 
 ifeq ($(INTEL), ON)
diff --git a/source/module_base/parallel_global.cpp b/source/module_base/parallel_global.cpp
index 6740cd622c..3361d4b7f1 100644
--- a/source/module_base/parallel_global.cpp
+++ b/source/module_base/parallel_global.cpp
@@ -26,7 +26,6 @@ MPI_Comm STO_WORLD;
 MPI_Comm PARAPW_WORLD; // qianrui add it for sto-dft 2021-4-14
 MPI_Comm GRID_WORLD; // mohan add 2012-01-13z
 MPI_Comm DIAG_WORLD; // mohan add 2012-01-13
-MPI_Group GRID_GROUP;
 
 void Parallel_Global::myProd(std::complex<double> *in,std::complex<double> *inout,int *len,MPI_Datatype *dptr)
 {
@@ -85,7 +84,6 @@ void Parallel_Global::split_diag_world(const int &diag_np)
 	}
 
 	MPI_Comm_split(MPI_COMM_WORLD, color, key, &DIAG_WORLD);
-    MPI_Comm_group(DIAG_WORLD, &GRID_GROUP);
 	MPI_Comm_rank(DIAG_WORLD, &GlobalV::DRANK);
 	MPI_Comm_size(DIAG_WORLD, &GlobalV::DSIZE);
 	GlobalV::DCOLOR=color;
@@ -102,30 +100,49 @@ void Parallel_Global::split_diag_world(const int &diag_np)
 
 
 
-void Parallel_Global::split_grid_world(const int &grid_np)
+void Parallel_Global::split_grid_world(const int &diag_np)
 {
 #ifdef __MPI
-	assert(grid_np>0); //LiuXh, 2020-12-14, diag_np --> grid_np
-    int myid;
-    MPI_Group WORLD_GROUP;
-    //MPI_Comm_rank(MPI_COMM_WORLD, &key);
-    MPI_Comm_rank(MPI_COMM_WORLD, &myid); //LiuXh, 2020-12-14, key --> myid
-    MPI_Comm_group(MPI_COMM_WORLD, &WORLD_GROUP);
-
-    int grid_proc_range[3]={0, (GlobalV::NPROC/grid_np)*grid_np-1, GlobalV::NPROC/grid_np};
-    MPI_Group_range_incl(WORLD_GROUP, 1, &grid_proc_range, &GRID_GROUP);
-
-    GRID_WORLD=MPI_COMM_NULL;
-    MPI_Comm_create(MPI_COMM_WORLD, GRID_GROUP, &GRID_WORLD);
-    if(GRID_WORLD != MPI_COMM_NULL)
-    {
-        MPI_Comm_rank(GRID_WORLD, &GlobalV::GRANK); //LiuXh, 2020-12-14, DIAG_WORLD --> GRID_WORLD
-        MPI_Comm_size(GRID_WORLD, &GlobalV::GSIZE); //LiuXh, 2020-12-14, DIAG_WORLD --> GRID_WORLD
-    }else
-    {
-        GlobalV::GRANK=-1;
-        GlobalV::GSIZE=-1;
-    }
+	assert(diag_np>0);
+	// number of processors in each 'grid group'.
+	int* group_grid_np = new int[diag_np];
+	ModuleBase::GlobalFunc::ZEROS(group_grid_np, diag_np);
+	// average processors in each 'grid group'
+	int ave = GlobalV::NPROC/diag_np;
+	// remain processors.
+	int remain = GlobalV::NPROC - ave * diag_np;
+
+	for(int i=0; i<diag_np; ++i)
+	{
+		group_grid_np[i] = ave;
+		if(i<remain)
+		{
+			++group_grid_np[i];
+		}
+	}
+
+	// color: same color will stay in same group.
+	// key: rank in each fragment group.
+	int color = -1;		// Peize Lin add initialization for compiler warning at 2020.01.31
+	int key = -1;		// Peize Lin add initialization for compiler warning at 2020.01.31
+
+	int np_now = 0;
+	for(int i=0; i<diag_np; ++i)
+	{
+		np_now += group_grid_np[i];
+		if(GlobalV::MY_RANK < np_now)
+		{
+			color = i;
+			key = group_grid_np[i] - (np_now - GlobalV::MY_RANK);
+			break;
+		}
+	}
+
+	MPI_Comm_split(MPI_COMM_WORLD, color, key, &GRID_WORLD);
+	MPI_Comm_rank(GRID_WORLD, &GlobalV::GRANK);
+	MPI_Comm_size(GRID_WORLD, &GlobalV::GSIZE);
+
+	delete[] group_grid_np;
 #else
 	GlobalV::GRANK=0;  //mohan fix bug 2012-02-04
 	GlobalV::GSIZE=1;
diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp
index 0b2608e5ea..6ca6fd9984 100644
--- a/source/module_esolver/esolver_ks.cpp
+++ b/source/module_esolver/esolver_ks.cpp
@@ -468,12 +468,11 @@ namespace ModuleESolver
                     SCF print: G1    -3.435545e+03  0.000000e+00   3.607e-01  2.862e-01
                 */
                 printiter(iter, drho, duration, diag_ethr);
-                if (this->conv_elec && iter >= 5)
+                if (this->conv_elec)
                 {
-                    std::cout << "this->conv_elec" << std::endl;
                     this->niter = iter;
                     bool stop = this->do_after_converge(iter);
-                    if(stop) {std::cout << "break\n"; break;}
+                    if(stop) break;
                 }
                 // notice for restart
                 if (GlobalV::MIXING_RESTART > 0 && iter == GlobalV::MIXING_RESTART - 1)
diff --git a/source/module_hsolver/diago_elpa.cpp b/source/module_hsolver/diago_elpa.cpp
index a50645bfb8..f3852493a1 100644
--- a/source/module_hsolver/diago_elpa.cpp
+++ b/source/module_hsolver/diago_elpa.cpp
@@ -35,24 +35,7 @@ namespace hsolver
     ELPA_Solver es((const bool)isReal, COMM_DIAG, (const int)GlobalV::NBANDS, (const int)h_mat.row, (const int)h_mat.col, (const int*)h_mat.desc);
     this->DecomposedState=0; // for k pointer, the decomposed s_mat can not be reused
     ModuleBase::timer::tick("DiagoElpa", "elpa_solve");
-    std::cout << "???" << std::endl;
-    GlobalV::ofs_running << "nrow: " << h_mat.row << "\nncol: " << h_mat.col << "\n";
-    GlobalV::ofs_running << "print H" << std::endl;
-    for (int i = 0; i < h_mat.col; i++)
-    {
-        for (int j = 0; j < h_mat.row; j++)
-        {
-            if (std::abs(h_mat.p[i * h_mat.col + j]) < 0.00000001)
-            {
-                GlobalV::ofs_running << "0 ";
-            }
-            else
-                GlobalV::ofs_running << h_mat.p[i * h_mat.col + j] << " ";
-        }
-        GlobalV::ofs_running << std::endl;
-    }
     es.generalized_eigenvector(h_mat.p, s_mat.p, this->DecomposedState, eigen.data(), psi.get_pointer());
-
     ModuleBase::timer::tick("DiagoElpa", "elpa_solve");
     es.exit();
 
@@ -78,21 +61,6 @@ namespace hsolver
     //ELPA_Solver es(isReal, COMM_DIAG, GlobalV::NBANDS, h_mat.row, h_mat.col, h_mat.desc);
     ELPA_Solver es((const bool)isReal, COMM_DIAG, (const int)GlobalV::NBANDS, (const int)h_mat.row, (const int)h_mat.col, (const int*)h_mat.desc);
     ModuleBase::timer::tick("DiagoElpa", "elpa_solve");
-    GlobalV::ofs_running << "nrow: " << h_mat.row << "\nncol: " << h_mat.col << "\n";
-    GlobalV::ofs_running << "print H" << std::endl;
-    for (int i = 0; i < h_mat.col; i++)
-    {
-        for (int j = 0; j < h_mat.row; j++)
-        {
-            if (std::abs(h_mat.p[i * h_mat.col + j]) < 0.00000001)
-            {
-                GlobalV::ofs_running << "0 ";
-            }
-            else
-                GlobalV::ofs_running << h_mat.p[i * h_mat.col + j] << " ";
-        }
-        GlobalV::ofs_running << std::endl;
-    }
     es.generalized_eigenvector(h_mat.p, s_mat.p, this->DecomposedState, eigen.data(), psi.get_pointer());
     ModuleBase::timer::tick("DiagoElpa", "elpa_solve");
     es.exit();
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index b74ac6b104..42ccdde938 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -4062,7 +4062,7 @@ void Input::Check(void)
         {
             ModuleBase::WARNING_QUIT("Input", "pexsi can not be used with plane wave basis.");
         }
-        else if (ks_solver != "default" && ks_solver != "cg" && ks_solver != "dav")
+        else if (ks_solver != "default" && ks_solver != "cg" && ks_solver != "dav" && ks_solver != "bpcg")
         {
             ModuleBase::WARNING_QUIT("Input", "please check the ks_solver parameter!");
         }

From 4d16f56f5d583f887811e96ec1a5572153840adb Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Mon, 29 Jan 2024 13:13:21 +0800
Subject: [PATCH 20/44] Fix CI Errors and Merge with Upstream

---
 docs/advanced/input_files/input-main.md       |  6 +-
 source/module_cell/read_atoms.cpp             | 91 +++++++++++--------
 source/module_cell/test/unitcell_test.cpp     |  3 +-
 source/module_esolver/esolver_dp.cpp          |  9 ++
 source/module_esolver/esolver_lj.cpp          |  9 ++
 .../hamilt_lcaodft/LCAO_hamilt.hpp            |  1 +
 source/module_io/input.cpp                    | 11 ++-
 source/module_io/test/input_test.cpp          |  9 --
 8 files changed, 83 insertions(+), 56 deletions(-)

diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index 8c675cab92..abd8cb7ff4 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -2123,14 +2123,14 @@ These variables are relevant when using hybrid functionals.
 ### exx_hybrid_step
 
 - **Type**: Integer
-- **Availability**: *[exx_seperate_loop](#exx_separate_loop)==1*
+- **Availability**: *[exx_separate_loop](#exx_separate_loop)==1*
 - **Description**: the maximal iteration number of the outer-loop, where the Fock exchange is calculated
 - **Default**: 100
 
 ### exx_mixing_beta
 
 - **Type**: Real
-- **Availability**: *[exx_seperate_loop](#exx_separate_loop)==1*
+- **Availability**: *[exx_separate_loop](#exx_separate_loop)==1*
 - **Description**: mixing_beta for densty matrix in each iteration of the outer-loop
 - **Default**: 1.0
 
@@ -2417,7 +2417,7 @@ These variables are used to control molecular dynamics calculations. For more in
 
 - **Type**: Real
 - **Description**: The target pressure used in NPT ensemble simulations, the default value of `md_plast` is `md_pfirst`. If `md_plast` is set to be different from `md_pfirst`, ABACUS will automatically change the target pressure from `md_pfirst` to `md_plast`.
-- **Default**: No default
+- **Default**: -1.0
 - **Unit**: kbar
 
 ### md_pfreq
diff --git a/source/module_cell/read_atoms.cpp b/source/module_cell/read_atoms.cpp
index 4c6bf9c0eb..abe05909f2 100644
--- a/source/module_cell/read_atoms.cpp
+++ b/source/module_cell/read_atoms.cpp
@@ -1017,26 +1017,32 @@ void UnitCell::print_stru_file(const std::string &fn, const int &type, const int
 				context<<atoms[it].mbl[ia].x<<atoms[it].mbl[ia].y<<atoms[it].mbl[ia].z;
 				ofs<<context.str();
 
-				if(level == 1)
-				{
-					// output velocity
-					ofs <<" v ";
-					context.set_context("vector3d");
-					context<<atoms[it].vel[ia].x<<atoms[it].vel[ia].y<<atoms[it].vel[ia].z;
-					ofs<<context.str()<<std::endl;
-				}
-				else if(level == 2)
-				{
-					// output magnetic information
-				}
-				else if(level == 3)
-				{
-					// output velocity and magnetic information
-				}
-				else
-				{
-					ofs << std::endl;
-				}
+                if (GlobalV::CALCULATION == "md")
+                {
+                    // output velocity
+                    ofs << " v ";
+                    context.set_context("vector3d");
+                    context << atoms[it].vel[ia].x << atoms[it].vel[ia].y << atoms[it].vel[ia].z;
+                    ofs << context.str();
+                }
+
+                if (GlobalV::NSPIN == 2)
+                {
+                    // output magnetic information
+                    ofs << " mag ";
+                    context.set_context("double_w6_f2");
+                    context << atoms[it].mag[ia];
+                    ofs << context.str();
+                }
+                else if (GlobalV::NSPIN == 4)
+                {
+                    // output magnetic information
+                    ofs << " mag ";
+                    context.set_context("vector3d");
+                    context << atoms[it].m_loc_[ia].x << " " << atoms[it].m_loc_[ia].y << " " << atoms[it].m_loc_[ia].z;
+                    ofs << context.str();
+                }
+                ofs << std::endl;
 			}
 		}
 	}
@@ -1058,25 +1064,32 @@ void UnitCell::print_stru_file(const std::string &fn, const int &type, const int
 				context<<atoms[it].mbl[ia].x<<atoms[it].mbl[ia].y<<atoms[it].mbl[ia].z;
 				ofs<<context.str();
 
-				if(level == 1)
-				{
-					// output velocity
-					context.set_context("vector3d");
-					context<<atoms[it].vel[ia].x<<atoms[it].vel[ia].y<<atoms[it].vel[ia].z;
-					ofs <<" v "<<context.str()<<std::endl;
-				}
-				else if(level == 2)
-				{
-					// output magnetic information
-				}
-				else if(level == 3)
-				{
-					// output velocity and magnetic information
-				}
-				else
-				{
-					ofs << std::endl;
-				}
+                if (GlobalV::CALCULATION == "md")
+                {
+                    // output velocity
+                    ofs << " v ";
+                    context.set_context("vector3d");
+                    context << atoms[it].vel[ia].x << atoms[it].vel[ia].y << atoms[it].vel[ia].z;
+                    ofs << context.str();
+                }
+
+                if (GlobalV::NSPIN == 2)
+                {
+                    // output magnetic information
+                    ofs << " mag ";
+                    context.set_context("double_w6_f2");
+                    context << atoms[it].mag[ia];
+                    ofs << context.str();
+                }
+                else if (GlobalV::NSPIN == 4)
+                {
+                    // output magnetic information
+                    ofs << " mag ";
+                    context.set_context("vector3d");
+                    context << atoms[it].m_loc_[ia].x << " " << atoms[it].m_loc_[ia].y << " " << atoms[it].m_loc_[ia].z;
+                    ofs << context.str();
+                }
+                ofs << std::endl;
 			}
 		}
 	}
diff --git a/source/module_cell/test/unitcell_test.cpp b/source/module_cell/test/unitcell_test.cpp
index 1bc4da54a1..b430a943a7 100644
--- a/source/module_cell/test/unitcell_test.cpp
+++ b/source/module_cell/test/unitcell_test.cpp
@@ -744,7 +744,8 @@ TEST_F(UcellTest,PrintSTRU)
 	//Cartesian type of coordinates
 	std::string fn = "C1H2_STRU";
 	int type = 1; // for Cartesian
-	int level = 1; //print velocity in STRU
+	int level = 1;
+    GlobalV::CALCULATION = "md"; // print velocity in STRU
 	ucell->print_stru_file(fn,type,level);
 	std::ifstream ifs;
 	ifs.open("C1H2_STRU");
diff --git a/source/module_esolver/esolver_dp.cpp b/source/module_esolver/esolver_dp.cpp
index d4ce7195a5..8551ead5ff 100644
--- a/source/module_esolver/esolver_dp.cpp
+++ b/source/module_esolver/esolver_dp.cpp
@@ -136,6 +136,15 @@ namespace ModuleESolver
     void ESolver_DP::cal_Stress(ModuleBase::matrix& stress)
     {
         stress = dp_virial;
+
+        // external stress
+        double unit_transform = ModuleBase::RYDBERG_SI / pow(ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8;
+        double external_stress[3] = {GlobalV::PRESS1, GlobalV::PRESS2, GlobalV::PRESS3};
+        for (int i = 0; i < 3; i++)
+        {
+            stress(i, i) -= external_stress[i] / unit_transform;
+        }
+
         ModuleIO::print_stress("TOTAL-STRESS", stress, true, false);
     }
 
diff --git a/source/module_esolver/esolver_lj.cpp b/source/module_esolver/esolver_lj.cpp
index 19e9d2369e..f48ff290d8 100644
--- a/source/module_esolver/esolver_lj.cpp
+++ b/source/module_esolver/esolver_lj.cpp
@@ -107,6 +107,15 @@ namespace ModuleESolver
     void ESolver_LJ::cal_Stress(ModuleBase::matrix& stress)
     {
         stress = lj_virial;
+
+        // external stress
+        double unit_transform = ModuleBase::RYDBERG_SI / pow(ModuleBase::BOHR_RADIUS_SI, 3) * 1.0e-8;
+        double external_stress[3] = {GlobalV::PRESS1, GlobalV::PRESS2, GlobalV::PRESS3};
+        for (int i = 0; i < 3; i++)
+        {
+            stress(i, i) -= external_stress[i] / unit_transform;
+        }
+
         ModuleIO::print_stress("TOTAL-STRESS", stress, true, false);
     }
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp
index 8edfb5be65..db8b32a13a 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp
@@ -51,6 +51,7 @@ void LCAO_Hamilt::calculate_HR_exx_sparse(
     {
 		int is0_b, is1_b;
 		std::tie(is0_b,is1_b) = RI_2D_Comm::split_is_block(is);
+        if (Hexxs.empty())	break;
 		for(const auto &HexxA : Hexxs[is])
 		{
 			const int iat0 = HexxA.first;
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index 42ccdde938..580dd203db 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -713,6 +713,8 @@ bool Input::Read(const std::string& fn)
         return false; // return error : false
     }
 
+    bool plast_find = false; // whether the parameter md_plast is found liuyu 2024-01-28
+
     ifs.rdstate();
     while (ifs.good())
     {
@@ -1609,10 +1611,15 @@ bool Input::Read(const std::string& fn)
         else if (strcmp("md_pfirst", word) == 0)
         {
             read_value(ifs, mdp.md_pfirst);
+            if (!plast_find)
+            {
+                mdp.md_plast = mdp.md_pfirst;
+            }
         }
         else if (strcmp("md_plast", word) == 0)
         {
             read_value(ifs, mdp.md_plast);
+            plast_find = true;
         }
         else if (strcmp("md_pfreq", word) == 0)
         {
@@ -3036,8 +3043,6 @@ void Input::Default_2(void) // jiyy add 2019-08-04
         }
         if (!out_md_control)
             out_level = "m"; // zhengdy add 2019-04-07
-        if (mdp.md_plast < 0.0)
-            mdp.md_plast = mdp.md_pfirst;
 
         if (mdp.md_tfreq == 0)
         {
@@ -3955,8 +3960,6 @@ void Input::Check(void)
         // deal with input parameters , 2019-04-30
         if (mdp.md_dt < 0)
             ModuleBase::WARNING_QUIT("Input::Check", "time interval of MD calculation should be set!");
-        if (mdp.md_type == "npt" && mdp.md_pfirst < 0)
-            ModuleBase::WARNING_QUIT("Input::Check", "pressure of MD calculation should be set!");
         if (mdp.md_type == "msst")
         {
             if (mdp.msst_qmass <= 0)
diff --git a/source/module_io/test/input_test.cpp b/source/module_io/test/input_test.cpp
index 11bce873ab..decb59fdec 100644
--- a/source/module_io/test/input_test.cpp
+++ b/source/module_io/test/input_test.cpp
@@ -1140,15 +1140,6 @@ TEST_F(InputTest, Check)
 	output = testing::internal::GetCapturedStdout();
 	EXPECT_THAT(output,testing::HasSubstr("time interval of MD calculation should be set!"));
 	INPUT.mdp.md_dt = 1.0;
-    //
-    INPUT.mdp.md_type = "npt";
-	INPUT.mdp.md_pmode = "iso";
-	INPUT.mdp.md_pfirst = -1.0;
-	testing::internal::CaptureStdout();
-	EXPECT_EXIT(INPUT.Check(),::testing::ExitedWithCode(0), "");
-	output = testing::internal::GetCapturedStdout();
-	EXPECT_THAT(output,testing::HasSubstr("pressure of MD calculation should be set!"));
-	INPUT.mdp.md_pfirst = 1.0;
 	//
 	INPUT.mdp.md_type = "msst";
 	INPUT.mdp.msst_qmass = -1.0;

From 4857553ba450fd675557a8dd782ed9ca9fb90cfb Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Thu, 1 Feb 2024 13:50:44 +0800
Subject: [PATCH 21/44] Resolve Pull Request Reviews

---
 source/module_base/global_variable.cpp        |  29 -----
 source/module_base/global_variable.h          |  26 -----
 .../module_elecstate/elecstate_lcao_tddft.cpp |   2 -
 source/module_hsolver/diago_pexsi.cpp         | 102 ++++++++++++++++++
 source/module_hsolver/diago_pexsi.h           |  31 ++++++
 .../module_pexsi/dist_bcd_matrix.cpp          |   9 +-
 .../module_pexsi/dist_bcd_matrix.h            |  10 +-
 .../module_pexsi/dist_matrix_transformer.cpp  |  22 ++--
 .../module_pexsi/dist_matrix_transformer.h    |   2 +-
 .../module_pexsi/pexsi_solver.cpp             |   7 +-
 .../module_pexsi/simple_pexsi.cpp             |  55 +++++-----
 .../module_pexsi/simple_pexsi.h               |   2 +-
 source/module_io/input_conv.cpp               |  81 +++++++++-----
 13 files changed, 242 insertions(+), 136 deletions(-)

diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp
index 6b7015dc25..eb0dc636e2 100644
--- a/source/module_base/global_variable.cpp
+++ b/source/module_base/global_variable.cpp
@@ -301,33 +301,4 @@ std::string qo_basis = "hydrogen";
 std::vector<std::string> qo_strategy = {};
 double qo_thr = 1.0e-6;
 std::vector<double> qo_screening_coeff = {};
-
-//==========================================================
-// PEXSI related
-//==========================================================
-int pexsi_npole = 54;
-int pexsi_inertia = 1;
-int pexsi_nmax = 80;
-// int pexsi_symbolic = 1;
-int pexsi_comm = 1;
-int pexsi_storage = 1;
-int pexsi_ordering = 0;
-int pexsi_row_ordering = 1;
-int pexsi_nproc = 1;
-int pexsi_symm = 1;
-int pexsi_trans = 0;
-int pexsi_method = 1;
-int pexsi_nproc_pole = 1;
-// double pexsi_spin = 2;
-double pexsi_temp = 0.0001;
-double pexsi_gap = 0;
-double pexsi_delta_e = 20.0;
-double pexsi_mu_lower = -10;
-double pexsi_mu_upper = 10;
-double pexsi_mu = 0.0;
-double pexsi_mu_thr = 0.05;
-double pexsi_mu_expand = 0.3;
-double pexsi_mu_guard = 0.2;
-double pexsi_elec_thr = 0.001;
-double pexsi_zero_thr = 1e-10;
 } // namespace GlobalV
diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h
index 9808ca080b..3b75f1e77c 100644
--- a/source/module_base/global_variable.h
+++ b/source/module_base/global_variable.h
@@ -330,31 +330,5 @@ extern std::vector<std::string> qo_strategy;
 extern double qo_thr;
 extern std::vector<double> qo_screening_coeff;
 
-// PEXSI related
-extern int pexsi_npole;
-extern int pexsi_inertia;
-extern int pexsi_nmax;
-// extern int pexsi_symbolic;
-extern int pexsi_comm;
-extern int pexsi_storage;
-extern int pexsi_ordering;
-extern int pexsi_row_ordering;
-extern int pexsi_nproc;
-extern int pexsi_symm;
-extern int pexsi_trans;
-extern int pexsi_method;
-extern int pexsi_nproc_pole;
-// extern double pexsi_spin;
-extern double pexsi_temp;
-extern double pexsi_gap;
-extern double pexsi_delta_e;
-extern double pexsi_mu_lower;
-extern double pexsi_mu_upper;
-extern double pexsi_mu;
-extern double pexsi_mu_thr;
-extern double pexsi_mu_expand;
-extern double pexsi_mu_guard;
-extern double pexsi_elec_thr;
-extern double pexsi_zero_thr;
 } // namespace GlobalV
 #endif
diff --git a/source/module_elecstate/elecstate_lcao_tddft.cpp b/source/module_elecstate/elecstate_lcao_tddft.cpp
index d6bec95f99..3d3539fd1a 100644
--- a/source/module_elecstate/elecstate_lcao_tddft.cpp
+++ b/source/module_elecstate/elecstate_lcao_tddft.cpp
@@ -67,9 +67,7 @@ void ElecStateLCAO_TDDFT::psiToRho_td(const psi::Psi<std::complex<double>>& psi)
     Gint_inout inout(this->loc->DM_R, this->charge->rho, Gint_Tools::job_type::rho); // rho calculation
     this->uhm->GK.cal_gint(&inout);
 
-    std::cout << "this->charge->renormalize_rho(); 1" << std::endl; 
     this->charge->renormalize_rho();
-    std::cout << "this->charge->renormalize_rho(); 1 done" << std::endl;
 
     ModuleBase::timer::tick("ElecStateLCAO", "psiToRho");
     return;
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index 8e4ee5b15b..3b059661dc 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -1,3 +1,4 @@
+#include <mpi.h>
 #include <complex>
 #ifdef __PEXSI
 #include "c_pexsi_interface.h"
@@ -14,6 +15,107 @@ typedef hamilt::MatrixBlock<std::complex<double>> matcd;
 
 namespace hsolver
 {
+template <>
+int DiagoPexsi<double>::pexsi_npole = 0;
+template <>
+int DiagoPexsi<double>::pexsi_inertia = 0;
+template <>
+int DiagoPexsi<double>::pexsi_nmax = 0;
+// template <>
+// int DiagoPexsi<double>::pexsi_symbolic = 0;
+template <>
+int DiagoPexsi<double>::pexsi_comm = 0;
+template <>
+int DiagoPexsi<double>::pexsi_storage = 0;
+template <>
+int DiagoPexsi<double>::pexsi_ordering = 0;
+template <>
+int DiagoPexsi<double>::pexsi_row_ordering = 0;
+template <>
+int DiagoPexsi<double>::pexsi_nproc = 0;
+template <>
+int DiagoPexsi<double>::pexsi_symm = 0;
+template <>
+int DiagoPexsi<double>::pexsi_trans = 0;
+template <>
+int DiagoPexsi<double>::pexsi_method = 0;
+template <>
+int DiagoPexsi<double>::pexsi_nproc_pole = 0;
+// template <>
+// double DiagoPexsi<double>::pexsi_spin = 2;
+template <>
+double DiagoPexsi<double>::pexsi_temp = 0.0;
+template <>
+double DiagoPexsi<double>::pexsi_gap = 0.0;
+template <>
+double DiagoPexsi<double>::pexsi_delta_e = 0.0;
+template <>
+double DiagoPexsi<double>::pexsi_mu_lower = 0.0;
+template <>
+double DiagoPexsi<double>::pexsi_mu_upper = 0.0;
+template <>
+double DiagoPexsi<double>::pexsi_mu = 0.0;
+template <>
+double DiagoPexsi<double>::pexsi_mu_thr = 0.0;
+template <>
+double DiagoPexsi<double>::pexsi_mu_expand = 0.0;
+template <>
+double DiagoPexsi<double>::pexsi_mu_guard = 0.0;
+template <>
+double DiagoPexsi<double>::pexsi_elec_thr = 0.0;
+template <>
+double DiagoPexsi<double>::pexsi_zero_thr = 0.0;
+
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_npole = 0;
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_inertia = 0;
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_nmax = 0;
+// template <>
+// int DiagoPexsi<std::complex<double>>::pexsi_symbolic = 0;
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_comm = 0;
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_storage = 0;
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_ordering = 0;
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_row_ordering = 0;
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_nproc = 0;
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_symm = 0;
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_trans = 0;
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_method = 0;
+template <>
+int DiagoPexsi<std::complex<double>>::pexsi_nproc_pole = 0;
+// template <>
+// double DiagoPexsi<std::complex<double>>::pexsi_spin = 2;
+template <>
+double DiagoPexsi<std::complex<double>>::pexsi_temp = 0.0;
+template <>
+double DiagoPexsi<std::complex<double>>::pexsi_gap = 0.0;
+template <>
+double DiagoPexsi<std::complex<double>>::pexsi_delta_e = 0.0;
+template <>
+double DiagoPexsi<std::complex<double>>::pexsi_mu_lower = 0.0;
+template <>
+double DiagoPexsi<std::complex<double>>::pexsi_mu_upper = 0.0;
+template <>
+double DiagoPexsi<std::complex<double>>::pexsi_mu = 0.0;
+template <>
+double DiagoPexsi<std::complex<double>>::pexsi_mu_thr = 0.0;
+template <>
+double DiagoPexsi<std::complex<double>>::pexsi_mu_expand = 0.0;
+template <>
+double DiagoPexsi<std::complex<double>>::pexsi_mu_guard = 0.0;
+template <>
+double DiagoPexsi<std::complex<double>>::pexsi_elec_thr = 0.0;
+template <>
+double DiagoPexsi<std::complex<double>>::pexsi_zero_thr = 0.0;
 
 template <>
 void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in)
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index c212d7795a..ffc89d6b4d 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -27,6 +27,37 @@ class DiagoPexsi : public DiagH<T>
     double totalEnergyS;
     double totalFreeEnergy;
     pexsi::PEXSI_Solver* ps;
+
+    //==========================================================
+    // PEXSI related variables
+    //==========================================================
+    static int pexsi_npole;
+    static int pexsi_inertia;
+    static int pexsi_nmax;
+    // static int pexsi_symbolic;
+    static int pexsi_comm;
+    static int pexsi_storage;
+    static int pexsi_ordering;
+    static int pexsi_row_ordering;
+    static int pexsi_nproc;
+    static int pexsi_symm;
+    static int pexsi_trans;
+    static int pexsi_method;
+    static int pexsi_nproc_pole;
+    // static double pexsi_spin = 2;
+    static double pexsi_temp;
+    static double pexsi_gap;
+    static double pexsi_delta_e;
+    static double pexsi_mu_lower;
+    static double pexsi_mu_upper;
+    static double pexsi_mu;
+    static double pexsi_mu_thr;
+    static double pexsi_mu_expand;
+    static double pexsi_mu_guard;
+    static double pexsi_elec_thr;
+    static double pexsi_zero_thr;
+
+    static MPI_Group grid_group;
 };
 } // namespace hsolver
 
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
index e498b83a2e..8e4c8e7ac7 100644
--- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
@@ -17,7 +17,7 @@ DistBCDMatrix::DistBCDMatrix(MPI_Comm comm,
                              int nblk,
                              int nrow,
                              int ncol,
-                             char LAYOUT)
+                             char layout)
 {
     this->comm = comm;
     this->group = group;
@@ -26,13 +26,13 @@ DistBCDMatrix::DistBCDMatrix(MPI_Comm comm,
     this->nblk = nblk;
     this->nrow = nrow;
     this->ncol = ncol;
-    if (LAYOUT == 'R' || LAYOUT == 'r' || LAYOUT == 'C' || LAYOUT == 'c')
+    if (layout == 'R' || layout == 'r' || layout == 'C' || layout == 'c')
     {
-        this->LAYOUT = LAYOUT;
+        this->layout = layout;
     }
     else
     {
-        throw("The LAYOUT must be 'R', 'r', 'C', or 'c'");
+        throw("The layout must be 'R', 'r', 'C', or 'c'");
     }
 
     if (comm != MPI_COMM_NULL)
@@ -91,7 +91,6 @@ int DistBCDMatrix::globalRow(const int localRow)
 
 int DistBCDMatrix::globalCol(const int localCol)
 {
-
     return (localCol / nblk * npcols + mypcol) * nblk + localCol % nblk;
 }
 
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
index 98b8512893..0964b9787c 100644
--- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
@@ -16,10 +16,10 @@ class DistBCDMatrix
   public:
     // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol);
     // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char
-    // LAYOUT);
+    // layout);
 
     // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol);
-    DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char LAYOUT);
+    DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char layout);
     ~DistBCDMatrix();
 
     int globalRow(const int localRow);
@@ -45,9 +45,9 @@ class DistBCDMatrix
     {
         return ncol;
     };
-    const char get_LAYOUT() const
+    const char get_layout() const
     {
-        return LAYOUT;
+        return layout;
     };
 
   private:
@@ -90,7 +90,7 @@ class DistBCDMatrix
     // the local data layout
     // 'R' or 'r' for row-major, which is used in C/C++
     // 'C' or 'c' for column-major, which is used in Fortran
-    char LAYOUT;
+    char layout;
 };
 } // namespace pexsi
 #endif // DISTBCDMATRIX_H
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
index ef6c6fec72..eadd991217 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
@@ -167,7 +167,7 @@ inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std
 }
 
 // find out the index of non-zero elements
-inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT,
+inline int DistMatrixTransformer::getNonZeroIndex(char layout,
                                                   const int nrow,
                                                   const int ncol,
                                                   double* H_2d,
@@ -231,7 +231,7 @@ inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT,
     if (myproc < 100)
         log << "rowidx and colidx cleared" << std::endl;
 #endif
-    if (LAYOUT == 'C' || LAYOUT == 'c')
+    if (layout == 'C' || layout == 'c')
     {
         for (int i = 0; i < ncol; ++i)
         {
@@ -247,7 +247,7 @@ inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT,
             }
         }
     }
-    else if (LAYOUT == 'R' || LAYOUT == 'r')
+    else if (layout == 'R' || layout == 'r')
     {
         for (int i = 0; i < ncol; ++i)
         {
@@ -267,7 +267,7 @@ inline int DistMatrixTransformer::getNonZeroIndex(char LAYOUT,
     {
 #ifdef _DEBUG
         if (myproc < 100)
-            log << "unknown LAYOUT: " << LAYOUT << std::endl;
+            log << "unknown layout: " << layout << std::endl;
 #endif
         return 1;
     }
@@ -622,7 +622,7 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                 << " ; mypcol: " << SRC_Matrix.mypcol << std::endl;
             log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.get_nrow() << " ; ncol: " << SRC_Matrix.get_ncol()
                 << std::endl;
-            log << "layout:" << SRC_Matrix.get_LAYOUT() << std::endl;
+            log << "layout:" << SRC_Matrix.get_layout() << std::endl;
             log << "ZERO = " << ZERO_Limit << std::endl;
             log << "DST_Matrix parameters:" << std::endl;
             log << "size: " << DST_Matrix.size << " ;nproc_data: " << DST_Matrix.nproc_data << std::endl;
@@ -641,7 +641,7 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
 #endif
         if (SRC_Matrix.get_comm() != MPI_COMM_NULL)
         {
-            getNonZeroIndex(SRC_Matrix.get_LAYOUT(),
+            getNonZeroIndex(SRC_Matrix.get_layout(),
                             SRC_Matrix.get_nrow(),
                             SRC_Matrix.get_ncol(),
                             H_2d,
@@ -660,7 +660,7 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
             if(SRC_Matrix.comm != MPI_COMM_NULL)
             {
                 log<<"NonZeroIndex :"<<std::endl;
-                if(SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
+                if(SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r')
                 {
                     for(int i=0; i<nnz; ++i)
                     {
@@ -713,7 +713,7 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
         std::vector<double> sender_buffer(sender_size);
         std::vector<double> receiver_buffer(receiver_size);
         // put H to sender buffer
-        if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
+        if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r')
         {
             for (int i = 0; i < sender_size; ++i)
             {
@@ -755,7 +755,7 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
 #endif
 
         // put S to sender buffer
-        if (SRC_Matrix.get_LAYOUT() == 'R' || SRC_Matrix.get_LAYOUT() == 'r')
+        if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r')
         {
             for (int i = 0; i < sender_size; ++i)
             {
@@ -1434,7 +1434,7 @@ MPI_Barrier(COMM_TRANS);
 // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM");
 #endif
         // transform receiver_buffer to DM
-        if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r')
+        if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r')
         {
             int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
             for (int i = 0; i < receiver_size; ++i)
@@ -1518,7 +1518,7 @@ MPI_Barrier(COMM_TRANS);
 // OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM");
 #endif
         // transform receiver_buffer to EDM
-        if (DST_Matrix.get_LAYOUT() == 'R' || DST_Matrix.get_LAYOUT() == 'r')
+        if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r')
         {
             int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
             for (int i = 0; i < receiver_size; ++i)
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
index e3e27e995a..c81128f9db 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
@@ -39,7 +39,7 @@ void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double*
 
 void countMatrixDistribution(int N, double* A, std::map<int, int>& P);
 
-int getNonZeroIndex(char LAYOUT,
+int getNonZeroIndex(char layout,
                     const int nrow,
                     const int ncol,
                     double* H_2d,
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
index 1be66abf59..8d55c15707 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -10,8 +10,6 @@
 
 extern MPI_Comm DIAG_WORLD;
 extern MPI_Comm GRID_WORLD;
-extern MPI_Group GRID_GROUP;
-
 namespace pexsi
 {
 PEXSI_Solver::PEXSI_Solver(const int blacs_text,
@@ -43,10 +41,11 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text,
 
 int PEXSI_Solver::solve()
 {
-
+    MPI_Group grid_group;
+    MPI_Comm_group(DIAG_WORLD, &grid_group);
     simplePEXSI(DIAG_WORLD,
                 GRID_WORLD,
-                GRID_GROUP,
+                grid_group,
                 this->blacs_text,
                 GlobalV::NLOCAL,
                 this->nb,
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
index df72a061c5..b59ed233ea 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -20,6 +20,7 @@
 #include "module_base/timer.h"
 #include "module_base/tool_quit.h"
 #include "module_base/global_variable.h"
+#include "module_hsolver/diago_pexsi.h"
 
 namespace pexsi
 {
@@ -106,36 +107,36 @@ int loadPEXSIOption(MPI_Comm comm,
     double double_para[12];
 
     // read in PEXSI options from GlobalV
-    int_para[0] = GlobalV::pexsi_npole;
-    int_para[1] = GlobalV::pexsi_inertia;
-    int_para[2] = GlobalV::pexsi_nmax;
+    int_para[0] = hsolver::DiagoPexsi<double>::pexsi_npole;
+    int_para[1] = hsolver::DiagoPexsi<double>::pexsi_inertia;
+    int_para[2] = hsolver::DiagoPexsi<double>::pexsi_nmax;
     int_para[3] = 0;
-    int_para[4] = 1; // GlobalV::pexsi_symbolic;
-    int_para[5] = GlobalV::pexsi_comm;
+    int_para[4] = 1; // hsolver::DiagoPexsi<double>::pexsi_symbolic;
+    int_para[5] = hsolver::DiagoPexsi<double>::pexsi_comm;
     int_para[6] = 0;
-    int_para[7] = GlobalV::pexsi_storage;
-    int_para[8] = GlobalV::pexsi_ordering;
-    int_para[9] = GlobalV::pexsi_row_ordering;
-    int_para[10] = GlobalV::pexsi_nproc;
-    int_para[11] = GlobalV::pexsi_symm;
-    int_para[12] = GlobalV::pexsi_trans;
-    int_para[13] = GlobalV::pexsi_method;
+    int_para[7] = hsolver::DiagoPexsi<double>::pexsi_storage;
+    int_para[8] = hsolver::DiagoPexsi<double>::pexsi_ordering;
+    int_para[9] = hsolver::DiagoPexsi<double>::pexsi_row_ordering;
+    int_para[10] = hsolver::DiagoPexsi<double>::pexsi_nproc;
+    int_para[11] = hsolver::DiagoPexsi<double>::pexsi_symm;
+    int_para[12] = hsolver::DiagoPexsi<double>::pexsi_trans;
+    int_para[13] = hsolver::DiagoPexsi<double>::pexsi_method;
     int_para[14] = 2;
     int_para[15] = 0;
-    int_para[16] = GlobalV::pexsi_nproc_pole;
+    int_para[16] = hsolver::DiagoPexsi<double>::pexsi_nproc_pole;
 
-    double_para[0] = GlobalV::NSPIN; // GlobalV::pexsi_spin;
-    double_para[1] = GlobalV::pexsi_temp;
-    double_para[2] = GlobalV::pexsi_gap;
-    double_para[3] = GlobalV::pexsi_delta_e;
-    double_para[4] = GlobalV::pexsi_mu_lower;
-    double_para[5] = GlobalV::pexsi_mu_upper;
-    double_para[6] = GlobalV::pexsi_mu;
-    double_para[7] = GlobalV::pexsi_mu_thr;
-    double_para[8] = GlobalV::pexsi_mu_expand;
-    double_para[9] = GlobalV::pexsi_mu_guard;
-    double_para[10] = GlobalV::pexsi_elec_thr;
-    double_para[11] = GlobalV::pexsi_zero_thr;
+    double_para[0] = GlobalV::NSPIN; // hsolver::DiagoPexsi<double>::pexsi_spin;
+    double_para[1] = hsolver::DiagoPexsi<double>::pexsi_temp;
+    double_para[2] = hsolver::DiagoPexsi<double>::pexsi_gap;
+    double_para[3] = hsolver::DiagoPexsi<double>::pexsi_delta_e;
+    double_para[4] = hsolver::DiagoPexsi<double>::pexsi_mu_lower;
+    double_para[5] = hsolver::DiagoPexsi<double>::pexsi_mu_upper;
+    double_para[6] = hsolver::DiagoPexsi<double>::pexsi_mu;
+    double_para[7] = hsolver::DiagoPexsi<double>::pexsi_mu_thr;
+    double_para[8] = hsolver::DiagoPexsi<double>::pexsi_mu_expand;
+    double_para[9] = hsolver::DiagoPexsi<double>::pexsi_mu_guard;
+    double_para[10] = hsolver::DiagoPexsi<double>::pexsi_elec_thr;
+    double_para[11] = hsolver::DiagoPexsi<double>::pexsi_zero_thr;
     // int myid;
     // MPI_Comm_rank(comm, &myid);
     // if (myid == 0)
@@ -426,7 +427,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
                 const int nblk,
                 const int nrow,
                 const int ncol,
-                char LAYOUT, // matrix parameters
+                char layout, // matrix parameters
                 double* H,
                 double* S, // input matrices
                 const double numElectronExact,
@@ -531,7 +532,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
     // DONE(ofs_running,"create block cyclic distribution matrix parameter, begin");
     // OUT(ofs_running, "checkpoint10");
-    DistBCDMatrix SRC_Matrix(comm_2D, group_2D, blacs_ctxt, size, nblk, nrow, ncol, LAYOUT);
+    DistBCDMatrix SRC_Matrix(comm_2D, group_2D, blacs_ctxt, size, nblk, nrow, ncol, layout);
 // OUT(ofs_running, "checkpoint11");
 #ifdef _DEBUG
     if (comm_PEXSI != MPI_COMM_NULL)
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.h b/source/module_hsolver/module_pexsi/simple_pexsi.h
index 6d569154e9..fded81fc59 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.h
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.h
@@ -13,7 +13,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
                 const int nblk,
                 const int nrow,
                 const int ncol,
-                char LAYOUT, // input matrix parameters
+                char layout, // input matrix parameters
                 double* H,
                 double* S, // input matrices
                 const double nElectronExact,
diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp
index d6e3371111..9d96b9f993 100644
--- a/source/module_io/input_conv.cpp
+++ b/source/module_io/input_conv.cpp
@@ -24,6 +24,9 @@
 #include "module_hamilt_lcao/module_dftu/dftu.h"
 #include "module_hamilt_lcao/module_tddft/evolve_elec.h"
 #endif
+#ifdef __PEXSI
+#include "module_hsolver/diago_pexsi.h"
+#endif
 
 #include "module_base/timer.h"
 #include "module_elecstate/elecstate_lcao.h"
@@ -770,31 +773,59 @@ void Input_Conv::Convert(void)
     //-----------------------------------------------
     // PEXSI related parameters
     //-----------------------------------------------
-    GlobalV::pexsi_npole = INPUT.pexsi_npole;
-    GlobalV::pexsi_inertia = INPUT.pexsi_inertia;
-    GlobalV::pexsi_nmax = INPUT.pexsi_nmax;
-    // GlobalV::pexsi_symbolic = INPUT.pexsi_symbolic;
-    GlobalV::pexsi_comm = INPUT.pexsi_comm;
-    GlobalV::pexsi_storage = INPUT.pexsi_storage;
-    GlobalV::pexsi_ordering = INPUT.pexsi_ordering;
-    GlobalV::pexsi_row_ordering = INPUT.pexsi_row_ordering;
-    GlobalV::pexsi_nproc = INPUT.pexsi_nproc;
-    GlobalV::pexsi_symm = INPUT.pexsi_symm;
-    GlobalV::pexsi_trans = INPUT.pexsi_trans;
-    GlobalV::pexsi_method = INPUT.pexsi_method;
-    GlobalV::pexsi_nproc_pole = INPUT.pexsi_nproc_pole;
-    // GlobalV::pexsi_spin = INPUT.pexsi_spin;
-    GlobalV::pexsi_temp = INPUT.pexsi_temp;
-    GlobalV::pexsi_gap = INPUT.pexsi_gap;
-    GlobalV::pexsi_delta_e = INPUT.pexsi_delta_e;
-    GlobalV::pexsi_mu_lower = INPUT.pexsi_mu_lower;
-    GlobalV::pexsi_mu_upper = INPUT.pexsi_mu_upper;
-    GlobalV::pexsi_mu = INPUT.pexsi_mu;
-    GlobalV::pexsi_mu_thr = INPUT.pexsi_mu_thr;
-    GlobalV::pexsi_mu_expand = INPUT.pexsi_mu_expand;
-    GlobalV::pexsi_mu_guard = INPUT.pexsi_mu_guard;
-    GlobalV::pexsi_elec_thr = INPUT.pexsi_elec_thr;
-    GlobalV::pexsi_zero_thr = INPUT.pexsi_zero_thr;
+#ifdef __PEXSI
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_npole = INPUT.pexsi_npole;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_inertia = INPUT.pexsi_inertia;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_nmax = INPUT.pexsi_nmax;
+    // hsolver::DiagoPexsi<std::complex<double>>::pexsi_symbolic = INPUT.pexsi_symbolic;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_comm = INPUT.pexsi_comm;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_storage = INPUT.pexsi_storage;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_ordering = INPUT.pexsi_ordering;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_row_ordering = INPUT.pexsi_row_ordering;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_nproc = INPUT.pexsi_nproc;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_symm = INPUT.pexsi_symm;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_trans = INPUT.pexsi_trans;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_method = INPUT.pexsi_method;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_nproc_pole = INPUT.pexsi_nproc_pole;
+    // hsolver::DiagoPexsi<std::complex<double>>::pexsi_spin = INPUT.pexsi_spin;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_temp = INPUT.pexsi_temp;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_gap = INPUT.pexsi_gap;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_delta_e = INPUT.pexsi_delta_e;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_lower = INPUT.pexsi_mu_lower;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_upper = INPUT.pexsi_mu_upper;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu = INPUT.pexsi_mu;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_thr = INPUT.pexsi_mu_thr;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_expand = INPUT.pexsi_mu_expand;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_guard = INPUT.pexsi_mu_guard;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_elec_thr = INPUT.pexsi_elec_thr;
+    hsolver::DiagoPexsi<std::complex<double>>::pexsi_zero_thr = INPUT.pexsi_zero_thr;
+
+    hsolver::DiagoPexsi<double>::pexsi_npole = INPUT.pexsi_npole;
+    hsolver::DiagoPexsi<double>::pexsi_inertia = INPUT.pexsi_inertia;
+    hsolver::DiagoPexsi<double>::pexsi_nmax = INPUT.pexsi_nmax;
+    // hsolver::DiagoPexsi<double>::pexsi_symbolic = INPUT.pexsi_symbolic;
+    hsolver::DiagoPexsi<double>::pexsi_comm = INPUT.pexsi_comm;
+    hsolver::DiagoPexsi<double>::pexsi_storage = INPUT.pexsi_storage;
+    hsolver::DiagoPexsi<double>::pexsi_ordering = INPUT.pexsi_ordering;
+    hsolver::DiagoPexsi<double>::pexsi_row_ordering = INPUT.pexsi_row_ordering;
+    hsolver::DiagoPexsi<double>::pexsi_nproc = INPUT.pexsi_nproc;
+    hsolver::DiagoPexsi<double>::pexsi_symm = INPUT.pexsi_symm;
+    hsolver::DiagoPexsi<double>::pexsi_trans = INPUT.pexsi_trans;
+    hsolver::DiagoPexsi<double>::pexsi_method = INPUT.pexsi_method;
+    hsolver::DiagoPexsi<double>::pexsi_nproc_pole = INPUT.pexsi_nproc_pole;
+    // hsolver::DiagoPexsi<double>::pexsi_spin = INPUT.pexsi_spin;
+    hsolver::DiagoPexsi<double>::pexsi_temp = INPUT.pexsi_temp;
+    hsolver::DiagoPexsi<double>::pexsi_gap = INPUT.pexsi_gap;
+    hsolver::DiagoPexsi<double>::pexsi_delta_e = INPUT.pexsi_delta_e;
+    hsolver::DiagoPexsi<double>::pexsi_mu_lower = INPUT.pexsi_mu_lower;
+    hsolver::DiagoPexsi<double>::pexsi_mu_upper = INPUT.pexsi_mu_upper;
+    hsolver::DiagoPexsi<double>::pexsi_mu = INPUT.pexsi_mu;
+    hsolver::DiagoPexsi<double>::pexsi_mu_thr = INPUT.pexsi_mu_thr;
+    hsolver::DiagoPexsi<double>::pexsi_mu_expand = INPUT.pexsi_mu_expand;
+    hsolver::DiagoPexsi<double>::pexsi_mu_guard = INPUT.pexsi_mu_guard;
+    hsolver::DiagoPexsi<double>::pexsi_elec_thr = INPUT.pexsi_elec_thr;
+    hsolver::DiagoPexsi<double>::pexsi_zero_thr = INPUT.pexsi_zero_thr;
+#endif
     ModuleBase::timer::tick("Input_Conv", "Convert");
     return;
 }

From 70d68d90c735996c392705f033cc182d15fe99df Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Thu, 1 Feb 2024 17:49:06 +0800
Subject: [PATCH 22/44] Fix parallel communication related issue

---
 source/module_hsolver/module_pexsi/pexsi_solver.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
index 8d55c15707..ebbf253b20 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -1,3 +1,4 @@
+#include "module_base/parallel_global.h"
 #ifdef __PEXSI
 #include "pexsi_solver.h"
 
@@ -42,9 +43,17 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text,
 int PEXSI_Solver::solve()
 {
     MPI_Group grid_group;
-    MPI_Comm_group(DIAG_WORLD, &grid_group);
+    int myid, grid_np;
+    MPI_Group world_group;
+    MPI_Comm_rank(DIAG_WORLD, &myid);
+    MPI_Comm_size(DIAG_WORLD, &grid_np);
+    MPI_Comm_group(DIAG_WORLD, &world_group);
+
+    int grid_proc_range[3]={0, (GlobalV::NPROC/grid_np)*grid_np-1, GlobalV::NPROC/grid_np};
+    MPI_Group_range_incl(world_group, 1, &grid_proc_range, &grid_group);
+
     simplePEXSI(DIAG_WORLD,
-                GRID_WORLD,
+                DIAG_WORLD,
                 grid_group,
                 this->blacs_text,
                 GlobalV::NLOCAL,

From 5b4a6cf8eb51c0861e2e9219f5a5a1a6492421f3 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Sat, 3 Feb 2024 17:10:29 +0800
Subject: [PATCH 23/44] Fix vars in Makefile.vars, add input tests and comments
 for pexsi vars

---
 source/Makefile.vars                          | 14 +--
 source/module_base/global_variable.h          |  1 -
 source/module_elecstate/elecstate.cpp         | 28 +++---
 source/module_elecstate/elecstate_lcao.cpp    | 26 +-----
 source/module_elecstate/elecstate_lcao.h      |  2 +-
 source/module_hsolver/diago_pexsi.cpp         | 20 ++---
 source/module_hsolver/diago_pexsi.h           | 90 +++++++++++++++++--
 source/module_hsolver/test/CMakeLists.txt     |  6 --
 .../module_hsolver/test/diago_lcao_test.cpp   |  7 --
 source/module_io/input.cpp                    | 21 +++--
 source/module_io/input.h                      | 10 +--
 source/module_io/test/input_conv_test.cpp     | 52 +++++++++++
 source/module_io/test/input_test_para.cpp     | 24 +++++
 source/module_io/test/write_input_test.cpp    | 38 ++++++++
 source/module_io/write_input.cpp              | 10 +--
 15 files changed, 247 insertions(+), 102 deletions(-)

diff --git a/source/Makefile.vars b/source/Makefile.vars
index 477b0a251d..f00eca8ea3 100644
--- a/source/Makefile.vars
+++ b/source/Makefile.vars
@@ -29,10 +29,10 @@ OPENMP = OFF
 ## CEREAL_DIR        should contain an include folder.
 #----------------------------------------------------------------------
 
-ELPA_DIR      = /root/lib/ELPA
-ELPA_INCLUDE_DIR = ${ELPA_DIR}/include/
+ELPA_DIR      = /usr/local/include/elpa-2021.05.002
+ELPA_INCLUDE_DIR = ${ELPA_DIR}/elpa
 
-CEREAL_DIR    = /root/lib/cereal
+CEREAL_DIR    = /usr/local/include/cereal
 
 
 ##-------------------  FOR GNU COMPILER  ------------------------------
@@ -44,7 +44,7 @@ CEREAL_DIR    = /root/lib/cereal
 ## CEREAL_DIR        should contain an include folder.
 ##---------------------------------------------------------------------
 
-# FFTW_DIR = /root/lib/FFTW3
+# FFTW_DIR = /public/soft/fftw_3.3.8
 # OPENBLAS_LIB_DIR   = /public/soft/openblas/lib
 # SCALAPACK_LIB_DIR  = /public/soft/openblas/lib
 
@@ -73,9 +73,9 @@ CEREAL_DIR    = /root/lib/cereal
 # LIBRI_DIR     = /public/software/LibRI
 # LIBCOMM_DIR   = /public/software/LibComm
 
-# PEXSI_DIR = /home/rhx/projects/pexsi-build/pexsi
-# DSUPERLU_DIR = /home/rhx/projects/pexsi-build/superlu
-# PARMETIS_DIR    = /home/rhx/projects/pexsi-build/parmetis
+# PEXSI_DIR = /public/software/pexsi
+# DSUPERLU_DIR = /public/software/superlu_dist
+# PARMETIS_DIR    = /public/software/parmetis
 
 ##---------------------------------------------------------------------
 # NP = 14 # It is not supported. use make -j14 or make -j to parallelly compile
diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h
index 5efed29033..1bbe1edb91 100644
--- a/source/module_base/global_variable.h
+++ b/source/module_base/global_variable.h
@@ -330,6 +330,5 @@ extern std::string qo_basis;
 extern std::vector<std::string> qo_strategy;
 extern double qo_thr;
 extern std::vector<double> qo_screening_coeff;
-
 } // namespace GlobalV
 #endif
diff --git a/source/module_elecstate/elecstate.cpp b/source/module_elecstate/elecstate.cpp
index e2a4c3eec6..393c2d07d5 100644
--- a/source/module_elecstate/elecstate.cpp
+++ b/source/module_elecstate/elecstate.cpp
@@ -174,28 +174,26 @@ void ElecState::calEBand()
     ModuleBase::TITLE("ElecState", "calEBand");
     // calculate ebands using wg and ekb
     double eband = 0.0;
-    {
 #ifdef _OPENMP
 #pragma omp parallel for collapse(2) reduction(+:eband)
 #endif
-        for (int ik = 0; ik < this->ekb.nr; ++ik)
+    for (int ik = 0; ik < this->ekb.nr; ++ik)
+    {
+        for (int ibnd = 0; ibnd < this->ekb.nc; ibnd++)
         {
-            for (int ibnd = 0; ibnd < this->ekb.nc; ibnd++)
-            {
-                eband += this->ekb(ik, ibnd) * this->wg(ik, ibnd);
-            }
+            eband += this->ekb(ik, ibnd) * this->wg(ik, ibnd);
         }
-        this->f_en.eband = eband;
-        if (GlobalV::KPAR != 1 && GlobalV::ESOLVER_TYPE != "sdft")
-        {
-            //==================================
-            // Reduce all the Energy in each cpu
-            //==================================
-            this->f_en.eband /= GlobalV::NPROC_IN_POOL;
+    }
+    this->f_en.eband = eband;
+    if (GlobalV::KPAR != 1 && GlobalV::ESOLVER_TYPE != "sdft")
+    {
+        //==================================
+        // Reduce all the Energy in each cpu
+        //==================================
+        this->f_en.eband /= GlobalV::NPROC_IN_POOL;
 #ifdef __MPI
-            Parallel_Reduce::reduce_all(this->f_en.eband);
+        Parallel_Reduce::reduce_all(this->f_en.eband);
 #endif
-        }
     }
     return;
 }
diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index 3970b98b4b..23d55162f8 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -191,10 +191,6 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)
                 this->loc->set_dm_gamma(is, this->DM->get_DMK_pointer(is));
             }
         }
-
-
-        
-
         ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d");
 
         for (int ik = 0; ik < psi.get_nk(); ++ik)
@@ -222,23 +218,6 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)
     //------------------------------------------------------------
     // calculate the charge density on real space grid.
     //------------------------------------------------------------
-    // print matrix zzh
-    // GlobalV::ofs_running << "dm_gamma print\n";
-    // for(int i=0; i< this->loc->dm_gamma[0].nc; i++)
-    // {
-    //     for(int j=0; j<this->loc->dm_gamma[0].nr; j++)
-    //     {
-    //         if (std::abs(this->loc->dm_gamma[0](i, j)) < 0.00000001)
-    //         {
-    //             GlobalV::ofs_running << "0 ";
-    //         }
-    //         else
-    //         {
-    //             GlobalV::ofs_running << this->loc->dm_gamma[0](i, j) << " ";
-    //         }
-    //     }
-    //     GlobalV::ofs_running << std::endl;
-    // }
     ModuleBase::GlobalFunc::NOTE("Calculate the charge on real space grid!");
     this->uhm->GG.transfer_DM2DtoGrid(this->DM->get_DMR_vector()); // transfer DM2D to DM_grid in gint
     Gint_inout inout(this->loc->DM, this->charge->rho, Gint_Tools::job_type::rho);
@@ -285,11 +264,10 @@ void ElecStateLCAO<double>::dmToRho(double* pexsi_DM)
 {
     ModuleBase::timer::tick("ElecStateLCAO", "dmToRho");
 
-    this->loc->set_dm_gamma(0, pexsi_DM);
-
     // old 2D-to-Grid conversion has been replaced by new Gint Refactor 2023/09/25
     if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready
     {
+        this->loc->set_dm_gamma(0, pexsi_DM);
         this->loc->cal_dk_gamma_from_2D_pub();
     }
 
@@ -333,6 +311,4 @@ void ElecStateLCAO<std::complex<double>>::dmToRho(std::complex<double>* DM)
 template class ElecStateLCAO<double>; // Gamma_only case
 template class ElecStateLCAO<std::complex<double>>; // multi-k case
 
-
-
 } // namespace elecstate
\ No newline at end of file
diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h
index 8c86844486..c1cf231a45 100644
--- a/source/module_elecstate/elecstate_lcao.h
+++ b/source/module_elecstate/elecstate_lcao.h
@@ -60,7 +60,7 @@ class ElecStateLCAO : public ElecState
     double get_spin_constrain_energy() override;
 
 #ifdef __PEXSI
-    //use for pexsi
+    // use for pexsi
     void dmToRho(TK* DM);
 #endif
 
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index 3b059661dc..803680fe31 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -18,15 +18,15 @@ namespace hsolver
 template <>
 int DiagoPexsi<double>::pexsi_npole = 0;
 template <>
-int DiagoPexsi<double>::pexsi_inertia = 0;
+bool DiagoPexsi<double>::pexsi_inertia = 0;
 template <>
 int DiagoPexsi<double>::pexsi_nmax = 0;
 // template <>
 // int DiagoPexsi<double>::pexsi_symbolic = 0;
 template <>
-int DiagoPexsi<double>::pexsi_comm = 0;
+bool DiagoPexsi<double>::pexsi_comm = 0;
 template <>
-int DiagoPexsi<double>::pexsi_storage = 0;
+bool DiagoPexsi<double>::pexsi_storage = 0;
 template <>
 int DiagoPexsi<double>::pexsi_ordering = 0;
 template <>
@@ -34,9 +34,9 @@ int DiagoPexsi<double>::pexsi_row_ordering = 0;
 template <>
 int DiagoPexsi<double>::pexsi_nproc = 0;
 template <>
-int DiagoPexsi<double>::pexsi_symm = 0;
+bool DiagoPexsi<double>::pexsi_symm = 0;
 template <>
-int DiagoPexsi<double>::pexsi_trans = 0;
+bool DiagoPexsi<double>::pexsi_trans = 0;
 template <>
 int DiagoPexsi<double>::pexsi_method = 0;
 template <>
@@ -69,15 +69,15 @@ double DiagoPexsi<double>::pexsi_zero_thr = 0.0;
 template <>
 int DiagoPexsi<std::complex<double>>::pexsi_npole = 0;
 template <>
-int DiagoPexsi<std::complex<double>>::pexsi_inertia = 0;
+bool DiagoPexsi<std::complex<double>>::pexsi_inertia = 0;
 template <>
 int DiagoPexsi<std::complex<double>>::pexsi_nmax = 0;
 // template <>
 // int DiagoPexsi<std::complex<double>>::pexsi_symbolic = 0;
 template <>
-int DiagoPexsi<std::complex<double>>::pexsi_comm = 0;
+bool DiagoPexsi<std::complex<double>>::pexsi_comm = 0;
 template <>
-int DiagoPexsi<std::complex<double>>::pexsi_storage = 0;
+bool DiagoPexsi<std::complex<double>>::pexsi_storage = 0;
 template <>
 int DiagoPexsi<std::complex<double>>::pexsi_ordering = 0;
 template <>
@@ -85,9 +85,9 @@ int DiagoPexsi<std::complex<double>>::pexsi_row_ordering = 0;
 template <>
 int DiagoPexsi<std::complex<double>>::pexsi_nproc = 0;
 template <>
-int DiagoPexsi<std::complex<double>>::pexsi_symm = 0;
+bool DiagoPexsi<std::complex<double>>::pexsi_symm = 0;
 template <>
-int DiagoPexsi<std::complex<double>>::pexsi_trans = 0;
+bool DiagoPexsi<std::complex<double>>::pexsi_trans = 0;
 template <>
 int DiagoPexsi<std::complex<double>>::pexsi_method = 0;
 template <>
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index ffc89d6b4d..34076a1080 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -31,33 +31,105 @@ class DiagoPexsi : public DiagH<T>
     //==========================================================
     // PEXSI related variables
     //==========================================================
+    /** 
+     * @brief  Number of terms in the pole expansion.
+     */ 
     static int pexsi_npole;
-    static int pexsi_inertia;
+    /** 
+     * @brief  Whether inertia counting is used at the very beginning.
+     */ 
+    static bool pexsi_inertia;
+    /** 
+     * @brief  Maximum number of PEXSI iterations after each inertia counting procedure.
+     */ 
     static int pexsi_nmax;
-    // static int pexsi_symbolic;
-    static int pexsi_comm;
-    static int pexsi_storage;
+    /** 
+     * @brief  Whether to construct PSelInv communication pattern.
+     */ 
+    static bool pexsi_comm;
+    /** 
+     * @brief  Whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices.  
+     */ 
+    static bool pexsi_storage;
+    /** 
+     * @brief  Ordering strategy for factorization and selected inversion. 
+     */ 
     static int pexsi_ordering;
+    /** 
+     * @brief  row permutation strategy for factorization and selected inversion.  
+     */ 
     static int pexsi_row_ordering;
+    /** 
+     * @brief  Number of processors for PARMETIS/PT-SCOTCH.  Only used if the ordering == 0.
+     */ 
     static int pexsi_nproc;
-    static int pexsi_symm;
-    static int pexsi_trans;
+    /** 
+     * @brief  Matrix structure.
+     * - = 0   : Unsymmetric matrix
+     * - = 1   : Symmetric matrix (default).
+     */ 
+    static bool pexsi_symm;
+    /** 
+     * @brief  Transpose.
+     * - = 0   : Factor non transposed matrix (default).
+     * - = 1   : Factor transposed matrix.
+     */ 
+    static bool pexsi_trans;
+    /** 
+     * @brief  The pole expansion method to be used.
+     * - = 1   : Cauchy Contour Integral method used.
+     * - = 2   : Moussa optimized method.
+     */ 
     static int pexsi_method;
+    /** 
+     * @brief  The point parallelizaion of PEXSI.
+     * - = 2  : Recommend two points parallelization
+     */ 
     static int pexsi_nproc_pole;
-    // static double pexsi_spin = 2;
+    /** 
+     * @brief  Temperature, in the same unit as H 
+     */ 
     static double pexsi_temp;
+    /** 
+     * @brief  Spectral gap. **Note** This can be set to be 0 in most cases.
+     */ 
     static double pexsi_gap;
+    /** 
+     * @brief  An upper bound for the spectral radius of \f$S^{-1} H\f$.
+     */ 
     static double pexsi_delta_e;
+    /** 
+     * @brief  Initial guess of lower bound for mu.
+     */ 
     static double pexsi_mu_lower;
+    /** 
+     * @brief  Initial guess of upper bound for mu.
+     */ 
     static double pexsi_mu_upper;
+    /** 
+     * @brief  Initial guess for mu (for the solver) (AG)
+     */ 
     static double pexsi_mu;
+    /** 
+     * @brief  Stopping criterion in terms of the chemical potential for the inertia counting procedure.
+     */ 
     static double pexsi_mu_thr;
+    /** 
+     * @brief  If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion.
+     */ 
     static double pexsi_mu_expand;
+    /** 
+     * @brief  Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure.
+     */ 
     static double pexsi_mu_guard;
+    /** 
+     * @brief  Stopping criterion of the %PEXSI iteration in terms of the number of electrons compared to numElectronExact.
+     */ 
     static double pexsi_elec_thr;
+    /** 
+     * @brief  Stopping criterion for the zero threshold.
+     */ 
     static double pexsi_zero_thr;
-
-    static MPI_Group grid_group;
 };
 } // namespace hsolver
 
diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt
index bf11e8ce8c..c76e223e18 100644
--- a/source/module_hsolver/test/CMakeLists.txt
+++ b/source/module_hsolver/test/CMakeLists.txt
@@ -80,12 +80,6 @@ AddTest(
 )
 
 if(ENABLE_LCAO)
-  # if(USE_ELPA and USE_PEXSI)
-  # AddTest(
-  #   TARGET HSolver_LCAO
-  #   LIBS ${math_libs} ELPA::ELPA base genelpa psi device
-  #   SOURCES diago_lcao_test.cpp ../diago_elpa.cpp ../diago_blas.cpp 
-  # )
   if(USE_ELPA)
   AddTest(
     TARGET HSolver_LCAO
diff --git a/source/module_hsolver/test/diago_lcao_test.cpp b/source/module_hsolver/test/diago_lcao_test.cpp
index 7e8d499526..487820a9d5 100644
--- a/source/module_hsolver/test/diago_lcao_test.cpp
+++ b/source/module_hsolver/test/diago_lcao_test.cpp
@@ -7,9 +7,6 @@
 #ifdef __ELPA
 #include "module_hsolver/diago_elpa.h"
 #endif
-#ifdef __PEXSI
-#include "module_hsolver/diago_pexsi.h"
-#endif
 
 #define PASSTHRESHOLD 1e-10
 #define DETAILINFO    false
@@ -67,10 +64,6 @@ template<class T> class DiagoPrepare
 #ifdef __ELPA
         else if(ks_solver == "genelpa")
             dh = new hsolver::DiagoElpa<T>;
-#endif
-#ifdef __PEXSI
-        else if(ks_solver == "pexsi")
-            dh = new hsolver::DiagoPexsi;
 #endif
         else
         {
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index 5e90ef7b71..fb5bc43551 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -22,7 +22,6 @@
 #include "module_base/global_variable.h"
 #include "module_base/parallel_common.h"
 #include "module_base/timer.h"
-#include "module_base/tool_quit.h"
 #include "version.h"
 Input INPUT;
 
@@ -645,16 +644,16 @@ void Input::Default(void)
     // variables for PEXSI
     //==========================================================
     pexsi_npole = 54;
-    pexsi_inertia = 1;
+    pexsi_inertia = true;
     pexsi_nmax = 80;
     // pexsi_symbolic = 1;
-    pexsi_comm = 1;
-    pexsi_storage = 1;
+    pexsi_comm = true;
+    pexsi_storage = true;
     pexsi_ordering = 0;
     pexsi_row_ordering = 1;
     pexsi_nproc = 1;
-    pexsi_symm = 1;
-    pexsi_trans = 0;
+    pexsi_symm = true;
+    pexsi_trans = false;
     pexsi_method = 1;
     pexsi_nproc_pole = 1;
     // pexsi_spin = 2;
@@ -3824,16 +3823,16 @@ void Input::Bcast()
     // PEXSI
     //==========================================================
     Parallel_Common::bcast_int(pexsi_npole);
-    Parallel_Common::bcast_int(pexsi_inertia);
+    Parallel_Common::bcast_bool(pexsi_inertia);
     Parallel_Common::bcast_int(pexsi_nmax);
     // Parallel_Common::bcast_int(pexsi_symbolic);
-    Parallel_Common::bcast_int(pexsi_comm);
-    Parallel_Common::bcast_int(pexsi_storage);
+    Parallel_Common::bcast_bool(pexsi_comm);
+    Parallel_Common::bcast_bool(pexsi_storage);
     Parallel_Common::bcast_int(pexsi_ordering);
     Parallel_Common::bcast_int(pexsi_row_ordering);
     Parallel_Common::bcast_int(pexsi_nproc);
-    Parallel_Common::bcast_int(pexsi_symm);
-    Parallel_Common::bcast_int(pexsi_trans);
+    Parallel_Common::bcast_bool(pexsi_symm);
+    Parallel_Common::bcast_bool(pexsi_trans);
     Parallel_Common::bcast_int(pexsi_method);
     Parallel_Common::bcast_int(pexsi_nproc_pole);
     // Parallel_Common::bcast_double(pexsi_spin);
diff --git a/source/module_io/input.h b/source/module_io/input.h
index 6d326d6b5d..f166612a4d 100644
--- a/source/module_io/input.h
+++ b/source/module_io/input.h
@@ -607,16 +607,16 @@ class Input
     // variables for PEXSI
     //==========================================================
     int pexsi_npole = 54;
-    int pexsi_inertia = 1;
+    bool pexsi_inertia = true;
     int pexsi_nmax = 80;
     // int pexsi_symbolic = 1;
-    int pexsi_comm = 1;
-    int pexsi_storage = 1;
+    bool pexsi_comm = true;
+    bool pexsi_storage = true;
     int pexsi_ordering = 0;
     int pexsi_row_ordering = 1;
     int pexsi_nproc = 1;
-    int pexsi_symm = 1;
-    int pexsi_trans = 0;
+    bool pexsi_symm = true;
+    bool pexsi_trans = false;
     int pexsi_method = 1;
     int pexsi_nproc_pole = 1;
     // double pexsi_spin = 2;
diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp
index 5b9c93dd77..60ec6c82dd 100644
--- a/source/module_io/test/input_conv_test.cpp
+++ b/source/module_io/test/input_conv_test.cpp
@@ -1,5 +1,6 @@
 #include "gtest/gtest.h"
 #include "gmock/gmock.h"
+#include "module_hsolver/diago_pexsi.h"
 #include "module_io/input_conv.h"
 #include "module_base/global_variable.h"
 #include "for_testing_input_conv.h"
@@ -539,6 +540,57 @@ TEST_F(InputConvTest, ReadTdEfieldTest)
     EXPECT_EQ(elecstate::H_TDDFT_pw::heavi_t0[0], 100);
     EXPECT_NEAR(elecstate::H_TDDFT_pw::heavi_amp[0], 1.00 * ModuleBase::BOHR_TO_A / ModuleBase::Ry_to_eV, 1e-8);
 }
+
+#ifdef __PEXSI
+TEST_F(InputConvTest, PEXSI)
+{
+	INPUT.Default();
+	std::string input_file = "./support/INPUT";
+	INPUT.Read(input_file);
+	Input_Conv::Convert();
+	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_npole, 54);
+	EXPECT_TRUE(hsolver::DiagoPexsi<double>::pexsi_inertia);
+	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_nmax, 80);
+	EXPECT_TRUE(hsolver::DiagoPexsi<double>::pexsi_comm);
+	EXPECT_TRUE(hsolver::DiagoPexsi<double>::pexsi_storage);
+	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_ordering, 0);
+	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_row_ordering, 1);
+	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_nproc, 1);
+	EXPECT_TRUE(hsolver::DiagoPexsi<double>::pexsi_symm);
+	EXPECT_FALSE(hsolver::DiagoPexsi<double>::pexsi_trans);
+	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_method, 1);
+	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_nproc_pole, 1);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_temp, 1e-4);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_gap, 0);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_delta_e, 20);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu_lower, -10);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu_upper, 10);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu, 0);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu_thr, 0.05);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu_expand, 0.3);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu_guard, 0.2);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_elec_thr, 0.001);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_zero_thr, 1e-10);
+
+	EXPECT_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_nproc, 1);
+	EXPECT_TRUE(hsolver::DiagoPexsi<std::complex<double>>::pexsi_symm);
+	EXPECT_FALSE(hsolver::DiagoPexsi<std::complex<double>>::pexsi_trans);
+	EXPECT_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_method, 1);
+	EXPECT_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_nproc_pole, 1);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_temp, 0.0001);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_gap, 0);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_delta_e, 20);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_lower, -10);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_upper, 10);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu, 0);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_thr, 0.05);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_expand, 0.3);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_guard, 0.2);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_elec_thr, 0.001);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_zero_thr, 1e-10);
+}
+#endif
+
 #endif
 
 #undef private
diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp
index f74544ecb6..ad40d635ea 100644
--- a/source/module_io/test/input_test_para.cpp
+++ b/source/module_io/test/input_test_para.cpp
@@ -392,6 +392,30 @@ TEST_F(InputParaTest, Bcast)
     EXPECT_EQ(INPUT.qo_screening_coeff.size(), 0);
     EXPECT_EQ(INPUT.qo_thr, 1e-6);
     EXPECT_EQ(INPUT.qo_basis, "hydrogen");
+
+    EXPECT_EQ(INPUT.pexsi_npole, 54);
+    EXPECT_TRUE(INPUT.pexsi_inertia);
+    EXPECT_EQ(INPUT.pexsi_nmax, 80);
+    EXPECT_TRUE(INPUT.pexsi_comm);
+    EXPECT_TRUE(INPUT.pexsi_storage);
+    EXPECT_EQ(INPUT.pexsi_ordering, 0);
+    EXPECT_EQ(INPUT.pexsi_row_ordering, 1);
+    EXPECT_EQ(INPUT.pexsi_nproc, 1);
+    EXPECT_TRUE(INPUT.pexsi_symm);
+    EXPECT_FALSE(INPUT.pexsi_trans);
+    EXPECT_EQ(INPUT.pexsi_method, 1);
+    EXPECT_EQ(INPUT.pexsi_nproc_pole, 1);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_temp, 0.0001);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_gap, 0);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_delta_e, 20);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_lower, -10);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_upper, 10);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu, 0);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_thr, 0.05);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_expand, 0.3);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_guard, 0.2);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_elec_thr, 0.001);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_zero_thr, 1e-10);
 }
 
 TEST_F(InputParaTest, Init)
diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp
index 0103c0a14b..e106fc399f 100644
--- a/source/module_io/test/write_input_test.cpp
+++ b/source/module_io/test/write_input_test.cpp
@@ -922,4 +922,42 @@ TEST_F(write_input, Deltaspin22)
     EXPECT_THAT(output, testing::HasSubstr("sccut                          3 #Maximal step size for lambda in eV/uB"));
     remove("write_input_test.log");
 }
+
+TEST_F (write_input, PEXSI24)
+{
+    INPUT.Default();
+    INPUT.Read("./support/witestfile");
+    std::string output_file = "write_input_test.log";
+    INPUT.Print(output_file);
+    int a = access("write_input_test.log", 00);
+    EXPECT_EQ(a, 0);
+    std::ifstream ifs ("write_input_test.log");
+    std::string output ((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
+    EXPECT_THAT(output, testing::HasSubstr("#Parameters (24.PEXSI)"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_npole                    54 #Number of poles in expansion"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_inertia                  1 #Whether inertia counting is used at the very beginning of PEXSI process"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_nmax                     80 #Maximum number of PEXSI iterations after each inertia counting procedure."));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_comm                     1 #Whether to construct PSelInv communication pattern"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_storage                  1 #Storage space used by the Selected Inversion algorithm for symmetric matrices."));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_ordering                 0 #Ordering strategy for factorization and selected inversion"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_row_ordering             1 #Row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_nproc                    1 #Number of processors for parmetis"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_symm                     1 #Matrix symmetry"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_trans                    0 #Whether to transpose"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_method                   1 #pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_nproc_pole               1 #Number of processes used by each pole"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_temp                     0.0001 #Temperature, in the same unit as H"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_gap                      0 #Spectral gap"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_delta_e                  20 #An upper bound for the spectral radius of \\f$S^{-1} H\\f$"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_lower                 -10 #Initial guess of lower bound for mu"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_upper                 10 #Initial guess of upper bound for mu"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu                       0 #Initial guess for mu (for the solver)"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_thr                   0.05 #Stopping criterion in terms of the chemical potential for the inertia counting procedure"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_expand                0.3 #If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_guard                 0.2 #Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_elec_thr                 0.001 #Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_zero_thr                 1e-10 #if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0"));
+    ifs.close();
+    remove("write_input_test.log");
+}
 #undef private
diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp
index 63488dd131..9b3edf8cc2 100644
--- a/source/module_io/write_input.cpp
+++ b/source/module_io/write_input.cpp
@@ -502,17 +502,17 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_inertia", pexsi_inertia, "Whether inertia counting is used at the very beginning of PEXSI process");
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nmax", pexsi_nmax, "Maximum number of PEXSI iterations after each inertia counting procedure.");
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_comm", pexsi_comm, "Whether to construct PSelInv communication pattern");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices, 0: non-symmetric, 1: symmetric");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices.");
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_ordering", pexsi_ordering, "Ordering strategy for factorization and selected inversion");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "Row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag");
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc", pexsi_nproc, "Number of processors for parmetis");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "matrix symmetry, 0: non-symmetric, 1: symmetric");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "transpose, 0: no transpose, 1: transpose");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "Matrix symmetry");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "Whether to transpose");
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_method", pexsi_method, "pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method");
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc_pole", pexsi_nproc_pole, "Number of processes used by each pole");
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_temp", pexsi_temp, "Temperature, in the same unit as H");
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_gap", pexsi_gap, "Spectral gap");
-    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \f$S^{-1} H\f$");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \\f$S^{-1} H\\f$");
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_lower", pexsi_mu_lower, "Initial guess of lower bound for mu");
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_upper", pexsi_mu_upper, "Initial guess of upper bound for mu");
     ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu", pexsi_mu, "Initial guess for mu (for the solver)");

From 493f71345db90f84ab175ba672acd7d337144a23 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Sat, 3 Feb 2024 21:11:24 +0800
Subject: [PATCH 24/44] Fix nspin > 1 cases

---
 source/module_elecstate/elecstate_lcao.cpp    |  14 +-
 source/module_elecstate/elecstate_lcao.h      |   2 +-
 source/module_hsolver/diago_pexsi.cpp         |   6 +-
 source/module_hsolver/diago_pexsi.h           |   5 +-
 source/module_hsolver/hsolver_lcao.cpp        |   3 +
 .../module_pexsi/dist_bcd_matrix.h            |  11 -
 .../module_pexsi/dist_matrix_transformer.cpp  | 864 +-----------------
 .../module_pexsi/dist_matrix_transformer.h    |   3 -
 .../module_pexsi/pexsi_solver.cpp             |   4 +-
 .../module_pexsi/pexsi_solver.h               |   4 +-
 .../module_pexsi/simple_pexsi.cpp             | 377 +-------
 source/module_hsolver/test/CMakeLists.txt     |   8 -
 12 files changed, 37 insertions(+), 1264 deletions(-)

diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index 23d55162f8..267d50ddcb 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -260,19 +260,25 @@ double ElecStateLCAO<std::complex<double>>::get_spin_constrain_energy()
 
 #ifdef __PEXSI
 template<>
-void ElecStateLCAO<double>::dmToRho(double* pexsi_DM)
+void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM)
 {
     ModuleBase::timer::tick("ElecStateLCAO", "dmToRho");
 
     // old 2D-to-Grid conversion has been replaced by new Gint Refactor 2023/09/25
     if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready
     {
-        this->loc->set_dm_gamma(0, pexsi_DM);
+        for (int ik = 0; ik < GlobalV::NSPIN; ++ik)
+        {
+            this->loc->set_dm_gamma(ik, pexsi_DM[ik]);
+        }
         this->loc->cal_dk_gamma_from_2D_pub();
     }
 
     auto DM = this->get_DM();
-    DM->set_DMK_pointer(0, pexsi_DM);
+    for (int is = 0; is < GlobalV::NSPIN; is++)
+    {
+        this->DM->set_DMK_pointer(is, pexsi_DM[is]);
+    }
     DM->cal_DMR();
     
     for (int is = 0; is < GlobalV::NSPIN; is++)
@@ -301,7 +307,7 @@ void ElecStateLCAO<double>::dmToRho(double* pexsi_DM)
 }
 
 template<>
-void ElecStateLCAO<std::complex<double>>::dmToRho(std::complex<double>* DM)
+void ElecStateLCAO<std::complex<double>>::dmToRho(std::vector<std::complex<double>*> DM)
 {
     ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case");
 }
diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h
index c1cf231a45..0a942a3bc6 100644
--- a/source/module_elecstate/elecstate_lcao.h
+++ b/source/module_elecstate/elecstate_lcao.h
@@ -61,7 +61,7 @@ class ElecStateLCAO : public ElecState
 
 #ifdef __PEXSI
     // use for pexsi
-    void dmToRho(TK* DM);
+    void dmToRho(std::vector<TK*> pexsi_DM);
 #endif
 
   protected:
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index 803680fe31..1cfc765c2a 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -131,14 +131,12 @@ void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>&
                                        this->ParaV->ncol,
                                        h_mat.p,
                                        s_mat.p,
-                                       this->DM,
-                                       this->EDM,
                                        this->totalEnergyH,
                                        this->totalEnergyS,
                                        this->totalFreeEnergy);
     this->ps->solve();
-    this->EDM = this->ps->get_EDM();
-    this->DM = this->ps->get_DM(); // loc.dm_gamma[ik] loc.dm_gamma[0]?
+    this->EDM.push_back(this->ps->get_EDM());
+    this->DM.push_back(this->ps->get_DM()); // loc.dm_gamma[ik] loc.dm_gamma[0]?
     this->totalFreeEnergy = this->ps->get_totalFreeEnergy();
     this->totalEnergyH = this->ps->get_totalEnergyH();
     this->totalEnergyS = this->ps->get_totalEnergyS();
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index 34076a1080..8e2aa98da1 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -1,6 +1,7 @@
 #ifndef DIGAOPEXSI_H
 #define DIGAOPEXSI_H
 
+#include <vector>
 #include "diagh.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
 #include "module_pexsi/pexsi_solver.h"
@@ -21,8 +22,8 @@ class DiagoPexsi : public DiagH<T>
     }
     void diag(hamilt::Hamilt<T>* phm_in, psi::Psi<T>& psi, Real* eigenvalue_in) override;
     const Parallel_Orbitals* ParaV;
-    T* DM;
-    double* EDM;
+    std::vector<T*> DM;
+    std::vector<T*> EDM;
     double totalEnergyH;
     double totalEnergyS;
     double totalFreeEnergy;
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index 7adea8b91d..743a02d51f 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -153,6 +153,9 @@ void HSolverLCAO<T, Device>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
                 delete[] this->pdiagh;
                 this->pdiagh = nullptr;
             }
+            auto tem = dynamic_cast<DiagoPexsi<T>*>(this->pdiagh);
+            tem->DM.clear();
+            tem->EDM.clear();
         }
         if (this->pdiagh == nullptr)
         {
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
index 0964b9787c..94b61277d2 100644
--- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
@@ -12,13 +12,7 @@ namespace pexsi
 {
 class DistBCDMatrix
 {
-
   public:
-    // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol);
-    // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int nprow, int npcol, int size, int nblk, int nrow, int ncol, char
-    // layout);
-
-    // DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol);
     DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char layout);
     ~DistBCDMatrix();
 
@@ -27,7 +21,6 @@ class DistBCDMatrix
     int localRow(const int globalRow, int& myprow);
     int localCol(const int globalCol, int& mypcol);
     int pnum(const int prow, const int pcol);
-    //~DistBCDMatrix();
 
     const MPI_Comm get_comm() const
     {
@@ -75,10 +68,6 @@ class DistBCDMatrix
     int nrow;
     int ncol;
 
-    // protected:
-
-    // private:
-
     // current process row and column
     int myprow;
     int mypcol;
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
index eadd991217..4b0fc23cfb 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
@@ -13,17 +13,6 @@
 #include "dist_bcd_matrix.h"
 #include "dist_ccs_matrix.h"
 
-// for debug
-#ifdef _DEBUG
-#include <unistd.h>
-
-#include <cstring>
-#include <fstream>
-
-#include "src_pw/global.h"
-#endif
-// end debug
-
 namespace pexsi
 {
 // find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a
@@ -177,60 +166,10 @@ inline int DistMatrixTransformer::getNonZeroIndex(char layout,
                                                   std::vector<int>& rowidx,
                                                   std::vector<int>& colidx)
 {
-#ifdef _DEBUG
-    char f_log[80];
-    int myproc;
-    MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
-    std::ofstream log;
-    if (myproc < 100)
-    {
-        sprintf(f_log, "transformer_%2.2d.log", myproc);
-        log.open(f_log, std::ios::app);
-        log << "start count nnz" << std::endl;
-    }
-    // count nonzeros value distribution of H and S
-    static bool isCOUNTNONZERO = true;
-    if (!isCOUNTNONZERO)
-    {
-        isCOUNTNONZERO = true;
-        char plog_name[80];
-        sprintf(plog_name, "HS_Distribution_%d.log", myproc);
-        std::ofstream plog;
-        plog.open(plog_name, std::ios::app);
-        std::map<int, int> pH;
-        countMatrixDistribution(nrow * ncol, H_2d, pH);
-        std::map<int, int> pS;
-        countMatrixDistribution(nrow * ncol, H_2d, pS);
-        plog << "Element in H distribution:\n";
-        // std::stringstream ss;
-        // ss.str("");
-        for (auto iter = pH.begin(); iter != pH.end(); ++iter)
-        {
-            // ss<<"p["<<iter->first<<"] : "<<iter->second<<std::endl;
-            plog << "p[" << iter->first << "] : " << iter->second << std::endl;
-        }
-        // OUT(ofs_running,ss.str());
-        // OUT(ofs_running, "Element in S distribution:");
-        plog << "Element in S distribution:\n";
-        // ss.str("");
-        for (auto iter = pS.begin(); iter != pS.end(); ++iter)
-        {
-            // ss<<"p["<<iter->first<<"] : "<<iter->second<<std::endl;
-            plog << "p[" << iter->first << "] : " << iter->second << std::endl;
-        }
-        // OUT(ofs_running,ss.str());
-        plog.close();
-    }
-#endif
-
     int idx = 0;
     nnz = 0;
     colidx.clear();
     rowidx.clear();
-#ifdef _DEBUG
-    if (myproc < 100)
-        log << "rowidx and colidx cleared" << std::endl;
-#endif
     if (layout == 'C' || layout == 'c')
     {
         for (int i = 0; i < ncol; ++i)
@@ -265,19 +204,8 @@ inline int DistMatrixTransformer::getNonZeroIndex(char layout,
     }
     else
     {
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "unknown layout: " << layout << std::endl;
-#endif
         return 1;
     }
-#ifdef _DEBUG
-    if (myproc < 100)
-    {
-        log << "nnz is counted: " << nnz << std::endl;
-        log.close();
-    }
-#endif
     return 0;
 }
 
@@ -297,32 +225,10 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
                                                    std::vector<int>& receiver_displacement_process,
                                                    std::vector<int>& buffer2ccsIndex)
 {
-    // debug
     int myproc;
     MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
-#ifdef _DEBUG
-    std::ofstream log;
-    if (myproc < 100)
-    {
-        char f_log[80];
-        sprintf(f_log, "transformer_%2.2d.log", myproc);
-        log.open(f_log, std::ios::app);
-        log << "enter buildTransformParameter" << std::endl;
-    }
-#endif
-    // end debug
-    // count sender non-zeros elements
     sender_size = nnz;
     std::fill(sender_size_process.begin(), sender_size_process.end(), 0);
-// debug
-#ifdef _DEBUG
-    if (myproc < 100)
-    {
-        log << "start translate ranks between group_data and group_trans" << std::endl;
-        log << "sender_size (in BCD) = " << sender_size << std::endl;
-    }
-#endif
-    // end debug
     // create process id map from group_data to group_trans
     int nproc_data;
     std::vector<int> proc_map_data_trans;
@@ -344,17 +250,6 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
         MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS);
     }
 
-// debug
-#ifdef _DEBUG
-    if (myproc < 100)
-    {
-        log << "rank_data        rank_trans" << std::endl;
-        for (int i = 0; i < nproc_data; ++i)
-            log << i << "\t\t\t" << proc_map_data_trans[i] << std::endl;
-    }
-#endif
-    // end debug
-
     for (int i = 0; i < nnz; ++i)
     {
         int l_col = colidx[i];
@@ -362,50 +257,16 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
         int dst_process;
         int dst_col = DST_Matrix.localCol(g_col, dst_process);
         int dst_process_trans = proc_map_data_trans[dst_process];
-        /*
-        // debug
-        #ifdef _DEBUG
-        log<<dst_process<<"\t\t";
-        #endif
-        // end debug
-         MPI_Group_translate_ranks(DST_Matrix.group_data, 1, &dst_process,
-                                   GROUP_TRANS, &dst_process_trans);
-        // debug
-        #ifdef _DEBUG
-        log<<dst_process_trans<<std::endl;
-        #endif
-        // end debug
-        */
         ++sender_size_process[dst_process_trans];
     }
-// debug
-#ifdef _DEBUG
-    if (myproc < 100)
-        log << "sender_size_process is creaated" << std::endl;
-#endif
-    // end debug
-
     // transfer sender index size to receiver index size
     MPI_Alltoall(&sender_size_process[0], 1, MPI_INT, &receiver_size_process[0], 1, MPI_INT, COMM_TRANS);
-// debug
-#ifdef _DEBUG
-    if (myproc < 100)
-        log << "receiver_size_process is got" << std::endl;
-#endif
-    // end debug
-
     // setup all2all parameters
     sender_displacement_process[0] = 0;
     for (int i = 1; i < NPROC_TRANS; ++i)
     {
         sender_displacement_process[i] = sender_displacement_process[i - 1] + sender_size_process[i - 1];
     }
-// debug
-#ifdef _DEBUG
-    if (myproc < 100)
-        log << "sender_displacement_process is creaated" << std::endl;
-#endif
-    // end debug
 
     receiver_displacement_process[0] = 0;
     receiver_size = receiver_size_process[0];
@@ -414,15 +275,6 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
         receiver_displacement_process[i] = receiver_displacement_process[i - 1] + receiver_size_process[i - 1];
         receiver_size += receiver_size_process[i];
     }
-// debug
-#ifdef _DEBUG
-    if (myproc < 100)
-    {
-        log << "sender_size and receiver_displacement_process are creaated" << std::endl;
-        log << "receiver_size (in CCS) = " << receiver_size << std::endl;
-    }
-#endif
-    // end debug
 
     // setup receiver index
     // setup sender_index
@@ -437,12 +289,6 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
         int dst_row = SRC_Matrix.globalRow(l_row);
         sender_index[i] = dst_col * DST_Matrix.get_size() + dst_row;
     }
-// debug
-#ifdef _DEBUG
-    if (myproc < 100)
-        log << "sender_index is got" << std::endl;
-#endif
-    // end debug
 
     // transfer index to receiver
     std::vector<int> receiver_index(receiver_size);
@@ -455,12 +301,6 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
                   &receiver_displacement_process[0],
                   MPI_INT,
                   COMM_TRANS);
-// debug
-#ifdef _DEBUG
-    if (myproc < 100)
-        log << "receiver_index is got" << std::endl;
-#endif
-    // end debug
 
     // setup buffer2ccsIndex based on receiver_index
     buffer2ccsIndex.resize(receiver_size);
@@ -472,15 +312,6 @@ int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
                       &receiver_index[0],
                       DST_Matrix,
                       &buffer2ccsIndex[0]);
-// debug
-#ifdef _DEBUG
-    if (myproc < 100)
-    {
-        log << "ccs parameter is built" << std::endl;
-        log.close();
-    }
-#endif
-    // end debug
     return 0;
 }
 
@@ -489,77 +320,10 @@ int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
                                              MPI_Group& GROUP_TRANS,
                                              MPI_Comm& COMM_TRANS)
 {
-// debug
-#ifdef _DEBUG
-    char f_log[80];
-    int myproc;
-    MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
-    std::ofstream log;
-    if (myproc < 100)
-    {
-        sprintf(f_log, "transformer_%2.2d.log", myproc);
-        log.open(f_log, std::ios::app);
-        // log<<std::endl<<"LOG of process: "<<myproc<<std::endl;
-        log << "enter newGroupCommTrans" << std::endl;
-    }
-#endif
     // build transfortram communicator which contains both processes of BCD processors and
     // CCS processors with nonzero elements
     MPI_Group_union(DST_Matrix.get_group_data(), SRC_Matrix.get_group(), &GROUP_TRANS);
     MPI_Comm_create(MPI_COMM_WORLD, GROUP_TRANS, &COMM_TRANS);
-// debug
-#ifdef _DEBUG
-    if (myproc < 100)
-    {
-        int trans_myid, trans_nproc;
-        int trans_gid, trans_gproc;
-        if (COMM_TRANS != MPI_COMM_NULL)
-        {
-            MPI_Comm_rank(COMM_TRANS, &trans_myid);
-            MPI_Comm_size(COMM_TRANS, &trans_nproc);
-        }
-        else
-        {
-            trans_myid = -1;
-            trans_nproc = -1;
-            // trans_gid=-1;
-            // trans_gproc=-1;
-        }
-        MPI_Group_rank(GROUP_TRANS, &trans_gid);
-        MPI_Group_size(GROUP_TRANS, &trans_gproc);
-        int BCD_myid, BCD_nproc;
-        BCD_myid = SRC_Matrix.myproc;
-        BCD_nproc = SRC_Matrix.nprocs;
-        int BCD_gid, BCD_gproc;
-        MPI_Group_rank(SRC_Matrix.group, &BCD_gid);
-        MPI_Group_size(SRC_Matrix.group, &BCD_gproc);
-        int CCS_myid, CCS_nproc;
-        int CCS_gid, CCS_gproc;
-        if (DST_Matrix.comm_data != MPI_COMM_NULL)
-        {
-            MPI_Comm_rank(DST_Matrix.comm_data, &CCS_myid);
-            MPI_Comm_size(DST_Matrix.comm_data, &CCS_nproc);
-        }
-        else
-        {
-            CCS_myid = -1;
-            CCS_nproc = -1;
-            // CCS_gid=-1;
-            // CCS_gproc=-1;
-        }
-        MPI_Group_rank(DST_Matrix.group_data, &CCS_gid);
-        MPI_Group_size(DST_Matrix.group_data, &CCS_gproc);
-        log << "myid in BCD:\t" << BCD_myid << "\tin CCS:\t" << CCS_myid << "\tin TRANS:\t" << trans_myid
-            << "\tBCD_gid:\t" << BCD_gid << "\tCCS_gid:\t" << CCS_gid << "\ttrans_gid:\t" << trans_gid << std::endl;
-        log << "nproc in BCD:\t" << BCD_nproc << "\tin CCS:\t" << CCS_nproc << "\tin TRANS:\t" << trans_nproc
-            << "\tBCD_gproc:\t" << BCD_gproc << "\tCCS_gproc:\t" << CCS_gproc << "\ttrans_gproc:\t" << trans_gproc
-            << std::endl;
-
-        log << "COMM_TRANS is created" << std::endl;
-        log.close();
-    }
-#endif
-    // end debug
     return 0;
 }
 
@@ -585,21 +349,6 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                                              double*& H_ccs,
                                              double*& S_ccs)
 {
-// debug
-#ifdef _DEBUG
-    char f_log[80];
-    int myproc;
-    MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
-    std::ofstream log;
-    if (myproc < 100)
-    {
-        sprintf(f_log, "transformer_%2.2d.log", myproc);
-        log.open(f_log, std::ios::app);
-        log << std::endl << "LOG of process: " << myproc << std::endl;
-        log << "enter transformBCDtoCCS for H and S" << std::endl;
-    }
-#endif
-    // end debug
     MPI_Group GROUP_TRANS;
     MPI_Comm COMM_TRANS = MPI_COMM_NULL;
     newGroupCommTrans(SRC_Matrix, DST_Matrix, GROUP_TRANS, COMM_TRANS);
@@ -615,30 +364,10 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
         std::vector<int> receiver_size_process(NPROC_TRANS);
         std::vector<int> receiver_displacement_process(NPROC_TRANS);
 
-#ifdef _DEBUG
-        if (myproc < 100)
-        {
-            log << "nprocs: " << SRC_Matrix.nprocs << " ; myprow: " << SRC_Matrix.myprow
-                << " ; mypcol: " << SRC_Matrix.mypcol << std::endl;
-            log << "nblk:" << SRC_Matrix.nblk << " ; nrow: " << SRC_Matrix.get_nrow() << " ; ncol: " << SRC_Matrix.get_ncol()
-                << std::endl;
-            log << "layout:" << SRC_Matrix.get_layout() << std::endl;
-            log << "ZERO = " << ZERO_Limit << std::endl;
-            log << "DST_Matrix parameters:" << std::endl;
-            log << "size: " << DST_Matrix.size << " ;nproc_data: " << DST_Matrix.nproc_data << std::endl;
-            log << "start transforming H and S to CCS format" << std::endl;
-        }
-#endif
-        // end debug
-
         // find out the non-zeros elements' positions
         std::vector<int> rowidx;
         std::vector<int> colidx;
         int nnz = 0;
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "start counting nnz..." << std::endl;
-#endif
         if (SRC_Matrix.get_comm() != MPI_COMM_NULL)
         {
             getNonZeroIndex(SRC_Matrix.get_layout(),
@@ -651,43 +380,6 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                             rowidx,
                             colidx);
         }
-#ifdef _DEBUG
-        if (myproc < 100)
-        {
-            log << "NonZeroIndex is got, nnz is " << nnz << std::endl;
-            log << "rowidx size: " << rowidx.size() << "; colidx size: " << colidx.size() << std::endl;
-            /*
-            if(SRC_Matrix.comm != MPI_COMM_NULL)
-            {
-                log<<"NonZeroIndex :"<<std::endl;
-                if(SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r')
-                {
-                    for(int i=0; i<nnz; ++i)
-                    {
-                        int HS_idx=rowidx[i]*SRC_Matrix.get_ncol()+colidx[i];
-                        log<<rowidx[i]<<' '<<colidx[i]<<' '<<HS_idx;
-                        log<<' '<<H_2d[HS_idx]<<' '<<S_2d[HS_idx]<<std::endl;
-                    }
-                }
-                else
-                {
-                    for(int i=0; i<nnz; ++i)
-                    {
-                        int HS_idx=colidx[i]*SRC_Matrix.get_nrow()+rowidx[i];
-                        log<<rowidx[i]<<' '<<colidx[i]<<' '<<HS_idx;
-                        log<<' '<<H_2d[HS_idx]<<' '<<S_2d[HS_idx]<<std::endl;
-                    }
-                }
-                log<<"nonzero index is output"<<std::endl;
-            }
-            else
-            {
-                log<<"no src_matrix elements in current process"<<std::endl;
-            }
-            */
-        }
-#endif
-
         // build all2all transformation parameters and the map index of receiver buffer
         std::vector<int> buffer2ccsIndex;
         buildTransformParameter(SRC_Matrix,
@@ -706,10 +398,6 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                                 receiver_displacement_process,
                                 buffer2ccsIndex);
 // Do transformation
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "Parameters are built" << std::endl;
-#endif
         std::vector<double> sender_buffer(sender_size);
         std::vector<double> receiver_buffer(receiver_size);
         // put H to sender buffer
@@ -727,10 +415,6 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                 sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]];
             }
         }
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "H sender_buffer is filled" << std::endl;
-#endif
         // do all2all transformation
         MPI_Alltoallv(&sender_buffer[0],
                       &sender_size_process[0],
@@ -742,17 +426,9 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                       MPI_DOUBLE,
                       COMM_TRANS);
 // collect H from receiver buffer
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "H receiver_buffer is received" << std::endl;
-#endif
         delete[] H_ccs;
         H_ccs = new double[receiver_size];
         buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], H_ccs);
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "H_ccs is received" << std::endl;
-#endif
 
         // put S to sender buffer
         if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r')
@@ -769,10 +445,6 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                 sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]];
             }
         }
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "S sender_buffer is filled" << std::endl;
-#endif
         // do all2all transformation
         MPI_Alltoallv(&sender_buffer[0],
                       &sender_size_process[0],
@@ -784,27 +456,12 @@ int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                       MPI_DOUBLE,
                       COMM_TRANS);
 // collect S from receiver buffer
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "S receiver_buffer is received" << std::endl;
-#endif
         delete[] S_ccs;
         S_ccs = new double[receiver_size];
         buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], S_ccs);
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "S_ccs is received" << std::endl;
-#endif
     }
     // clear and return
     deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS);
-#ifdef _DEBUG
-    if (myproc < 100)
-    {
-        log << "COMM_TRANS is deleted" << std::endl;
-        log.close();
-    }
-#endif
     return 0;
 }
 
@@ -817,29 +474,8 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
                                              double* DM,
                                              double* EDM)
 {
-// debug
-#ifdef _DEBUG
-    OUT(ofs_running, "transformCCStoBCD: start");
-    MPI_Barrier(MPI_COMM_WORLD);
-#endif
-    // end debug
     int myproc;
     MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
-// debug
-#ifdef _DEBUG
-    std::ofstream log;
-    if (myproc < 100)
-    {
-        char f_log[80];
-        sprintf(f_log, "transformer_%2.2d.log", myproc);
-        // MPI_Barrier(MPI_COMM_WORLD);
-        log.open(f_log, std::ios::app);
-        // MPI_Barrier(MPI_COMM_WORLD);
-        log << "\nstart transform DMnzval to DM" << std::endl;
-    }
-    MPI_Barrier(MPI_COMM_WORLD);
-#endif
-    // end debug
     MPI_Group GROUP_TRANS;
     MPI_Comm COMM_TRANS = MPI_COMM_NULL;
     newGroupCommTrans(DST_Matrix, SRC_Matrix, GROUP_TRANS, COMM_TRANS);
@@ -851,32 +487,13 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             DM[i] = 0;
             EDM[i] = 0;
         }
-#ifdef _DEBUG
-        // MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-            log << "DM and EDM filled by 0" << std::endl;
-// OUT(ofs_running, "transformCCStoBCD: DM and EDM filled by 0");
-#endif
         // setup number of local elements to be transfered to each remote processes
         int NPROC_TRANS;
         MPI_Comm_size(COMM_TRANS, &NPROC_TRANS);
-        // std::vector<int> sender_size_process(NPROC_TRANS);
-        // std::vector<int> sender_displacement_process(NPROC_TRANS);
-        // std::vector<int> receiver_size_process(NPROC_TRANS);
-        // std::vector<int> receiver_displacement_process(NPROC_TRANS);
         int sender_size_process[NPROC_TRANS];
         int sender_displacement_process[NPROC_TRANS];
         int receiver_size_process[NPROC_TRANS];
         int receiver_displacement_process[NPROC_TRANS];
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "NPROC_TRANS = " << NPROC_TRANS << std::endl;
-        // MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-            log << "build process rank map from BCD to TRANS" << std::endl;
-// OUT(ofs_running, "transformCCStoBCD: build process rank map from BCD to TRANS");
-// MPI_Barrier(COMM_TRANS);
-#endif
         int nproc_bcd;
         std::vector<int> proc_map_bcd_trans;
         int myproc_trans;
@@ -898,56 +515,16 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             proc_map_bcd_trans.resize(nproc_bcd, 0);
             MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS);
         }
-
-#ifdef _DEBUG
-        // check process map from BCD comm to TRANS comm
-        if (myproc < 100)
-        {
-            log << "check process map:\n";
-            log << "pid in bcd\tpid in trans\n";
-            for (int i = 0; i < nproc_bcd; ++i)
-            {
-                log << i << "\t\t" << proc_map_bcd_trans[i] << std::endl;
-            }
-            log << "check pid from prow and pcol int bcd to pid in trans\n";
-            log << "p_row  p_col  p_bcd  p_trans\n";
-            for (int i = 0; i < DST_Matrix.nprows; ++i)
-            {
-                for (int j = 0; j < DST_Matrix.npcols; ++j)
-                {
-                    int pid_bcd = DST_Matrix.pnum(i, j);
-                    int pid_trans = proc_map_bcd_trans[pid_bcd];
-                    log << i << "\t" << j << "\t" << pid_bcd << "\t" << pid_trans << std::endl;
-                }
-            }
-            log << "setup alltoall parameters" << std::endl;
-        }
-        // OUT(ofs_running, "transformCCStoBCD: setup alltoall parameters");
-        MPI_Barrier(COMM_TRANS);
-#endif
         // setup sender_size_process
         // std::fill(sender_size_process.begin(), sender_size_process.end(), 0);
         for (int i = 0; i < NPROC_TRANS; ++i)
             sender_size_process[i] = 0;
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-            log << "sender_size_process is inited by 0" << std::endl;
-        // OUT(ofs_running, "transformCCStoBCD: sender_size_process is inited by 0, size ", NPROC_TRANS);
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-            log << "display all columns and rows of nonzeros values:\n";
-        int log_nnz = 0;
-#endif
         for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol)
         {
             int g_col = SRC_Matrix.globalCol(icol);
             int recv_pcol_bcd;
             int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd);
-            // #ifdef _DEBUG
-            // log<<g_col<<"\n ";
-            // #endif
-            // OUT(ofs_running, "transformCCStoBCD: recv_pcol_bcd", recv_pcol_bcd);
+    
             for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; ++rowidx)
             {
                 int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1;
@@ -956,47 +533,14 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
                 int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
                 int recv_proc = proc_map_bcd_trans[recv_proc_bcd];
                 ++sender_size_process[recv_proc];
-                // #ifdef _DEBUG
-                // log<<" "<<g_row;
-                // ++log_nnz;
-                // #endif
             }
             // log<<"\n";
         }
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-        {
-            log << "sender_size_process is counted, total nonzeros are: " << log_nnz << std::endl;
-            log << "target pid\tsize\n";
-            for (int i = 0; i < NPROC_TRANS; i++)
-            {
-                log << i << "\t\t" << sender_size_process[i] << std::endl;
-            }
-        }
-        // OUT(ofs_running, "transformCCStoBCD: sender_size_process is counted");
-        MPI_Barrier(COMM_TRANS);
-#endif
-
         // setup receiver_size_process
         // std::fill(receiver_size_process.begin(), receiver_size_process.end(), 0);
         for (int i = 0; i < NPROC_TRANS; ++i)
             receiver_size_process[i] = 0;
         MPI_Alltoall(&sender_size_process[0], 1, MPI_INT, &receiver_size_process[0], 1, MPI_INT, COMM_TRANS);
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-        {
-            log << "receiver_size_process is got" << std::endl;
-            log << "target pid\tsize\n";
-            for (int i = 0; i < NPROC_TRANS; i++)
-            {
-                log << i << "\t\t" << receiver_size_process[i] << std::endl;
-            }
-        }
-// OUT(ofs_running, "transformCCStoBCD: receiver_size_process is got");
-#endif
-
         // setup sender_displacement and receiver_displacement
         sender_displacement_process[0] = 0;
         receiver_displacement_process[0] = 0;
@@ -1007,23 +551,6 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             receiver_displacement_process[i] = receiver_displacement_process[i - 1] + receiver_size_process[i - 1];
             receiver_size += receiver_size_process[i];
         }
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-        {
-            log << "displacements are built" << std::endl;
-            log << "check alltoallv parameters" << std::endl;
-            for (int i = 0; i < NPROC_TRANS; ++i)
-            {
-                log << "pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  "
-                       "receiver_displacement_process"
-                    << std::endl;
-                log << i << "\t" << sender_size_process[i] << "\t\t\t" << sender_displacement_process[i] << "\t\t\t\t"
-                    << receiver_size_process[i] << "\t\t\t" << receiver_displacement_process[i] << std::endl;
-            }
-        }
-// OUT(ofs_running, "transformCCStoBCD: displacements are built");
-#endif
 
         // setup up sender index and receiver index
         int sender_size = SRC_Matrix.get_nnzlocal();
@@ -1032,15 +559,7 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
         int* dst_index;
         int* receiver_index;
         double* receiver_buffer;
-#ifdef _DEBUG
-        if (myproc < 100)
-        {
-            log << "sender_size = " << sender_size << "; receiver_size = " << receiver_size << std::endl;
-            log.flush();
-            log << "start allocating sender_index, dst_index and receiver_index..." << std::endl;
-            log.flush();
-        }
-#endif
+
         if (sender_size > 0)
         {
             sender_index = new int[sender_size];
@@ -1064,11 +583,7 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             dst_index[0] = -1;
             dst_index[1] = -1;
         }
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-            log << "; receiver_index size: ";
-#endif
+
         if (receiver_size > 0)
         {
             receiver_index = new int[2 * receiver_size];
@@ -1098,33 +613,9 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
         {
             p[i] = sender_displacement_process[i];
         }
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-        {
-            log << "check BCD pnum" << std::endl;
-            log.flush();
-            for (int i = 0; i < DST_Matrix.nprows; ++i)
-            {
-                for (int j = 0; j < DST_Matrix.npcols; ++j)
-                {
-                    log << i << "\t" << j << "\t" << DST_Matrix.pnum(i, j) << std::endl;
-                }
-            }
-            log << "source CCS matrix parameters:\n";
-            log << "numColLocal: " << SRC_Matrix.numColLocal << std::endl;
-            log << "pointer to beginning of each process is inited by sender_displacement_process" << std::endl;
-            // log<<"icol"<<"\t"<<"g_col"<<"\t"<<"col(bcd)"<<"\t"<<"pcol(bcd)"<<std::endl;
-            // log.flush();
-        }
-// MPI_Barrier(COMM_TRANS);
-#endif
 
         int idx = 0;
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "idx start at " << idx << std::endl;
-#endif
+
         for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol)
         {
             int g_col = SRC_Matrix.globalCol(icol);
@@ -1135,59 +626,13 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
                 int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1;
                 int recv_prow_bcd;
                 int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd);
-#ifdef _DEBUG
-                if (myproc < 100)
-                {
-                    if (recv_prow_bcd >= DST_Matrix.nprows || recv_prow_bcd < 0)
-                    {
-                        log << "ERROR: recv_prow_bcd error! recv_prow_bcd is " << recv_prow_bcd << "; max is "
-                            << DST_Matrix.nprows << std::endl;
-                        log.flush();
-                    }
-                }
-#endif
+
                 int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
-#ifdef _DEBUG
-                // MPI_Barrier(COMM_TRANS);
-                if (myproc < 100)
-                {
-                    if (recv_proc_bcd > NPROC_TRANS || recv_proc_bcd < 0)
-                    {
-                        log << "ERROR: recv_proc_bcd outbound! recv_proc_bcd is " << recv_proc_bcd << "; max is "
-                            << NPROC_TRANS << std::endl;
-                        log.flush();
-                    }
-                }
-#endif
+
                 int recv_proc = proc_map_bcd_trans[recv_proc_bcd];
-#ifdef _DEBUG
-                // MPI_Barrier(COMM_TRANS);
-                if (myproc < 100)
-                {
-                    if (p[recv_proc] >= sender_size || p[recv_proc] < 0)
-                    {
-                        log << "ERROR: sender_index's index outbound! " << std::endl;
-                        log << recv_prow_bcd << " " << recv_pcol_bcd << recv_proc_bcd << " " << recv_proc << std::endl;
-                        log << p[recv_proc] << " " << sender_size << std::endl;
-                        log.flush();
-                    }
-                }
-// MPI_Barrier(COMM_TRANS);
-#endif
+
                 sender_index[p[recv_proc]] = idx;
-#ifdef _DEBUG
-                // MPI_Barrier(COMM_TRANS);
-                if (myproc < 100)
-                {
-                    if ((p[recv_proc] * 2 + 1) >= (2 * sender_size) || (p[recv_proc] * 2 + 1) < 0)
-                    {
-                        log << "ERROR: dst_index's index outbound! recv_proc:" << recv_proc
-                            << "; p:" << p[recv_proc] * 2 + 1 << "; max is " << 2 * sender_size << std::endl;
-                        log.flush();
-                    }
-                }
-// MPI_Barrier(COMM_TRANS);
-#endif
+
                 dst_index[p[recv_proc] * 2] = recv_row;
                 dst_index[p[recv_proc] * 2 + 1] = recv_col;
                 ++p[recv_proc];
@@ -1195,50 +640,6 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             }
         }
 
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        // check sender_index and dst_index
-        if (myproc < 100)
-        {
-            for (int i = 0; i < sender_size; ++i)
-            {
-                if (sender_index[i] < 0 || sender_index[i] > SRC_Matrix.nnzLocal)
-                {
-                    log << "ERROR! sender_index outbound: " << i << " " << sender_index[i] << std::endl;
-                    log.flush();
-                }
-            }
-            for (int i = 0; i < 2 * sender_size; ++i)
-            {
-                if (dst_index[i] < 0 || dst_index[i] > DST_Matrix.size)
-                {
-                    log << "ERROR! dst_index outbound: " << i << " " << dst_index[i] << " " << DST_Matrix.size
-                        << std::endl;
-                    log.flush();
-                }
-            }
-            log << "sender_index is built" << std::endl;
-            log << "sender_size = " << sender_size << std::endl;
-            // for(int i=0; i<sender_size; i+=sender_size/100)
-            //     log<<i<<"\t"<<dst_index[2*i]<<"\t"<<dst_index[2*i+1]<<std::endl;
-            // OUT(ofs_running, "transformCCStoBCD: sender_index is built");
-
-            // save sender_index to file for debug
-            /*std::ofstream log_sender_index;
-            for(int i=0; i<NPROC_TRANS; ++i)
-            {
-                if(sender_size_process[i] > 0)
-                {
-                    sprintf(f_log, "sender_index_from_%2.2d_to_%2.2d.log", myproc_trans, i);
-                    log_sender_index.open(f_log, std::ios::app);
-                    for(int j=sender_displacement_process[i]; j<sender_displacement_process[i]+sender_size_process[i];
-            ++j) log_sender_index<<sender_index[j]<<std::endl; log_sender_index.close();
-                }
-            }
-            */
-        }
-#endif
-
         for (int i = 0; i < NPROC_TRANS; ++i)
         {
             sender_size_process[i] *= 2;
@@ -1246,38 +647,7 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
             receiver_size_process[i] *= 2;
             receiver_displacement_process[i] *= 2;
         }
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-        {
-            log << "Alltoall parameters for index array" << std::endl;
-            log << "dst_index size:" << 2 * sender_size << "\t receiver_index size: " << 2 * receiver_size << std::endl;
-            log << "pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  "
-                   "receiver_displacement_process"
-                << std::endl;
-            for (int i = 0; i < NPROC_TRANS; ++i)
-            {
-                log << i << "\t" << sender_size_process[i] << "\t\t" << sender_displacement_process[i] << "\t\t"
-                    << receiver_size_process[i] << "\t\t" << receiver_displacement_process[i] << std::endl;
-            }
-            // save dst_index to file for debug
-            /*std::ofstream log_dst_index;
-            for(int i=0; i<NPROC_TRANS; ++i)
-            {
-                if(sender_size_process[i] > 0)
-                {
-                    sprintf(f_log, "dst_index_from_%2.2d_to_%2.2d.log", myproc_trans, i);
-                    log_dst_index.open(f_log, std::ios::app);
-                    for(int j=sender_displacement_process[i]; j<sender_displacement_process[i]+sender_size_process[i];
-            ++j) log_dst_index<<dst_index[j]<<std::endl; log_dst_index.close();
-                }
-            }
-            */
-            log << "start alltoallv for index" << std::endl;
-        }
-        MPI_Barrier(COMM_TRANS);
-// OUT(ofs_running, "transformCCStoBCD: sender_index is built");
-#endif
+
         MPI_Alltoallv(&dst_index[0],
                       &sender_size_process[0],
                       &sender_displacement_process[0],
@@ -1287,62 +657,7 @@ int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
                       &receiver_displacement_process[0],
                       MPI_INT,
                       COMM_TRANS);
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-        {
-            log << "receiver_index is got" << std::endl;
-            log << "receiver_size is: " << receiver_size << std::endl;
-            log.flush();
-        }
-/*
-// save receiver_index to file for debug
-std::ofstream log_rcv_index;
-for(int i=0; i<NPROC_TRANS; ++i)
-{
-    log<<"receive index (from proc_trans "<<i<<") is from "<<receiver_displacement_process[i]<<" to
-"<<receiver_displacement_process[i]+receiver_size_process[i]<<std::endl; if(receiver_size_process[i] > 0)
-    {
-        sprintf(f_log, "receiver_index_from_%2.2d_to_%2.2d.log", i, myproc_trans);
-        log_rcv_index.open(f_log, std::ios::app);
-        for(int j=receiver_displacement_process[i]; j<receiver_displacement_process[i]+receiver_size_process[i]; ++j)
-            log_rcv_index<<receiver_index[j]<<std::endl;
-        log_rcv_index.close();
-    }
-}
-log<<"receiver_index values are saved"<<std::endl;
-log.flush();
-// MPI_Barrier(COMM_TRANS);
 
-for(int i=0; i<receiver_size; ++i)
-{
-    if(receiver_index[i*2]<0)
-    {
-        log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" < 0"<<std::endl;
-        log.flush();
-    }
-    else if(receiver_index[i*2]>DST_Matrix.get_nrow())
-    {
-        log<<"ERROR! receiver_index(BCD)["<<2*i<<"] = "<<receiver_index[i*2]<<" > "<<DST_Matrix.get_nrow()<<std::endl;
-        log.flush();
-    }
-    if(receiver_index[i*2+1]<0)
-    {
-        log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" < 0"<<std::endl;
-        log.flush();
-    }
-    else if(receiver_index[i*2+1]>DST_Matrix.get_ncol())
-    {
-        log<<"ERROR! receiver_index(BCD)["<<2*i+1<<"] = "<<receiver_index[i*2+1]<<" > "<<DST_Matrix.get_ncol()<<std::endl;
-        log.flush();
-    }
-}
-log<<"receiver_index values are checked"<<std::endl;
-log.flush();
-MPI_Barrier(COMM_TRANS);
-// OUT(ofs_running, "transformCCStoBCD: receiver_index is got");
-*/
-#endif
         // reset size and displacement for transfering matrix value by alltoall
         for (int i = 0; i < NPROC_TRANS; ++i)
         {
@@ -1351,14 +666,6 @@ MPI_Barrier(COMM_TRANS);
             receiver_size_process[i] /= 2;
             receiver_displacement_process[i] /= 2;
         }
-#ifdef _DEBUG
-        if (myproc < 100)
-        {
-            log << "size_process and displacement_process are reset for buffer transform" << std::endl;
-            log.flush();
-        }
-        MPI_Barrier(COMM_TRANS);
-#endif
 
         // transfer DM
         // set up DM sender buffer
@@ -1366,56 +673,7 @@ MPI_Barrier(COMM_TRANS);
         {
             sender_buffer[i] = DMnzvalLocal[sender_index[i]];
         }
-#ifdef _DEBUG
-        if (myproc < 100)
-        {
-            log << "DM(CCS) is put to sender_buffer" << std::endl;
-            log.flush();
-            // OUT(ofs_running, "transformCCStoBCD: DM(CCS) is put to sender_buffer");
 
-            // check receiver_index, which may be changed after alltoall for buffer
-            for (int i = 0; i < receiver_size; ++i)
-            {
-                if (receiver_index[i * 2] < 0)
-                {
-                    log << "ERROR! receiver_index(BCD)[" << 2 * i << "] = " << receiver_index[i * 2] << " < 0"
-                        << std::endl;
-                    log.flush();
-                }
-                else if (receiver_index[i * 2] > DST_Matrix.get_nrow())
-                {
-                    log << "ERROR! receiver_index(BCD)[" << 2 * i << "] = " << receiver_index[i * 2] << " > "
-                        << DST_Matrix.get_nrow() << std::endl;
-                    log.flush();
-                }
-                if (receiver_index[i * 2 + 1] < 0)
-                {
-                    log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " < 0"
-                        << std::endl;
-                    log.flush();
-                }
-                else if (receiver_index[i * 2 + 1] > DST_Matrix.get_ncol())
-                {
-                    log << "ERROR! receiver_index(BCD)[" << 2 * i + 1 << "] = " << receiver_index[i * 2 + 1] << " > "
-                        << DST_Matrix.get_ncol() << std::endl;
-                    log.flush();
-                }
-            }
-            log << "receiver_index values are checked" << std::endl;
-            log.flush();
-            // check parameters for alltoall for buffer
-            log << "pid_trans  sender_size_process  sender_displacement_process  receiver_size_process  "
-                   "receiver_displacement_process"
-                << std::endl;
-            for (int i = 0; i < NPROC_TRANS; ++i)
-            {
-                log << i << "\t" << sender_size_process[i] << "\t\t" << sender_displacement_process[i] << "\t\t"
-                    << receiver_size_process[i] << "\t\t" << receiver_displacement_process[i] << std::endl;
-            }
-            log.flush();
-        }
-        MPI_Barrier(COMM_TRANS);
-#endif
         // transfer sender buffer to receiver buffer
         MPI_Alltoallv(&sender_buffer[0],
                       &sender_size_process[0],
@@ -1427,12 +685,6 @@ MPI_Barrier(COMM_TRANS);
                       MPI_DOUBLE,
                       COMM_TRANS);
 
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-            log << "receiver_buffer is got from DM" << std::endl;
-// OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from DM");
-#endif
         // transform receiver_buffer to DM
         if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r')
         {
@@ -1442,19 +694,6 @@ MPI_Barrier(COMM_TRANS);
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
                 int idx = ix * DST_Matrix.get_ncol() + iy;
-#ifdef _DEBUG
-                if (myproc < 100)
-                {
-                    if (idx < 0 || idx >= DST_Matrix_elem)
-                    {
-                        log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem
-                            << std::endl;
-                        log << "index number is " << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " ncol = " << DST_Matrix.get_ncol() << std::endl;
-                        log.flush();
-                    }
-                }
-#endif
                 DM[idx] = receiver_buffer[i];
             }
         }
@@ -1466,40 +705,15 @@ MPI_Barrier(COMM_TRANS);
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
                 int idx = iy * DST_Matrix.get_nrow() + ix;
-#ifdef _DEBUG
-                if (myproc < 100)
-                {
-                    if (idx < 0 || idx >= DST_Matrix_elem)
-                    {
-                        log << "idx for DM ERROR: idx is " << idx << "; DM total size is " << DST_Matrix_elem
-                            << std::endl;
-                        log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " nrow = " << DST_Matrix.get_nrow() << std::endl;
-                        log.flush();
-                    }
-                }
-#endif
                 DM[idx] = receiver_buffer[i];
             }
         }
 
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "DM(BCD) is got from receiver_buffer" << std::endl;
-        MPI_Barrier(COMM_TRANS);
-// OUT(ofs_running, "transformCCStoBCD: DM(BCD) is got from receiver_buffer");
-#endif
         // setup up sender buffer of EDM
         for (int i = 0; i < sender_size; ++i)
         {
             sender_buffer[i] = EDMnzvalLocal[sender_index[i]];
         }
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-            log << "EDM(CCS) is put to sender_buffer" << std::endl;
-// OUT(ofs_running, "transformCCStoBCD: EDM(CCS) is put to sender_buffer");
-#endif
 
         // transfer sender buffer to receiver buffer
         MPI_Alltoallv(&sender_buffer[0],
@@ -1511,12 +725,7 @@ MPI_Barrier(COMM_TRANS);
                       &receiver_displacement_process[0],
                       MPI_DOUBLE,
                       COMM_TRANS);
-#ifdef _DEBUG
-        MPI_Barrier(COMM_TRANS);
-        if (myproc < 100)
-            log << "receiver_buffer is got from EDM" << std::endl;
-// OUT(ofs_running, "transformCCStoBCD: receiver_buffer is got from EDM");
-#endif
+
         // transform receiver_buffer to EDM
         if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r')
         {
@@ -1526,19 +735,6 @@ MPI_Barrier(COMM_TRANS);
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
                 int idx = ix * DST_Matrix.get_ncol() + iy;
-#ifdef _DEBUG
-                if (myproc < 100)
-                {
-                    if (idx < 0 || idx >= DST_Matrix_elem)
-                    {
-                        log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem
-                            << std::endl;
-                        log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " ncol = " << DST_Matrix.get_ncol() << std::endl;
-                        log.flush();
-                    }
-                }
-#endif
                 EDM[idx] = receiver_buffer[i];
             }
         }
@@ -1550,54 +746,18 @@ MPI_Barrier(COMM_TRANS);
                 int ix = receiver_index[2 * i];
                 int iy = receiver_index[2 * i + 1];
                 int idx = iy * DST_Matrix.get_nrow() + ix;
-#ifdef _DEBUG
-                if (myproc < 100)
-                {
-                    if (idx < 0 || idx >= DST_Matrix_elem)
-                    {
-                        log << "idx for EDM ERROR: idx is " << idx << "; EDM total size is " << DST_Matrix_elem
-                            << std::endl;
-                        log << "index number is" << 2 * i << " ix = " << ix << " iy = " << iy
-                            << " nrow = " << DST_Matrix.get_nrow() << std::endl;
-                        log.flush();
-                    }
-                }
-#endif
                 EDM[idx] = receiver_buffer[i];
             }
         }
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "EDM(BCD) is got from receiver_buffer" << std::endl;
-        MPI_Barrier(COMM_TRANS);
-#endif
+
         delete[] sender_index;
         delete[] sender_buffer;
         delete[] dst_index;
         delete[] receiver_index;
         delete[] receiver_buffer;
-#ifdef _DEBUG
-        if (myproc < 100)
-            log << "work arrays are deleted" << std::endl;
-#endif
+
     }
-#ifdef _DEBUG
-    if (myproc < 100)
-        log << "OUT COMM_TRANS" << std::endl;
-    if (myproc < 100)
-        log << "before deleteGroupCommTrans" << std::endl;
-#endif
     deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS);
-#ifdef _DEBUG
-    MPI_Barrier(MPI_COMM_WORLD);
-    if (myproc < 100)
-    {
-        log << "COMM_TRANS is deleted" << std::endl;
-        log.close();
-    }
-    MPI_Barrier(MPI_COMM_WORLD);
-    OUT(ofs_running, "transformCCStoBCD: finish job, COMM_TRANS is deleted");
-#endif
     return 0;
 }
 
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
index c81128f9db..e261d31f3e 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
@@ -80,9 +80,6 @@ int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
                       double*& H_ccs,
                       double*& S_ccs);
 
-// int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, double* DMnzvalLocal,
-// DistBCDMatrix& DST_Matrix, double* DM_2d);
-
 int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
                       double* DMnzvalLocal,
                       double* ENDnzvalLocal,
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
index ebbf253b20..b5f15b40d4 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -3,8 +3,8 @@
 #include "pexsi_solver.h"
 
 #include <mpi.h>
-
 #include <cstring>
+#include <vector>
 
 #include "module_base/global_variable.h"
 #include "simple_pexsi.h"
@@ -19,8 +19,6 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text,
                            const int ncol,
                            const double* h,
                            const double* s,
-                           double* DM,
-                           double* EDM,
                            double& totalEnergyH,
                            double& totalEnergyS,
                            double& totalFreeEnergy)
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h
index b3d7aed152..6289ccfd55 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.h
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.h
@@ -1,6 +1,8 @@
 #ifndef PEXSI_Solver_H
 #define PEXSI_Solver_H
 
+#include <vector>
+
 namespace pexsi
 {
 class PEXSI_Solver
@@ -12,8 +14,6 @@ class PEXSI_Solver
                  const int ncol,
                  const double* h,
                  const double* s,
-                 double* DM,
-                 double* EDM,
                  double& totalEnergyH,
                  double& totalEnergyS,
                  double& totalFreeEnergy);
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
index b59ed233ea..7169e0ab85 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -38,27 +38,16 @@ inline void strtolower(char* sa, char* sb)
 
 inline void setDefaultOption(int* int_para, double* double_para)
 {
-    // options.spin=2;
     double_para[0] = 2;
-    // options.gap=0;
     double_para[2] = 0;
-    // ZERO_Limit=DBL_MIN;
     double_para[11] = DBL_MIN;
-    // options.matrixType=0;
     int_para[3] = 0;
-    // options.solver=1;
     int_para[6] = 1;
-    // options.ordering=0;
     int_para[8] = 0;
-    // options.rowOrdering=0;
     int_para[9] = 0;
-    // options.symmetric=0;
     int_para[11] = 0;
-    // options.transpose=0;
     int_para[12] = 0;
-    // options.nPoints=2;
     int_para[14] = 2;
-    // options.verbosity=1;
     int_para[15] = 1;
 }
 
@@ -137,222 +126,7 @@ int loadPEXSIOption(MPI_Comm comm,
     double_para[9] = hsolver::DiagoPexsi<double>::pexsi_mu_guard;
     double_para[10] = hsolver::DiagoPexsi<double>::pexsi_elec_thr;
     double_para[11] = hsolver::DiagoPexsi<double>::pexsi_zero_thr;
-    // int myid;
-    // MPI_Comm_rank(comm, &myid);
-    // if (myid == 0)
-    // {
-    //     std::ifstream ifs(PexsiOptionFile.c_str());
-    //     if (!ifs)
-    //     {
-    //         return 1;
-    //     }
-    //     setDefaultOption(int_para, double_para);
 
-    //     ifs.clear();
-    //     ifs.seekg(0);
-
-    //     char key[128];
-    //     char lowercase_key[128];
-    //     const int LINE_LINGTH = 1024;
-    //     char unused_string[LINE_LINGTH];
-
-    //     while (ifs.good())
-    //     {
-    //         ifs >> key;
-    //         //~ cout<<"readin word is: "<<key<<endl;
-    //         strtolower(key, lowercase_key);
-    //         if (strcmp("spin", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.spin;
-    //             ifs >> double_para[0];
-    //             //~ cout<<"double_para[0]: "<<key<<" = "<<double_para[0]<<endl;
-    //         }
-    //         else if (strcmp("temperature", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.temperature;
-    //             ifs >> double_para[1];
-    //             //~ cout<<"double_para[1]: "<<key<<" = "<<double_para[1]<<endl;
-    //         }
-    //         else if (strcmp("gap", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.gap;
-    //             ifs >> double_para[2];
-    //             //~ cout<<"double_para[2]: "<<key<<" = "<<double_para[2]<<endl;
-    //         }
-    //         else if (strcmp("deltae", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.deltaE;
-    //             ifs >> double_para[3];
-    //             //~ cout<<"double_para[3]: "<<key<<" = "<<double_para[3]<<endl;
-    //         }
-    //         else if (strcmp("numpole", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.numPole;
-    //             ifs >> int_para[0];
-    //             //~ cout<<"int_para[0]: "<<key<<" = "<<int_para[0]<<endl;
-    //         }
-    //         else if (strcmp("isinertiacount", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.isInertiaCount;
-    //             ifs >> int_para[1];
-    //             //~ cout<<"int_para[1]: "<<key<<" = "<<int_para[1]<<endl;
-    //         }
-    //         else if (strcmp("maxpexsiiter", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.maxPEXSIIter;
-    //             ifs >> int_para[2];
-    //             //~ cout<<"int_para[2]: "<<key<<" = "<<int_para[2]<<endl;
-    //         }
-    //         else if (strcmp("mumin0", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.muMin0;
-    //             ifs >> double_para[4];
-    //             //~ cout<<"double_para[4]: "<<key<<" = "<<double_para[4]<<endl;
-    //         }
-    //         else if (strcmp("mumax0", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.muMax0;
-    //             ifs >> double_para[5];
-    //             //~ cout<<"double_para[5]: "<<key<<" = "<<double_para[5]<<endl;
-    //         }
-    //         else if (strcmp("mu0", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.mu0;
-    //             ifs >> double_para[6];
-    //             //~ cout<<"double_para[6]: "<<key<<" = "<<double_para[6]<<endl;
-    //         }
-    //         else if (strcmp("muinertiatolerance", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.muInertiaTolerance;
-    //             ifs >> double_para[7];
-    //             //~ cout<<"double_para[7]: "<<key<<" = "<<double_para[7]<<endl;
-    //         }
-    //         else if (strcmp("muinertiaexpansion", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.muInertiaExpansion;
-    //             ifs >> double_para[8];
-    //             //~ cout<<"double_para[8]: "<<key<<" = "<<double_para[8]<<endl;
-    //         }
-    //         else if (strcmp("mupexsisafeguard", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.muPEXSISafeGuard;
-    //             ifs >> double_para[9];
-    //             //~ cout<<"double_para[9]: "<<key<<" = "<<double_para[9]<<endl;
-    //         }
-    //         else if (strcmp("numelectronpexsitolerance", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.numElectronPEXSITolerance;
-    //             ifs >> double_para[10];
-    //             //~ cout<<"double_para[10]: "<<key<<" = "<<double_para[10]<<endl;
-    //         }
-    //         else if (strcmp("zero_limit", lowercase_key) == 0)
-    //         {
-    //             ifs >> double_para[11];
-    //         }
-    //         else if (strcmp("matrixtype", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.matrixType;
-    //             ifs >> int_para[3];
-    //             //~ cout<<"int_para[3]: "<<key<<" = "<<int_para[3]<<endl;
-    //         }
-    //         else if (strcmp("issymbolicfactorize", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.isSymbolicFactorize;
-    //             ifs >> int_para[4];
-    //             //~ cout<<"int_para[4]: "<<key<<" = "<<int_para[4]<<endl;
-    //         }
-    //         else if (strcmp("isconstructcommpattern", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.isConstructCommPattern;
-    //             ifs >> int_para[5];
-    //             //~ cout<<"int_para[5]: "<<key<<" = "<<int_para[5]<<endl;
-    //         }
-    //         else if (strcmp("solver", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.solver;
-    //             ifs >> int_para[6];
-    //             //~ cout<<"int_para[6]: "<<key<<" = "<<int_para[6]<<endl;
-    //         }
-    //         else if (strcmp("symmetricstorage", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.symmetricStorage;
-    //             ifs >> int_para[7];
-    //             //~ cout<<"int_para[7]: "<<key<<" = "<<int_para[7]<<endl;
-    //         }
-    //         else if (strcmp("ordering", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.ordering;
-    //             ifs >> int_para[8];
-    //             //~ cout<<"int_para[8]: "<<key<<" = "<<int_para[8]<<endl;
-    //         }
-    //         else if (strcmp("rowordering", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.rowOrdering;
-    //             ifs >> int_para[9];
-    //             //~ cout<<"int_para[9]: "<<key<<" = "<<int_para[9]<<endl;
-    //         }
-    //         else if (strcmp("npsymbfact", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.npSymbFact;
-    //             ifs >> int_para[10];
-    //             //~ cout<<"int_para[10]: "<<key<<" = "<<int_para[10]<<endl;
-    //         }
-    //         else if (strcmp("symmetric", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.symmetric;
-    //             ifs >> int_para[11];
-    //             //~ cout<<"int_para[11]: "<<key<<" = "<<int_para[11]<<endl;
-    //         }
-    //         else if (strcmp("transpose", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.transpose;
-    //             ifs >> int_para[12];
-    //             //~ cout<<"int_para[12]: "<<key<<" = "<<int_para[12]<<endl;
-    //         }
-    //         else if (strcmp("method", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.method;
-    //             ifs >> int_para[13];
-    //             //~ cout<<"int_para[13]: "<<key<<" = "<<int_para[13]<<endl;
-    //         }
-    //         else if (strcmp("npoints", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.nPoints;
-    //             ifs >> int_para[14];
-    //             //~ cout<<"int_para[14]: "<<key<<" = "<<int_para[14]<<endl;
-    //         }
-    //         else if (strcmp("verbosity", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.verbosity;
-    //             ifs >> int_para[15];
-    //             //~ cout<<"int_para[15]: "<<key<<" = "<<int_para[15]<<endl;
-    //         }
-    //         else if (strcmp("numprocessperpole", lowercase_key) == 0)
-    //         {
-    //             //~ ifs>>options.verbosity;
-    //             ifs >> int_para[16];
-    //             //~ cout<<"int_para[16]: "<<key<<" = "<<int_para[16]<<endl;
-    //         }
-    //         else
-    //         {
-    //             if (key[0] == '#' || key[0] == '/')
-    //             {
-    //                 ifs.getline(unused_string, LINE_LINGTH);
-    //             }
-    //             else
-    //             {
-    //                 std::cout << " THE PARAMETER NAME '" << key << "' IS NOT USED!" << std::endl;
-    //                 return 1;
-    //             }
-    //         }
-    //     }
-    // }
-
-    // // broadcast all options
-    // MPI_Bcast(int_para, 17, MPI_INT, 0, comm);
-    // MPI_Bcast(double_para, 12, MPI_DOUBLE, 0, comm);
-
-    // setup PEXSI options from int_para and double_para
     options.numPole = int_para[0];
     options.isInertiaCount = int_para[1];
     options.maxPEXSIIter = int_para[2];
@@ -446,37 +220,15 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     if (comm_PEXSI != MPI_COMM_NULL)
     {
         MPI_Comm_rank(comm_PEXSI, &myid);
-// for log
-#ifdef _DEBUG
-        if (myid < 100)
-            log_openfile(myid, f_log);
-#endif
     }
 
-    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-    // DONE(ofs_running,"set up PEXSI parameter, begin");
     //  set up PEXSI parameter
     PPEXSIOptions options;
     PPEXSISetDefaultOptions(&options);
     int numProcessPerPole;
     double ZERO_Limit;
     loadPEXSIOption(comm_PEXSI, PexsiOptionFile, options, numProcessPerPole, ZERO_Limit);
-// OUT(ofs_running, "checkpoint01");
-//  debug
-#ifdef _DEBUG
-    if (comm_PEXSI != MPI_COMM_NULL)
-    {
-        if (myid < 100)
-            log_PEXSIOption(numElectronExact, f_log);
-    }
-#endif
-    // end debug
-    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-    // DONE(ofs_running,"set up PEXSI parameter, finish");
 
-    // set up PEXSI plan
-    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-    // OUT(ofs_running, "checkpoint02");
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "setup_PEXSI_plan");
     PPEXSIPlan plan;
     int info;
@@ -485,62 +237,27 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "splitNProc2NProwNPcol");
     splitNProc2NProwNPcol(numProcessPerPole, pexsi_prow, pexsi_pcol);
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "splitNProc2NProwNPcol");
-// OUT(ofs_running, "checkpoint03");
-#ifdef _DEBUG
-    // if(comm_PEXSI != MPI_COMM_NULL)
-    //{
-    if (myid < 100)
-        log_PEXSIgrid(pexsi_prow, pexsi_pcol, f_log);
-//}
-#endif
+
     outputFileIndex = -1;
-    // OUT(ofs_running, "checkpoint04");
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIPlanInit");
     if (comm_PEXSI != MPI_COMM_NULL)
     {
-        // OUT(ofs_running, "checkpoint05");
         plan = PPEXSIPlanInitialize(comm_PEXSI, pexsi_prow, pexsi_pcol, outputFileIndex, &info);
-#ifdef _DEBUG
-        // OUT(ofs_running, "checkpoint06");
-        if (myid < 100)
-            log_PEXSIinit(info, f_log);
-// OUT(ofs_running, "checkpoint07");
-#endif
     }
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIPlanInit");
-    // OUT(ofs_running, "checkpoint08");
-    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "setup_PEXSI_plan");
 
     // create compressed column storage distribution matrix parameter
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
     // DONE(ofs_running,"create compressed column storage distribution matrix parameter, begin");
     DistCCSMatrix DST_Matrix(comm_PEXSI, numProcessPerPole, size);
-    // OUT(ofs_running, "checkpoint09");
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
     // DONE(ofs_running,"create compressed column storage distribution matrix parameter, finish");
 
-#ifdef _DEBUG
-    if (comm_PEXSI != MPI_COMM_NULL)
-    {
-        if (myid < 100)
-            log_DSTMatrix(DST_Matrix, f_log);
-    }
-#endif
 
     // create block cyclic distribution matrix parameter
-    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-    // DONE(ofs_running,"create block cyclic distribution matrix parameter, begin");
-    // OUT(ofs_running, "checkpoint10");
     DistBCDMatrix SRC_Matrix(comm_2D, group_2D, blacs_ctxt, size, nblk, nrow, ncol, layout);
-// OUT(ofs_running, "checkpoint11");
-#ifdef _DEBUG
-    if (comm_PEXSI != MPI_COMM_NULL)
-    {
-        if (myid < 100)
-            log_SRCMatrix(SRC_Matrix, f_log);
-    }
-#endif
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
     // DONE(ofs_running,"create block cyclic distribution matrix parameter, finish");
     double* HnzvalLocal = nullptr;
@@ -550,23 +267,14 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     double* FDMnzvalLocal = nullptr;
     // transform H and S from 2D block cyclic distribution to compressed column sparse matrix
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-    // OUT(ofs_running, "checkpoint12");
     DistMatrixTransformer::transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal);
     // MPI_Barrier(MPI_COMM_WORLD);
     // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
-    // OUT(ofs_running, "checkpoint13");
     if (comm_PEXSI != MPI_COMM_NULL)
     {
-// debug
-#ifdef _DEBUG
-        if (myid < 100)
-            log_DSTparameter(DST_Matrix, HnzvalLocal, f_log);
-#endif
-        // end debug
 
         // Load H and S to PEXSI
         int isSIdentity = 0;
-        // OUT(ofs_running, "checkpoint14");
         PPEXSILoadRealHSMatrix(plan,
                                options,
                                size,
@@ -579,25 +287,13 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
                                isSIdentity,
                                SnzvalLocal,
                                &info);
-// OUT(ofs_running, "checkpoint15");
-#ifdef _DEBUG
-        if (myid < 100)
-            log_HSload(f_log);
-#endif
-        // call PEXSI to solve Kohn-Sham equation
-        // PPEXSIDFTDriver2(plan, &options,
-        // numElectronExact,
-        // &muPEXSI,
-        // &numElectronPEXSI,
-        // &numTotalInertiaIter,
-        // &info);
+
         double mu;
         double nelec;
         double muMinInertia;
         double muMaxInertia;
         int numTotalPEXSIIter;
         int numTotalInertiaIter; // Number of total inertia[out]
-        // OUT(ofs_running, "checkpoint16");
         // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
         ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT");
         PPEXSIDFTDriver(plan,                 // PEXSI plan[in]
@@ -612,14 +308,6 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
                         &info);               // 0: successful; otherwise: unsuccessful
         // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
         ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT");
-// OUT(ofs_running, "checkpoint17");
-
-// debug
-#ifdef _DEBUG
-        if (myid < 100)
-            log_PEXSIcalled(mu, nelec, muMinInertia, muMaxInertia, numTotalPEXSIIter, f_log);
-#endif
-        // end debug
 
         // retrieve the results from the plan
         if (DMnzvalLocal != nullptr)
@@ -641,19 +329,10 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
                                         &totalEnergyS,
                                         &totalFreeEnergy,
                                         &info);
-#ifdef _DEBUG
-            if (myid < 100)
-                log_DM(DST_Matrix, DMnzvalLocal, f_log);
-#endif
         }
         // clean PEXSI
         PPEXSIPlanFinalize(plan, &info);
-#ifdef _DEBUG
-        if (myid < 100)
-            log_PEXSIFinalized(f_log);
-#endif
     }
-    // OUT(ofs_running, "checkpoint18");
 
     // transform Density Matrix and Energy Density Matrix from compressed column sparse matrix
     // back to 2D block cyclic distribution if neccessary
@@ -664,61 +343,13 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
         DM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
         EDM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
     }
-#ifdef _DEBUG
-    // OUT(ofs_running, "checkpoint19");
-    if (myid < 100)
-        log_DMEDM_in_BCD_allocated(f_log);
-    MPI_Barrier(MPI_COMM_WORLD);
-#endif
     // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
     DistMatrixTransformer::transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM);
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
     // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
 
-#ifdef _DEBUG
-    MPI_Barrier(MPI_COMM_WORLD);
-    // OUT(ofs_running, "checkpoint20");
-    if (comm_PEXSI != MPI_COMM_NULL)
-    {
-        if (myid < 100)
-            log_DMtransformed(f_log);
-        if (myid < 100)
-            log_closefile(f_log);
-        // output result
-        // save local data of DMnzvalLocal
-        /*
-        ofstream f_DM;
-        sprintf(fname,"DM_%2.2d.dat", myid);
-        f_DM.open(fname, ios::out);
-        for(int i=0; i<SRC_Matrix.nrow; ++i)
-        {
-            for(int j=0; j<SRC_Matrix.ncol; ++j)
-            {
-                f_DM<<DM[i*SRC_Matrix.ncol+j]<<"\t";
-            }
-            f_DM<<"\n";
-        }
-        f_DM.close();
-
-        // save local data of EDMnzvalLocal
-        ofstream f_EDM;
-        sprintf(fname,"EDM_%2.2d.dat", myid);
-        f_EDM.open(fname, ios::out);
-        for(int i=0; i<SRC_Matrix.nrow; ++i)
-        {
-            for(int j=0; j<SRC_Matrix.ncol; ++j)
-            {
-                f_EDM<<EDM[i*SRC_Matrix.ncol+j]<<"\t";
-            }
-            f_EDM<<"\n";
-        }
-        f_EDM.close();
-        */
-    }
-#endif
     MPI_Barrier(MPI_COMM_WORLD);
-    // OUT(ofs_running, "checkpoint21");
     MPI_Barrier(MPI_COMM_WORLD);
     delete[] DMnzvalLocal;
     delete[] EDMnzvalLocal;
@@ -726,8 +357,6 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     delete[] HnzvalLocal;
     delete[] SnzvalLocal;
     MPI_Barrier(MPI_COMM_WORLD);
-    // OUT(ofs_running, "checkpoint22");
-    // MPI_Barrier(MPI_COMM_WORLD);
     return 0;
 }
 } // namespace pexsi
diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt
index c76e223e18..5dd16d10ee 100644
--- a/source/module_hsolver/test/CMakeLists.txt
+++ b/source/module_hsolver/test/CMakeLists.txt
@@ -86,14 +86,6 @@ if(ENABLE_LCAO)
     LIBS ${math_libs} ELPA::ELPA base genelpa psi device
     SOURCES diago_lcao_test.cpp ../diago_elpa.cpp ../diago_blas.cpp 
   )
-  # elseif(USE_PEXSI)
-  #   AddTest(
-  #     TARGET HSolver_LCAO
-  #     LIBS ${math_libs} ${PEXSI_LIBRARY} ${SuperLU_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} base psi device
-  #     SOURCES diago_lcao_test.cpp ../diago_pexsi.cpp ../diago_blas.cpp 
-  #   )
-  #   # print out the PEXSI library path
-  #   message(STATUS "PEXSI_LIBRARY: ${PEXSI_LIBRARY}")
   else()
     AddTest(
       TARGET HSolver_LCAO

From 26685cd09dbac5fafa7fb1a740dfb16598916c7c Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Sat, 3 Feb 2024 21:52:23 +0800
Subject: [PATCH 25/44] Improvement: take calculated mu as new initial guess,
 may slightly improve performance

---
 source/module_hsolver/module_pexsi/simple_pexsi.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
index 7169e0ab85..3ef8e6eeeb 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -309,6 +309,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
         // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
         ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT");
 
+        hsolver::DiagoPexsi<double>::pexsi_mu = mu;
         // retrieve the results from the plan
         if (DMnzvalLocal != nullptr)
             delete[] DMnzvalLocal;

From 2cf6773dfe075a4d558c65239a17085093587f80 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Sun, 4 Feb 2024 01:53:02 +0800
Subject: [PATCH 26/44] Fix mistakes in the last commit

---
 source/module_hsolver/diago_pexsi.cpp               |  6 ++++--
 source/module_hsolver/diago_pexsi.h                 |  7 +++++++
 source/module_hsolver/module_pexsi/pexsi_solver.cpp | 11 +++++++++--
 source/module_hsolver/module_pexsi/pexsi_solver.h   |  4 +++-
 source/module_hsolver/module_pexsi/simple_pexsi.cpp |  7 ++++---
 source/module_hsolver/module_pexsi/simple_pexsi.h   |  4 +++-
 6 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index 1cfc765c2a..b0bac5e220 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -125,6 +125,7 @@ void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>&
     phm_in->matrix(h_mat, s_mat);
     std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
     MPI_Comm COMM_DIAG = MPI_COMM_WORLD;
+    int ik = psi.get_current_k();
     this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt,
                                        this->ParaV->nb,
                                        this->ParaV->nrow,
@@ -134,12 +135,13 @@ void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>&
                                        this->totalEnergyH,
                                        this->totalEnergyS,
                                        this->totalFreeEnergy);
-    this->ps->solve();
+    this->ps->solve(mu_buffer[ik]);
     this->EDM.push_back(this->ps->get_EDM());
-    this->DM.push_back(this->ps->get_DM()); // loc.dm_gamma[ik] loc.dm_gamma[0]?
+    this->DM.push_back(this->ps->get_DM());
     this->totalFreeEnergy = this->ps->get_totalFreeEnergy();
     this->totalEnergyH = this->ps->get_totalEnergyH();
     this->totalEnergyS = this->ps->get_totalEnergyS();
+    this->mu_buffer[ik] = this->ps->get_mu();
 }
 
 template <>
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index 8e2aa98da1..c749096410 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -3,6 +3,7 @@
 
 #include <vector>
 #include "diagh.h"
+#include "module_base/global_variable.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
 #include "module_pexsi/pexsi_solver.h"
 
@@ -14,10 +15,16 @@ class DiagoPexsi : public DiagH<T>
 {
   private:
     using Real = typename GetTypeReal<T>::type;
+    std::vector<double> mu_buffer;
 
   public:
     DiagoPexsi(const Parallel_Orbitals* ParaV_in)
     {
+        mu_buffer.resize(GlobalV::NSPIN);
+        for (int i = 0; i < GlobalV::NSPIN; i++)
+        {
+            mu_buffer[i] = this->pexsi_mu;
+        }
         this->ParaV = ParaV_in;
     }
     void diag(hamilt::Hamilt<T>* phm_in, psi::Psi<T>& psi, Real* eigenvalue_in) override;
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
index b5f15b40d4..ed1cfa0061 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -38,7 +38,7 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text,
     this->totalFreeEnergy = 0.0;
 }
 
-int PEXSI_Solver::solve()
+int PEXSI_Solver::solve(double mu0)
 {
     MPI_Group grid_group;
     int myid, grid_np;
@@ -67,7 +67,9 @@ int PEXSI_Solver::solve()
                 this->EDM,
                 this->totalEnergyH,
                 this->totalEnergyS,
-                this->totalFreeEnergy);
+                this->totalFreeEnergy,
+                mu,
+                mu0);
     return 0;
 }
 
@@ -96,5 +98,10 @@ const double PEXSI_Solver::get_totalEnergyS() const
     return totalEnergyS;
 }
 
+const double PEXSI_Solver::get_mu() const
+{
+    return mu;
+}
+
 } // namespace pexsi
 #endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h
index 6289ccfd55..1bf7060dde 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.h
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.h
@@ -17,12 +17,13 @@ class PEXSI_Solver
                  double& totalEnergyH,
                  double& totalEnergyS,
                  double& totalFreeEnergy);
-    int solve();
+    int solve(double mu0);
     double* get_DM() const;
     double* get_EDM() const;
     const double get_totalFreeEnergy() const;
     const double get_totalEnergyH() const;
     const double get_totalEnergyS() const;
+    const double get_mu() const;
 
   private:
     int blacs_text;
@@ -36,6 +37,7 @@ class PEXSI_Solver
     double totalEnergyH;
     double totalEnergyS;
     double totalFreeEnergy;
+    double mu;
 };
 } // namespace pexsi
 #endif // PEXSI_Solver_H
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
index 3ef8e6eeeb..a5f8e6f804 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -210,7 +210,9 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
                 double*& EDM, // output matrices
                 double& totalEnergyH,
                 double& totalEnergyS,
-                double& totalFreeEnergy) // output energy
+                double& totalFreeEnergy, // output energy
+                double& mu,
+                double mu0)
 {
 
     if (comm_2D == MPI_COMM_NULL && comm_PEXSI == MPI_COMM_NULL)
@@ -228,6 +230,7 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     int numProcessPerPole;
     double ZERO_Limit;
     loadPEXSIOption(comm_PEXSI, PexsiOptionFile, options, numProcessPerPole, ZERO_Limit);
+    options.mu0 = mu0;
 
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "setup_PEXSI_plan");
     PPEXSIPlan plan;
@@ -288,7 +291,6 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
                                SnzvalLocal,
                                &info);
 
-        double mu;
         double nelec;
         double muMinInertia;
         double muMaxInertia;
@@ -309,7 +311,6 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
         // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
         ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT");
 
-        hsolver::DiagoPexsi<double>::pexsi_mu = mu;
         // retrieve the results from the plan
         if (DMnzvalLocal != nullptr)
             delete[] DMnzvalLocal;
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.h b/source/module_hsolver/module_pexsi/simple_pexsi.h
index fded81fc59..db8879e5ac 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.h
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.h
@@ -22,6 +22,8 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
                 double*& EDM, // output matrices
                 double& totalEnergyH,
                 double& totalEnergyS,
-                double& totalFreeEnergy);
+                double& totalFreeEnergy,
+                double& mu,
+                double mu0);
 }
 #endif // SIMPLE_PEXSI_H
\ No newline at end of file

From 7298c4160223e29a6bfd79f7bbd46f1f4f754021 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Tue, 6 Feb 2024 15:19:37 +0800
Subject: [PATCH 27/44] Fix: params and features - set default pexsi_temp - fix
 md in pexsi

---
 source/module_elecstate/elecstate_lcao.cpp    |  9 ++++++---
 source/module_elecstate/elecstate_lcao.h      |  4 +++-
 .../hamilt_lcaodft/FORCE_gamma_edm.cpp        | 19 +++++++++++++++++--
 source/module_hsolver/hsolver_lcao.cpp        |  2 +-
 source/module_io/input.cpp                    |  2 +-
 source/module_io/input.h                      |  2 +-
 source/module_io/test/input_conv_test.cpp     |  4 ++--
 source/module_io/test/input_test_para.cpp     |  2 +-
 source/module_io/test/write_input_test.cpp    |  2 +-
 9 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index a474a3c172..3538b28b82 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -1,4 +1,5 @@
 #include "elecstate_lcao.h"
+#include <vector>
 
 #include "cal_dm.h"
 #include "module_base/timer.h"
@@ -166,7 +167,7 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)
     this->calEBand();
 
     if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack"
-        || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao" || GlobalV::KS_SOLVER == "pexsi")
+        || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao")
     {
         ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d");
 
@@ -252,7 +253,7 @@ double ElecStateLCAO<std::complex<double>>::get_spin_constrain_energy()
 
 #ifdef __PEXSI
 template<>
-void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM)
+void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM, std::vector<double*> pexsi_EDM)
 {
     ModuleBase::timer::tick("ElecStateLCAO", "dmToRho");
 
@@ -267,9 +268,11 @@ void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM)
     }
 
     auto DM = this->get_DM();
+    this->pexsi_EDM.clear();
     for (int is = 0; is < GlobalV::NSPIN; is++)
     {
         this->DM->set_DMK_pointer(is, pexsi_DM[is]);
+        this->pexsi_EDM.push_back(pexsi_EDM[is]);
     }
     DM->cal_DMR();
     
@@ -299,7 +302,7 @@ void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM)
 }
 
 template<>
-void ElecStateLCAO<std::complex<double>>::dmToRho(std::vector<std::complex<double>*> DM)
+void ElecStateLCAO<std::complex<double>>::dmToRho(std::vector<std::complex<double>*> pexsi_DM, std::vector<std::complex<double>*> pexsi_EDM)
 {
     ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case");
 }
diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h
index c47bd0c8cb..4a7df3d76e 100644
--- a/source/module_elecstate/elecstate_lcao.h
+++ b/source/module_elecstate/elecstate_lcao.h
@@ -1,6 +1,7 @@
 #ifndef ELECSTATELCAO_H
 #define ELECSTATELCAO_H
 
+#include <vector>
 #include "elecstate.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h"
@@ -61,7 +62,8 @@ class ElecStateLCAO : public ElecState
 
 #ifdef __PEXSI
     // use for pexsi
-    void dmToRho(std::vector<TK*> pexsi_DM);
+    void dmToRho(std::vector<TK*> pexsi_DM, std::vector<TK*> pexsi_EDM);
+    std::vector<TK*> pexsi_EDM;
 #endif
 
   protected:
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp
index a69821c50c..1ce37e592e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp
@@ -1,4 +1,5 @@
 #include "FORCE_gamma.h"
+#include "module_elecstate/elecstate_lcao.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_base/parallel_reduce.h"
 #include "module_base/timer.h"
@@ -35,8 +36,22 @@ void Force_LCAO_gamma::cal_foverlap(
     // construct a DensityMatrix for Gamma-Only
     const Parallel_Orbitals* pv = this->ParaV;
     elecstate::DensityMatrix<double, double> EDM(pv,GlobalV::NSPIN);
-
-    elecstate::cal_dm_psi(EDM.get_paraV_pointer(), wgEkb, psid[0], EDM);
+    
+#ifdef __PEXSI
+    if (GlobalV::KS_SOLVER == "pexsi")
+    {
+        auto pes = dynamic_cast<const elecstate::ElecStateLCAO<double>*>(pelec);
+        for (int ik = 0; ik < GlobalV::NSPIN; ik++)
+        {
+            EDM.set_DMK_pointer(ik, pes->pexsi_EDM[ik]);
+        }
+        
+    }
+    else
+#endif
+    {
+        elecstate::cal_dm_psi(EDM.get_paraV_pointer(), wgEkb, psid[0], EDM);
+    }
 
     ModuleBase::timer::tick("Force_LCAO_gamma","cal_edm_2d");
 
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index 66725f764f..9f9460eb7c 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -232,7 +232,7 @@ void HSolverLCAO<T, Device>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
         if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!");
         elecstate::ElecStateLCAO<T>* _pes = dynamic_cast<elecstate::ElecStateLCAO<T>*>(pes);
         pes->f_en.eband = tem->totalFreeEnergy;
-        _pes->dmToRho(tem->DM);
+        _pes->dmToRho(tem->DM, tem->EDM);
     }
     else
 #endif
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index fb5bc43551..df515d6d10 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -657,7 +657,7 @@ void Input::Default(void)
     pexsi_method = 1;
     pexsi_nproc_pole = 1;
     // pexsi_spin = 2;
-    pexsi_temp = 0.0001;
+    pexsi_temp = 0.015;
     pexsi_gap = 0;
     pexsi_delta_e = 20.0;
     pexsi_mu_lower = -10;
diff --git a/source/module_io/input.h b/source/module_io/input.h
index f166612a4d..e527987b8e 100644
--- a/source/module_io/input.h
+++ b/source/module_io/input.h
@@ -620,7 +620,7 @@ class Input
     int pexsi_method = 1;
     int pexsi_nproc_pole = 1;
     // double pexsi_spin = 2;
-    double pexsi_temp = 0.0001;
+    double pexsi_temp = 0.015;
     double pexsi_gap = 0;
     double pexsi_delta_e = 20.0;
     double pexsi_mu_lower = -10;
diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp
index 60ec6c82dd..3455e9ec40 100644
--- a/source/module_io/test/input_conv_test.cpp
+++ b/source/module_io/test/input_conv_test.cpp
@@ -560,7 +560,7 @@ TEST_F(InputConvTest, PEXSI)
 	EXPECT_FALSE(hsolver::DiagoPexsi<double>::pexsi_trans);
 	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_method, 1);
 	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_nproc_pole, 1);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_temp, 1e-4);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_temp, 0.015);
 	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_gap, 0);
 	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_delta_e, 20);
 	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu_lower, -10);
@@ -577,7 +577,7 @@ TEST_F(InputConvTest, PEXSI)
 	EXPECT_FALSE(hsolver::DiagoPexsi<std::complex<double>>::pexsi_trans);
 	EXPECT_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_method, 1);
 	EXPECT_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_nproc_pole, 1);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_temp, 0.0001);
+	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_temp, 0.015);
 	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_gap, 0);
 	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_delta_e, 20);
 	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_lower, -10);
diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp
index ad40d635ea..1b936f73d5 100644
--- a/source/module_io/test/input_test_para.cpp
+++ b/source/module_io/test/input_test_para.cpp
@@ -405,7 +405,7 @@ TEST_F(InputParaTest, Bcast)
     EXPECT_FALSE(INPUT.pexsi_trans);
     EXPECT_EQ(INPUT.pexsi_method, 1);
     EXPECT_EQ(INPUT.pexsi_nproc_pole, 1);
-    EXPECT_DOUBLE_EQ(INPUT.pexsi_temp, 0.0001);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_temp, 0.015);
     EXPECT_DOUBLE_EQ(INPUT.pexsi_gap, 0);
     EXPECT_DOUBLE_EQ(INPUT.pexsi_delta_e, 20);
     EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_lower, -10);
diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp
index e106fc399f..6c8a2b32f9 100644
--- a/source/module_io/test/write_input_test.cpp
+++ b/source/module_io/test/write_input_test.cpp
@@ -946,7 +946,7 @@ TEST_F (write_input, PEXSI24)
     EXPECT_THAT(output, testing::HasSubstr("pexsi_trans                    0 #Whether to transpose"));
     EXPECT_THAT(output, testing::HasSubstr("pexsi_method                   1 #pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method"));
     EXPECT_THAT(output, testing::HasSubstr("pexsi_nproc_pole               1 #Number of processes used by each pole"));
-    EXPECT_THAT(output, testing::HasSubstr("pexsi_temp                     0.0001 #Temperature, in the same unit as H"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_temp                     0.015 #Temperature, in the same unit as H"));
     EXPECT_THAT(output, testing::HasSubstr("pexsi_gap                      0 #Spectral gap"));
     EXPECT_THAT(output, testing::HasSubstr("pexsi_delta_e                  20 #An upper bound for the spectral radius of \\f$S^{-1} H\\f$"));
     EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_lower                 -10 #Initial guess of lower bound for mu"));

From ce18c08714444b369288cb0a84f3d2326bfc6d9f Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Tue, 6 Feb 2024 15:45:52 +0800
Subject: [PATCH 28/44] fix empty lines

---
 source/module_elecstate/elecstate_lcao.cpp | 3 +--
 source/module_esolver/esolver_ks.cpp       | 3 +++
 source/module_relax/relax_driver.cpp       | 4 +++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index 3538b28b82..3d629f7abe 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -150,6 +150,7 @@ void ElecStateLCAO<std::complex<double>>::psiToRho(const psi::Psi<std::complex<d
         Gint_inout inout1(this->loc->DM_R, this->charge->kin_r, Gint_Tools::job_type::tau);
         this->uhm->GK.cal_gint(&inout1);
     }
+
     this->charge->renormalize_rho();
 
     ModuleBase::timer::tick("ElecStateLCAO", "psiToRho");
@@ -175,7 +176,6 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)
         //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma);
         elecstate::cal_dm_psi(this->DM->get_paraV_pointer(), this->wg, psi, *(this->DM));
         this->DM->cal_DMR();
-
         if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready
         {
             this->loc->dm_gamma.resize(GlobalV::NSPIN);
@@ -185,7 +185,6 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)
             }
         }
         ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d");
-
         for (int ik = 0; ik < psi.get_nk(); ++ik)
         {
             // for gamma_only case, no convertion occured, just for print.
diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp
index ead5195085..fab63ce864 100644
--- a/source/module_esolver/esolver_ks.cpp
+++ b/source/module_esolver/esolver_ks.cpp
@@ -354,6 +354,7 @@ namespace ModuleESolver
         else
         {
             ModuleBase::timer::tick(this->classname, "Run");
+
             this->beforescf(istep); //Something else to do before the iter loop
             ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT SCF");
             if(this->maxniter > 0)  this->printhead(); //print the headline on the screen.
@@ -471,8 +472,10 @@ namespace ModuleESolver
                 }
             }
             afterscf(istep);
+
             ModuleBase::timer::tick(this->classname, "Run");
         }       
+
         return;
     };
 
diff --git a/source/module_relax/relax_driver.cpp b/source/module_relax/relax_driver.cpp
index 4da9d7a283..78c4710e1a 100644
--- a/source/module_relax/relax_driver.cpp
+++ b/source/module_relax/relax_driver.cpp
@@ -31,6 +31,7 @@ void Relax_Driver<FPTYPE, Device>::relax_driver(ModuleESolver::ESolver *p_esolve
     while (istep <= GlobalV::RELAX_NMAX && !stop)
     {
         time_t estart = time(NULL);
+
         if (GlobalV::OUT_LEVEL == "ie"
             && (GlobalV::CALCULATION == "relax" || GlobalV::CALCULATION == "cell-relax" || GlobalV::CALCULATION == "scf"
                 || GlobalV::CALCULATION == "nscf"))
@@ -39,7 +40,6 @@ void Relax_Driver<FPTYPE, Device>::relax_driver(ModuleESolver::ESolver *p_esolve
         }
 
         // mohan added eiter to count for the electron iteration number, 2021-01-28
-        
         p_esolver->Run(istep - 1, GlobalC::ucell);
 
         time_t eend = time(NULL);
@@ -117,8 +117,10 @@ void Relax_Driver<FPTYPE, Device>::relax_driver(ModuleESolver::ESolver *p_esolve
             }
         }
         time_t fend = time(NULL);
+
         ++istep;
     }
+
     if (GlobalV::OUT_LEVEL == "i")
     {
         std::cout << " ION DYNAMICS FINISHED :)" << std::endl;

From c4d86a44cc77466192ad72869d091dbf247998e1 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Wed, 7 Feb 2024 18:08:30 +0800
Subject: [PATCH 29/44] Fix: move params to pexsi_solver, rename USE_PEXSI to
 ENABLE_PEXSI

---
 CMakeLists.txt                                |   4 +-
 cmake/FindPEXSI.cmake                         |   2 +-
 source/module_hsolver/CMakeLists.txt          |   2 +-
 source/module_hsolver/diago_pexsi.cpp         | 102 -----------------
 source/module_hsolver/diago_pexsi.h           | 104 +-----------------
 .../module_pexsi/pexsi_solver.cpp             |  27 +++++
 .../module_pexsi/pexsi_solver.h               | 103 +++++++++++++++++
 .../module_pexsi/simple_pexsi.cpp             |  50 ++++-----
 source/module_io/input_conv.cpp               |  76 +++++--------
 source/module_io/test/input_conv_test.cpp     |  63 ++++-------
 10 files changed, 208 insertions(+), 325 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3c6080e51b..e2ae6dc04f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,7 +36,7 @@ option(COMMIT_INFO "Print commit information in log" ON)
 option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON)
 option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF)
 option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF)
-option(USE_PEXSI "Enable support for PEXSI." OFF)
+option(ENABLE_PEXSI "Enable support for PEXSI." OFF)
 
 
 
@@ -210,7 +210,7 @@ if(ENABLE_LCAO)
       add_compile_definitions(USE_NEW_TWO_CENTER)
   endif()
   
-  if(USE_PEXSI)
+  if(ENABLE_PEXSI)
     find_package(PEXSI REQUIRED)
     target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi)
     include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR})
diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake
index b1565d2c06..5adc4c8a6d 100644
--- a/cmake/FindPEXSI.cmake
+++ b/cmake/FindPEXSI.cmake
@@ -41,7 +41,7 @@ find_library(ParMETIS_LIBRARY
 )
 
 find_library(SuperLU_DIST_LIBRARY
-    NAMES libsuperlu_dist.a
+    NAMES superlu_dist
     HINTS ${SuperLU_DIST_DIR}
     PATH_SUFFIXES "lib"
 )
diff --git a/source/module_hsolver/CMakeLists.txt b/source/module_hsolver/CMakeLists.txt
index 9a023fb5d0..cea200887a 100644
--- a/source/module_hsolver/CMakeLists.txt
+++ b/source/module_hsolver/CMakeLists.txt
@@ -38,7 +38,7 @@ if(ENABLE_LCAO)
     endif()
   endif()
 
-  if(USE_PEXSI)
+  if(ENABLE_PEXSI)
   list(APPEND objects
       diago_pexsi.cpp
     )
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index b0bac5e220..95c4a7433e 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -15,108 +15,6 @@ typedef hamilt::MatrixBlock<std::complex<double>> matcd;
 
 namespace hsolver
 {
-template <>
-int DiagoPexsi<double>::pexsi_npole = 0;
-template <>
-bool DiagoPexsi<double>::pexsi_inertia = 0;
-template <>
-int DiagoPexsi<double>::pexsi_nmax = 0;
-// template <>
-// int DiagoPexsi<double>::pexsi_symbolic = 0;
-template <>
-bool DiagoPexsi<double>::pexsi_comm = 0;
-template <>
-bool DiagoPexsi<double>::pexsi_storage = 0;
-template <>
-int DiagoPexsi<double>::pexsi_ordering = 0;
-template <>
-int DiagoPexsi<double>::pexsi_row_ordering = 0;
-template <>
-int DiagoPexsi<double>::pexsi_nproc = 0;
-template <>
-bool DiagoPexsi<double>::pexsi_symm = 0;
-template <>
-bool DiagoPexsi<double>::pexsi_trans = 0;
-template <>
-int DiagoPexsi<double>::pexsi_method = 0;
-template <>
-int DiagoPexsi<double>::pexsi_nproc_pole = 0;
-// template <>
-// double DiagoPexsi<double>::pexsi_spin = 2;
-template <>
-double DiagoPexsi<double>::pexsi_temp = 0.0;
-template <>
-double DiagoPexsi<double>::pexsi_gap = 0.0;
-template <>
-double DiagoPexsi<double>::pexsi_delta_e = 0.0;
-template <>
-double DiagoPexsi<double>::pexsi_mu_lower = 0.0;
-template <>
-double DiagoPexsi<double>::pexsi_mu_upper = 0.0;
-template <>
-double DiagoPexsi<double>::pexsi_mu = 0.0;
-template <>
-double DiagoPexsi<double>::pexsi_mu_thr = 0.0;
-template <>
-double DiagoPexsi<double>::pexsi_mu_expand = 0.0;
-template <>
-double DiagoPexsi<double>::pexsi_mu_guard = 0.0;
-template <>
-double DiagoPexsi<double>::pexsi_elec_thr = 0.0;
-template <>
-double DiagoPexsi<double>::pexsi_zero_thr = 0.0;
-
-template <>
-int DiagoPexsi<std::complex<double>>::pexsi_npole = 0;
-template <>
-bool DiagoPexsi<std::complex<double>>::pexsi_inertia = 0;
-template <>
-int DiagoPexsi<std::complex<double>>::pexsi_nmax = 0;
-// template <>
-// int DiagoPexsi<std::complex<double>>::pexsi_symbolic = 0;
-template <>
-bool DiagoPexsi<std::complex<double>>::pexsi_comm = 0;
-template <>
-bool DiagoPexsi<std::complex<double>>::pexsi_storage = 0;
-template <>
-int DiagoPexsi<std::complex<double>>::pexsi_ordering = 0;
-template <>
-int DiagoPexsi<std::complex<double>>::pexsi_row_ordering = 0;
-template <>
-int DiagoPexsi<std::complex<double>>::pexsi_nproc = 0;
-template <>
-bool DiagoPexsi<std::complex<double>>::pexsi_symm = 0;
-template <>
-bool DiagoPexsi<std::complex<double>>::pexsi_trans = 0;
-template <>
-int DiagoPexsi<std::complex<double>>::pexsi_method = 0;
-template <>
-int DiagoPexsi<std::complex<double>>::pexsi_nproc_pole = 0;
-// template <>
-// double DiagoPexsi<std::complex<double>>::pexsi_spin = 2;
-template <>
-double DiagoPexsi<std::complex<double>>::pexsi_temp = 0.0;
-template <>
-double DiagoPexsi<std::complex<double>>::pexsi_gap = 0.0;
-template <>
-double DiagoPexsi<std::complex<double>>::pexsi_delta_e = 0.0;
-template <>
-double DiagoPexsi<std::complex<double>>::pexsi_mu_lower = 0.0;
-template <>
-double DiagoPexsi<std::complex<double>>::pexsi_mu_upper = 0.0;
-template <>
-double DiagoPexsi<std::complex<double>>::pexsi_mu = 0.0;
-template <>
-double DiagoPexsi<std::complex<double>>::pexsi_mu_thr = 0.0;
-template <>
-double DiagoPexsi<std::complex<double>>::pexsi_mu_expand = 0.0;
-template <>
-double DiagoPexsi<std::complex<double>>::pexsi_mu_guard = 0.0;
-template <>
-double DiagoPexsi<std::complex<double>>::pexsi_elec_thr = 0.0;
-template <>
-double DiagoPexsi<std::complex<double>>::pexsi_zero_thr = 0.0;
-
 template <>
 void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in)
 {
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index c749096410..af3a175ff1 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -23,7 +23,7 @@ class DiagoPexsi : public DiagH<T>
         mu_buffer.resize(GlobalV::NSPIN);
         for (int i = 0; i < GlobalV::NSPIN; i++)
         {
-            mu_buffer[i] = this->pexsi_mu;
+            mu_buffer[i] = this->ps->pexsi_mu;
         }
         this->ParaV = ParaV_in;
     }
@@ -36,108 +36,6 @@ class DiagoPexsi : public DiagH<T>
     double totalFreeEnergy;
     pexsi::PEXSI_Solver* ps;
 
-    //==========================================================
-    // PEXSI related variables
-    //==========================================================
-    /** 
-     * @brief  Number of terms in the pole expansion.
-     */ 
-    static int pexsi_npole;
-    /** 
-     * @brief  Whether inertia counting is used at the very beginning.
-     */ 
-    static bool pexsi_inertia;
-    /** 
-     * @brief  Maximum number of PEXSI iterations after each inertia counting procedure.
-     */ 
-    static int pexsi_nmax;
-    /** 
-     * @brief  Whether to construct PSelInv communication pattern.
-     */ 
-    static bool pexsi_comm;
-    /** 
-     * @brief  Whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices.  
-     */ 
-    static bool pexsi_storage;
-    /** 
-     * @brief  Ordering strategy for factorization and selected inversion. 
-     */ 
-    static int pexsi_ordering;
-    /** 
-     * @brief  row permutation strategy for factorization and selected inversion.  
-     */ 
-    static int pexsi_row_ordering;
-    /** 
-     * @brief  Number of processors for PARMETIS/PT-SCOTCH.  Only used if the ordering == 0.
-     */ 
-    static int pexsi_nproc;
-    /** 
-     * @brief  Matrix structure.
-     * - = 0   : Unsymmetric matrix
-     * - = 1   : Symmetric matrix (default).
-     */ 
-    static bool pexsi_symm;
-    /** 
-     * @brief  Transpose.
-     * - = 0   : Factor non transposed matrix (default).
-     * - = 1   : Factor transposed matrix.
-     */ 
-    static bool pexsi_trans;
-    /** 
-     * @brief  The pole expansion method to be used.
-     * - = 1   : Cauchy Contour Integral method used.
-     * - = 2   : Moussa optimized method.
-     */ 
-    static int pexsi_method;
-    /** 
-     * @brief  The point parallelizaion of PEXSI.
-     * - = 2  : Recommend two points parallelization
-     */ 
-    static int pexsi_nproc_pole;
-    /** 
-     * @brief  Temperature, in the same unit as H 
-     */ 
-    static double pexsi_temp;
-    /** 
-     * @brief  Spectral gap. **Note** This can be set to be 0 in most cases.
-     */ 
-    static double pexsi_gap;
-    /** 
-     * @brief  An upper bound for the spectral radius of \f$S^{-1} H\f$.
-     */ 
-    static double pexsi_delta_e;
-    /** 
-     * @brief  Initial guess of lower bound for mu.
-     */ 
-    static double pexsi_mu_lower;
-    /** 
-     * @brief  Initial guess of upper bound for mu.
-     */ 
-    static double pexsi_mu_upper;
-    /** 
-     * @brief  Initial guess for mu (for the solver) (AG)
-     */ 
-    static double pexsi_mu;
-    /** 
-     * @brief  Stopping criterion in terms of the chemical potential for the inertia counting procedure.
-     */ 
-    static double pexsi_mu_thr;
-    /** 
-     * @brief  If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion.
-     */ 
-    static double pexsi_mu_expand;
-    /** 
-     * @brief  Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure.
-     */ 
-    static double pexsi_mu_guard;
-    /** 
-     * @brief  Stopping criterion of the %PEXSI iteration in terms of the number of electrons compared to numElectronExact.
-     */ 
-    static double pexsi_elec_thr;
-    /** 
-     * @brief  Stopping criterion for the zero threshold.
-     */ 
-    static double pexsi_zero_thr;
 };
 } // namespace hsolver
 
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
index ed1cfa0061..ebb2c6d78b 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -13,6 +13,33 @@ extern MPI_Comm DIAG_WORLD;
 extern MPI_Comm GRID_WORLD;
 namespace pexsi
 {
+
+int PEXSI_Solver::pexsi_npole = 0;
+bool PEXSI_Solver::pexsi_inertia = 0;
+int PEXSI_Solver::pexsi_nmax = 0;
+// int PEXSI_Solver::pexsi_symbolic = 0;
+bool PEXSI_Solver::pexsi_comm = 0;
+bool PEXSI_Solver::pexsi_storage = 0;
+int PEXSI_Solver::pexsi_ordering = 0;
+int PEXSI_Solver::pexsi_row_ordering = 0;
+int PEXSI_Solver::pexsi_nproc = 0;
+bool PEXSI_Solver::pexsi_symm = 0;
+bool PEXSI_Solver::pexsi_trans = 0;
+int PEXSI_Solver::pexsi_method = 0;
+int PEXSI_Solver::pexsi_nproc_pole = 0;
+// double PEXSI_Solver::pexsi_spin = 2;
+double PEXSI_Solver::pexsi_temp = 0.0;
+double PEXSI_Solver::pexsi_gap = 0.0;
+double PEXSI_Solver::pexsi_delta_e = 0.0;
+double PEXSI_Solver::pexsi_mu_lower = 0.0;
+double PEXSI_Solver::pexsi_mu_upper = 0.0;
+double PEXSI_Solver::pexsi_mu = 0.0;
+double PEXSI_Solver::pexsi_mu_thr = 0.0;
+double PEXSI_Solver::pexsi_mu_expand = 0.0;
+double PEXSI_Solver::pexsi_mu_guard = 0.0;
+double PEXSI_Solver::pexsi_elec_thr = 0.0;
+double PEXSI_Solver::pexsi_zero_thr = 0.0;
+
 PEXSI_Solver::PEXSI_Solver(const int blacs_text,
                            const int nb,
                            const int nrow,
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h
index 1bf7060dde..880efaf504 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.h
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.h
@@ -25,6 +25,109 @@ class PEXSI_Solver
     const double get_totalEnergyS() const;
     const double get_mu() const;
 
+    //==========================================================
+    // PEXSI related variables
+    //==========================================================
+    /** 
+     * @brief  Number of terms in the pole expansion.
+     */ 
+    static int pexsi_npole;
+    /** 
+     * @brief  Whether inertia counting is used at the very beginning.
+     */ 
+    static bool pexsi_inertia;
+    /** 
+     * @brief  Maximum number of PEXSI iterations after each inertia counting procedure.
+     */ 
+    static int pexsi_nmax;
+    /** 
+     * @brief  Whether to construct PSelInv communication pattern.
+     */ 
+    static bool pexsi_comm;
+    /** 
+     * @brief  Whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices.  
+     */ 
+    static bool pexsi_storage;
+    /** 
+     * @brief  Ordering strategy for factorization and selected inversion. 
+     */ 
+    static int pexsi_ordering;
+    /** 
+     * @brief  row permutation strategy for factorization and selected inversion.  
+     */ 
+    static int pexsi_row_ordering;
+    /** 
+     * @brief  Number of processors for PARMETIS/PT-SCOTCH.  Only used if the ordering == 0.
+     */ 
+    static int pexsi_nproc;
+    /** 
+     * @brief  Matrix structure.
+     * - = 0   : Unsymmetric matrix
+     * - = 1   : Symmetric matrix (default).
+     */ 
+    static bool pexsi_symm;
+    /** 
+     * @brief  Transpose.
+     * - = 0   : Factor non transposed matrix (default).
+     * - = 1   : Factor transposed matrix.
+     */ 
+    static bool pexsi_trans;
+    /** 
+     * @brief  The pole expansion method to be used.
+     * - = 1   : Cauchy Contour Integral method used.
+     * - = 2   : Moussa optimized method.
+     */ 
+    static int pexsi_method;
+    /** 
+     * @brief  The point parallelizaion of PEXSI.
+     * - = 2  : Recommend two points parallelization
+     */ 
+    static int pexsi_nproc_pole;
+    /** 
+     * @brief  Temperature, in the same unit as H 
+     */ 
+    static double pexsi_temp;
+    /** 
+     * @brief  Spectral gap. **Note** This can be set to be 0 in most cases.
+     */ 
+    static double pexsi_gap;
+    /** 
+     * @brief  An upper bound for the spectral radius of \f$S^{-1} H\f$.
+     */ 
+    static double pexsi_delta_e;
+    /** 
+     * @brief  Initial guess of lower bound for mu.
+     */ 
+    static double pexsi_mu_lower;
+    /** 
+     * @brief  Initial guess of upper bound for mu.
+     */ 
+    static double pexsi_mu_upper;
+    /** 
+     * @brief  Initial guess for mu (for the solver) (AG)
+     */ 
+    static double pexsi_mu;
+    /** 
+     * @brief  Stopping criterion in terms of the chemical potential for the inertia counting procedure.
+     */ 
+    static double pexsi_mu_thr;
+    /** 
+     * @brief  If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion.
+     */ 
+    static double pexsi_mu_expand;
+    /** 
+     * @brief  Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure.
+     */ 
+    static double pexsi_mu_guard;
+    /** 
+     * @brief  Stopping criterion of the %PEXSI iteration in terms of the number of electrons compared to numElectronExact.
+     */ 
+    static double pexsi_elec_thr;
+    /** 
+     * @brief  Stopping criterion for the zero threshold.
+     */ 
+    static double pexsi_zero_thr;
+
   private:
     int blacs_text;
     int nb;
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
index a5f8e6f804..c52a6c8ef3 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -96,36 +96,36 @@ int loadPEXSIOption(MPI_Comm comm,
     double double_para[12];
 
     // read in PEXSI options from GlobalV
-    int_para[0] = hsolver::DiagoPexsi<double>::pexsi_npole;
-    int_para[1] = hsolver::DiagoPexsi<double>::pexsi_inertia;
-    int_para[2] = hsolver::DiagoPexsi<double>::pexsi_nmax;
+    int_para[0] = pexsi::PEXSI_Solver::pexsi_npole;
+    int_para[1] = pexsi::PEXSI_Solver::pexsi_inertia;
+    int_para[2] = pexsi::PEXSI_Solver::pexsi_nmax;
     int_para[3] = 0;
-    int_para[4] = 1; // hsolver::DiagoPexsi<double>::pexsi_symbolic;
-    int_para[5] = hsolver::DiagoPexsi<double>::pexsi_comm;
+    int_para[4] = 1; // pexsi::PEXSI_Solver::pexsi_symbolic;
+    int_para[5] = pexsi::PEXSI_Solver::pexsi_comm;
     int_para[6] = 0;
-    int_para[7] = hsolver::DiagoPexsi<double>::pexsi_storage;
-    int_para[8] = hsolver::DiagoPexsi<double>::pexsi_ordering;
-    int_para[9] = hsolver::DiagoPexsi<double>::pexsi_row_ordering;
-    int_para[10] = hsolver::DiagoPexsi<double>::pexsi_nproc;
-    int_para[11] = hsolver::DiagoPexsi<double>::pexsi_symm;
-    int_para[12] = hsolver::DiagoPexsi<double>::pexsi_trans;
-    int_para[13] = hsolver::DiagoPexsi<double>::pexsi_method;
+    int_para[7] = pexsi::PEXSI_Solver::pexsi_storage;
+    int_para[8] = pexsi::PEXSI_Solver::pexsi_ordering;
+    int_para[9] = pexsi::PEXSI_Solver::pexsi_row_ordering;
+    int_para[10] = pexsi::PEXSI_Solver::pexsi_nproc;
+    int_para[11] = pexsi::PEXSI_Solver::pexsi_symm;
+    int_para[12] = pexsi::PEXSI_Solver::pexsi_trans;
+    int_para[13] = pexsi::PEXSI_Solver::pexsi_method;
     int_para[14] = 2;
     int_para[15] = 0;
-    int_para[16] = hsolver::DiagoPexsi<double>::pexsi_nproc_pole;
+    int_para[16] = pexsi::PEXSI_Solver::pexsi_nproc_pole;
 
-    double_para[0] = GlobalV::NSPIN; // hsolver::DiagoPexsi<double>::pexsi_spin;
-    double_para[1] = hsolver::DiagoPexsi<double>::pexsi_temp;
-    double_para[2] = hsolver::DiagoPexsi<double>::pexsi_gap;
-    double_para[3] = hsolver::DiagoPexsi<double>::pexsi_delta_e;
-    double_para[4] = hsolver::DiagoPexsi<double>::pexsi_mu_lower;
-    double_para[5] = hsolver::DiagoPexsi<double>::pexsi_mu_upper;
-    double_para[6] = hsolver::DiagoPexsi<double>::pexsi_mu;
-    double_para[7] = hsolver::DiagoPexsi<double>::pexsi_mu_thr;
-    double_para[8] = hsolver::DiagoPexsi<double>::pexsi_mu_expand;
-    double_para[9] = hsolver::DiagoPexsi<double>::pexsi_mu_guard;
-    double_para[10] = hsolver::DiagoPexsi<double>::pexsi_elec_thr;
-    double_para[11] = hsolver::DiagoPexsi<double>::pexsi_zero_thr;
+    double_para[0] = GlobalV::NSPIN; // pexsi::PEXSI_Solver::pexsi_spin;
+    double_para[1] = pexsi::PEXSI_Solver::pexsi_temp;
+    double_para[2] = pexsi::PEXSI_Solver::pexsi_gap;
+    double_para[3] = pexsi::PEXSI_Solver::pexsi_delta_e;
+    double_para[4] = pexsi::PEXSI_Solver::pexsi_mu_lower;
+    double_para[5] = pexsi::PEXSI_Solver::pexsi_mu_upper;
+    double_para[6] = pexsi::PEXSI_Solver::pexsi_mu;
+    double_para[7] = pexsi::PEXSI_Solver::pexsi_mu_thr;
+    double_para[8] = pexsi::PEXSI_Solver::pexsi_mu_expand;
+    double_para[9] = pexsi::PEXSI_Solver::pexsi_mu_guard;
+    double_para[10] = pexsi::PEXSI_Solver::pexsi_elec_thr;
+    double_para[11] = pexsi::PEXSI_Solver::pexsi_zero_thr;
 
     options.numPole = int_para[0];
     options.isInertiaCount = int_para[1];
diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp
index e77205c0c8..17da1e6460 100644
--- a/source/module_io/input_conv.cpp
+++ b/source/module_io/input_conv.cpp
@@ -775,57 +775,31 @@ void Input_Conv::Convert(void)
     // PEXSI related parameters
     //-----------------------------------------------
 #ifdef __PEXSI
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_npole = INPUT.pexsi_npole;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_inertia = INPUT.pexsi_inertia;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_nmax = INPUT.pexsi_nmax;
-    // hsolver::DiagoPexsi<std::complex<double>>::pexsi_symbolic = INPUT.pexsi_symbolic;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_comm = INPUT.pexsi_comm;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_storage = INPUT.pexsi_storage;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_ordering = INPUT.pexsi_ordering;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_row_ordering = INPUT.pexsi_row_ordering;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_nproc = INPUT.pexsi_nproc;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_symm = INPUT.pexsi_symm;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_trans = INPUT.pexsi_trans;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_method = INPUT.pexsi_method;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_nproc_pole = INPUT.pexsi_nproc_pole;
-    // hsolver::DiagoPexsi<std::complex<double>>::pexsi_spin = INPUT.pexsi_spin;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_temp = INPUT.pexsi_temp;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_gap = INPUT.pexsi_gap;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_delta_e = INPUT.pexsi_delta_e;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_lower = INPUT.pexsi_mu_lower;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_upper = INPUT.pexsi_mu_upper;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu = INPUT.pexsi_mu;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_thr = INPUT.pexsi_mu_thr;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_expand = INPUT.pexsi_mu_expand;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_guard = INPUT.pexsi_mu_guard;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_elec_thr = INPUT.pexsi_elec_thr;
-    hsolver::DiagoPexsi<std::complex<double>>::pexsi_zero_thr = INPUT.pexsi_zero_thr;
-
-    hsolver::DiagoPexsi<double>::pexsi_npole = INPUT.pexsi_npole;
-    hsolver::DiagoPexsi<double>::pexsi_inertia = INPUT.pexsi_inertia;
-    hsolver::DiagoPexsi<double>::pexsi_nmax = INPUT.pexsi_nmax;
-    // hsolver::DiagoPexsi<double>::pexsi_symbolic = INPUT.pexsi_symbolic;
-    hsolver::DiagoPexsi<double>::pexsi_comm = INPUT.pexsi_comm;
-    hsolver::DiagoPexsi<double>::pexsi_storage = INPUT.pexsi_storage;
-    hsolver::DiagoPexsi<double>::pexsi_ordering = INPUT.pexsi_ordering;
-    hsolver::DiagoPexsi<double>::pexsi_row_ordering = INPUT.pexsi_row_ordering;
-    hsolver::DiagoPexsi<double>::pexsi_nproc = INPUT.pexsi_nproc;
-    hsolver::DiagoPexsi<double>::pexsi_symm = INPUT.pexsi_symm;
-    hsolver::DiagoPexsi<double>::pexsi_trans = INPUT.pexsi_trans;
-    hsolver::DiagoPexsi<double>::pexsi_method = INPUT.pexsi_method;
-    hsolver::DiagoPexsi<double>::pexsi_nproc_pole = INPUT.pexsi_nproc_pole;
-    // hsolver::DiagoPexsi<double>::pexsi_spin = INPUT.pexsi_spin;
-    hsolver::DiagoPexsi<double>::pexsi_temp = INPUT.pexsi_temp;
-    hsolver::DiagoPexsi<double>::pexsi_gap = INPUT.pexsi_gap;
-    hsolver::DiagoPexsi<double>::pexsi_delta_e = INPUT.pexsi_delta_e;
-    hsolver::DiagoPexsi<double>::pexsi_mu_lower = INPUT.pexsi_mu_lower;
-    hsolver::DiagoPexsi<double>::pexsi_mu_upper = INPUT.pexsi_mu_upper;
-    hsolver::DiagoPexsi<double>::pexsi_mu = INPUT.pexsi_mu;
-    hsolver::DiagoPexsi<double>::pexsi_mu_thr = INPUT.pexsi_mu_thr;
-    hsolver::DiagoPexsi<double>::pexsi_mu_expand = INPUT.pexsi_mu_expand;
-    hsolver::DiagoPexsi<double>::pexsi_mu_guard = INPUT.pexsi_mu_guard;
-    hsolver::DiagoPexsi<double>::pexsi_elec_thr = INPUT.pexsi_elec_thr;
-    hsolver::DiagoPexsi<double>::pexsi_zero_thr = INPUT.pexsi_zero_thr;
+    pexsi::PEXSI_Solver::pexsi_npole = INPUT.pexsi_npole;
+    pexsi::PEXSI_Solver::pexsi_inertia = INPUT.pexsi_inertia;
+    pexsi::PEXSI_Solver::pexsi_nmax = INPUT.pexsi_nmax;
+    // pexsi::PEXSI_Solver::pexsi_symbolic = INPUT.pexsi_symbolic;
+    pexsi::PEXSI_Solver::pexsi_comm = INPUT.pexsi_comm;
+    pexsi::PEXSI_Solver::pexsi_storage = INPUT.pexsi_storage;
+    pexsi::PEXSI_Solver::pexsi_ordering = INPUT.pexsi_ordering;
+    pexsi::PEXSI_Solver::pexsi_row_ordering = INPUT.pexsi_row_ordering;
+    pexsi::PEXSI_Solver::pexsi_nproc = INPUT.pexsi_nproc;
+    pexsi::PEXSI_Solver::pexsi_symm = INPUT.pexsi_symm;
+    pexsi::PEXSI_Solver::pexsi_trans = INPUT.pexsi_trans;
+    pexsi::PEXSI_Solver::pexsi_method = INPUT.pexsi_method;
+    pexsi::PEXSI_Solver::pexsi_nproc_pole = INPUT.pexsi_nproc_pole;
+    // pexsi::PEXSI_Solver::pexsi_spin = INPUT.pexsi_spin;
+    pexsi::PEXSI_Solver::pexsi_temp = INPUT.pexsi_temp;
+    pexsi::PEXSI_Solver::pexsi_gap = INPUT.pexsi_gap;
+    pexsi::PEXSI_Solver::pexsi_delta_e = INPUT.pexsi_delta_e;
+    pexsi::PEXSI_Solver::pexsi_mu_lower = INPUT.pexsi_mu_lower;
+    pexsi::PEXSI_Solver::pexsi_mu_upper = INPUT.pexsi_mu_upper;
+    pexsi::PEXSI_Solver::pexsi_mu = INPUT.pexsi_mu;
+    pexsi::PEXSI_Solver::pexsi_mu_thr = INPUT.pexsi_mu_thr;
+    pexsi::PEXSI_Solver::pexsi_mu_expand = INPUT.pexsi_mu_expand;
+    pexsi::PEXSI_Solver::pexsi_mu_guard = INPUT.pexsi_mu_guard;
+    pexsi::PEXSI_Solver::pexsi_elec_thr = INPUT.pexsi_elec_thr;
+    pexsi::PEXSI_Solver::pexsi_zero_thr = INPUT.pexsi_zero_thr;
 #endif
     ModuleBase::timer::tick("Input_Conv", "Convert");
     return;
diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp
index 3455e9ec40..a0c566f9b5 100644
--- a/source/module_io/test/input_conv_test.cpp
+++ b/source/module_io/test/input_conv_test.cpp
@@ -548,46 +548,29 @@ TEST_F(InputConvTest, PEXSI)
 	std::string input_file = "./support/INPUT";
 	INPUT.Read(input_file);
 	Input_Conv::Convert();
-	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_npole, 54);
-	EXPECT_TRUE(hsolver::DiagoPexsi<double>::pexsi_inertia);
-	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_nmax, 80);
-	EXPECT_TRUE(hsolver::DiagoPexsi<double>::pexsi_comm);
-	EXPECT_TRUE(hsolver::DiagoPexsi<double>::pexsi_storage);
-	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_ordering, 0);
-	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_row_ordering, 1);
-	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_nproc, 1);
-	EXPECT_TRUE(hsolver::DiagoPexsi<double>::pexsi_symm);
-	EXPECT_FALSE(hsolver::DiagoPexsi<double>::pexsi_trans);
-	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_method, 1);
-	EXPECT_EQ(hsolver::DiagoPexsi<double>::pexsi_nproc_pole, 1);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_temp, 0.015);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_gap, 0);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_delta_e, 20);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu_lower, -10);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu_upper, 10);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu, 0);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu_thr, 0.05);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu_expand, 0.3);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_mu_guard, 0.2);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_elec_thr, 0.001);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<double>::pexsi_zero_thr, 1e-10);
-
-	EXPECT_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_nproc, 1);
-	EXPECT_TRUE(hsolver::DiagoPexsi<std::complex<double>>::pexsi_symm);
-	EXPECT_FALSE(hsolver::DiagoPexsi<std::complex<double>>::pexsi_trans);
-	EXPECT_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_method, 1);
-	EXPECT_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_nproc_pole, 1);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_temp, 0.015);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_gap, 0);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_delta_e, 20);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_lower, -10);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_upper, 10);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu, 0);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_thr, 0.05);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_expand, 0.3);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_mu_guard, 0.2);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_elec_thr, 0.001);
-	EXPECT_DOUBLE_EQ(hsolver::DiagoPexsi<std::complex<double>>::pexsi_zero_thr, 1e-10);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 54);
+	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_inertia);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nmax, 80);
+	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_comm);
+	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_storage);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_ordering, 0);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_row_ordering, 1);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nproc, 1);
+	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_symm);
+	EXPECT_FALSE(pexsi::PEXSI_Solver::pexsi_trans);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_method, 1);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nproc_pole, 1);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_temp, 0.015);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_gap, 0);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_delta_e, 20);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_lower, -10);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_upper, 10);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu, 0);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_thr, 0.05);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_expand, 0.3);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_guard, 0.2);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_elec_thr, 0.001);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_zero_thr, 1e-10);
 }
 #endif
 

From b33a37bef4315d2bab348533735cb1fd46691366 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Wed, 7 Feb 2024 21:58:13 +0800
Subject: [PATCH 30/44] Docs: added docs for pexsi inputs

---
 docs/advanced/input_files/input-main.md       | 165 ++++++++++++++++++
 .../module_pexsi/dist_matrix_transformer.h    |   2 +-
 .../module_pexsi/pexsi_solver.h               |   2 +-
 source/module_io/input.cpp                    |   2 +-
 source/module_io/input.h                      |   2 +-
 source/module_io/test/input_conv_test.cpp     |   2 +-
 source/module_io/test/input_test_para.cpp     |   2 +-
 source/module_io/test/write_input_test.cpp    |   2 +-
 8 files changed, 172 insertions(+), 7 deletions(-)

diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index 6a3fa4fc3d..e617c519c5 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -382,6 +382,30 @@
     - [qo\_strategy](#qo_strategy)
     - [qo\_screening\_coeff](#qo_screening_coeff)
     - [qo\_thr](#qo_thr)
+  - [PEXSI](#PEXSI)
+    - [pexsi_npole](#pexsi_npole)
+    - [pexsi_inertia](#pexsi_inertia)
+    - [pexsi_nmax](#pexsi_nmax)
+    - [pexsi_comm](#pexsi_comm)
+    - [pexsi_storage](#pexsi_storage)
+    - [pexsi_ordering](#pexsi_ordering)
+    - [pexsi_row_ordering](#pexsi_row_ordering)
+    - [pexsi_nproc](#pexsi_nproc)
+    - [pexsi_symm](#pexsi_symm)
+    - [pexsi_trans](#pexsi_trans)
+    - [pexsi_method](#pexsi_method)
+    - [pexsi_nproc_pole](#pexsi_nproc_pole)
+    - [pexsi_temp](#pexsi_temp)
+    - [pexsi_gap](#pexsi_gap)
+    - [pexsi_delta_e](#pexsi_delta_e)
+    - [pexsi_mu_lower](#pexsi_mu_lower)
+    - [pexsi_mu_upper](#pexsi_mu_upper)
+    - [pexsi_mu](#pexsi_mu)
+    - [pexsi_mu_thr](#pexsi_mu_thr)
+    - [pexsi_mu_expand](#pexsi_mu_expand)
+    - [pexsi_mu_guard](#pexsi_mu_guard)
+    - [pexsi_elec_thr](#pexsi_elec_thr)
+    - [pexsi_zero_thr](#pexsi_zero_thr)
 
 [back to top](#full-list-of-input-keywords)
 
@@ -3548,5 +3572,146 @@ These variables are used to control the usage of QO analysis. Please note presen
 - **Description**: the convergence threshold determining the cutoff of generated orbital. Lower threshold will yield orbital with larger cutoff radius.
 - **Default**: 1.0e-6
 
+## PEXSI
+
+These variables are used to control the usage of PEXSI (Pole Expansion and Selected Inversion) method in calculations.
+
+### pexsi_npole
+
+- **Type**: Integer
+- **Description**: the number of poles used in the pole expansion method, should be a even number.
+- **Default**: 80
+
+### pexsi_inertia
+
+- **Type**: Boolean
+- **Description**: whether inertia counting is used at the very beginning.
+- **Default**: True
+
+### pexsi_nmax
+
+- **Type**: Integer
+- **Description**: maximum number of PEXSI iterations after each inertia counting procedure.
+- **Default**: 80
+
+### pexsi_comm
+
+- **Type**: Boolean
+- **Description**: whether to construct PSelInv communication pattern.
+- **Default**: True
+
+### pexsi_storage
+
+- **Type**: Boolean
+- **Description**: whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices.
+- **Default**: True
+
+### pexsi_ordering
+
+- **Type**: Integer
+- **Description**: ordering strategy for factorization and selected inversion. 0: Parallel ordering using ParMETIS, 1: Sequential ordering using METIS, 2: Multiple minimum degree ordering
+- **Default**: 0
+
+### pexsi_row_ordering
+
+- **Type**: Integer
+- **Description**: row permutation strategy for factorization and selected inversion, 0: No row permutation, 1: Make the diagonal entry of the matrix larger than the off-diagonal entries.
+- **Default**: 1
+
+### pexsi_nproc
+
+- **Type**: Integer
+- **Description**: number of processors for PARMETIS. Only used if pexsi_ordering == 0.
+- **Default**: 1
+
+### pexsi_symm
+
+- **Type**: Boolean
+- **Description**: whether the matrix is symmetric.
+- **Default**: True
+
+### pexsi_trans
+
+- **Type**: Boolean
+- **Description**: whether to factorize the transpose of the matrix.
+- **Default**: False
+
+### pexsi_method
+
+- **Type**: Integer
+- **Description**: the pole expansion method to be used. 1 for Cauchy Contour Integral method, 2 for Moussa optimized method.
+- **Default**: 1
+
+### pexsi_nproc_pole
+
+- **Type**: Integer
+- **Description**: the point parallelizaion of PEXSI. Recommend two points parallelization.
+- **Default**: 1
+
+### pexsi_temp
+
+- **Type**: Real
+- **Description**: temperature in Fermi-Dirac distribution, in Ry, should have the same effect as the smearing sigma when smearing method is set to Fermi-Dirac.
+- **Default**: 0.015
+
+### pexsi_gap
+
+- **Type**: Real
+- **Description**: spectral gap, this can be set to be 0 in most cases.
+- **Default**: 0
+
+### pexsi_delta_e
+
+- **Type**: Real
+- **Description**: an upper bound for the spectral radius of $S^{-1} H$.
+- **Default**: 20
+
+### pexsi_mu_lower
+
+- **Type**: Real
+- **Description**: initial guess of lower bound for mu.
+- **Default**: -10
+
+### pexsi_mu_upper
+
+- **Type**: Real
+- **Description**: initial guess of upper bound for mu.
+- **Default**: 10
+
+### pexsi_mu
+
+- **Type**: Real
+- **Description**: initial guess for mu (for the solver).
+- **Default**: 0
+
+### pexsi_mu_thr
+
+- **Type**: Real
+- **Description**: stopping criterion in terms of the chemical potential for the inertia counting procedure.
+- **Default**: 0.05
+
+### pexsi_mu_expand
+
+- **Type**: Real
+- **Description**: if the chemical potential is not in the initial interval, the interval is expanded by this value.
+- **Default**: 0.3
+
+### pexsi_mu_guard
+
+- **Type**: Real
+- **Description**: safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure.
+- **Default**: 0.2
+
+### pexsi_elec_thr
+
+- **Type**: Real
+- **Description**: stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact.
+- **Default**: 0.001
+
+### pexsi_zero_thr
+
+- **Type**: Real
+- **Description**: if the absolute value of CCS matrix element is less than this value, it will be considered as zero.
+- **Default**: 1e-10
 
 [back to top](#full-list-of-input-keywords)
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
index e261d31f3e..672b22f4f3 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
@@ -2,7 +2,7 @@
 #define DISTMATRIXTRANSFORMER_H
 
 #include <mpi.h>
-
+#include <map>
 #include <vector>
 // transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
 // they should have same MPI communicator
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h
index 880efaf504..a5d52be5cf 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.h
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.h
@@ -124,7 +124,7 @@ class PEXSI_Solver
      */ 
     static double pexsi_elec_thr;
     /** 
-     * @brief  Stopping criterion for the zero threshold.
+     * @brief  If the absolute value of CCS matrix element is less than this value, it will be considered as zero.
      */ 
     static double pexsi_zero_thr;
 
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index ce76fb3a40..ba01da5dae 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -644,7 +644,7 @@ void Input::Default(void)
     //==========================================================
     // variables for PEXSI
     //==========================================================
-    pexsi_npole = 54;
+    pexsi_npole = 80;
     pexsi_inertia = true;
     pexsi_nmax = 80;
     // pexsi_symbolic = 1;
diff --git a/source/module_io/input.h b/source/module_io/input.h
index dd3d8c0bb0..223a21c8ab 100644
--- a/source/module_io/input.h
+++ b/source/module_io/input.h
@@ -607,7 +607,7 @@ class Input
     //==========================================================
     // variables for PEXSI
     //==========================================================
-    int pexsi_npole = 54;
+    int pexsi_npole = 80;
     bool pexsi_inertia = true;
     int pexsi_nmax = 80;
     // int pexsi_symbolic = 1;
diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp
index a0c566f9b5..ff5c567990 100644
--- a/source/module_io/test/input_conv_test.cpp
+++ b/source/module_io/test/input_conv_test.cpp
@@ -548,7 +548,7 @@ TEST_F(InputConvTest, PEXSI)
 	std::string input_file = "./support/INPUT";
 	INPUT.Read(input_file);
 	Input_Conv::Convert();
-	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 54);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 80);
 	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_inertia);
 	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nmax, 80);
 	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_comm);
diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp
index 1b936f73d5..41b7597d58 100644
--- a/source/module_io/test/input_test_para.cpp
+++ b/source/module_io/test/input_test_para.cpp
@@ -393,7 +393,7 @@ TEST_F(InputParaTest, Bcast)
     EXPECT_EQ(INPUT.qo_thr, 1e-6);
     EXPECT_EQ(INPUT.qo_basis, "hydrogen");
 
-    EXPECT_EQ(INPUT.pexsi_npole, 54);
+    EXPECT_EQ(INPUT.pexsi_npole, 80);
     EXPECT_TRUE(INPUT.pexsi_inertia);
     EXPECT_EQ(INPUT.pexsi_nmax, 80);
     EXPECT_TRUE(INPUT.pexsi_comm);
diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp
index 6c8a2b32f9..cfc874f060 100644
--- a/source/module_io/test/write_input_test.cpp
+++ b/source/module_io/test/write_input_test.cpp
@@ -934,7 +934,7 @@ TEST_F (write_input, PEXSI24)
     std::ifstream ifs ("write_input_test.log");
     std::string output ((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
     EXPECT_THAT(output, testing::HasSubstr("#Parameters (24.PEXSI)"));
-    EXPECT_THAT(output, testing::HasSubstr("pexsi_npole                    54 #Number of poles in expansion"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_npole                    80 #Number of poles in expansion"));
     EXPECT_THAT(output, testing::HasSubstr("pexsi_inertia                  1 #Whether inertia counting is used at the very beginning of PEXSI process"));
     EXPECT_THAT(output, testing::HasSubstr("pexsi_nmax                     80 #Maximum number of PEXSI iterations after each inertia counting procedure."));
     EXPECT_THAT(output, testing::HasSubstr("pexsi_comm                     1 #Whether to construct PSelInv communication pattern"));

From 94ff925a865753855ac185975c8d11a1c7adedcc Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Sat, 30 Mar 2024 16:16:40 +0800
Subject: [PATCH 31/44] Fix unit test issues in input_conv

---
 source/module_io/input_conv.cpp               |  2 +-
 .../module_io/test/for_testing_input_conv.h   | 34 +++++++++++++++++++
 source/module_io/test/input_conv_test.cpp     |  1 -
 source/module_io/test/input_test_para.cpp     |  1 -
 4 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp
index 49d0a55ee7..d140984a3b 100644
--- a/source/module_io/input_conv.cpp
+++ b/source/module_io/input_conv.cpp
@@ -25,7 +25,7 @@
 #include "module_hamilt_lcao/module_tddft/evolve_elec.h"
 #endif
 #ifdef __PEXSI
-#include "module_hsolver/diago_pexsi.h"
+#include "module_hsolver/module_pexsi/pexsi_solver.h"
 #endif
 
 #include "module_base/timer.h"
diff --git a/source/module_io/test/for_testing_input_conv.h b/source/module_io/test/for_testing_input_conv.h
index 1e25cc32e5..b012dbf183 100644
--- a/source/module_io/test/for_testing_input_conv.h
+++ b/source/module_io/test/for_testing_input_conv.h
@@ -26,6 +26,9 @@
 #include "module_relax/relax_old/ions_move_basic.h"
 #include "module_relax/relax_old/ions_move_cg.h"
 #include "module_relax/relax_old/lattice_change_basic.h"
+#ifdef __PEXSI
+#include "module_hsolver/module_pexsi/pexsi_solver.h"
+#endif
 
 bool berryphase::berry_phase_flag = false;
 
@@ -355,6 +358,37 @@ pseudopot_cell_vnl ppcell;
 Charge_Mixing CHR_MIX;
 } // namespace GlobalC
 
+#ifdef  __PEXSI
+namespace pexsi
+{
+int PEXSI_Solver::pexsi_npole = 0;
+bool PEXSI_Solver::pexsi_inertia = 0;
+int PEXSI_Solver::pexsi_nmax = 0;
+// int PEXSI_Solver::pexsi_symbolic = 0;
+bool PEXSI_Solver::pexsi_comm = 0;
+bool PEXSI_Solver::pexsi_storage = 0;
+int PEXSI_Solver::pexsi_ordering = 0;
+int PEXSI_Solver::pexsi_row_ordering = 0;
+int PEXSI_Solver::pexsi_nproc = 0;
+bool PEXSI_Solver::pexsi_symm = 0;
+bool PEXSI_Solver::pexsi_trans = 0;
+int PEXSI_Solver::pexsi_method = 0;
+int PEXSI_Solver::pexsi_nproc_pole = 0;
+// double PEXSI_Solver::pexsi_spin = 2;
+double PEXSI_Solver::pexsi_temp = 0.0;
+double PEXSI_Solver::pexsi_gap = 0.0;
+double PEXSI_Solver::pexsi_delta_e = 0.0;
+double PEXSI_Solver::pexsi_mu_lower = 0.0;
+double PEXSI_Solver::pexsi_mu_upper = 0.0;
+double PEXSI_Solver::pexsi_mu = 0.0;
+double PEXSI_Solver::pexsi_mu_thr = 0.0;
+double PEXSI_Solver::pexsi_mu_expand = 0.0;
+double PEXSI_Solver::pexsi_mu_guard = 0.0;
+double PEXSI_Solver::pexsi_elec_thr = 0.0;
+double PEXSI_Solver::pexsi_zero_thr = 0.0;
+} // namespace pexsi
+#endif
+
 #undef private
 
 #endif
diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp
index 07640f1adc..302f7e1f6d 100644
--- a/source/module_io/test/input_conv_test.cpp
+++ b/source/module_io/test/input_conv_test.cpp
@@ -1,6 +1,5 @@
 #include "gtest/gtest.h"
 #include "gmock/gmock.h"
-#include "module_hsolver/diago_pexsi.h"
 #include "module_io/input_conv.h"
 #include "module_base/global_variable.h"
 #include "for_testing_input_conv.h"
diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp
index b2b2c90f31..9955b99e5a 100644
--- a/source/module_io/test/input_test_para.cpp
+++ b/source/module_io/test/input_test_para.cpp
@@ -392,7 +392,6 @@ TEST_F(InputParaTest, Bcast)
     EXPECT_EQ(INPUT.qo_strategy.size(), 0);
     EXPECT_EQ(INPUT.qo_screening_coeff.size(), 0);
     EXPECT_EQ(INPUT.qo_thr, 1e-6);
-    EXPECT_EQ(INPUT.qo_basis, "hydrogen");
     EXPECT_EQ(INPUT.qo_basis, "szv");
 
     EXPECT_EQ(INPUT.pexsi_npole, 80);

From 11f0a12242e37a600e3e877d67df4424bb757461 Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Sun, 31 Mar 2024 15:04:37 +0800
Subject: [PATCH 32/44] Change default pexsi_npole from 80 to 40

---
 docs/advanced/input_files/input-main.md    | 50 +++++++++++-----------
 source/module_elecstate/elecstate_lcao.cpp |  2 +-
 source/module_elecstate/elecstate_lcao.h   |  6 +++
 source/module_io/input.cpp                 |  2 +-
 source/module_io/input.h                   |  2 +-
 source/module_io/test/input_conv_test.cpp  |  2 +-
 source/module_io/test/input_test_para.cpp  |  2 +-
 source/module_io/test/write_input_test.cpp |  2 +-
 8 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index 0fe2ded416..d50cdab8b3 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -384,30 +384,30 @@
     - [qo\_strategy](#qo_strategy)
     - [qo\_screening\_coeff](#qo_screening_coeff)
     - [qo\_thr](#qo_thr)
-  - [PEXSI](#PEXSI)
-    - [pexsi_npole](#pexsi_npole)
-    - [pexsi_inertia](#pexsi_inertia)
-    - [pexsi_nmax](#pexsi_nmax)
-    - [pexsi_comm](#pexsi_comm)
-    - [pexsi_storage](#pexsi_storage)
-    - [pexsi_ordering](#pexsi_ordering)
-    - [pexsi_row_ordering](#pexsi_row_ordering)
-    - [pexsi_nproc](#pexsi_nproc)
-    - [pexsi_symm](#pexsi_symm)
-    - [pexsi_trans](#pexsi_trans)
-    - [pexsi_method](#pexsi_method)
-    - [pexsi_nproc_pole](#pexsi_nproc_pole)
-    - [pexsi_temp](#pexsi_temp)
-    - [pexsi_gap](#pexsi_gap)
-    - [pexsi_delta_e](#pexsi_delta_e)
-    - [pexsi_mu_lower](#pexsi_mu_lower)
-    - [pexsi_mu_upper](#pexsi_mu_upper)
-    - [pexsi_mu](#pexsi_mu)
-    - [pexsi_mu_thr](#pexsi_mu_thr)
-    - [pexsi_mu_expand](#pexsi_mu_expand)
-    - [pexsi_mu_guard](#pexsi_mu_guard)
-    - [pexsi_elec_thr](#pexsi_elec_thr)
-    - [pexsi_zero_thr](#pexsi_zero_thr)
+  - [PEXSI](#pexsi)
+    - [pexsi\_npole](#pexsi_npole)
+    - [pexsi\_inertia](#pexsi_inertia)
+    - [pexsi\_nmax](#pexsi_nmax)
+    - [pexsi\_comm](#pexsi_comm)
+    - [pexsi\_storage](#pexsi_storage)
+    - [pexsi\_ordering](#pexsi_ordering)
+    - [pexsi\_row\_ordering](#pexsi_row_ordering)
+    - [pexsi\_nproc](#pexsi_nproc)
+    - [pexsi\_symm](#pexsi_symm)
+    - [pexsi\_trans](#pexsi_trans)
+    - [pexsi\_method](#pexsi_method)
+    - [pexsi\_nproc\_pole](#pexsi_nproc_pole)
+    - [pexsi\_temp](#pexsi_temp)
+    - [pexsi\_gap](#pexsi_gap)
+    - [pexsi\_delta\_e](#pexsi_delta_e)
+    - [pexsi\_mu\_lower](#pexsi_mu_lower)
+    - [pexsi\_mu\_upper](#pexsi_mu_upper)
+    - [pexsi\_mu](#pexsi_mu)
+    - [pexsi\_mu\_thr](#pexsi_mu_thr)
+    - [pexsi\_mu\_expand](#pexsi_mu_expand)
+    - [pexsi\_mu\_guard](#pexsi_mu_guard)
+    - [pexsi\_elec\_thr](#pexsi_elec_thr)
+    - [pexsi\_zero\_thr](#pexsi_zero_thr)
 
 [back to top](#full-list-of-input-keywords)
 
@@ -3643,7 +3643,7 @@ These variables are used to control the usage of PEXSI (Pole Expansion and Selec
 
 - **Type**: Integer
 - **Description**: the number of poles used in the pole expansion method, should be a even number.
-- **Default**: 80
+- **Default**: 40
 
 ### pexsi_inertia
 
diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index 3d629f7abe..d5acbbaf05 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -95,7 +95,7 @@ void ElecStateLCAO<std::complex<double>>::psiToRho(const psi::Psi<std::complex<d
     // this part for calculating DMK in 2d-block format, not used for charge now
     //    psi::Psi<std::complex<double>> dm_k_2d();
 
-    if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack" ||  GlobalV::KS_SOLVER == "pexsi"
+    if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack"
         || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao") // Peize Lin test 2019-05-15
     {
         //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_k);
diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h
index 4a7df3d76e..721f05148b 100644
--- a/source/module_elecstate/elecstate_lcao.h
+++ b/source/module_elecstate/elecstate_lcao.h
@@ -62,6 +62,12 @@ class ElecStateLCAO : public ElecState
 
 #ifdef __PEXSI
     // use for pexsi
+
+    /** 
+     * @brief calculate electronic charge density from pointers of density matrix calculated by pexsi
+     * @param pexsi_DM: pointers of density matrix calculated by pexsi
+     * @param pexsi_EDM: pointers of energy-weighed density matrix calculated by pexsi, needed by MD, will be stored in DensityMatrix::EDM
+     */
     void dmToRho(std::vector<TK*> pexsi_DM, std::vector<TK*> pexsi_EDM);
     std::vector<TK*> pexsi_EDM;
 #endif
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index 71e3b5dc65..4f2abc67c3 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -647,7 +647,7 @@ void Input::Default(void)
     //==========================================================
     // variables for PEXSI
     //==========================================================
-    pexsi_npole = 80;
+    pexsi_npole = 40;
     pexsi_inertia = true;
     pexsi_nmax = 80;
     // pexsi_symbolic = 1;
diff --git a/source/module_io/input.h b/source/module_io/input.h
index 97213357a3..083905407e 100644
--- a/source/module_io/input.h
+++ b/source/module_io/input.h
@@ -609,7 +609,7 @@ class Input
     //==========================================================
     // variables for PEXSI
     //==========================================================
-    int pexsi_npole = 80;
+    int pexsi_npole = 40;
     bool pexsi_inertia = true;
     int pexsi_nmax = 80;
     // int pexsi_symbolic = 1;
diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp
index 302f7e1f6d..9fd1c0cc52 100644
--- a/source/module_io/test/input_conv_test.cpp
+++ b/source/module_io/test/input_conv_test.cpp
@@ -547,7 +547,7 @@ TEST_F(InputConvTest, PEXSI)
 	std::string input_file = "./support/INPUT";
 	INPUT.Read(input_file);
 	Input_Conv::Convert();
-	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 80);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 40);
 	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_inertia);
 	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nmax, 80);
 	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_comm);
diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp
index 9955b99e5a..c172a9fab0 100644
--- a/source/module_io/test/input_test_para.cpp
+++ b/source/module_io/test/input_test_para.cpp
@@ -394,7 +394,7 @@ TEST_F(InputParaTest, Bcast)
     EXPECT_EQ(INPUT.qo_thr, 1e-6);
     EXPECT_EQ(INPUT.qo_basis, "szv");
 
-    EXPECT_EQ(INPUT.pexsi_npole, 80);
+    EXPECT_EQ(INPUT.pexsi_npole, 40);
     EXPECT_TRUE(INPUT.pexsi_inertia);
     EXPECT_EQ(INPUT.pexsi_nmax, 80);
     EXPECT_TRUE(INPUT.pexsi_comm);
diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp
index 628985ce0b..a06809c639 100644
--- a/source/module_io/test/write_input_test.cpp
+++ b/source/module_io/test/write_input_test.cpp
@@ -934,7 +934,7 @@ TEST_F (write_input, PEXSI24)
     std::ifstream ifs ("write_input_test.log");
     std::string output ((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
     EXPECT_THAT(output, testing::HasSubstr("#Parameters (24.PEXSI)"));
-    EXPECT_THAT(output, testing::HasSubstr("pexsi_npole                    80 #Number of poles in expansion"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_npole                    40 #Number of poles in expansion"));
     EXPECT_THAT(output, testing::HasSubstr("pexsi_inertia                  1 #Whether inertia counting is used at the very beginning of PEXSI process"));
     EXPECT_THAT(output, testing::HasSubstr("pexsi_nmax                     80 #Maximum number of PEXSI iterations after each inertia counting procedure."));
     EXPECT_THAT(output, testing::HasSubstr("pexsi_comm                     1 #Whether to construct PSelInv communication pattern"));

From fd19b2c22e26c4d1ab73d36e9f0a4b8a51b881fd Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Sun, 31 Mar 2024 15:55:23 +0800
Subject: [PATCH 33/44] Place pexsi_EDM in DensityMatrix, set size of pexsi_dm
 = 1 when GlobalV::NSPIN==4, and add comments for dmToRho

---
 source/module_elecstate/elecstate_lcao.cpp           | 12 +++++++++---
 source/module_elecstate/elecstate_lcao.h             |  1 -
 source/module_elecstate/module_dm/density_matrix.h   | 10 ++++++++++
 .../hamilt_lcaodft/FORCE_gamma_edm.cpp               |  2 +-
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index d5acbbaf05..56e6dd8dfb 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -267,11 +267,17 @@ void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM, std::vector<d
     }
 
     auto DM = this->get_DM();
-    this->pexsi_EDM.clear();
-    for (int is = 0; is < GlobalV::NSPIN; is++)
+    this->get_DM()->pexsi_EDM.clear();
+
+    int nspin = GlobalV::NSPIN;
+    if (GlobalV::NSPIN == 4)
+    {
+        nspin = 1;
+    }
+    for (int is = 0; is < nspin; is++)
     {
         this->DM->set_DMK_pointer(is, pexsi_DM[is]);
-        this->pexsi_EDM.push_back(pexsi_EDM[is]);
+        this->get_DM()->pexsi_EDM.push_back(pexsi_EDM[is]);
     }
     DM->cal_DMR();
     
diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h
index 721f05148b..2ecc0af4eb 100644
--- a/source/module_elecstate/elecstate_lcao.h
+++ b/source/module_elecstate/elecstate_lcao.h
@@ -69,7 +69,6 @@ class ElecStateLCAO : public ElecState
      * @param pexsi_EDM: pointers of energy-weighed density matrix calculated by pexsi, needed by MD, will be stored in DensityMatrix::EDM
      */
     void dmToRho(std::vector<TK*> pexsi_DM, std::vector<TK*> pexsi_EDM);
-    std::vector<TK*> pexsi_EDM;
 #endif
 
   protected:
diff --git a/source/module_elecstate/module_dm/density_matrix.h b/source/module_elecstate/module_dm/density_matrix.h
index 519f798f5d..f7e6f4e8c0 100644
--- a/source/module_elecstate/module_dm/density_matrix.h
+++ b/source/module_elecstate/module_dm/density_matrix.h
@@ -200,6 +200,14 @@ namespace elecstate
     
     std::vector<ModuleBase::ComplexMatrix> EDMK; // for TD-DFT
 
+#ifdef __PEXSI
+    /**
+     * @brief EDM storage for PEXSI
+     * used in MD calculation
+     */
+    std::vector<TK*> pexsi_EDM;
+#endif
+
   private:
     /**
      * @brief HContainer for density matrix in real space for 2D parallelization
@@ -247,6 +255,8 @@ namespace elecstate
      * _nks = kv->_nks / nspin
      */
     int _nks = 0;
+
+
 };
 
 } // namespace elecstate
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp
index 1ce37e592e..dcf9a79049 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp
@@ -43,7 +43,7 @@ void Force_LCAO_gamma::cal_foverlap(
         auto pes = dynamic_cast<const elecstate::ElecStateLCAO<double>*>(pelec);
         for (int ik = 0; ik < GlobalV::NSPIN; ik++)
         {
-            EDM.set_DMK_pointer(ik, pes->pexsi_EDM[ik]);
+            EDM.set_DMK_pointer(ik, pes->get_DM()->pexsi_EDM[ik]);
         }
         
     }

From aec57c0d94f8759a3b43cd7c9126a707e5ea600b Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Sun, 31 Mar 2024 16:07:31 +0800
Subject: [PATCH 34/44] An unit test added for DiagoPexsi

---
 source/module_hsolver/test/CMakeLists.txt     |  19 +
 .../test/PEXSI-DM-GammaOnly-Si2.dat           | 107 +++++
 .../test/PEXSI-H-GammaOnly-Si2.dat            |  26 ++
 .../test/PEXSI-S-GammaOnly-Si2.dat            |  26 ++
 .../test/diago_pexsi_parallel_test.sh         |  18 +
 .../module_hsolver/test/diago_pexsi_test.cpp  | 403 ++++++++++++++++++
 6 files changed, 599 insertions(+)
 create mode 100644 source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat
 create mode 100644 source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat
 create mode 100644 source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat
 create mode 100644 source/module_hsolver/test/diago_pexsi_parallel_test.sh
 create mode 100644 source/module_hsolver/test/diago_pexsi_test.cpp

diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt
index 5dd16d10ee..ca7e4e822d 100644
--- a/source/module_hsolver/test/CMakeLists.txt
+++ b/source/module_hsolver/test/CMakeLists.txt
@@ -93,6 +93,14 @@ if(ENABLE_LCAO)
       SOURCES diago_lcao_test.cpp ../diago_blas.cpp 
     )
   endif()
+
+  if (ENABLE_PEXSI)
+  AddTest(
+    TARGET HSolver_LCAO_PEXSI
+    LIBS ${math_libs} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} MPI::MPI_CXX base psi device pexsi
+    SOURCES diago_pexsi_test.cpp ../diago_pexsi.cpp ../../module_basis/module_ao/parallel_orbitals.cpp ../../module_basis/module_ao/parallel_2d.cpp
+  )
+  endif()
 endif()
 if (USE_CUDA AND USE_CUSOLVER_LCAO)
 AddTest(
@@ -116,6 +124,11 @@ install(FILES diago_cg_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
 install(FILES diago_david_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
 install(FILES diago_lcao_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
 
+install(FILES PEXSI-H-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+install(FILES PEXSI-S-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+install(FILES PEXSI-DM-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+install(FILES diago_pexsi_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+
 find_program(BASH bash)
 add_test(NAME HSolver_cg_parallel
       COMMAND ${BASH} diago_cg_parallel_test.sh
@@ -130,4 +143,10 @@ if(ENABLE_LCAO)
         COMMAND ${BASH} diago_lcao_parallel_test.sh
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
       )
+if(ENABLE_PEXSI)
+  add_test(NAME HSolver_LCAO_PEXSI_parallel
+        COMMAND ${BASH} diago_pexsi_parallel_test.sh
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+      )
+endif()
 endif()   
\ No newline at end of file
diff --git a/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat
new file mode 100644
index 0000000000..1043cc51a1
--- /dev/null
+++ b/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat
@@ -0,0 +1,107 @@
+  26 26
+ 8
+ 0.660474083048563
+ 3.884e-01 1.025e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.883e-01 1.024e-02 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 1.025e-02 2.683e-04 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.024e-02 2.718e-04 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -7.169e-01
+ 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 1.699e-01
+ 0.000e+00 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00
+ 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 0.000e+00 1.699e-01
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01
+ 0.000e+00 1.671e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 1.699e-01 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 1.773e-01
+ 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -4.160e-02
+ 0.000e+00 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00
+ 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00 0.000e+00 -4.160e-02
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02
+ 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 -4.160e-02 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -5.653e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.426e-07 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02
+ 0.000e+00 3.977e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 4.160e-02 0.000e+00 3.891e-02 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02 0.000e+00
+ 0.000e+00 0.000e+00 3.977e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -1.699e-01 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 3.891e-02
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 -5.653e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -1.426e-07 0.000e+00
+ 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.977e-02 0.000e+00 0.000e+00 -1.699e-01
+ 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 3.891e-02
+ 3.883e-01 1.024e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.884e-01 1.025e-02 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 1.024e-02 2.718e-04 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.025e-02 2.683e-04 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 7.260e-01
+ 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -1.671e-01
+ 0.000e+00 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00
+ 0.000e+00 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 0.000e+00 -1.671e-01
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01
+ 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 -1.671e-01 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 -1.781e-01
+ 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 4.138e-02
+ 0.000e+00 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00
+ 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00 0.000e+00 4.138e-02
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02
+ 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 4.138e-02 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -1.426e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -5.653e-07 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02
+ 0.000e+00 3.891e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -1.671e-01 0.000e+00 0.000e+00 4.138e-02 0.000e+00 3.977e-02 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02 0.000e+00
+ 0.000e+00 0.000e+00 3.891e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -1.671e-01 0.000e+00 0.000e+00 4.138e-02 0.000e+00 0.000e+00 0.000e+00 3.977e-02
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 -1.426e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -5.653e-07 0.000e+00
+ 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.891e-02 0.000e+00 0.000e+00 -1.671e-01
+ 0.000e+00 0.000e+00 4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 3.977e-02
\ No newline at end of file
diff --git a/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat
new file mode 100644
index 0000000000..87c76fc184
--- /dev/null
+++ b/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat
@@ -0,0 +1,26 @@
+26 -3.45008963e-01 7.00777279e-01 2.67435378e-18 -6.48062567e-16 5.69806192e-16 -2.01865721e-15 1.37825425e-15 1.02932021e-15 -1.29549404e-15 9.40872433e-16 -4.37541396e-16 -4.22502120e-15 -9.57567315e-16 -5.60569495e-01 3.19974163e-01 9.47088035e-16 6.99293619e-16 2.66546040e-17 2.24941591e-16 3.49977208e-16 6.64422948e-16 1.19697268e-15 -2.18249784e-16 -1.24808688e-15 2.25204771e-15 3.31560018e-16
+ 7.97835961e-01 -3.26671355e-16 9.44414999e-17 1.08101471e-16 2.96820086e-16 -6.21597964e-17 -1.93606525e-17 1.47303571e-15 -5.85952677e-16 -9.93382239e-16 2.85142385e-15 1.26395876e-15 3.19974163e-01 -2.96734690e-02 -9.56683510e-16 2.99393236e-15 1.06817249e-15 2.22266652e-16 -1.05210457e-15 -2.10421437e-15 -2.19285057e-15 8.05560256e-16 1.20571583e-15 -4.19339449e-15 -1.13240719e-15
+ 3.80362144e-01 5.95917771e-16 -2.18276481e-16 -3.54010265e-01 -4.19309899e-16 -2.30363123e-16 1.53423809e-15 -2.91444576e-16 -9.56803808e-17 -5.35261856e-17 -6.16823227e-02 -2.90999424e-16 -2.32177556e-15 -3.89575886e-02 2.12205807e-16 -8.22906289e-17 6.81593611e-03 2.10103917e-17 2.24379615e-16 -4.85438948e-16 9.00442486e-17 1.31161204e-16 4.61089846e-17 2.39556639e-01
+ 3.80362144e-01 2.55037050e-16 -3.75167990e-16 -3.54010265e-01 -3.00934924e-16 4.18935390e-17 1.46886306e-16 -6.16823227e-02 -7.98028325e-16 -4.57838734e-17 -3.14309773e-16 1.21608455e-15 -4.64848149e-17 -3.89575886e-02 -8.08540395e-17 1.28237973e-16 6.81593611e-03 -8.35066919e-17 -1.00156533e-16 -1.74201705e-16 2.39556639e-01 4.32484922e-16 6.72537317e-17
+ 3.80362144e-01 1.49782541e-16 6.27515573e-17 -3.54010265e-01 4.82940748e-17 -6.16823227e-02 6.20938499e-16 4.71589687e-16 -9.18536141e-16 -3.65700951e-16 -2.61249330e-16 -2.01796811e-16 3.17945596e-17 -3.89575886e-02 2.14594692e-16 -1.51811429e-16 6.81593611e-03 -2.12500255e-17 2.39556639e-01 -5.79371716e-17 -2.59943780e-16 5.30093710e-16
+ 6.49510022e-01 -3.99877720e-16 2.33364502e-16 -1.06835200e-15 1.35924369e-16 4.35440159e-16 5.65086482e-17 2.05033710e-02 -2.40594211e-16 -5.69887431e-16 6.81593611e-03 2.06395261e-17 -1.15861484e-16 1.34872389e-01 5.35360292e-17 -5.33661495e-17 3.47762359e-16 -3.36405101e-16 -2.31631258e-16 -2.62331242e-17 -4.85682373e-02
+ 6.49510022e-01 1.01473550e-16 -8.91511572e-16 -7.44322866e-16 2.05033710e-02 3.18067759e-16 1.77405265e-16 6.50186302e-16 -2.33952222e-15 3.10478769e-17 6.81593611e-03 -3.09684775e-17 3.48878974e-17 1.34872389e-01 1.38971253e-16 2.83475393e-17 1.26434741e-16 -4.85682373e-02 -2.28940621e-16 -2.75815582e-16
+ 6.49510022e-01 -6.05413446e-16 2.05033710e-02 -4.85158818e-16 2.18100282e-16 5.90800199e-16 -4.24041197e-16 -1.76755725e-16 2.48864123e-16 6.96454974e-17 6.81593611e-03 -1.21887704e-16 8.99652487e-17 1.34872389e-01 1.30566455e-16 -4.85682373e-02 4.45112250e-17 2.04467699e-16 -3.02221322e-16
+ 1.39709618e+00 3.28775150e-17 3.02403823e-16 1.12276155e-15 4.11283608e-16 -1.68777752e-15 1.99081203e-15 -3.80457763e-16 -2.91238204e-16 -2.93921607e-16 -7.17012563e-20 1.88551508e-16 -4.63408913e-17 -4.73478204e-01 1.77949873e-16 3.47597824e-16 -7.95551748e-17 1.22787443e-15
+ 1.36533927e+00 -4.17480353e-16 2.38782046e-16 2.07954517e-16 4.06349249e-16 -9.28946662e-17 -1.76779162e-16 -2.40828408e-16 -2.39556639e-01 -9.25671896e-17 1.12753601e-16 4.85682373e-02 -1.30259315e-17 2.04879371e-01 2.55654809e-16 -8.58762656e-16 1.09717457e-15
+ 1.36533927e+00 -2.91149533e-16 4.76951783e-17 4.23208590e-16 -1.68531661e-15 3.22333315e-16 -2.39556639e-01 -2.64951724e-17 -6.87350695e-17 4.85682373e-02 1.57686979e-16 -3.76991794e-16 5.08885431e-16 2.04879371e-01 1.27435694e-15 4.01644429e-16
+ 1.39709618e+00 -4.55433608e-17 -2.99630111e-15 3.78928659e-15 6.58110607e-17 3.50160588e-16 -1.80754014e-16 -2.30392781e-17 -3.48739041e-16 1.55913703e-16 -6.27363150e-17 9.66464560e-16 -1.17769212e-15 -4.73478204e-01 3.90844224e-17
+ 1.36533927e+00 -1.30972594e-15 1.71848190e-15 -2.39556639e-01 3.88957496e-17 4.45336175e-17 4.85682373e-02 -1.94106500e-17 -1.20756475e-16 -9.64457501e-16 6.28258136e-16 2.28328203e-16 7.89854971e-17 2.04879371e-01
+ -3.45008963e-01 7.00777279e-01 2.73555881e-15 -1.00946454e-15 -1.16246495e-15 -1.18942654e-15 2.34640662e-15 -2.17975499e-17 1.59173761e-15 1.65013351e-16 -5.00521378e-16 3.23944830e-15 2.47573682e-16
+ 7.97835961e-01 -2.89307129e-16 4.07830148e-16 3.07829438e-16 1.46389913e-16 -1.66937141e-16 -2.06630218e-17 -1.15365109e-15 4.29729746e-16 6.95094012e-16 -2.40691019e-15 -8.41099617e-16
+ 3.80362144e-01 -3.72488558e-16 -1.36186620e-16 -3.54010265e-01 1.65418864e-16 -5.01909637e-17 8.72781487e-16 -3.15475321e-16 -1.75617767e-16 -1.33423172e-16 6.16823227e-02
+ 3.80362144e-01 6.95266223e-16 1.24652862e-16 -3.54010265e-01 -2.05726330e-16 2.83810662e-16 2.42210163e-16 6.16823227e-02 -8.82894651e-16 9.45554362e-18
+ 3.80362144e-01 -5.04905355e-16 -3.43420006e-16 -3.54010265e-01 1.97895263e-16 6.16823227e-02 5.27704784e-16 4.09823362e-16 -1.55579456e-16
+ 6.49510022e-01 3.01665582e-16 1.65533985e-16 -1.52276340e-15 4.86620331e-16 6.36576344e-16 4.94854914e-17 -2.05033710e-02
+ 6.49510022e-01 -1.88619393e-17 -1.87155561e-16 -5.78216449e-16 -2.05033710e-02 1.56194250e-15 -3.49237870e-16
+ 6.49510022e-01 -3.24872954e-16 -2.05033710e-02 -7.32295395e-16 -8.96538828e-16 9.96070017e-16
+ 1.39709618e+00 -1.11050273e-16 -1.69128575e-17 -1.24504759e-15 -8.08763902e-16
+ 1.36533927e+00 3.67630294e-16 -7.04593046e-17 8.84496171e-18
+ 1.36533927e+00 9.35720969e-16 -1.83878822e-16
+ 1.39709618e+00 1.75929309e-16
+ 1.36533927e+00
diff --git a/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat
new file mode 100644
index 0000000000..e1bb9a1439
--- /dev/null
+++ b/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat
@@ -0,0 +1,26 @@
+26 1.49964801e+00 -1.41677065e+00 -2.71050543e-19 -4.60108297e-17 -3.03983184e-17 7.58941521e-18 7.92145212e-17 1.30781887e-17 -1.58750915e-17 7.77915059e-18 1.07336015e-17 6.22542594e-17 8.78542573e-18 1.24515654e+00 -1.87761557e+00 -4.52345240e-17 -1.21904982e-16 -4.81699167e-17 -3.15892467e-17 -1.29304662e-16 -9.53928505e-18 -6.92703544e-17 -2.51210914e-17 -6.61062629e-17 -5.20340810e-17 -2.29647573e-17
+ 3.24615314e+00 1.08420217e-17 4.77048956e-18 -1.56125113e-17 -3.46944695e-18 2.05998413e-17 3.10081821e-17 4.84909422e-17 1.85669622e-18 -1.14518854e-18 -9.97060572e-17 1.05438661e-17 -1.87761557e+00 2.38584105e+00 5.04340357e-17 2.33103467e-17 6.07830843e-18 -2.07895767e-17 3.37674767e-16 -7.05544564e-17 -1.25743735e-16 2.64630033e-17 4.25617115e-17 -2.13442138e-16 2.66781497e-17
+ 5.45596765e-01 2.16840434e-18 4.33680869e-18 -5.12717090e-01 -1.38777878e-17 -3.03576608e-18 2.16840434e-19 2.16840434e-19 -4.76710143e-18 8.09763498e-19 0.00000000e+00 4.52345240e-17 -5.04340357e-17 -1.24362116e-01 -3.18772379e-17 -7.50471191e-18 9.98285745e-02 1.08555743e-17 1.01711716e-17 1.79143233e-17 2.27868803e-17 2.29969445e-18 -1.53440018e-18 5.44995645e-01
+ 5.45596765e-01 5.42101086e-19 -1.38777878e-17 -5.12717090e-01 -1.81061763e-17 1.57209315e-18 -9.14795583e-19 0.00000000e+00 -1.27054942e-18 -1.26038503e-18 1.21904982e-16 -2.33103467e-17 -3.18772379e-17 -1.24362116e-01 2.03965534e-17 1.08555743e-17 9.98285745e-02 2.64680855e-17 3.72186277e-17 -3.19162015e-18 5.44995645e-01 3.61445899e-17 1.12147162e-18
+ 5.45596765e-01 -3.03576608e-18 -1.81061763e-17 -5.12717090e-01 -2.75793928e-18 0.00000000e+00 4.40457133e-18 -2.09217138e-18 -7.86046575e-18 4.81699167e-17 -6.07830843e-18 -7.50471191e-18 2.03965534e-17 -1.24362116e-01 1.01711716e-17 2.64680855e-17 9.98285745e-02 -3.06117707e-18 5.44995645e-01 -4.81792340e-18 1.73573992e-17 2.58853269e-17
+ 5.51297780e-01 -2.77555756e-17 1.73472348e-18 1.35525272e-19 -4.60785923e-19 -1.00559751e-17 5.82758668e-19 -1.35525272e-20 3.15892467e-17 2.07895767e-17 9.98285745e-02 1.08555743e-17 1.01711716e-17 -3.41075882e-02 1.01779479e-17 6.03087458e-17 -5.67003855e-18 -3.71339244e-18 1.81722449e-17 7.94516905e-19 -4.08542832e-01
+ 5.51297780e-01 -2.45029691e-17 -5.96311195e-19 -1.35525272e-19 -1.35525272e-20 4.60785923e-19 -3.19839641e-18 1.29304662e-16 -3.37674767e-16 1.08555743e-17 9.98285745e-02 2.64680855e-17 1.01779479e-17 -3.41075882e-02 3.01950305e-17 -1.21972744e-17 2.71050543e-18 -4.08542832e-01 1.97392558e-17 6.03087458e-18
+ 5.51297780e-01 2.90024081e-18 -1.35525272e-20 -1.31459513e-18 -4.74338450e-19 2.16840434e-18 9.53928505e-18 7.05544564e-17 1.01711716e-17 2.64680855e-17 9.98285745e-02 6.03087458e-17 3.01950305e-17 -3.41075882e-02 -4.38763067e-18 -4.08542832e-01 3.43793733e-17 2.17179248e-18 -5.34647196e-18
+ 9.27713518e-01 2.88668828e-18 -1.21972744e-19 7.70699912e-17 7.72494048e-19 -6.92703544e-17 -1.25743735e-16 -1.79143233e-17 -3.72186277e-17 3.06117707e-18 5.67003855e-18 1.21972744e-17 4.38763067e-18 -6.02927412e-01 -1.92810110e-17 2.84052499e-18 -7.30159341e-17 6.80624854e-17
+ 1.16731758e+00 3.47791728e-18 -3.22550146e-18 3.52365706e-19 -2.51210914e-17 2.64630033e-17 -2.27868803e-17 3.19162015e-18 -5.44995645e-01 3.71339244e-18 -2.71050543e-18 4.08542832e-01 -1.92810110e-17 1.40487499e-01 6.89230709e-18 2.04346699e-17 1.79486282e-17
+ 1.16731758e+00 1.04896560e-17 -6.09863722e-19 -6.61062629e-17 4.25617115e-17 -2.29969445e-18 -5.44995645e-01 4.81792340e-18 -1.81722449e-17 4.08542832e-01 -3.43793733e-17 2.84052499e-18 6.89230709e-18 1.40487499e-01 -3.05927125e-18 -4.40795946e-18
+ 9.27713518e-01 7.03147766e-21 -5.20340810e-17 -2.13442138e-16 1.53440018e-18 -3.61445899e-17 -1.73573992e-17 -7.94516905e-19 -1.97392558e-17 -2.17179248e-18 -7.30159341e-17 2.04346699e-17 -3.05927125e-18 -6.02927412e-01 1.56701095e-18
+ 1.16731758e+00 -2.29647573e-17 2.66781497e-17 -5.44995645e-01 -1.12147162e-18 -2.58853269e-17 4.08542832e-01 -6.03087458e-18 5.34647196e-18 6.80624854e-17 1.79486282e-17 -4.40795946e-18 1.56701095e-18 1.40487499e-01
+ 1.49964801e+00 -1.41677065e+00 -2.71050543e-19 -4.60108297e-17 -3.03983184e-17 7.58941521e-18 7.92145212e-17 1.30781887e-17 -1.58750915e-17 7.77915059e-18 1.07336015e-17 6.22542594e-17 8.78542573e-18
+ 3.24615314e+00 1.08420217e-17 4.77048956e-18 -1.56125113e-17 -3.46944695e-18 2.05998413e-17 3.10081821e-17 4.84909422e-17 1.85669622e-18 -1.14518854e-18 -9.97060572e-17 1.05438661e-17
+ 5.45596765e-01 2.16840434e-18 4.33680869e-18 -5.12717090e-01 -1.38777878e-17 -3.03576608e-18 2.16840434e-19 2.16840434e-19 -4.76710143e-18 8.09763498e-19 0.00000000e+00
+ 5.45596765e-01 5.42101086e-19 -1.38777878e-17 -5.12717090e-01 -1.81061763e-17 1.57209315e-18 -9.14795583e-19 0.00000000e+00 -1.27054942e-18 -1.26038503e-18
+ 5.45596765e-01 -3.03576608e-18 -1.81061763e-17 -5.12717090e-01 -2.75793928e-18 0.00000000e+00 4.40457133e-18 -2.09217138e-18 -7.86046575e-18
+ 5.51297780e-01 -2.77555756e-17 1.73472348e-18 1.35525272e-19 -4.60785923e-19 -1.00559751e-17 5.82758668e-19 -1.35525272e-20
+ 5.51297780e-01 -2.45029691e-17 -5.96311195e-19 -1.35525272e-19 -1.35525272e-20 4.60785923e-19 -3.19839641e-18
+ 5.51297780e-01 2.90024081e-18 -1.35525272e-20 -1.31459513e-18 -4.74338450e-19 2.16840434e-18
+ 9.27713518e-01 2.88668828e-18 -1.21972744e-19 7.70699912e-17 7.72494048e-19
+ 1.16731758e+00 3.47791728e-18 -3.22550146e-18 3.52365706e-19
+ 1.16731758e+00 1.04896560e-17 -6.09863722e-19
+ 9.27713518e-01 7.03147766e-21
+ 1.16731758e+00
diff --git a/source/module_hsolver/test/diago_pexsi_parallel_test.sh b/source/module_hsolver/test/diago_pexsi_parallel_test.sh
new file mode 100644
index 0000000000..4767d690a2
--- /dev/null
+++ b/source/module_hsolver/test/diago_pexsi_parallel_test.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+np=`cat /proc/cpuinfo | grep "cpu cores" | uniq| awk '{print $NF}'`
+echo "nprocs in this machine is $np"
+
+for i in 6 3 2;do
+    if [[ $i -gt $np ]];then
+        continue
+    fi
+    echo "TEST DIAGO PEXSI in parallel, nprocs=$i"
+    mpirun -np $i ./HSolver_LCAO_PEXSI
+    if [[ $? != 0 ]];then
+        echo -e "\e[1;33m [  FAILED  ] \e[0m"\
+			"execute UT with $i cores error."
+        exit 1
+    fi
+    break    
+done
diff --git a/source/module_hsolver/test/diago_pexsi_test.cpp b/source/module_hsolver/test/diago_pexsi_test.cpp
new file mode 100644
index 0000000000..62c915e614
--- /dev/null
+++ b/source/module_hsolver/test/diago_pexsi_test.cpp
@@ -0,0 +1,403 @@
+#ifdef __PEXSI
+#include <mpi.h>
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <vector>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "module_hsolver/diago_pexsi.h"
+#include "module_hsolver/module_pexsi/pexsi_solver.h"
+#include "module_hsolver/test/diago_elpa_utils.h"
+#include "module_basis/module_ao/parallel_orbitals.h"
+#include "module_base/parallel_global.h"
+#include "module_base/global_variable.h"
+
+#define PASSTHRESHOLD 5e-4
+#define DETAILINFO    false
+#define PRINT_HS      false
+#define REPEATRUN     1
+
+template <typename T> class HamiltTEST : public hamilt::Hamilt<T>
+{
+    public:
+    int desc[9];
+    int nrow, ncol;
+    std::vector<T> h_local;
+    std::vector<T> s_local;
+
+    void matrix(hamilt::MatrixBlock<T> &hk_in, hamilt::MatrixBlock<T> &sk_in) 
+    {
+        hk_in = hamilt::MatrixBlock<T>{this->h_local.data(),
+                                    (size_t)this->nrow,
+                                    (size_t)this->ncol,
+                                    this->desc};
+        sk_in = hamilt::MatrixBlock<T>{this->s_local.data(),
+                                    (size_t)this->nrow,
+                                    (size_t)this->ncol,
+                                    this->desc};
+    }
+
+    void constructHamilt(const int iter, const hamilt::MatrixBlock<double> rho) {}
+    void updateHk(const int ik) {}
+};
+
+
+
+template<class T> class PexsiPrepare 
+{
+  public:
+    PexsiPrepare(int nlocal, int nbands, int nb2d, int sparsity, std::string hfname, std::string sfname, std::string dmname)
+        : nlocal(nlocal), nbands(nbands), nb2d(nb2d), sparsity(sparsity), hfname(hfname), sfname(sfname), dmname(dmname)
+    {
+        MPI_Comm_size(MPI_COMM_WORLD, &dsize);
+        MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+
+    }
+
+    int dsize, myrank;
+    int nlocal, nbands, nb2d, sparsity;
+    int nprows, npcols, myprow, mypcol;
+    int nrow, ncol;
+    double hsolver_time = 0.0;
+    std::string sfname, hfname, dmname;
+    HamiltTEST<T> hmtest;
+    std::vector<T> h;
+    std::vector<T> s;
+    std::vector<T> h_local;
+    std::vector<T> s_local;
+    psi::Psi<T> psi;
+    hsolver::DiagoPexsi<T>* dh = nullptr;
+    Parallel_Orbitals po;
+    std::vector<double> abc;
+    int icontxt;
+
+    double mu;
+
+    // density matrix
+    std::vector<T*> dm_local;
+    std::vector<T*> edm_local;
+
+    std::vector<T> dm;
+
+    bool read_HS()
+    {
+        bool readhfile = false;
+        bool readsfile = false;
+        if (this->myrank == 0)
+        {
+            int hdim, sdim;
+            readhfile = LCAO_DIAGO_TEST::read_hs<std::vector<T>>(hfname, this->h);
+            readsfile = LCAO_DIAGO_TEST::read_hs<std::vector<T>>(sfname, this->s);
+            hdim = sqrt(this->h.size());
+            sdim = sqrt(this->s.size());
+            if (hdim != sdim)
+            {
+                printf("Error: dimensions of H and S are not equal, %d, %d\n", hdim, sdim);
+                readhfile = readsfile = false;
+            }
+            nlocal = hdim;
+        }
+        MPI_Bcast(&nlocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
+        MPI_Bcast(&readhfile, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
+        MPI_Bcast(&readsfile, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
+        nbands = nlocal/2;
+        if (readhfile && readsfile)
+            return true;
+        return false;
+    }
+
+    bool produce_HS()
+    {
+        bool ok = this->read_HS();
+        return ok;
+    }
+
+    void pb2d()
+    {
+        LCAO_DIAGO_TEST::process_2d(nprows, npcols, myprow, mypcol, icontxt);
+
+        hmtest.nrow = LCAO_DIAGO_TEST::na_rc(nlocal, nb2d, nprows, myprow); // the number of row of the new_matrix in each process
+        hmtest.ncol = LCAO_DIAGO_TEST::na_rc(nlocal, nb2d, npcols, mypcol); // the number of column of the new_matrix in each process
+
+
+        int ISRC = 0, info;
+        descinit_(hmtest.desc, &nlocal, &nlocal, &nb2d, &nb2d, &ISRC, &ISRC, &icontxt, &(hmtest.nrow), &info);
+        if (info != 0)
+        {
+            printf("Invalid blacs-distribution. Abort!\n");
+            exit(1);
+        }
+
+        
+        // set po variables
+        po.ncol = hmtest.ncol;
+        po.nrow = hmtest.nrow;
+        po.nb = nb2d;
+        po.blacs_ctxt = icontxt;
+        po.comm_2D = MPI_COMM_WORLD;
+        po.dim0 = nprows;
+        po.dim1 = npcols;
+        po.testpb = true;
+
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "nrow: " << hmtest.nrow << ", ncol: " << hmtest.ncol << ", nb: " << nb2d << std::endl;
+        }
+
+        dh = new hsolver::DiagoPexsi<T>(&po);
+    }
+
+    void distribute_data()
+    {
+
+        int local_size = hmtest.nrow * hmtest.ncol;
+        this->h_local.resize(local_size);
+        this->s_local.resize(local_size);
+
+        LCAO_DIAGO_TEST::distribute_data<T>(this->h.data(),this->h_local.data(),nlocal,nb2d,hmtest.nrow,hmtest.ncol,icontxt);
+        LCAO_DIAGO_TEST::distribute_data<T>(this->s.data(),this->s_local.data(),nlocal,nb2d,hmtest.nrow,hmtest.ncol,icontxt);
+    }
+
+    void set_env()
+    {
+        GlobalV::NLOCAL = nlocal;
+        GlobalV::NBANDS = nbands;
+        GlobalV::DSIZE = dsize;
+        GlobalV::NSPIN = 1;
+        DIAG_WORLD = MPI_COMM_WORLD;
+        GlobalV::NPROC = dsize;
+
+        psi.fix_k(0);
+    }
+
+    void set_pexsi_vars()
+    {
+        pexsi::PEXSI_Solver::set_pexsi_vars();
+        pexsi::PEXSI_Solver::pexsi_mu = mu;
+    }
+
+    void diago()
+    {
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Start to solve the KS equation using PEXSI" << std::endl;
+        }
+        this->pb2d();
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Finish the 2D parallelization" << std::endl;
+        }
+        this->distribute_data();
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Finish the data distribution" << std::endl;
+        }
+        this->set_env();
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Finish the environment setting" << std::endl;
+        }
+        double starttime = 0.0, endtime = 0.0;
+        this->set_pexsi_vars();
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Finish the PEXSI setting" << std::endl;
+        }
+        MPI_Barrier(MPI_COMM_WORLD);
+        starttime = MPI_Wtime();
+        for(int i=0;i<REPEATRUN;i++)
+        {
+            hmtest.h_local = this->h_local;
+            hmtest.s_local = this->s_local;
+            dh->diag(&hmtest, psi, nullptr);
+
+            // copy the density matrix to dm_local
+            dm_local = dh->DM;
+            edm_local = dh->EDM;
+        }
+        MPI_Barrier(MPI_COMM_WORLD);
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Finish the KS equation solving" << std::endl;
+        }
+        endtime = MPI_Wtime();
+        hsolver_time = (endtime - starttime)/REPEATRUN;
+    }
+
+    bool read_ref()
+    {
+        auto f_dm = std::ifstream(dmname);
+        if (!f_dm.is_open())
+        {
+            std::cout << "Error: cannot open the reference file " << dmname << std::endl;
+            return false;
+        }
+        int nread = 0;
+        f_dm >> nread;
+        if (nread != nlocal)
+        {
+            std::cout << "Error: the number of global orbitals in the reference file is not equal to the current calculation" << std::endl;
+            return false;
+        }
+        f_dm >> nread;
+        if (nread != nlocal)
+        {
+            std::cout << "Error: the number of global orbitals in the reference file is not equal to the current calculation" << std::endl;
+            return false;
+        }
+
+        f_dm >> GlobalV::nelec >> mu;
+        
+        dm.resize(nread*nread);
+        // T* edm = new T[nglobal*nglobal];
+        for (int i = 0; i < nread; i++)
+        {
+            for (int j = 0; j < nread; j++)
+            {
+                f_dm >> dm[i*nread+j];
+            }
+        }
+        return true;
+    }
+
+
+    bool compare_ref(std::stringstream &out_info)
+    {
+        double maxerror = 0.0;
+        int iindex = 0;
+        bool pass = true;
+
+        auto ofs = std::ofstream("dm_local" + std::to_string(myprow) + std::to_string(mypcol) + ".dat");
+
+        int SENDPROW = 0, SENDPCOL = 0, tag = 0;
+
+        // do iteration for matrix, distribute old_matrix to each process, pass a block each time
+        for (int row = 0; row < nlocal; row++)
+        {
+            int recv_prow = (row / nb2d) % nprows; // the row number of recive process
+            int nm_row = ((row / nb2d) / nprows) * nb2d + row % nb2d; // row number of block in new_matrix
+            for (int col = 0; col < nlocal; col += nb2d)
+            {
+                int recv_pcol = (col / nb2d) % npcols; // the column number of recive process
+                int nm_col = ((col / nb2d) / npcols) * nb2d + col % nb2d;
+                int pass_length = std::min(nlocal - col, nb2d); // nlocal may not be devided by nb2d;
+                // at global: nlocal * row + col + i
+                // at local: (nm_col + i) * hmtest.nrow + nm_row
+
+                if (myprow == recv_prow && mypcol == recv_pcol)
+                {
+                    double diff = 0;
+                    for (int i = 0; i < pass_length; i++)
+                    {
+                        diff = std::abs(dm_local[0][(nm_col + i) * hmtest.nrow + nm_row] - dm[nlocal * row + col + i]);
+                        if (diff > maxerror)
+                        {
+                            maxerror = diff;
+                        }
+                        if (diff > PASSTHRESHOLD)
+                        {
+                            pass = false;
+                        }
+                    }
+                }
+            }
+        }
+
+        bool pass_all = true;
+        double maxerror_all = 0.0;
+        MPI_Allreduce(&pass, &pass_all, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD);
+        MPI_Allreduce(&maxerror, &maxerror_all, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+        if (myrank == 0)
+        {
+            std::cout << "H/S matrix are read from " << hfname << ", " << sfname << std::endl;
+            std::cout << "Density matrix are read from " << dmname << std::endl;
+            std::cout << std::endl;
+            out_info << "Maximum difference between ks_hsolver and ref is " << maxerror_all << ", the pass threshold is " << PASSTHRESHOLD << std::endl;
+
+            if (DETAILINFO)
+            {
+                std::cout << out_info.str();
+                out_info.str("");
+                out_info.clear();
+            }
+        }
+        delete dh;
+        return pass_all;
+    }
+};
+
+class PexsiGammaOnlyTest : public ::testing::TestWithParam<PexsiPrepare<double>> {};
+
+TEST_P(PexsiGammaOnlyTest, LCAO)
+{
+    std::stringstream out_info;
+    PexsiPrepare<double> dp = GetParam();
+    if (DETAILINFO && dp.myrank == 0)
+    {
+        std::cout << "nlocal: " << dp.nlocal << ", nbands: " << dp.nbands << ", nb2d: " << dp.nb2d << ", sparsity: " << dp.sparsity << std::endl;
+    
+    }
+    ASSERT_TRUE(dp.produce_HS());
+    if (DETAILINFO && dp.myrank == 0)
+    {
+        std::cout << "H/S matrix are read from " << dp.hfname << ", " << dp.sfname << std::endl;
+    }
+    ASSERT_TRUE(dp.read_ref());
+    if (DETAILINFO && dp.myrank == 0)
+    {
+        std::cout << "Density matrix are read from " << dp.dmname << std::endl;
+    }
+    dp.diago();
+    if (DETAILINFO && dp.myrank == 0)
+    {
+        std::cout << "Time for hsolver: " << dp.hsolver_time << "s" << std::endl;
+    }
+
+    bool pass = dp.compare_ref(out_info);
+    EXPECT_TRUE(pass) << out_info.str();
+    
+    MPI_Barrier(MPI_COMM_WORLD);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    DiagoTest,
+    PexsiGammaOnlyTest,
+    ::testing::Values( //int nlocal, int nbands, int nb2d, int sparsity, std::string ks_solver_in, std::string hfname, std::string sfname
+        PexsiPrepare<double>(0, 0, 2, 0, "PEXSI-H-GammaOnly-Si2.dat", "PEXSI-S-GammaOnly-Si2.dat", "PEXSI-DM-GammaOnly-Si2.dat"),
+        PexsiPrepare<double>(0, 0, 1, 0, "PEXSI-H-GammaOnly-Si2.dat", "PEXSI-S-GammaOnly-Si2.dat", "PEXSI-DM-GammaOnly-Si2.dat")
+
+    ));
+
+
+int main(int argc, char **argv)
+{
+    MPI_Init(&argc, &argv);
+    int mypnum, dsize;
+    MPI_Comm_size(MPI_COMM_WORLD, &dsize);
+    MPI_Comm_rank(MPI_COMM_WORLD, &mypnum);
+
+    testing::InitGoogleTest(&argc, argv);
+    //Parallel_Global::split_diag_world(dsize);
+    ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
+    if (mypnum != 0)
+    {
+        delete listeners.Release(listeners.default_result_printer());
+    }
+    int result = RUN_ALL_TESTS();
+
+    if (mypnum == 0 && result != 0)
+    {
+        std::cout << "ERROR:some tests are not passed" << std::endl;
+        return result;
+    }
+    else
+    {
+        MPI_Finalize();
+        return 0;
+    }
+}
+
+
+#endif // __PEXSI
\ No newline at end of file

From 70f9a54b197e05fa9b79cbd198a81f8b56adad72 Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Tue, 9 Apr 2024 13:49:07 +0800
Subject: [PATCH 35/44] modify for changed gint interface

---
 source/module_elecstate/elecstate_lcao.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index c29e2d9bfc..b9c7a80a49 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -297,9 +297,9 @@ void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM, std::vector<d
     }
 
     ModuleBase::GlobalFunc::NOTE("Calculate the charge on real space grid!");
-    this->uhm->GG.transfer_DM2DtoGrid(this->DM->get_DMR_vector()); // transfer DM2D to DM_grid in gint
+    this->gint_gamma->transfer_DM2DtoGrid(this->DM->get_DMR_vector()); // transfer DM2D to DM_grid in gint
     Gint_inout inout(this->loc->DM, this->charge->rho, Gint_Tools::job_type::rho);
-    this->uhm->GG.cal_gint(&inout);
+    this->gint_gamma->cal_gint(&inout);
     if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
     {
         for (int is = 0; is < GlobalV::NSPIN; is++)
@@ -307,7 +307,7 @@ void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM, std::vector<d
             ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[0], this->charge->nrxx);
         }
         Gint_inout inout1(this->loc->DM, this->charge->kin_r, Gint_Tools::job_type::tau);
-        this->uhm->GG.cal_gint(&inout1);
+        this->gint_gamma->cal_gint(&inout1);
     }
 
     this->charge->renormalize_rho();

From 5a891f7e81ece8551a1693e29610e126a350288f Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Tue, 9 Apr 2024 14:28:43 +0800
Subject: [PATCH 36/44] correct nspin related behaviors

---
 source/module_elecstate/elecstate_lcao.cpp | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index b9c7a80a49..404e75f2e6 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -266,12 +266,18 @@ void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM, std::vector<d
 {
     ModuleBase::timer::tick("ElecStateLCAO", "dmToRho");
 
+    int nspin = GlobalV::NSPIN;
+    if (GlobalV::NSPIN == 4)
+    {
+        nspin = 1;
+    }
+
     // old 2D-to-Grid conversion has been replaced by new Gint Refactor 2023/09/25
     if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready
     {
-        for (int ik = 0; ik < GlobalV::NSPIN; ++ik)
+        for (int is = 0; is < nspin; ++is)
         {
-            this->loc->set_dm_gamma(ik, pexsi_DM[ik]);
+            this->loc->set_dm_gamma(is, pexsi_DM[is]);
         }
         this->loc->cal_dk_gamma_from_2D_pub();
     }
@@ -279,11 +285,7 @@ void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM, std::vector<d
     auto DM = this->get_DM();
     this->get_DM()->pexsi_EDM.clear();
 
-    int nspin = GlobalV::NSPIN;
-    if (GlobalV::NSPIN == 4)
-    {
-        nspin = 1;
-    }
+    
     for (int is = 0; is < nspin; is++)
     {
         this->DM->set_DMK_pointer(is, pexsi_DM[is]);

From d7b402de0f983c654e693deb3617632b43f5f8ef Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Tue, 9 Apr 2024 14:41:36 +0800
Subject: [PATCH 37/44] add efermi passthrough

---
 source/module_hsolver/diago_pexsi.h    |  2 +-
 source/module_hsolver/hsolver_lcao.cpp | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index af3a175ff1..24b09367a5 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -15,7 +15,6 @@ class DiagoPexsi : public DiagH<T>
 {
   private:
     using Real = typename GetTypeReal<T>::type;
-    std::vector<double> mu_buffer;
 
   public:
     DiagoPexsi(const Parallel_Orbitals* ParaV_in)
@@ -35,6 +34,7 @@ class DiagoPexsi : public DiagH<T>
     double totalEnergyS;
     double totalFreeEnergy;
     pexsi::PEXSI_Solver* ps;
+    std::vector<double> mu_buffer;
 
 };
 } // namespace hsolver
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index 9f9460eb7c..bb53e57d28 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -232,6 +232,16 @@ void HSolverLCAO<T, Device>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
         if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!");
         elecstate::ElecStateLCAO<T>* _pes = dynamic_cast<elecstate::ElecStateLCAO<T>*>(pes);
         pes->f_en.eband = tem->totalFreeEnergy;
+        if (pes->eferm.two_efermi) 
+        {
+            pes->eferm.ef = tem->mu_buffer[0];
+            pes->eferm.ef_up = tem->mu_buffer[0];
+            pes->eferm.ef_dw = tem->mu_buffer[1];
+        }
+        else
+        {
+            pes->eferm.ef = tem->mu_buffer[0];
+        }
         _pes->dmToRho(tem->DM, tem->EDM);
     }
     else

From 76774f676426ca8c86bb26cd0efeb5d45f171b46 Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Tue, 9 Apr 2024 18:59:23 +0800
Subject: [PATCH 38/44] Revert "add efermi passthrough"

This reverts commit d7b402de0f983c654e693deb3617632b43f5f8ef.
---
 source/module_hsolver/diago_pexsi.h    |  2 +-
 source/module_hsolver/hsolver_lcao.cpp | 10 ----------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index 24b09367a5..af3a175ff1 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -15,6 +15,7 @@ class DiagoPexsi : public DiagH<T>
 {
   private:
     using Real = typename GetTypeReal<T>::type;
+    std::vector<double> mu_buffer;
 
   public:
     DiagoPexsi(const Parallel_Orbitals* ParaV_in)
@@ -34,7 +35,6 @@ class DiagoPexsi : public DiagH<T>
     double totalEnergyS;
     double totalFreeEnergy;
     pexsi::PEXSI_Solver* ps;
-    std::vector<double> mu_buffer;
 
 };
 } // namespace hsolver
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index bb53e57d28..9f9460eb7c 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -232,16 +232,6 @@ void HSolverLCAO<T, Device>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
         if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!");
         elecstate::ElecStateLCAO<T>* _pes = dynamic_cast<elecstate::ElecStateLCAO<T>*>(pes);
         pes->f_en.eband = tem->totalFreeEnergy;
-        if (pes->eferm.two_efermi) 
-        {
-            pes->eferm.ef = tem->mu_buffer[0];
-            pes->eferm.ef_up = tem->mu_buffer[0];
-            pes->eferm.ef_dw = tem->mu_buffer[1];
-        }
-        else
-        {
-            pes->eferm.ef = tem->mu_buffer[0];
-        }
         _pes->dmToRho(tem->DM, tem->EDM);
     }
     else

From 9e8c7241b673b5d8e26a2d4d621d3899ab6bf1d5 Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Sat, 13 Apr 2024 20:15:18 +0800
Subject: [PATCH 39/44] commits to resolve conversations related to codes

---
 source/module_elecstate/elecstate_lcao.cpp    |  2 -
 source/module_elecstate/elecstate_lcao.h      |  4 +-
 source/module_hsolver/diago_pexsi.cpp         | 41 +++++++++++++------
 source/module_hsolver/diago_pexsi.h           | 13 ++----
 .../module_pexsi/dist_bcd_matrix.cpp          |  6 +--
 .../module_pexsi/dist_ccs_matrix.cpp          | 14 +++----
 .../module_pexsi/dist_matrix_transformer.cpp  |  4 +-
 .../module_pexsi/pexsi_solver.cpp             | 16 +++++++-
 .../module_pexsi/pexsi_solver.h               |  3 +-
 9 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index 404e75f2e6..89e40dfe05 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -282,9 +282,7 @@ void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM, std::vector<d
         this->loc->cal_dk_gamma_from_2D_pub();
     }
 
-    auto DM = this->get_DM();
     this->get_DM()->pexsi_EDM.clear();
-
     
     for (int is = 0; is < nspin; is++)
     {
diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h
index b7996036a9..c3e7ae3a2d 100644
--- a/source/module_elecstate/elecstate_lcao.h
+++ b/source/module_elecstate/elecstate_lcao.h
@@ -70,8 +70,8 @@ class ElecStateLCAO : public ElecState
 
     /** 
      * @brief calculate electronic charge density from pointers of density matrix calculated by pexsi
-     * @param pexsi_DM: pointers of density matrix calculated by pexsi
-     * @param pexsi_EDM: pointers of energy-weighed density matrix calculated by pexsi, needed by MD, will be stored in DensityMatrix::EDM
+     * @param pexsi_DM: pointers of density matrix (DMK) calculated by pexsi
+     * @param pexsi_EDM: pointers of energy-weighed density matrix (EDMK) calculated by pexsi, needed by MD, will be stored in DensityMatrix::pexsi_EDM
      */
     void dmToRho(std::vector<TK*> pexsi_DM, std::vector<TK*> pexsi_EDM);
 #endif
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index 95c4a7433e..31b48aef5f 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -1,11 +1,8 @@
 #include <mpi.h>
 #include <complex>
 #ifdef __PEXSI
-#include "c_pexsi_interface.h"
 #include "diago_pexsi.h"
 #include "module_base/global_variable.h"
-#include "module_base/lapack_connector.h"
-#include "module_base/timer.h"
 #include "module_base/tool_quit.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
 #include "module_pexsi/pexsi_solver.h"
@@ -15,6 +12,23 @@ typedef hamilt::MatrixBlock<std::complex<double>> matcd;
 
 namespace hsolver
 {
+template <typename T>
+DiagoPexsi<T>::DiagoPexsi(const Parallel_Orbitals* ParaV_in)
+{
+    int nspin = GlobalV::NSPIN;
+    if (GlobalV::NSPIN == 4)
+    {
+        nspin = 1;
+    }
+    mu_buffer.resize(nspin);
+    for (int i = 0; i < nspin; i++)
+    {
+        mu_buffer[i] = this->ps->pexsi_mu;
+    }
+    this->ParaV = ParaV_in;
+    this->ps = std::make_unique<pexsi::PEXSI_Solver>();
+}
+
 template <>
 void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in)
 {
@@ -24,15 +38,15 @@ void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>&
     std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
     MPI_Comm COMM_DIAG = MPI_COMM_WORLD;
     int ik = psi.get_current_k();
-    this->ps = new pexsi::PEXSI_Solver(this->ParaV->blacs_ctxt,
-                                       this->ParaV->nb,
-                                       this->ParaV->nrow,
-                                       this->ParaV->ncol,
-                                       h_mat.p,
-                                       s_mat.p,
-                                       this->totalEnergyH,
-                                       this->totalEnergyS,
-                                       this->totalFreeEnergy);
+    this->ps->prepare(this->ParaV->blacs_ctxt,
+                                                     this->ParaV->nb,
+                                                     this->ParaV->nrow,
+                                                     this->ParaV->ncol,
+                                                     h_mat.p,
+                                                     s_mat.p,
+                                                     this->totalEnergyH,
+                                                     this->totalEnergyS,
+                                                     this->totalFreeEnergy);
     this->ps->solve(mu_buffer[ik]);
     this->EDM.push_back(this->ps->get_EDM());
     this->DM.push_back(this->ps->get_DM());
@@ -51,5 +65,8 @@ void DiagoPexsi<std::complex<double>>::diag(hamilt::Hamilt<std::complex<double>>
     ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case");
 }
 
+template class DiagoPexsi<double>;
+template class DiagoPexsi<std::complex<double> >;
+
 } // namespace hsolver
 #endif
\ No newline at end of file
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index af3a175ff1..c2063a22b4 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -2,6 +2,7 @@
 #define DIGAOPEXSI_H
 
 #include <vector>
+#include <memory>
 #include "diagh.h"
 #include "module_base/global_variable.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
@@ -18,15 +19,7 @@ class DiagoPexsi : public DiagH<T>
     std::vector<double> mu_buffer;
 
   public:
-    DiagoPexsi(const Parallel_Orbitals* ParaV_in)
-    {
-        mu_buffer.resize(GlobalV::NSPIN);
-        for (int i = 0; i < GlobalV::NSPIN; i++)
-        {
-            mu_buffer[i] = this->ps->pexsi_mu;
-        }
-        this->ParaV = ParaV_in;
-    }
+    DiagoPexsi(const Parallel_Orbitals* ParaV_in);
     void diag(hamilt::Hamilt<T>* phm_in, psi::Psi<T>& psi, Real* eigenvalue_in) override;
     const Parallel_Orbitals* ParaV;
     std::vector<T*> DM;
@@ -34,7 +27,7 @@ class DiagoPexsi : public DiagH<T>
     double totalEnergyH;
     double totalEnergyS;
     double totalFreeEnergy;
-    pexsi::PEXSI_Solver* ps;
+    std::unique_ptr<pexsi::PEXSI_Solver> ps;
 
 };
 } // namespace hsolver
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
index 8e4c8e7ac7..ff3f85f32b 100644
--- a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
@@ -1,6 +1,6 @@
 #ifdef __PEXSI
 #include "dist_bcd_matrix.h"
-
+#include <cctype>
 #include <mpi.h>
 extern "C"
 {
@@ -26,13 +26,13 @@ DistBCDMatrix::DistBCDMatrix(MPI_Comm comm,
     this->nblk = nblk;
     this->nrow = nrow;
     this->ncol = ncol;
-    if (layout == 'R' || layout == 'r' || layout == 'C' || layout == 'c')
+    if (layout == 'r' || layout == 'c')
     {
         this->layout = layout;
     }
     else
     {
-        throw("The layout must be 'R', 'r', 'C', or 'c'");
+        throw("The layout must be 'r' or 'c'");
     }
 
     if (comm != MPI_COMM_NULL)
diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
index ddd02aaa9a..74391f2fbe 100644
--- a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
+++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
@@ -12,8 +12,8 @@ DistCCSMatrix::DistCCSMatrix(void)
     this->nnz = 0;
     this->nnzLocal = 0;
     this->numColLocal = 0;
-    this->colptrLocal = NULL;
-    this->rowindLocal = NULL;
+    this->colptrLocal = nullptr;
+    this->rowindLocal = nullptr;
 }
 
 DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in)
@@ -23,8 +23,8 @@ DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in)
     this->nnz = 0;
     this->nnzLocal = 0;
     this->numColLocal = 0;
-    this->colptrLocal = NULL;
-    this->rowindLocal = NULL;
+    this->colptrLocal = nullptr;
+    this->rowindLocal = nullptr;
 }
 
 DistCCSMatrix::DistCCSMatrix(int size_in, int nnzLocal_in)
@@ -65,21 +65,21 @@ DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in, int nproc_data_in, int size_in)
             this->numColLocal = size / nproc_data;
             this->firstCol = size / nproc_data * myproc;
             this->colptrLocal = new int[this->numColLocal + 1];
-            this->rowindLocal = NULL;
+            this->rowindLocal = nullptr;
         }
         else if (myproc == nproc_data - 1)
         {
             this->numColLocal = size - myproc * (size / nproc_data);
             this->firstCol = size / nproc_data * myproc;
             this->colptrLocal = new int[this->numColLocal + 1];
-            this->rowindLocal = NULL;
+            this->rowindLocal = nullptr;
         }
         else
         {
             this->numColLocal = 0;
             this->firstCol = size - 1;
             this->colptrLocal = new int[this->numColLocal + 1];
-            this->rowindLocal = NULL;
+            this->rowindLocal = nullptr;
         }
     }
 }
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
index 4b0fc23cfb..313a840e68 100644
--- a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
@@ -170,7 +170,7 @@ inline int DistMatrixTransformer::getNonZeroIndex(char layout,
     nnz = 0;
     colidx.clear();
     rowidx.clear();
-    if (layout == 'C' || layout == 'c')
+    if (layout == 'c')
     {
         for (int i = 0; i < ncol; ++i)
         {
@@ -186,7 +186,7 @@ inline int DistMatrixTransformer::getNonZeroIndex(char layout,
             }
         }
     }
-    else if (layout == 'R' || layout == 'r')
+    else if (layout == 'r')
     {
         for (int i = 0; i < ncol; ++i)
         {
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
index ebb2c6d78b..914f471a59 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -40,7 +40,7 @@ double PEXSI_Solver::pexsi_mu_guard = 0.0;
 double PEXSI_Solver::pexsi_elec_thr = 0.0;
 double PEXSI_Solver::pexsi_zero_thr = 0.0;
 
-PEXSI_Solver::PEXSI_Solver(const int blacs_text,
+void PEXSI_Solver::prepare(const int blacs_text,
                            const int nb,
                            const int nrow,
                            const int ncol,
@@ -54,17 +54,29 @@ PEXSI_Solver::PEXSI_Solver(const int blacs_text,
     this->nb = nb;
     this->nrow = nrow;
     this->ncol = ncol;
+    if (this->h) { delete[] this->h;}
     this->h = new double[nrow * ncol];
+    if (this->s) { delete[] this->s;}
     this->s = new double[nrow * ncol];
     std::memcpy(this->h, h, nrow * ncol * sizeof(double));
     std::memcpy(this->s, s, nrow * ncol * sizeof(double));
+    if (this->DM) { delete[] this->DM;}
     this->DM = new double[nrow * ncol];
+    if (this->EDM) { delete[] this->EDM;}
     this->EDM = new double[nrow * ncol];
     this->totalEnergyH = 0.0;
     this->totalEnergyS = 0.0;
     this->totalFreeEnergy = 0.0;
 }
 
+PEXSI_Solver::~PEXSI_Solver()
+{
+    delete[] h;
+    delete[] s;
+    delete[] DM;
+    delete[] EDM;
+}
+
 int PEXSI_Solver::solve(double mu0)
 {
     MPI_Group grid_group;
@@ -85,7 +97,7 @@ int PEXSI_Solver::solve(double mu0)
                 this->nb,
                 this->nrow,
                 this->ncol,
-                'C',
+                'c',
                 this->h,
                 this->s,
                 GlobalV::nelec,
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h
index a5d52be5cf..2cb1dd13b2 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.h
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.h
@@ -8,7 +8,7 @@ namespace pexsi
 class PEXSI_Solver
 {
   public:
-    PEXSI_Solver(const int blacs_text,
+    void prepare(const int blacs_text,
                  const int nb,
                  const int nrow,
                  const int ncol,
@@ -17,6 +17,7 @@ class PEXSI_Solver
                  double& totalEnergyH,
                  double& totalEnergyS,
                  double& totalFreeEnergy);
+    ~PEXSI_Solver();
     int solve(double mu0);
     double* get_DM() const;
     double* get_EDM() const;

From 569269b4277542e300a419946840b95d0c267223 Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Sat, 13 Apr 2024 22:18:57 +0800
Subject: [PATCH 40/44] DM and EDM pointers in pexsi now handled by diagopexsi,
 and copying h s matrices no longer needed

---
 source/module_elecstate/elecstate_lcao.cpp    |  3 +-
 source/module_hsolver/diago_pexsi.cpp         | 44 ++++++++++++++-----
 source/module_hsolver/diago_pexsi.h           |  2 +-
 source/module_hsolver/hsolver_lcao.cpp        |  3 +-
 .../module_pexsi/pexsi_solver.cpp             | 37 +++-------------
 .../module_pexsi/pexsi_solver.h               |  8 +---
 .../module_pexsi/simple_pexsi.cpp             |  8 ++--
 7 files changed, 48 insertions(+), 57 deletions(-)

diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index 89e40dfe05..5e3f81b1e5 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -282,12 +282,11 @@ void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM, std::vector<d
         this->loc->cal_dk_gamma_from_2D_pub();
     }
 
-    this->get_DM()->pexsi_EDM.clear();
+    this->get_DM()->pexsi_EDM = pexsi_EDM;
     
     for (int is = 0; is < nspin; is++)
     {
         this->DM->set_DMK_pointer(is, pexsi_DM[is]);
-        this->get_DM()->pexsi_EDM.push_back(pexsi_EDM[is]);
     }
     DM->cal_DMR();
     
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index 31b48aef5f..a88349160d 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -25,8 +25,34 @@ DiagoPexsi<T>::DiagoPexsi(const Parallel_Orbitals* ParaV_in)
     {
         mu_buffer[i] = this->ps->pexsi_mu;
     }
+
     this->ParaV = ParaV_in;
     this->ps = std::make_unique<pexsi::PEXSI_Solver>();
+
+    this->DM.resize(nspin);
+    this->EDM.resize(nspin);
+    for (int i = 0; i < nspin; i++)
+    {
+        this->DM[i] = new T[ParaV->nrow * ParaV->ncol];
+        this->EDM[i] = new T[ParaV->nrow * ParaV->ncol];
+    }
+
+}
+
+template <typename T>
+DiagoPexsi<T>::~DiagoPexsi()
+{
+    int nspin = GlobalV::NSPIN;
+    if (GlobalV::NSPIN == 4)
+    {
+        nspin = 1;
+    }
+    for (int i = 0; i < nspin; i++)
+    {
+        delete[] this->DM[i];
+        delete[] this->EDM[i];
+    }
+
 }
 
 template <>
@@ -36,20 +62,16 @@ void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>&
     matd h_mat, s_mat;
     phm_in->matrix(h_mat, s_mat);
     std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
-    MPI_Comm COMM_DIAG = MPI_COMM_WORLD;
     int ik = psi.get_current_k();
     this->ps->prepare(this->ParaV->blacs_ctxt,
-                                                     this->ParaV->nb,
-                                                     this->ParaV->nrow,
-                                                     this->ParaV->ncol,
-                                                     h_mat.p,
-                                                     s_mat.p,
-                                                     this->totalEnergyH,
-                                                     this->totalEnergyS,
-                                                     this->totalFreeEnergy);
+                      this->ParaV->nb,
+                      this->ParaV->nrow,
+                      this->ParaV->ncol,
+                      h_mat.p,
+                      s_mat.p,
+                      DM[ik],
+                      EDM[ik]);
     this->ps->solve(mu_buffer[ik]);
-    this->EDM.push_back(this->ps->get_EDM());
-    this->DM.push_back(this->ps->get_DM());
     this->totalFreeEnergy = this->ps->get_totalFreeEnergy();
     this->totalEnergyH = this->ps->get_totalEnergyH();
     this->totalEnergyS = this->ps->get_totalEnergyS();
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
index c2063a22b4..9802273e4b 100644
--- a/source/module_hsolver/diago_pexsi.h
+++ b/source/module_hsolver/diago_pexsi.h
@@ -28,7 +28,7 @@ class DiagoPexsi : public DiagH<T>
     double totalEnergyS;
     double totalFreeEnergy;
     std::unique_ptr<pexsi::PEXSI_Solver> ps;
-
+    ~DiagoPexsi();
 };
 } // namespace hsolver
 
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index 9f9460eb7c..c545ed50a9 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -154,8 +154,6 @@ void HSolverLCAO<T, Device>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
                 this->pdiagh = nullptr;
             }
             auto tem = dynamic_cast<DiagoPexsi<T>*>(this->pdiagh);
-            tem->DM.clear();
-            tem->EDM.clear();
         }
         if (this->pdiagh == nullptr)
         {
@@ -232,6 +230,7 @@ void HSolverLCAO<T, Device>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
         if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!");
         elecstate::ElecStateLCAO<T>* _pes = dynamic_cast<elecstate::ElecStateLCAO<T>*>(pes);
         pes->f_en.eband = tem->totalFreeEnergy;
+        // maybe eferm could be dealt with in the future
         _pes->dmToRho(tem->DM, tem->EDM);
     }
     else
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
index 914f471a59..7a71e6ca01 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.cpp
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -46,37 +46,22 @@ void PEXSI_Solver::prepare(const int blacs_text,
                            const int ncol,
                            const double* h,
                            const double* s,
-                           double& totalEnergyH,
-                           double& totalEnergyS,
-                           double& totalFreeEnergy)
+                           double*& _DM,
+                           double*& _EDM)
 {
     this->blacs_text = blacs_text;
     this->nb = nb;
     this->nrow = nrow;
     this->ncol = ncol;
-    if (this->h) { delete[] this->h;}
-    this->h = new double[nrow * ncol];
-    if (this->s) { delete[] this->s;}
-    this->s = new double[nrow * ncol];
-    std::memcpy(this->h, h, nrow * ncol * sizeof(double));
-    std::memcpy(this->s, s, nrow * ncol * sizeof(double));
-    if (this->DM) { delete[] this->DM;}
-    this->DM = new double[nrow * ncol];
-    if (this->EDM) { delete[] this->EDM;}
-    this->EDM = new double[nrow * ncol];
+    this->h = const_cast<double*>(h);
+    this->s = const_cast<double*>(s);
+    this->DM = _DM;
+    this->EDM = _EDM;
     this->totalEnergyH = 0.0;
     this->totalEnergyS = 0.0;
     this->totalFreeEnergy = 0.0;
 }
 
-PEXSI_Solver::~PEXSI_Solver()
-{
-    delete[] h;
-    delete[] s;
-    delete[] DM;
-    delete[] EDM;
-}
-
 int PEXSI_Solver::solve(double mu0)
 {
     MPI_Group grid_group;
@@ -112,16 +97,6 @@ int PEXSI_Solver::solve(double mu0)
     return 0;
 }
 
-double* PEXSI_Solver::get_DM() const
-{
-    return DM;
-}
-
-double* PEXSI_Solver::get_EDM() const
-{
-    return EDM;
-}
-
 const double PEXSI_Solver::get_totalFreeEnergy() const
 {
     return totalFreeEnergy;
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h
index 2cb1dd13b2..b041d13656 100644
--- a/source/module_hsolver/module_pexsi/pexsi_solver.h
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.h
@@ -14,13 +14,9 @@ class PEXSI_Solver
                  const int ncol,
                  const double* h,
                  const double* s,
-                 double& totalEnergyH,
-                 double& totalEnergyS,
-                 double& totalFreeEnergy);
-    ~PEXSI_Solver();
+                 double*& DM,
+                 double*& EDM);
     int solve(double mu0);
-    double* get_DM() const;
-    double* get_EDM() const;
     const double get_totalFreeEnergy() const;
     const double get_totalEnergyH() const;
     const double get_totalEnergyS() const;
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
index c52a6c8ef3..075c803182 100644
--- a/source/module_hsolver/module_pexsi/simple_pexsi.cpp
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -340,10 +340,10 @@ int simplePEXSI(MPI_Comm comm_PEXSI,
     // back to 2D block cyclic distribution if neccessary
     if (comm_2D != MPI_COMM_NULL)
     {
-        delete[] DM;
-        delete[] EDM;
-        DM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
-        EDM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
+        // delete[] DM;
+        // delete[] EDM;
+        // DM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
+        // EDM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
     }
     // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
     ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");

From f8352cb7a8fdebd030a7ac1c36e3a2097aef2583 Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Sat, 13 Apr 2024 22:19:19 +0800
Subject: [PATCH 41/44] add pexsi examples

---
 examples/pexsi/md_Si8/INPUT       | 32 ++++++++++++
 examples/pexsi/md_Si8/KPT         |  4 ++
 examples/pexsi/md_Si8/STRU        | 28 ++++++++++
 examples/pexsi/scf_Si64/INPUT     | 20 ++++++++
 examples/pexsi/scf_Si64/KPT       |  4 ++
 examples/pexsi/scf_Si64/STRU      | 85 +++++++++++++++++++++++++++++++
 examples/pexsi/scf_spin_Fe2/INPUT | 22 ++++++++
 examples/pexsi/scf_spin_Fe2/KPT   |  4 ++
 examples/pexsi/scf_spin_Fe2/STRU  | 29 +++++++++++
 9 files changed, 228 insertions(+)
 create mode 100644 examples/pexsi/md_Si8/INPUT
 create mode 100644 examples/pexsi/md_Si8/KPT
 create mode 100644 examples/pexsi/md_Si8/STRU
 create mode 100755 examples/pexsi/scf_Si64/INPUT
 create mode 100644 examples/pexsi/scf_Si64/KPT
 create mode 100755 examples/pexsi/scf_Si64/STRU
 create mode 100644 examples/pexsi/scf_spin_Fe2/INPUT
 create mode 100644 examples/pexsi/scf_spin_Fe2/KPT
 create mode 100644 examples/pexsi/scf_spin_Fe2/STRU

diff --git a/examples/pexsi/md_Si8/INPUT b/examples/pexsi/md_Si8/INPUT
new file mode 100644
index 0000000000..13231579bc
--- /dev/null
+++ b/examples/pexsi/md_Si8/INPUT
@@ -0,0 +1,32 @@
+INPUT_PARAMETERS
+#Parameters (1.General)
+suffix                 Si_rescaling
+calculation            md
+nbands                 20
+symmetry               0
+pseudo_dir             ../../../tests/PP_ORB
+orbital_dir            ../../../tests/PP_ORB
+
+#Parameters (2.Iteration)
+ecutwfc                30
+scf_thr                1e-5
+scf_nmax               100
+
+#Parameters (3.Basis)
+basis_type             lcao
+ks_solver              pexsi
+gamma_only             1
+
+#Parameters (5.Mixing)
+mixing_type            broyden
+mixing_beta            0.3
+chg_extrap             second-order
+
+#Parameters (6.MD)
+md_type                nvt
+md_thermostat          rescaling
+md_tolerance           10
+md_nstep               10
+md_dt                  1
+md_tfirst              300
+md_tfreq               0.025
diff --git a/examples/pexsi/md_Si8/KPT b/examples/pexsi/md_Si8/KPT
new file mode 100644
index 0000000000..c289c0158a
--- /dev/null
+++ b/examples/pexsi/md_Si8/KPT
@@ -0,0 +1,4 @@
+K_POINTS
+0
+Gamma
+1 1 1 0 0 0
diff --git a/examples/pexsi/md_Si8/STRU b/examples/pexsi/md_Si8/STRU
new file mode 100644
index 0000000000..795530e8cf
--- /dev/null
+++ b/examples/pexsi/md_Si8/STRU
@@ -0,0 +1,28 @@
+ATOMIC_SPECIES
+Si  28.085  Si_ONCV_PBE-1.0.upf
+
+NUMERICAL_ORBITAL
+Si_gga_8au_60Ry_2s2p1d.orb
+
+LATTICE_CONSTANT
+1.8897270    # 1 Angstrom = 1.8897270 bohr
+
+LATTICE_VECTORS
+5.43090      0.00000      0.00000
+0.00000      5.43090      0.00000
+0.00000      0.00000      5.43090
+
+ATOMIC_POSITIONS
+Direct 
+
+Si
+0.0
+8
+0.000	0.000	0.000	1	1	1
+0.000	0.500	0.500	1	1	1
+0.500	0.000	0.500	1	1	1
+0.500	0.500	0.000	1	1	1
+0.250	0.250	0.250	1	1	1
+0.250	0.750	0.750	1	1	1
+0.750	0.250	0.750	1	1	1
+0.750	0.750	0.250	1	1	1
\ No newline at end of file
diff --git a/examples/pexsi/scf_Si64/INPUT b/examples/pexsi/scf_Si64/INPUT
new file mode 100755
index 0000000000..ddb9813701
--- /dev/null
+++ b/examples/pexsi/scf_Si64/INPUT
@@ -0,0 +1,20 @@
+INPUT_PARAMETERS
+suffix      test
+ntype       1
+nbands      200
+pseudo_dir          ../../../tests/PP_ORB
+orbital_dir         ../../../tests/PP_ORB
+
+calculation scf
+mixing_beta 0.4
+basis_type  lcao
+gamma_only  1
+symmetry    0
+
+ecutwfc     60
+lcao_dr         1e-3
+scf_nmax	      20
+
+ks_solver   pexsi
+
+pexsi_npole  40
\ No newline at end of file
diff --git a/examples/pexsi/scf_Si64/KPT b/examples/pexsi/scf_Si64/KPT
new file mode 100644
index 0000000000..c289c0158a
--- /dev/null
+++ b/examples/pexsi/scf_Si64/KPT
@@ -0,0 +1,4 @@
+K_POINTS
+0
+Gamma
+1 1 1 0 0 0
diff --git a/examples/pexsi/scf_Si64/STRU b/examples/pexsi/scf_Si64/STRU
new file mode 100755
index 0000000000..70e722c0ec
--- /dev/null
+++ b/examples/pexsi/scf_Si64/STRU
@@ -0,0 +1,85 @@
+ATOMIC_SPECIES
+Si 28 Si_ONCV_PBE-1.0.upf
+
+NUMERICAL_ORBITAL
+Si_gga_8au_60Ry_2s2p1d.orb
+
+LATTICE_CONSTANT
+10.2  // add lattice constant
+
+LATTICE_VECTORS
+2 0 0
+0 2 0
+0 0 2
+
+ATOMIC_POSITIONS
+Cartesian //Cartesian or Direct coordinate.
+
+Si	// Element type	
+0	// magnetism
+64	// number of atoms
+0.00 0.00 0.00 0 0 0
+0.25 0.25 0.25 0 0 0
+0.00 0.50 0.50 0 0 0
+0.25 0.75 0.75 0 0 0
+0.50 0.00 0.50 0 0 0
+0.75 0.25 0.75 0 0 0
+0.50 0.50 0.00 0 0 0
+0.75 0.75 0.25 0 0 0
+1.00 0.00 0.00 0 0 0
+1.25 0.25 0.25 0 0 0
+1.00 0.50 0.50 0 0 0
+1.25 0.75 0.75 0 0 0
+1.50 0.00 0.50 0 0 0
+1.75 0.25 0.75 0 0 0
+1.50 0.50 0.00 0 0 0
+1.75 0.75 0.25 0 0 0
+0.00 1.00 0.00 0 0 0
+0.25 1.25 0.25 0 0 0
+0.00 1.50 0.50 0 0 0
+0.25 1.75 0.75 0 0 0
+0.50 1.00 0.50 0 0 0
+0.75 1.25 0.75 0 0 0
+0.50 1.50 0.00 0 0 0
+0.75 1.75 0.25 0 0 0
+1.00 1.00 0.00 0 0 0
+1.25 1.25 0.25 0 0 0
+1.00 1.50 0.50 0 0 0
+1.25 1.75 0.75 0 0 0
+1.50 1.00 0.50 0 0 0
+1.75 1.25 0.75 0 0 0
+1.50 1.50 0.00 0 0 0
+1.75 1.75 0.25 0 0 0
+0.00 0.00 1.00 0 0 0
+0.25 0.25 1.25 0 0 0
+0.00 0.50 1.50 0 0 0
+0.25 0.75 1.75 0 0 0
+0.50 0.00 1.50 0 0 0
+0.75 0.25 1.75 0 0 0
+0.50 0.50 1.00 0 0 0
+0.75 0.75 1.25 0 0 0
+1.00 0.00 1.00 0 0 0
+1.25 0.25 1.25 0 0 0
+1.00 0.50 1.50 0 0 0
+1.25 0.75 1.75 0 0 0
+1.50 0.00 1.50 0 0 0
+1.75 0.25 1.75 0 0 0
+1.50 0.50 1.00 0 0 0
+1.75 0.75 1.25 0 0 0
+0.00 1.00 1.00 0 0 0
+0.25 1.25 1.25 0 0 0
+0.00 1.50 1.50 0 0 0
+0.25 1.75 1.75 0 0 0
+0.50 1.00 1.50 0 0 0
+0.75 1.25 1.75 0 0 0
+0.50 1.50 1.00 0 0 0
+0.75 1.75 1.25 0 0 0
+1.00 1.00 1.00 0 0 0
+1.25 1.25 1.25 0 0 0
+1.00 1.50 1.50 0 0 0
+1.25 1.75 1.75 0 0 0
+1.50 1.00 1.50 0 0 0
+1.75 1.25 1.75 0 0 0
+1.50 1.50 1.00 0 0 0
+1.75 1.75 1.25 0 0 0
+
diff --git a/examples/pexsi/scf_spin_Fe2/INPUT b/examples/pexsi/scf_spin_Fe2/INPUT
new file mode 100644
index 0000000000..a6a5bcc971
--- /dev/null
+++ b/examples/pexsi/scf_spin_Fe2/INPUT
@@ -0,0 +1,22 @@
+INPUT_PARAMETERS
+suffix         autotest
+#nbands   40 
+
+calculation    scf
+ecutwfc        20
+scf_thr            1.0e-8
+scf_nmax          50
+out_chg     0
+
+mixing_type    broyden
+
+
+ks_solver     pexsi
+pexsi_temp    0.1
+pexsi_npole   80
+basis_type    lcao
+gamma_only    1
+symmetry      0
+nspin         2
+pseudo_dir	../../../tests/PP_ORB
+orbital_dir	../../../tests/PP_ORB
diff --git a/examples/pexsi/scf_spin_Fe2/KPT b/examples/pexsi/scf_spin_Fe2/KPT
new file mode 100644
index 0000000000..c289c0158a
--- /dev/null
+++ b/examples/pexsi/scf_spin_Fe2/KPT
@@ -0,0 +1,4 @@
+K_POINTS
+0
+Gamma
+1 1 1 0 0 0
diff --git a/examples/pexsi/scf_spin_Fe2/STRU b/examples/pexsi/scf_spin_Fe2/STRU
new file mode 100644
index 0000000000..b7a2039467
--- /dev/null
+++ b/examples/pexsi/scf_spin_Fe2/STRU
@@ -0,0 +1,29 @@
+ATOMIC_SPECIES
+Fe1 1.000 Fe_ONCV_PBE-1.0.upf
+Fe2 1.000 Fe_ONCV_PBE-1.0.upf
+
+NUMERICAL_ORBITAL
+Fe_gga_9au_100Ry_4s2p2d1f.orb
+Fe_gga_9au_100Ry_4s2p2d1f.orb
+
+LATTICE_CONSTANT
+15
+
+LATTICE_VECTORS
+ 1.00    0.50     0.50
+ 0.50    1.00     0.50
+ 0.50    0.50     1.00
+ATOMIC_POSITIONS
+Direct
+
+Fe1
+5.0
+1
+0.00            0.00            0.00            1        1       1
+
+Fe2
+-5.0
+1
+0.50            0.50            0.50            1        1       1
+
+

From b0ef9ad1965bb9012d8a00b15996d7106baa15e6 Mon Sep 17 00:00:00 2001
From: rhx's linux <renhongxu0820@hotmail.com>
Date: Sat, 13 Apr 2024 22:33:29 +0800
Subject: [PATCH 42/44] fix pexsi unit test (original version shouldn't run)

---
 .../module_hsolver/test/diago_pexsi_test.cpp  | 27 ++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/source/module_hsolver/test/diago_pexsi_test.cpp b/source/module_hsolver/test/diago_pexsi_test.cpp
index 62c915e614..4b3b8aba99 100644
--- a/source/module_hsolver/test/diago_pexsi_test.cpp
+++ b/source/module_hsolver/test/diago_pexsi_test.cpp
@@ -174,7 +174,32 @@ template<class T> class PexsiPrepare
 
     void set_pexsi_vars()
     {
-        pexsi::PEXSI_Solver::set_pexsi_vars();
+        // pexsi::PEXSI_Solver::set_pexsi_vars();
+        pexsi::PEXSI_Solver::pexsi_npole = 40;
+        pexsi::PEXSI_Solver::pexsi_inertia = true;
+        pexsi::PEXSI_Solver::pexsi_nmax = 80;
+        // pexsi_symbolic = 1;
+        pexsi::PEXSI_Solver::pexsi_comm = true;
+        pexsi::PEXSI_Solver::pexsi_storage = true;
+        pexsi::PEXSI_Solver::pexsi_ordering = 0;
+        pexsi::PEXSI_Solver::pexsi_row_ordering = 1;
+        pexsi::PEXSI_Solver::pexsi_nproc = 1;
+        pexsi::PEXSI_Solver::pexsi_symm = true;
+        pexsi::PEXSI_Solver::pexsi_trans = false;
+        pexsi::PEXSI_Solver::pexsi_method = 1;
+        pexsi::PEXSI_Solver::pexsi_nproc_pole = 1;
+        // pexsi_spin = 2;
+        pexsi::PEXSI_Solver::pexsi_temp = 0.015;
+        pexsi::PEXSI_Solver::pexsi_gap = 0;
+        pexsi::PEXSI_Solver::pexsi_delta_e = 20.0;
+        pexsi::PEXSI_Solver::pexsi_mu_lower = -10;
+        pexsi::PEXSI_Solver::pexsi_mu_upper = 10;
+        pexsi::PEXSI_Solver::pexsi_mu = 0.0;
+        pexsi::PEXSI_Solver::pexsi_mu_thr = 0.05;
+        pexsi::PEXSI_Solver::pexsi_mu_expand = 0.3;
+        pexsi::PEXSI_Solver::pexsi_mu_guard = 0.2;
+        pexsi::PEXSI_Solver::pexsi_elec_thr = 0.001;
+        pexsi::PEXSI_Solver::pexsi_zero_thr = 1e-10;
         pexsi::PEXSI_Solver::pexsi_mu = mu;
     }
 

From 5b53d6fe78125ee45985bc1d689c5707483cc289 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Wed, 17 Apr 2024 11:10:02 +0800
Subject: [PATCH 43/44] add building docs for pexsi

---
 docs/advanced/install.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/advanced/install.md b/docs/advanced/install.md
index d6201a060f..a5deeb5888 100644
--- a/docs/advanced/install.md
+++ b/docs/advanced/install.md
@@ -104,6 +104,18 @@ Currently supported math functions:
 cmake -B build -DUSE_ABACUS_LIBM=1
 ```
 
+## Build with PEXSI support
+
+ABACUS supports the PEXSI library for gamma only LCAO calculations. PEXSI version 2.0.0 is tested to work with ABACUS, please always use the latest version of PEXSI. 
+
+To build ABACUS with PEXSI support, you need to compile PEXSI (and its dependencies) first. Please refer to the [PEXSI Installation Guide](https://pexsi.readthedocs.io/en/latest/install.html) for more details. Note that PEXSI requires ParMETIS and SuperLU_DIST.
+
+After compiling PEXSI, you can set `ENABLE_PEXSI` to `ON`. If the libraries are not installed in standard paths, you can set `PEXSI_DIR`, `ParMETIS_DIR` and `SuperLU_DIST_DIR` to the corresponding directories.
+
+```bash
+cmake -B build -DENABLE_PEXSI=ON -DPEXSI_DIR=${path to PEXSI installation directory} -DParMETIS_DIR=${path to ParMETIS installation directory} -DSuperLU_DIST_DIR=${path to SuperLU_DIST installation directory}
+```
+
 ## Build ABACUS with make
 
 > Note: We suggest using CMake to configure and compile.

From 110e5b07f5cfe5c75e7122831922003390edb0d6 Mon Sep 17 00:00:00 2001
From: Flying-dragon-boxing <renhongxu0820@hotmail.com>
Date: Thu, 18 Apr 2024 14:45:35 +0800
Subject: [PATCH 44/44] set cxx standard to c++14, which is required in
 make_unique

---
 CMakeLists.txt                        | 1 +
 source/module_hsolver/diago_pexsi.cpp | 1 +
 2 files changed, 2 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1a4671d3dd..8a4b3fe79a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -215,6 +215,7 @@ if(ENABLE_LCAO)
     target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi)
     include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR})
     add_compile_definitions(__PEXSI)
+    set(CMAKE_CXX_STANDARD 14)
   endif()
 else()
   set(ENABLE_DEEPKS OFF)
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
index a88349160d..4076922626 100644
--- a/source/module_hsolver/diago_pexsi.cpp
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -1,5 +1,6 @@
 #include <mpi.h>
 #include <complex>
+#include <memory>
 #ifdef __PEXSI
 #include "diago_pexsi.h"
 #include "module_base/global_variable.h"