"""
zmp.py

Functions associated with zmp inversion
"""


import psi4
psi4.core.be_quiet()
import numpy as np
from functools import reduce


eps = np.finfo(float).eps

#added by Ehsan
import matplotlib.pyplot as plt

class ZMP():
    """
    ZMP Class
    Performs ZMP optimization according to: 
    
    1) 'From electron densities to Kohn-Sham kinetic energies, orbital energies,
    exchange-correlation potentials, and exchange-correlation energies' by
    Zhao + Morrison + Parr. 
    https://doi.org/10.1103/PhysRevA.50.2138
    """
    def zmp(self, 
            opt_max_iter=100, 
            opt_tol= psi4.core.get_option("SCF", "D_CONVERGENCE"), 
            lambda_list=[70],
            zmp_mixing = 1,
            print_scf = False, 
            ):

        """
        Performs ZMP optimization according to: 
        
        1) 'From electron densities to Kohn-Sham kinetic energies, orbital energies,
        exchange-correlation potentials, and exchange-correlation energies' by
        Zhao + Morrison + Parr. 
        https://doi.org/10.1103/PhysRevA.50.2138

        Additional DIIS algorithms obtained from:
        2) 'Psi4NumPy: An interactive quantum chemistry programming environment 
        for reference implementations and rapid development.' by 
        Daniel G.A. Smith and others. 
        https://doi.org/10.1021/acs.jctc.8b00286
 
        Functionals that drive the SCF procedure are obtained from:
        https://doi.org/10.1002/qua.26400

        Parameters:
        -----------
        lambda_list: list
            List of Lamda parameters used as a coefficient for Hartree
            difference in SCF cycle.
        zmp_mixing: float, optional
            mixing \in [0,1]. How much of the new potential is added in.
            For example, zmp_mixing = 0 means the traditional ZMP, i.e. all the potentials from previous
            smaller lambda are ignored.
            Zmp_mixing = 1 means that all the potentials of previous lambdas are accumulated, the larger lambda
            potential are meant to fix the wrong/inaccurate region of the potential of the sum of the previous
            potentials instead of providing an entire new potentials.
            default: 1
        opt_max_iter: float
            Maximum number of iterations for scf cycle
        opt_tol: float
            Convergence criteria set for Density Difference and DIIS error.
        return:
            The result will be stored in self.proto_density_a and self.proto_density_b
            For zmp_mixing==1, restricted (ref==1):
                self.proto_density_a = \sum_i lambda_i * (Da_i - Dt[0]) - 1/N * (Dt[0])
                self.proto_density_b = \sum_i lambda_i * (Db_i - Dt[1]) - 1/N * (Dt[1]);
            unrestricted (ref==1):
                self.proto_density_a = \sum_i lambda_i * (Da_i - Dt[0]) - 1/N * (Dt[0] + Dt[1])
                self.proto_density_b = \sum_i lambda_i * (Db_i - Dt[1]) - 1/N * (Dt[0] + Dt[1]);
            For restricted (ref==1):
                vxc = \int dr' \frac{self.proto_density_a + self.proto_density_b}{|r-r'|}
                    = 2 * \int dr' \frac{self.proto_density_a}{|r-r'|};
            for unrestricted (ref==2):
                vxc_up = \int dr' \frac{self.proto_density_a}{|r-r'|}
                vxc_down = \int dr' \frac{self.proto_density_b}{|r-r'|}.
            To get potential on grid, one needs to do
                vxc = self.on_grid_esp(Da=self.proto_density_a, Db=self.proto_density_b, grid=grid) for restricted;
                vxc_up = self.on_grid_esp(Da=self.proto_density_a, Db=np.zeros_like(self.proto_density_a),
                          grid=grid) for unrestricted;
        """
        self.diis_space = 100
        self.mixing = zmp_mixing

        print("\nRunning ZMP:")
        self.zmp_scf(lambda_list, opt_max_iter, print_scf, D_conv=opt_tol)

    def zmp_scf(self, 
            lambda_list,
            maxiter, 
            print_scf,
            D_conv):
        """
        Performs scf cycle
        Parameters:
            zmp_functional: options the penalty term.
            But others are not currently working except for Hartree penalty (original ZMP).
        ----------
        """       
        # Target density on grid
        if self.ref == 1:
            # density() is a method in the class Psi4Grider() in module  psi4grider.py (added by Ehsan)
            D0 = self.eng.grid.density(Da=self.Dt[0])
            
            
        else:
            D0 = self.eng.grid.density(Da=self.Dt[0], Db=self.Dt[1])
        
        # Initialize Stuff
        vc_previous_a = np.zeros((self.nbf, self.nbf))
         
        vc_previous_b = np.zeros((self.nbf, self.nbf))
        self.Da = self.Dt[0]
        self.Db = self.Dt[1]
        Da = self.Dt[0]
        Db = self.Dt[1]
        Cocca = self.ct[0]
        #print("Cocca: ", Cocca, type(Cocca))
        Coccb = self.ct[1]

        grid_diff_old = 1/np.finfo(float).eps
        
        self.proto_density_a =  np.zeros_like(Da)
        
        self.proto_density_b =  np.zeros_like(Db)

#------------->  Iterating over lambdas:
        Ddif = []
        L = []
        for lam_i in lambda_list:
            self.shift = 0.1 * lam_i
            D_old = self.Dt[0]

            # Trial & Residual Vector Lists
            state_vectors_a, state_vectors_b = [], []
            error_vectors_a, error_vectors_b = [], []

            for SCF_ITER in range(1,maxiter):

#------------->  Generate Fock Matrix:
                vc = self.generate_s_functional(lam_i,
                                                Cocca, Coccb, 
                                                Da, Db)

                #Equation 10 of Reference (1). Level shift. 
                Fa = self.T + self.V + self.va + vc[0] + vc_previous_a
                Fa += (self.S2 - reduce(np.dot, (self.S2, Da, self.S2))) * self.shift
                #added by Ehsan: a function (np.dot : dot product) applies on an iterable (self.S2, Da, self.S2) and gives one output (a new matrix)
                
                if self.ref == 2:
                    Fb = self.T + self.V + self.vb + vc[1] + vc_previous_b
                    Fb += (self.S2 - reduce(np.dot, (self.S2, Db, self.S2))) * self.shift
                

    #------------->  DIIS:
                if SCF_ITER > 1:
                    #Construct the AO gradient
                    # r = (A(FDS - SDF)A)_mu_nu
                    # added by Ehsan (self.A: Inverse squared root of S matrix), grad_a is a matrix showing the gradients
                    grad_a = self.A.dot(Fa.dot(Da).dot(self.S2) - self.S2.dot(Da).dot(Fa)).dot(self.A)
                    grad_a[np.abs(grad_a) < eps] = 0.0

                    if SCF_ITER < self.diis_space:
                        state_vectors_a.append(Fa.copy())
                        error_vectors_a.append(grad_a.copy())
                        
                    else:
                        state_vectors_a.append(Fa.copy())
                        error_vectors_a.append(grad_a.copy())
                        del state_vectors_a[0]
                        del error_vectors_a[0]

                    #Build inner product of error vectors
                    #dimension of the DIIS subspace = Bdim (by Ehsan)
                    Bdim = len(state_vectors_a) 
                    # one column and one row are added to the matrix to be fiiled with -1 (this is part of the DIIS prodecure)
                    #np.empty: the elements of the array will initially contain whatever data was already in the memory allocated for the array
                    Ba = np.empty((Bdim + 1, Bdim + 1))  
                    
                    # sets the last row and the last column of the matrix Ba to -1
                    Ba[-1, :] = -1 
                    Ba[:, -1] = -1
                    Ba[-1, -1] = 0
                    
                    Bb = Ba.copy()
                    

                    for i in range(len(state_vectors_a)):
                        for j in range(len(state_vectors_a)):
                            # Ba[i,j] will be a number made of the sum of inner products of corresponding elements in matrices 
                            Ba[i,j] = np.einsum('ij,ij->', error_vectors_a[i], error_vectors_a[j])
                            
                    
                    #Build almost zeros matrix to generate inverse. 
                    RHS = np.zeros(Bdim + 1)
                    RHS[-1] = -1
                    

                    #Find coefficient matrix:
                    
                    # x = np.linalg.solve(A, b) Solve the linear system A*x = b

                    Ca = np.linalg.solve(Ba, RHS.copy())
                    
                    Ca[np.abs(Ca) < eps] = 0.0 

                    #Generate new fock Matrix:
                    Fa = np.zeros_like(Fa)
                    # .shape[0] gives the number of rows in a 2D array
                    for i in range(Ca.shape[0] - 1):
                        Fa += Ca[i] * state_vectors_a[i]
                    
                    #diis_error_a is the maximum error element in the last error vectors matrix
                    diis_error_a = np.max(np.abs(error_vectors_a[-1]))
                    if self.ref ==  1:
                        Fb = Fa.copy()
                        diis_error = 2 * diis_error_a

                    else:
                        grad_b = self.A.dot(Fb.dot(Db).dot(self.S2) - self.S2.dot(Db).dot(Fb)).dot(self.A)
                        grad_b[np.abs(grad_b) < eps] = 0.0
                        
                        if SCF_ITER < self.diis_space:
                            state_vectors_b.append(Fb.copy())
                            error_vectors_b.append(grad_b.copy())
                        else:
                            state_vectors_b.append(Fb.copy())
                            error_vectors_b.append(grad_b.copy())
                            del state_vectors_b[0]
                            del error_vectors_b[0]

                        for i in range(len(state_vectors_b)):
                            for j in range(len(state_vectors_b)):
                                Bb[i,j] = np.einsum('ij,ij->', error_vectors_b[i], error_vectors_b[j])

                        diis_error_b = np.max(np.abs(error_vectors_b[-1]))
                        diis_error = diis_error_a + diis_error_b

                        Cb = np.linalg.solve(Bb, RHS.copy())
                        Cb[np.abs(Cb) < eps] = 0.0 

                        Fb = np.zeros_like(Fb)
                        for i in range(Cb.shape[0] - 1):
                            Fb += Cb[i] * state_vectors_b[i]
                # for the first iteration the error is set to 1.0
                else:
                    diis_error = 1.0
                
    #------------->  Diagonalization | Check convergence:
                # diagonalize() method has been defined in inventer.py . the inputs are fock matrix and number of occupied orbitals
                # this is to find the new density matrix. Here the eigenfunction is solved to get eigenvalues and coefficients
                Ca, Cocca, Da, eigs_a = self.diagonalize(Fa, self.nalpha)
                # eigs_a is a one dimensioanl matrix (size = nbf) of eigenvalues or enrgies 
                
                if self.ref == 2:
                    Cb, Coccb, Db, eigs_b = self.diagonalize(Fb, self.nbeta)
                else: 
                    Cb, Coccb, Db, eigs_b = Ca.copy(), Cocca.copy(), Da.copy(), eigs_a.copy()

                #difference of the new and old density matrices
                ddm = D_old - Da
                D_old = Da
                # the maximum element in the differnce denstiy matrix is taken as the density error value
                derror = np.max(np.abs(ddm))

                if print_scf:
                    if np.mod(SCF_ITER,5) == 0.0:
                        print(f"Iteration: {SCF_ITER:3d} | Self Convergence Error: {derror:10.5e} | DIIS Error: {diis_error:10.5e}")
                
                #DIIS error may improve as fast as the D_conv. Relax the criteria an order of magnitude. 
                if abs(derror) < D_conv and abs(diis_error) < D_conv*10: 
                    # here SCF convergence is reached
                    break
                if SCF_ITER == maxiter - 1:
                    raise ValueError("ZMP Error: Maximum Number of SCF cycles reached. Try different settings.")

            if self.ref == 1:
                # map the current density on grid with n points depending on the size of basis set
                density_current = self.eng.grid.density(Da=Da)
                
            else:
                density_current_a = self.eng.grid.density(Da=Da, Db=Db)
            #the difference between the current and exact density is evaluated on grid
            grid_diff = np.max(np.abs(D0 - density_current))
            if np.abs(grid_diff_old) < np.abs(grid_diff):
                # This is a greedy algorithm: if the density error stopped improving for this lambda, we will stop here.
                print(f"\nZMP halted at lambda={lam_i}. Density Error Stops Updating: old: {grid_diff_old}, current: {grid_diff}.")
                break

            grid_diff_old = grid_diff
            print(f"SCF Converged for lambda:{int(lam_i):5d}. Max density difference: {grid_diff}")
            #added by Ehsan
            Ddif.append(grid_diff)
            L.append(lam_i)
            # D0 is on grid. density_current is also on grid.
            # Dt[Dta,Dtb] and Da or Db are just arrays or matrices
            self.proto_density_a += lam_i * (Da - self.Dt[0]) * self.mixing
            if self.ref == 2:
                self.proto_density_b += lam_i * (Db - self.Dt[1]) * self.mixing
            else:
                self.proto_density_b = self.proto_density_a.copy()

            vc_previous_a += vc[0] * self.mixing
            if self.ref == 2:
                #add a portion of previous vc to the new one
                vc_previous_b += vc[1] * self.mixing

            # this is the lambda that is already proven to be improving the density, i.e. the corresponding
            # potential has updated to proto_density
            successful_lam = lam_i
            # The proto_density corresponds to successful_lam
            successful_proto_density = [(Da - self.Dt[0]), (Db - self.Dt[1])]
# -------------> END Iterating over lambdas:
        
        #added by Ehsan
        plt.plot(L, Ddif)
        plt.xlabel('Lambda')
        plt.ylabel('Delta-Density')
        plt.title(f"basis set: {self.basis}")
        plt.savefig('Lam_D_' + self.basis+ '.pdf')
        plt.close()

        self.proto_density_a += successful_lam * successful_proto_density[0] * (1 - self.mixing)
        if self.guide_components.lower() == "fermi_amaldi":
            # for ref==1, vxc = \int dr (proto_density_a + proto_density_b)/|r-r'| - 1/N*vH
            if self.ref == 1:
                self.proto_density_a -= (1 / (self.nalpha + self.nbeta)) * (self.Dt[0])
            # for ref==1, vxc = \int dr (proto_density_a)/|r-r'| - 1/N*vH
            else:
                self.proto_density_a -= (1 / (self.nalpha + self.nbeta)) * (self.Dt[0] + self.Dt[1])

        self.Da = Da
        self.Ca = Ca
        self.Coca = Cocca
        self.eigvecs_a = eigs_a

        if self.ref == 2:
            self.proto_density_b += successful_lam * successful_proto_density[1] * (1 - self.mixing)
            if self.guide_components.lower() == "fermi_amaldi":
                # for ref==1, vxc = \int dr (proto_density_a + proto_density_b)/|r-r'| - 1/N*vH
                # an inner if caluse with an opposite condition!!!
                if self.ref == 1:
                    self.proto_density_b -= (1 / (self.nalpha + self.nbeta)) * (self.Dt[1])
                # for ref==1, vxc = \int dr (proto_density_a)/|r-r'| - 1/N*vH
                else:
                    self.proto_density_b -= (1 / (self.nalpha + self.nbeta)) * (self.Dt[0] + self.Dt[1])
            self.Db = Db
            self.Cb = Cb
            self.Cocb = Coccb
            self.eigvecs_b = eigs_b

        else:
            self.proto_density_b = self.proto_density_a.copy()
            self.Db = self.Da.copy()
            self.Cb = self.Ca.copy()
            self.Cocb = self.Coca.copy()
            self.eigvecs_b = self.eigvecs_a.copy()


    def generate_s_functional(self, lam, Cocca, Coccb, Da, Db):
        """
        Generates S_n Functional as described in:
        https://doi.org/10.1002/qua.26400
        """

        # J is the Coulomb Matrix (note added by Ehsan)
        J = self.eng.compute_hartree(Cocca, Coccb)
        # J is computed from the occupied orbitals with the compoute_hartree() method in engine/psi4.py
        # the matrix Cocc is typically an n×m matrix, where n is the total number of basis functions and m is the number of occupied orbitals.
        # J[0] corresponds to the Coulomb Matrix based on alpha occupied orbitals (note added by Ehsan)
        # Here density (D0) is not directly used!
        #D0 = Cocc x Cocc*+, Cocc*+ is the conjugate transpose of the coefficient matrix of occupied orbitals

        #Equation 7 of Reference (1), which gives Vc(r) for each lambda (original ZMP paper)
        if self.ref == 1:
            vc_a = 2 * lam * ( J[0] - self.J0[0] ) 
            self.vca = J[0] - self.J0[0]
            vc = [vc_a]
        else:
            vc_a = lam * ( J[0] - self.J0[0] ) 
            vc_b = lam * ( J[1] - self.J0[1] )
            vc = [vc_a, vc_b]            
        # in practice Vc(r) will be a matrix (nbf x nbf) obtained from the difference between two Coulomb matrices
        return vc