diff --git a/src_clean/Ewald_Energy_Functions.h b/src_clean/Ewald_Energy_Functions.h index b446936..310c7a7 100644 --- a/src_clean/Ewald_Energy_Functions.h +++ b/src_clean/Ewald_Energy_Functions.h @@ -94,43 +94,112 @@ __device__ void Initialize_Vectors(Boxsize Box, size_t Oldsize, size_t Newsize, } } -__global__ void Initialize_WaveVector_General(Boxsize Box, int3 kmax, Atoms* d_a, Atoms New, Atoms Old, size_t Oldsize, size_t Newsize, size_t SelectedComponent, size_t Location, size_t chainsize, size_t numberOfAtoms, int MoveType) +__device__ void Initialize_Vectors_SPECIAL(Boxsize Box, size_t Oldsize, size_t Newsize, Atoms Old, size_t numberOfAtoms, int3 kmax) { - //Zhao's note: need to think about changing this boolean to switch// - if(MoveType == TRANSLATION || MoveType == ROTATION || MoveType == SPECIAL_ROTATION || MoveType == SINGLE_INSERTION || MoveType == SINGLE_DELETION) // Translation/Rotation/single_insertion/single_deletion // + int kx_max = kmax.x; + int ky_max = kmax.y; + int kz_max = kmax.z; + // Calculate remaining positive kx, ky and kz by recurrence + for(size_t kx = 2; kx <= kx_max; ++kx) { - //For Translation/Rotation, the Old positions are already in the Old struct, just need to put the New positions into Old, after the Old positions// - for(size_t i = Oldsize; i < Oldsize + Newsize; i++) //chainsize here is the total size of the molecule for translation/rotation + for(size_t i = 0; i != numberOfAtoms; ++i) { - Old.pos[i] = New.pos[i - Oldsize]; - Old.scale[i] = New.scale[i - Oldsize]; - Old.charge[i] = New.charge[i - Oldsize]; - Old.scaleCoul[i] = New.scaleCoul[i - Oldsize]; + Box.eik_x[i + kx * numberOfAtoms] = multiply(Box.eik_x[i + (kx - 1) * numberOfAtoms], Box.eik_x[i + 1 * numberOfAtoms]); } } - else if(MoveType == INSERTION || MoveType == CBCF_INSERTION) // Insertion & Fractional Insertion // + for(size_t ky = 2; ky <= ky_max; ++ky) { - //Put the trial orientations in New to Old, right after the first bead position// - for(size_t i = 0; i < chainsize; i++) + for(size_t i = 0; i != numberOfAtoms; ++i) { - Old.pos[i + 1] = New.pos[Location * chainsize + i]; - Old.scale[i + 1] = New.scale[Location * chainsize + i]; - Old.charge[i + 1] = New.charge[Location * chainsize + i]; - Old.scaleCoul[i + 1] = New.scaleCoul[Location * chainsize + i]; + Box.eik_y[i + ky * numberOfAtoms] = multiply(Box.eik_y[i + (ky - 1) * numberOfAtoms], Box.eik_y[i + 1 * numberOfAtoms]); } } - else if(MoveType == DELETION || MoveType == CBCF_DELETION) // Deletion // + for(size_t kz = 2; kz <= kz_max; ++kz) + { + for(size_t i = 0; i != numberOfAtoms; ++i) + { + Box.eik_z[i + kz * numberOfAtoms] = multiply(Box.eik_z[i + (kz - 1) * numberOfAtoms], Box.eik_z[i + 1 * numberOfAtoms]); + } + } +} + +__device__ void Initialize_Vectors_thread(Complex* eik, size_t numberOfAtoms, int k_max) +{ + // Calculate remaining positive kx, ky and kz by recurrence + for(size_t k = 2; k <= k_max; ++k) { - for(size_t i = 0; i < Oldsize; i++) + for(size_t i = 0; i != numberOfAtoms; ++i) { - // For deletion, Location = UpdateLocation, see Deletion Move // - Old.pos[i] = d_a[SelectedComponent].pos[Location + i]; - Old.scale[i] = d_a[SelectedComponent].scale[Location + i]; - Old.charge[i] = d_a[SelectedComponent].charge[Location + i]; - Old.scaleCoul[i] = d_a[SelectedComponent].scaleCoul[Location + i]; + eik[i + k * numberOfAtoms] = multiply(eik[i + (k - 1) * numberOfAtoms], eik[i + 1 * numberOfAtoms]); } } - Initialize_Vectors(Box, Oldsize, Newsize, Old, numberOfAtoms, kmax); +} + +__global__ void Initialize_WaveVector_General(Boxsize Box, int3 kmax, Atoms* d_a, Atoms New, Atoms Old, size_t Oldsize, size_t Newsize, size_t SelectedComponent, size_t Location, size_t chainsize, size_t numberOfAtoms, int MoveType) +{ + //Zhao's note: need to think about changing this boolean to switch// + size_t ij = blockIdx.x * blockDim.x + threadIdx.x; + if(ij < (Newsize + Oldsize)) + { + if(MoveType == TRANSLATION || MoveType == ROTATION || MoveType == SPECIAL_ROTATION || MoveType == SINGLE_INSERTION || MoveType == SINGLE_DELETION) // Translation/Rotation/single_insertion/single_deletion // + { + //For Translation/Rotation, the Old positions are already in the Old struct, just need to put the New positions into Old, after the Old positions// + if(ij >= Oldsize) + { + Old.pos[ij] = New.pos[ij - Oldsize]; + Old.scale[ij] = New.scale[ij - Oldsize]; + Old.charge[ij] = New.charge[ij - Oldsize]; + Old.scaleCoul[ij] = New.scaleCoul[ij - Oldsize]; + } + } + else if(MoveType == INSERTION || MoveType == CBCF_INSERTION) // Insertion & Fractional Insertion // + { + //Put the trial orientations in New to Old, right after the first bead position// + if(ij < chainsize) + { + Old.pos[ij + 1] = New.pos[Location * chainsize + ij]; + Old.scale[ij + 1] = New.scale[Location * chainsize + ij]; + Old.charge[ij + 1] = New.charge[Location * chainsize + ij]; + Old.scaleCoul[ij + 1] = New.scaleCoul[Location * chainsize + ij]; + } + } + else if(MoveType == DELETION || MoveType == CBCF_DELETION) // Deletion // + { + if(ij < Oldsize) + { + // For deletion, Location = UpdateLocation, see Deletion Move // + Old.pos[ij] = d_a[SelectedComponent].pos[Location + ij]; + Old.scale[ij] = d_a[SelectedComponent].scale[Location + ij]; + Old.charge[ij] = d_a[SelectedComponent].charge[Location + ij]; + Old.scaleCoul[ij] = d_a[SelectedComponent].scaleCoul[Location + ij]; + } + } + //Old+New// + Complex tempcomplex; tempcomplex.real = 1.0; tempcomplex.imag = 0.0; + tempcomplex.real = 1.0; tempcomplex.imag = 0.0; + double3 pos = Old.pos[ij]; + Box.eik_x[ij + 0 * numberOfAtoms] = tempcomplex; + Box.eik_y[ij + 0 * numberOfAtoms] = tempcomplex; + Box.eik_z[ij + 0 * numberOfAtoms] = tempcomplex; + double3 s; matrix_multiply_by_vector(Box.InverseCell, pos, s); s*=2*M_PI; + tempcomplex.real = std::cos(s.x); tempcomplex.imag = std::sin(s.x); Box.eik_x[ij + 1 * numberOfAtoms] = tempcomplex; + tempcomplex.real = std::cos(s.y); tempcomplex.imag = std::sin(s.y); Box.eik_y[ij + 1 * numberOfAtoms] = tempcomplex; + tempcomplex.real = std::cos(s.z); tempcomplex.imag = std::sin(s.z); Box.eik_z[ij + 1 * numberOfAtoms] = tempcomplex; + } + __syncthreads(); + + if(ij == 0) + { + Initialize_Vectors_thread(Box.eik_x, numberOfAtoms, kmax.x); + } + else if(ij == 1) + { + Initialize_Vectors_thread(Box.eik_y, numberOfAtoms, kmax.y); + } + else if(ij == 2) + { + Initialize_Vectors_thread(Box.eik_z, numberOfAtoms, kmax.z); + } } __global__ void Initialize_WaveVector_LambdaChange(Boxsize Box, int3 kmax, Atoms* d_a, Atoms Old, size_t Oldsize, double2 newScale) @@ -140,43 +209,103 @@ __global__ void Initialize_WaveVector_LambdaChange(Boxsize Box, int3 kmax, Atoms __global__ void Initialize_WaveVector_Reinsertion(Boxsize Box, int3 kmax, double3* temp, Atoms* d_a, Atoms Old, size_t Oldsize, size_t Newsize, size_t realpos, size_t numberOfAtoms, size_t SelectedComponent) { - for(size_t i = 0; i < Oldsize; i++) + size_t ij = threadIdx.x + blockIdx.x * blockDim.x; + if(ij < (Oldsize + Newsize)) { - Old.pos[i] = d_a[SelectedComponent].pos[realpos + i]; - Old.scale[i] = d_a[SelectedComponent].scale[realpos + i]; - Old.charge[i] = d_a[SelectedComponent].charge[realpos + i]; - Old.scaleCoul[i] = d_a[SelectedComponent].scaleCoul[realpos + i]; + if(ij < Oldsize) + { + Old.pos[ij] = d_a[SelectedComponent].pos[realpos + ij]; + Old.scale[ij] = d_a[SelectedComponent].scale[realpos + ij]; + Old.charge[ij] = d_a[SelectedComponent].charge[realpos + ij]; + Old.scaleCoul[ij] = d_a[SelectedComponent].scaleCoul[realpos + ij]; + } + else + //if(ij >= Oldsize && (ij < (Oldsize + Newsize))) + { + Old.pos[ij] = temp[ij - Oldsize]; + Old.scale[ij] = d_a[SelectedComponent].scale[realpos + ij - Oldsize]; + Old.charge[ij] = d_a[SelectedComponent].charge[realpos + ij - Oldsize]; + Old.scaleCoul[ij] = d_a[SelectedComponent].scaleCoul[realpos + ij - Oldsize]; + } + + //Old+New// + Complex tempcomplex; tempcomplex.real = 1.0; tempcomplex.imag = 0.0; + tempcomplex.real = 1.0; tempcomplex.imag = 0.0; + double3 pos = Old.pos[ij]; + Box.eik_x[ij + 0 * numberOfAtoms] = tempcomplex; + Box.eik_y[ij + 0 * numberOfAtoms] = tempcomplex; + Box.eik_z[ij + 0 * numberOfAtoms] = tempcomplex; + double3 s; matrix_multiply_by_vector(Box.InverseCell, pos, s); s*=2*M_PI; + tempcomplex.real = std::cos(s.x); tempcomplex.imag = std::sin(s.x); Box.eik_x[ij + 1 * numberOfAtoms] = tempcomplex; + tempcomplex.real = std::cos(s.y); tempcomplex.imag = std::sin(s.y); Box.eik_y[ij + 1 * numberOfAtoms] = tempcomplex; + tempcomplex.real = std::cos(s.z); tempcomplex.imag = std::sin(s.z); Box.eik_z[ij + 1 * numberOfAtoms] = tempcomplex; } - //Reinsertion New Positions stored in three arrays, other data are the same as the Old molecule information in d_a// - for(size_t i = Oldsize; i < Oldsize + Newsize; i++) //chainsize here is the total size of the molecule for translation/rotation + __syncthreads(); + if(ij == 0) { - Old.pos[i] = temp[i - Oldsize]; - Old.scale[i] = d_a[SelectedComponent].scale[realpos + i - Oldsize]; - Old.charge[i] = d_a[SelectedComponent].charge[realpos + i - Oldsize]; - Old.scaleCoul[i] = d_a[SelectedComponent].scaleCoul[realpos + i - Oldsize]; + Initialize_Vectors_thread(Box.eik_x, numberOfAtoms, kmax.x); + } + else if(ij == 1) + { + Initialize_Vectors_thread(Box.eik_y, numberOfAtoms, kmax.y); + } + else if(ij == 2) + { + Initialize_Vectors_thread(Box.eik_z, numberOfAtoms, kmax.z); } - Initialize_Vectors(Box, Oldsize, Newsize, Old, numberOfAtoms, kmax); } __global__ void Initialize_WaveVector_IdentitySwap(Boxsize Box, int3 kmax, double3* temp, Atoms* d_a, Atoms Old, size_t Oldsize, size_t Newsize, size_t realpos, size_t numberOfAtoms, size_t OLDComponent, size_t NEWComponent) { - for(size_t i = 0; i < Oldsize; i++) + size_t ij = threadIdx.x + blockIdx.x * blockDim.x; + + if(ij < (Oldsize + Newsize)) { - Old.pos[i] = d_a[OLDComponent].pos[realpos + i]; - Old.scale[i] = d_a[OLDComponent].scale[realpos + i]; - Old.charge[i] = d_a[OLDComponent].charge[realpos + i]; - Old.scaleCoul[i] = d_a[OLDComponent].scaleCoul[realpos + i]; + if(ij < Oldsize) + { + Old.pos[ij] = d_a[OLDComponent].pos[realpos + ij]; + Old.scale[ij] = d_a[OLDComponent].scale[realpos + ij]; + Old.charge[ij] = d_a[OLDComponent].charge[realpos + ij]; + Old.scaleCoul[ij] = d_a[OLDComponent].scaleCoul[realpos + ij]; + } + //IdentitySwap New Positions stored in three arrays, other data are the same as the Old molecule information in d_a// + //Zhao's note: assuming not performing identity swap on fractional molecules// + else + { + Old.pos[ij] = temp[ij - Oldsize]; + Old.scale[ij] = 1.0; + Old.charge[ij] = d_a[NEWComponent].charge[ij - Oldsize]; + Old.scaleCoul[ij] = 1.0; + } + + + //Old+New// + Complex tempcomplex; tempcomplex.real = 1.0; tempcomplex.imag = 0.0; + tempcomplex.real = 1.0; tempcomplex.imag = 0.0; + double3 pos = Old.pos[ij]; + Box.eik_x[ij + 0 * numberOfAtoms] = tempcomplex; + Box.eik_y[ij + 0 * numberOfAtoms] = tempcomplex; + Box.eik_z[ij + 0 * numberOfAtoms] = tempcomplex; + double3 s; matrix_multiply_by_vector(Box.InverseCell, pos, s); s*=2*M_PI; + tempcomplex.real = std::cos(s.x); tempcomplex.imag = std::sin(s.x); Box.eik_x[ij + 1 * numberOfAtoms] = tempcomplex; + tempcomplex.real = std::cos(s.y); tempcomplex.imag = std::sin(s.y); Box.eik_y[ij + 1 * numberOfAtoms] = tempcomplex; + tempcomplex.real = std::cos(s.z); tempcomplex.imag = std::sin(s.z); Box.eik_z[ij + 1 * numberOfAtoms] = tempcomplex; + } - //IdentitySwap New Positions stored in three arrays, other data are the same as the Old molecule information in d_a// - //Zhao's note: assuming not performing identity swap on fractional molecules// - for(size_t i = Oldsize; i < Oldsize + Newsize; i++) //chainsize here is the total size of the molecule for translation/rotation + __syncthreads(); + + if(ij == 0) + { + Initialize_Vectors_thread(Box.eik_x, numberOfAtoms, kmax.x); + } + else if(ij == 1) + { + Initialize_Vectors_thread(Box.eik_y, numberOfAtoms, kmax.y); + } + else if(ij == 2) { - Old.pos[i] = temp[i - Oldsize]; - Old.scale[i] = 1.0; - Old.charge[i] = d_a[NEWComponent].charge[i - Oldsize]; - Old.scaleCoul[i] = 1.0; + Initialize_Vectors_thread(Box.eik_z, numberOfAtoms, kmax.z); } - Initialize_Vectors(Box, Oldsize, Newsize, Old, numberOfAtoms, kmax); } __global__ void JustStore_StructureFactor_Ewald(Boxsize Box, size_t nvec) @@ -435,11 +564,12 @@ double2 GPU_EwaldDifference_General(Boxsize& Box, Atoms*& d_a, Atoms& New, Atoms } size_t numberOfAtoms = Oldsize + Newsize; - Initialize_WaveVector_General<<<1,1>>>(Box, Box.kmax, d_a, New, Old, Oldsize, Newsize, SelectedComponent, Location, chainsize, numberOfAtoms, MoveType); checkCUDAErrorEwald("error Initializing Ewald Vectors"); + size_t Nblock = 0; size_t Nthread = 0; Setup_threadblock(Oldsize + Newsize, &Nblock, &Nthread); + Initialize_WaveVector_General<<>>(Box, Box.kmax, d_a, New, Old, Oldsize, Newsize, SelectedComponent, Location, chainsize, numberOfAtoms, MoveType); checkCUDAErrorEwald("error Initializing Ewald Vectors"); //Fourier Loop// size_t numberOfStructureFactors = (Box.kmax.x + 1) * (2 * Box.kmax.y + 1) * (2 * Box.kmax.z + 1); - size_t Nblock = 0; size_t Nthread = 0; Setup_threadblock(numberOfStructureFactors, &Nblock, &Nthread); + Nblock = 0; Nthread = 0; Setup_threadblock(numberOfStructureFactors, &Nblock, &Nthread); //If we separate Host-Guest from Guest-Guest, we can double the Nblock, so the first half does Guest-Guest, and the second half does Host-Guest// Fourier_Ewald_Diff<<>>(Box, SameType, CrossType, Old, alpha_squared, prefactor, Box.kmax, Oldsize, Newsize, Blocksum, UseTempVector, Nblock); @@ -502,11 +632,12 @@ double2 GPU_EwaldDifference_Reinsertion(Boxsize& Box, Atoms*& d_a, Atoms& Old, d Complex* SameType = Box.AdsorbateEik; Complex* CrossType = Box.FrameworkEik; // Construct exp(ik.r) for atoms and k-vectors kx, ky, kz = 0, 1 explicitly - Initialize_WaveVector_Reinsertion<<<1,1>>>(Box, Box.kmax, temp, d_a, Old, Oldsize, Newsize, UpdateLocation, numberOfAtoms, SelectedComponent); + size_t Nblock = 0; size_t Nthread = 0; Setup_threadblock(Oldsize + Newsize, &Nblock, &Nthread); + Initialize_WaveVector_Reinsertion<<>>(Box, Box.kmax, temp, d_a, Old, Oldsize, Newsize, UpdateLocation, numberOfAtoms, SelectedComponent); //Fourier Loop// size_t numberOfStructureFactors = (Box.kmax.x + 1) * (2 * Box.kmax.y + 1) * (2 * Box.kmax.z + 1); - size_t Nblock = 0; size_t Nthread = 0; Setup_threadblock(numberOfStructureFactors, &Nblock, &Nthread); + Nblock = 0; Nthread = 0; Setup_threadblock(numberOfStructureFactors, &Nblock, &Nthread); Fourier_Ewald_Diff<<>>(Box, SameType, CrossType, Old, alpha_squared, prefactor, Box.kmax, Oldsize, Newsize, Blocksum, false, Nblock); double sum[Nblock * 2]; double SameSum = 0.0; double CrossSum = 0.0; cudaMemcpy(sum, Blocksum, 2 * Nblock * sizeof(double), cudaMemcpyDeviceToHost); @@ -535,13 +666,16 @@ double2 GPU_EwaldDifference_IdentitySwap(Boxsize& Box, Atoms*& d_a, Atoms& Old, } numberOfAtoms = Oldsize + Newsize; + if(numberOfAtoms == 0) return {0.0, 0.0}; + // Construct exp(ik.r) for atoms and k-vectors kx, ky, kz = 0, 1 explicitly - Initialize_WaveVector_IdentitySwap<<<1,1>>>(Box, Box.kmax, temp, d_a, Old, Oldsize, Newsize, UpdateLocation, numberOfAtoms, OLDComponent, NEWComponent); + size_t Nblock = 0; size_t Nthread = 0; Setup_threadblock(Oldsize + Newsize, &Nblock, &Nthread); + Initialize_WaveVector_IdentitySwap<<>>(Box, Box.kmax, temp, d_a, Old, Oldsize, Newsize, UpdateLocation, numberOfAtoms, OLDComponent, NEWComponent); checkCUDAErrorEwald("Error in Initialization of WaveVector for Identity Swap\n"); Complex* SameType = Box.AdsorbateEik; Complex* CrossType = Box.FrameworkEik; //Fourier Loop// size_t numberOfStructureFactors = (Box.kmax.x + 1) * (2 * Box.kmax.y + 1) * (2 * Box.kmax.z + 1); - size_t Nblock = 0; size_t Nthread = 0; Setup_threadblock(numberOfStructureFactors, &Nblock, &Nthread); + Nblock = 0; Nthread = 0; Setup_threadblock(numberOfStructureFactors, &Nblock, &Nthread); Fourier_Ewald_Diff<<>>(Box, SameType, CrossType, Old, alpha_squared, prefactor, Box.kmax, Oldsize, Newsize, Blocksum, false, Nblock); double sum[Nblock * 2]; double SameSum = 0.0; double CrossSum = 0.0; cudaMemcpy(sum, Blocksum, 2 * Nblock * sizeof(double), cudaMemcpyDeviceToHost); @@ -562,7 +696,6 @@ double2 GPU_EwaldDifference_IdentitySwap(Boxsize& Box, Atoms*& d_a, Atoms& Old, double deltaExclusion = (SystemComponents.ExclusionIntra[OLDComponent] + SystemComponents.ExclusionAtom[OLDComponent]) * delta_scale; SameSum -= deltaExclusion; } - return {SameSum, 2.0 * CrossSum}; } diff --git a/src_clean/VDW_Coulomb.cu b/src_clean/VDW_Coulomb.cu index 7823a52..0fc13f7 100644 --- a/src_clean/VDW_Coulomb.cu +++ b/src_clean/VDW_Coulomb.cu @@ -627,7 +627,7 @@ double CPU_EwaldDifference(Boxsize& Box, Atoms& New, Atoms& Old, ForceField& FF, return ewaldE; } -__global__ void Calculate_Single_Body_Energy_SEPARATE_HostGuest_VDWReal(Boxsize Box, Atoms* System, Atoms Old, Atoms New, ForceField FF, double* BlockEnergy, size_t ComponentID, size_t totalAtoms, size_t chainsize, bool* flag, int3 Nblocks, bool Do_New, bool Do_Old, int3 NComps) +__global__ void Calculate_Single_Body_Energy_VDWReal(Boxsize Box, Atoms* System, Atoms Old, Atoms New, ForceField FF, double* BlockEnergy, size_t ComponentID, size_t totalAtoms, size_t chainsize, bool* flag, int3 Nblocks, bool Do_New, bool Do_Old, int3 NComps) { //divide species into Host-Host, Host-Guest, and Guest-Guest// //However, Host-Host and Guest-Guest are mutually exclusive// @@ -844,7 +844,7 @@ __global__ void Calculate_Single_Body_Energy_SEPARATE_HostGuest_VDWReal(Boxsize //} } -__global__ void Calculate_Single_Body_Energy_SEPARATE_HostGuest_VDWReal_LambdaChange(Boxsize Box, Atoms* System, Atoms Old, Atoms New, ForceField FF, double* BlockEnergy, size_t ComponentID, size_t totalAtoms, size_t chainsize, bool* flag, int3 Nblocks, bool Do_New, bool Do_Old, int3 NComps, double2 newScale) +__global__ void Calculate_Single_Body_Energy_VDWReal_LambdaChange(Boxsize Box, Atoms* System, Atoms Old, Atoms New, ForceField FF, double* BlockEnergy, size_t ComponentID, size_t totalAtoms, size_t chainsize, bool* flag, int3 Nblocks, bool Do_New, bool Do_Old, int3 NComps, double2 newScale) { //divide species into Host-Host, Host-Guest, and Guest-Guest// //However, Host-Host and Guest-Guest are mutually exclusive// @@ -1182,7 +1182,7 @@ __global__ void Energy_difference_LambdaChange(Boxsize Box, Atoms* System, Atoms } } -__global__ void Calculate_Multiple_Trial_Energy_SEPARATE_HostGuest_VDWReal(Boxsize Box, Atoms* System, Atoms NewMol, ForceField FF, double* Blocksum, size_t ComponentID, size_t totalAtoms, bool* flag, size_t totalthreads, size_t chainsize, size_t NblockForTrial, size_t HG_Nblock, int3 NComps, int2* ExcludeList) +__global__ void Calculate_Multiple_Trial_Energy_VDWReal(Boxsize Box, Atoms* System, Atoms NewMol, ForceField FF, double* Blocksum, size_t ComponentID, size_t totalAtoms, bool* flag, size_t totalthreads, size_t chainsize, size_t NblockForTrial, size_t HG_Nblock, int3 NComps, int2* ExcludeList) { //Dividing Nblocks into Nblocks for host-guest and for guest-guest// //NblockForTrial = HG_Nblock + GG_Nblock; @@ -1460,7 +1460,7 @@ __device__ void determine_comp_and_Molindex_from_thread(Atoms* System, size_t& M } } -__global__ void TotalVDWCoul(Boxsize Box, Atoms* System, ForceField FF, double* Blocksum, bool* flag, size_t InteractionPerThread, bool UseOffset, int3 BLOCK, int3 NComponents, size_t NFrameworkAtoms, size_t NAdsorbateAtoms, size_t NFrameworkZero_ExtraFramework, bool ConsiderIntra) +__global__ void TotalVDWRealCoulomb(Boxsize Box, Atoms* System, ForceField FF, double* Blocksum, bool* flag, size_t InteractionPerThread, bool UseOffset, int3 BLOCK, int3 NComponents, size_t NFrameworkAtoms, size_t NAdsorbateAtoms, size_t NFrameworkZero_ExtraFramework, bool ConsiderIntra) { extern __shared__ double sdata[]; //shared memory for partial sum// @@ -1644,7 +1644,7 @@ MoveEnergy Total_VDW_Coulomb_Energy(Simulations& Sim, Components& SystemComponen //Set Overlap Flag// cudaMemset(Sim.device_flag, false, sizeof(bool)); - TotalVDWCoul<<>>(Sim.Box, Sim.d_a, FF, Sim.Blocksum, Sim.device_flag, InteractionPerThread, UseOffset, BLOCKS, SystemComponents.NComponents, NHostAtom, NGuestAtom, NFrameworkZero_ExtraFramework, ConsiderIntra); + TotalVDWRealCoulomb<<>>(Sim.Box, Sim.d_a, FF, Sim.Blocksum, Sim.device_flag, InteractionPerThread, UseOffset, BLOCKS, SystemComponents.NComponents, NHostAtom, NGuestAtom, NFrameworkZero_ExtraFramework, ConsiderIntra); checkCUDAErrorEwald("WRONG TOTAL VDW+REAL ENERGY\n"); cudaDeviceSynchronize(); diff --git a/src_clean/fxn_main.h b/src_clean/fxn_main.h index 4ed0798..1a1f610 100644 --- a/src_clean/fxn_main.h +++ b/src_clean/fxn_main.h @@ -205,7 +205,7 @@ inline void Prepare_Widom(WidomStruct& Widom, Boxsize Box, Simulations& Sims, Co inline void Allocate_Copy_Ewald_Vector(Boxsize& device_Box, Components& SystemComponents) { - fprintf(SystemComponents.OUTPUT, "****** Allocating Ewald WaveVectors (INITIAL STAGE ONLY) ******\n"); + fprintf(SystemComponents.OUTPUT, "****** Allocating Ewald WaveVectors + StructureFactors (INITIAL STAGE ONLY) ******\n"); //Zhao's note: This only works if the box size is not changed, eik_xy might not be useful if box size is not changed// size_t eikx_size = SystemComponents.eik_x.size() * 2; size_t eiky_size = SystemComponents.eik_y.size() * 2; //added times 2 for box volume move// @@ -243,11 +243,11 @@ inline void Allocate_Copy_Ewald_Vector(Boxsize& device_Box, Components& SystemCo AdsorbateEik[i].real = 0.0; AdsorbateEik[i].imag = 0.0; FrameworkEik[i].real = 0.0; FrameworkEik[i].imag = 0.0; } - if(i < 10) fprintf(SystemComponents.OUTPUT, "Wave Vector %zu is %.5f %.5f\n", i, AdsorbateEik[i].real, AdsorbateEik[i].imag); + if(i < 10) fprintf(SystemComponents.OUTPUT, "Structure Factor %zu is %.5f %.5f\n", i, AdsorbateEik[i].real, AdsorbateEik[i].imag); } cudaMemcpy(device_Box.AdsorbateEik, AdsorbateEik, AdsorbateEiksize * sizeof(Complex), cudaMemcpyHostToDevice); checkCUDAError("error copying Complex"); cudaMemcpy(device_Box.FrameworkEik, FrameworkEik, AdsorbateEiksize * sizeof(Complex), cudaMemcpyHostToDevice); checkCUDAError("error copying Complex"); - fprintf(SystemComponents.OUTPUT, "****** DONE Allocating Ewald WaveVectors (INITIAL STAGE ONLY) ******\n"); + fprintf(SystemComponents.OUTPUT, "****** DONE Allocating Ewald WaveVectors + StructureFactors(INITIAL STAGE ONLY) ******\n"); } inline void Check_Simulation_Energy(Boxsize& Box, Atoms* System, ForceField FF, ForceField device_FF, Components& SystemComponents, int SIMULATIONSTAGE, size_t Numsim, Simulations& Sim, bool UseGPU) @@ -303,7 +303,7 @@ inline void Check_Simulation_Energy(Boxsize& Box, Atoms* System, ForceField FF, cudaDeviceSynchronize(); //Zhao's note: if doing initial energy, initialize and copy host Ewald to device// if(SIMULATIONSTAGE == INITIAL) Allocate_Copy_Ewald_Vector(Sim.Box, SystemComponents); - Check_WaveVector_CPUGPU(Sim.Box, SystemComponents); //Check WaveVector on the CPU and GPU// + Check_StructureFactor_CPUGPU(Sim.Box, SystemComponents); //Check StructureFactor on the CPU and GPU// cudaDeviceSynchronize(); } //Calculate Tail Correction Energy// diff --git a/src_clean/mc_cbcfc.h b/src_clean/mc_cbcfc.h index 86f1b6c..5f007ba 100644 --- a/src_clean/mc_cbcfc.h +++ b/src_clean/mc_cbcfc.h @@ -60,7 +60,7 @@ static inline MoveEnergy CBCF_LambdaChange(Components& SystemComponents, Simulat int3 NBlocks = {(int) HH_Nblock, (int) HG_Nblock, (int) GG_Nblock}; //x: HH_Nblock, y: HG_Nblock, z: GG_Nblock; bool Do_New = true; bool Do_Old = true; - Calculate_Single_Body_Energy_SEPARATE_HostGuest_VDWReal_LambdaChange<<>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Molsize, Sims.device_flag, NBlocks, Do_New, Do_Old, SystemComponents.NComponents, newScale); + Calculate_Single_Body_Energy_VDWReal_LambdaChange<<>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Molsize, Sims.device_flag, NBlocks, Do_New, Do_Old, SystemComponents.NComponents, newScale); cudaMemcpy(SystemComponents.flag, Sims.device_flag, sizeof(bool), cudaMemcpyDeviceToHost); @@ -330,7 +330,7 @@ static inline MoveEnergy CBCFMove(Components& SystemComponents, Simulations& Sim SystemComponents.Tmmc[SelectedComponent].currentBin = newBin; if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent]) { - Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent); + Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent); } final_energy = energy; } @@ -406,7 +406,7 @@ static inline MoveEnergy CBCFMove(Components& SystemComponents, Simulations& Sim SystemComponents.Tmmc[SelectedComponent].currentBin = newBin; if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent]) { - Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent); + Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent); } energy.take_negative(); energy += second_step_energy; @@ -463,7 +463,7 @@ static inline MoveEnergy CBCFMove(Components& SystemComponents, Simulations& Sim SystemComponents.Tmmc[SelectedComponent].currentBin = newBin; if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent]) { - Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent); + Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent); } final_energy = energy; } diff --git a/src_clean/mc_single_particle.h b/src_clean/mc_single_particle.h index 0aefe92..8f11930 100644 --- a/src_clean/mc_single_particle.h +++ b/src_clean/mc_single_particle.h @@ -99,7 +99,7 @@ static inline MoveEnergy SingleBodyMove(Components& SystemComponents, Simulation //printf("NHostAtom: %zu, HH_Nblock: %zu, HG_Nblock: %zu, NGuestAtom: %zu, GG_Nblock: %zu\n", NHostAtom, HH_Nblock, HG_Nblock, NGuestAtom, GG_Nblock); if(Atomsize != 0) { - Calculate_Single_Body_Energy_SEPARATE_HostGuest_VDWReal<<>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Molsize, Sims.device_flag, NBlocks, Do_New, Do_Old, SystemComponents.NComponents); + Calculate_Single_Body_Energy_VDWReal<<>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Molsize, Sims.device_flag, NBlocks, Do_New, Do_Old, SystemComponents.NComponents); cudaMemcpy(SystemComponents.flag, Sims.device_flag, sizeof(bool), cudaMemcpyDeviceToHost); } @@ -212,7 +212,7 @@ static inline MoveEnergy SingleBodyMove(Components& SystemComponents, Simulation SystemComponents.Moves[SelectedComponent].Record_Move_Accept(MoveType); if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent]) { - Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent); + Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent); } } else {tot.zero(); }; diff --git a/src_clean/mc_swap_moves.h b/src_clean/mc_swap_moves.h index bab2cce..bfba047 100644 --- a/src_clean/mc_swap_moves.h +++ b/src_clean/mc_swap_moves.h @@ -134,7 +134,7 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations& Update_Reinsertion_data<<<1,SystemComponents.Moleculesize[SelectedComponent]>>>(Sims.d_a, temp, SelectedComponent, UpdateLocation); checkCUDAError("error Updating Reinsertion data"); cudaFree(temp); if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent]) - Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent); + Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent); SystemComponents.Tmmc[SelectedComponent].Update(1.0, NMol, REINSERTION); //Update for TMMC, since Macrostate not changed, just add 1.// //energy.print(); return energy; @@ -186,7 +186,7 @@ static inline MoveEnergy CreateMolecule(Components& SystemComponents, Simulation Update_insertion_data<<<1,1>>>(Sims.d_a, Sims.Old, Sims.New, SelectedTrial, SelectedComponent, UpdateLocation, (int) SystemComponents.Moleculesize[SelectedComponent]); if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent]) { - Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent); + Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent); } Update_NumberOfMolecules(SystemComponents, Sims.d_a, SelectedComponent, INSERTION); return energy; @@ -612,7 +612,7 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati cudaFree(temp); //Zhao's note: BUG!!!!, Think about if OLD/NEW Component belong to different type (framework/adsorbate)// if(!FF.noCharges && ((SystemComponents.hasPartialCharge[NEWComponent]) ||(SystemComponents.hasPartialCharge[OLDComponent]))) - Update_Ewald_Vector(Sims.Box, false, SystemComponents, NEWComponent); + Update_Vector_Ewald(Sims.Box, false, SystemComponents, NEWComponent); //energy.print(); return energy; } diff --git a/src_clean/mc_utilities.h b/src_clean/mc_utilities.h index 4719da0..d769a88 100644 --- a/src_clean/mc_utilities.h +++ b/src_clean/mc_utilities.h @@ -275,7 +275,7 @@ static inline void AcceptInsertion(Components& SystemComponents, Simulations& Si Update_NumberOfMolecules(SystemComponents, Sims.d_a, SelectedComponent, INSERTION); //true = Insertion// if(!noCharges && SystemComponents.hasPartialCharge[SelectedComponent]) { - Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent); + Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent); } } @@ -288,7 +288,7 @@ static inline void AcceptDeletion(Components& SystemComponents, Simulations& Sim Update_NumberOfMolecules(SystemComponents, Sims.d_a, SelectedComponent, DELETION); //false = Deletion// if(!noCharges && SystemComponents.hasPartialCharge[SelectedComponent]) { - Update_Ewald_Vector(Sims.Box, false, SystemComponents, SelectedComponent); + Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent); } //Zhao's note: the last molecule can be the fractional molecule, (fractional molecule ID is stored on the host), we need to update it as well (at least check it)// //The function below will only be processed if the system has a fractional molecule and the transfered molecule is NOT the fractional one // diff --git a/src_clean/mc_widom.h b/src_clean/mc_widom.h index 2dfdc47..3baea1a 100644 --- a/src_clean/mc_widom.h +++ b/src_clean/mc_widom.h @@ -326,7 +326,7 @@ static inline double Widom_Move_FirstBead_PARTIAL(Components& SystemComponents, size_t HGGG_Nblock = HG_Nblock + GG_Nblock; if(Atomsize != 0) { - Calculate_Multiple_Trial_Energy_SEPARATE_HostGuest_VDWReal<<>>(Sims.Box, Sims.d_a, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Sims.device_flag, threadsNeeded,1, HGGG_Nblock, HG_Nblock, SystemComponents.NComponents, Sims.ExcludeList); checkCUDAError("Error calculating energies (PARTIAL SUM HGGG)"); + Calculate_Multiple_Trial_Energy_VDWReal<<>>(Sims.Box, Sims.d_a, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Sims.device_flag, threadsNeeded,1, HGGG_Nblock, HG_Nblock, SystemComponents.NComponents, Sims.ExcludeList); checkCUDAError("Error calculating energies (PARTIAL SUM HGGG)"); cudaMemcpy(SystemComponents.flag, Sims.device_flag, NumberOfTrials*sizeof(bool), cudaMemcpyDeviceToHost); } //printf("OldNBlock: %zu, HG_Nblock: %zu, GG_Nblock: %zu, HGGG_Nblock: %zu\n", Nblock, HG_Nblock, GG_Nblock, HGGG_Nblock); @@ -458,7 +458,7 @@ static inline double Widom_Move_Chain_PARTIAL(Components& SystemComponents, Simu //Setup calculation for separated HG + GG interactions// if(Atomsize != 0) { - Calculate_Multiple_Trial_Energy_SEPARATE_HostGuest_VDWReal<<>>(Sims.Box, Sims.d_a, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Sims.device_flag, threadsNeeded, chainsize, HGGG_Nblock, HG_Nblock, SystemComponents.NComponents, Sims.ExcludeList); checkCUDAError("Error calculating energies (PARTIAL SUM HGGG Orientation)"); + Calculate_Multiple_Trial_Energy_VDWReal<<>>(Sims.Box, Sims.d_a, Sims.New, FF, Sims.Blocksum, SelectedComponent, Atomsize, Sims.device_flag, threadsNeeded, chainsize, HGGG_Nblock, HG_Nblock, SystemComponents.NComponents, Sims.ExcludeList); checkCUDAError("Error calculating energies (PARTIAL SUM HGGG Orientation)"); cudaMemcpy(SystemComponents.flag, Sims.device_flag, Widom.NumberWidomTrialsOrientations*sizeof(bool), cudaMemcpyDeviceToHost); }