From d31b7b6eefb7382c04381437844f6a9e8926172e Mon Sep 17 00:00:00 2001 From: Zhaoli2042 Date: Mon, 14 Oct 2024 23:50:00 -0400 Subject: [PATCH] =?UTF-8?q?Small=20optimization=20of=20code=20*=20Examples?= =?UTF-8?q?:=20=20=20=20=20=20*=20CO2-MFI:=209.7=20=E2=86=92=208.15=20seco?= =?UTF-8?q?nds=20=20=20=20=20=20*=20Bae-Mixture:=20108.8=20=E2=86=92=2094.?= =?UTF-8?q?8=20seconds=20=20=20=20=20=20*=20NU-2000:=2017.20=20=E2=86=92?= =?UTF-8?q?=207.5=20seconds=20=20=20=20=20=20*=20NPTMC:=20223.73=20?= =?UTF-8?q?=E2=86=92=20175.2=20seconds=20=20=20=20=20=20*=20NVT-Gibbs=20(1?= =?UTF-8?q?00=20+=20100=20cycles):=2035.3=20=E2=86=92=2031.45=20seconds?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src_clean/Ewald_Energy_Functions.h | 21 +++----- src_clean/data_struct.h | 2 + src_clean/fxn_main.h | 5 ++ src_clean/mc_swap_moves.h | 27 ++++------ src_clean/mc_widom.h | 79 +++++++++++++++--------------- 5 files changed, 64 insertions(+), 70 deletions(-) diff --git a/src_clean/Ewald_Energy_Functions.h b/src_clean/Ewald_Energy_Functions.h index 511a1d0..f04d279 100644 --- a/src_clean/Ewald_Energy_Functions.h +++ b/src_clean/Ewald_Energy_Functions.h @@ -299,12 +299,12 @@ __global__ void Fourier_Ewald_Diff(Boxsize Box, Complex* SameTypeEik, Complex* C } if((ksqr > 1e-10) && (ksqr < Box.ReciprocalCutOff)) { - double3 ax; ax.x = Box.InverseCell[0]; ax.y = Box.InverseCell[3]; ax.z = Box.InverseCell[6]; - double3 ay; ay.x = Box.InverseCell[1]; ay.y = Box.InverseCell[4]; ay.z = Box.InverseCell[7]; - double3 az; az.x = Box.InverseCell[2]; az.y = Box.InverseCell[5]; az.z = Box.InverseCell[8]; + double3 ax = {Box.InverseCell[0], Box.InverseCell[3], Box.InverseCell[6]}; + double3 ay = {Box.InverseCell[1], Box.InverseCell[4], Box.InverseCell[7]}; + double3 az = {Box.InverseCell[2], Box.InverseCell[5], Box.InverseCell[8]}; size_t numberOfAtoms = Oldsize + Newsize; - Complex cksum_old; cksum_old.real = 0.0; cksum_old.imag = 0.0; - Complex cksum_new; cksum_new.real = 0.0; cksum_new.imag = 0.0; + Complex cksum_old = {0.0, 0.0}; + Complex cksum_new = {0.0, 0.0}; double3 kvec_x = ax * 2.0 * M_PI * (double) kx; double3 kvec_y = ay * 2.0 * M_PI * (double) ky; double3 kvec_z = az * 2.0 * M_PI * (double) kz; @@ -399,21 +399,14 @@ __global__ void Update_StructureFactor_Stored(Complex* Eik, Complex* Temp_Eik, s } void Update_Vector_Ewald(Boxsize& Box, bool CPU, Components& SystemComponents, size_t SelectedComponent) { - //else //Update on the GPU// - //{ - size_t numberOfStructureFactors = (Box.kmax.x + 1) * (2 * Box.kmax.y + 1) * (2 * Box.kmax.z + 1); - size_t Nblock = 0; size_t Nthread = 0; Setup_threadblock(numberOfStructureFactors, &Nblock, &Nthread); - Complex* Stored_Eik; if(SelectedComponent < SystemComponents.NComponents.y) { - Stored_Eik = Box.FrameworkEik; + std::swap(Box.FrameworkEik, Box.tempEik); } else { - Stored_Eik = Box.AdsorbateEik; + std::swap(Box.AdsorbateEik, Box.tempEik); } - Update_StructureFactor_Stored<<>>(Stored_Eik, Box.tempEik, numberOfStructureFactors); - //} } //////////////////////////////////////////////// diff --git a/src_clean/data_struct.h b/src_clean/data_struct.h index 8bffa44..f311bb7 100644 --- a/src_clean/data_struct.h +++ b/src_clean/data_struct.h @@ -889,6 +889,8 @@ struct Components double Pressure=0.0; double Beta; // Inverse Temperature + double3* tempMolStorage; // temporary storage for reinsertion move// + size_t EnergyEvalTimes = 0; bool* flag; // flags for checking overlaps (on host), device version in Simulations struct// diff --git a/src_clean/fxn_main.h b/src_clean/fxn_main.h index 1a1f610..d3a9a5a 100644 --- a/src_clean/fxn_main.h +++ b/src_clean/fxn_main.h @@ -177,6 +177,11 @@ inline void Prepare_Widom(WidomStruct& Widom, Boxsize Box, Simulations& Sims, Co size_t fourier_size = SystemComponents.EikAllocateSize; if(fourier_size > vdw_real_size) blocksum_size = fourier_size; + //Allocate temporary space for reinsertion// + size_t MaxAdsorbateMolsize = *std::max_element(SystemComponents.Moleculesize.begin() + 1, SystemComponents.Moleculesize.end());; + cudaMalloc(&SystemComponents.tempMolStorage, MaxAdsorbateMolsize * 2 * sizeof(double3)); + printf("Allocated %zu double3 for reinsertion!\n", MaxAdsorbateMolsize * 2); + cudaMallocHost(&Sims.Blocksum, blocksum_size*sizeof(double)); cudaMallocManaged(&Sims.ExcludeList, 10 * sizeof(int2)); diff --git a/src_clean/mc_swap_moves.h b/src_clean/mc_swap_moves.h index bfba047..937a301 100644 --- a/src_clean/mc_swap_moves.h +++ b/src_clean/mc_swap_moves.h @@ -73,9 +73,7 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations& } //Store The New Locations// - double3* temp; - cudaMalloc(&temp, sizeof(double3) * SystemComponents.Moleculesize[SelectedComponent]); - StoreNewLocation_Reinsertion<<<1,1>>>(Sims.Old, Sims.New, temp, SelectedTrial, SystemComponents.Moleculesize[SelectedComponent]); + StoreNewLocation_Reinsertion<<<1,1>>>(Sims.Old, Sims.New, SystemComponents.tempMolStorage, SelectedTrial, SystemComponents.Moleculesize[SelectedComponent]); ///////////// // RETRACE // ///////////// @@ -98,7 +96,7 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations& bool EwaldPerformed = false; if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent]) { - double2 EwaldE = GPU_EwaldDifference_Reinsertion(Sims.Box, Sims.d_a, Sims.Old, temp, FF, Sims.Blocksum, SystemComponents, SelectedComponent, UpdateLocation); + double2 EwaldE = GPU_EwaldDifference_Reinsertion(Sims.Box, Sims.d_a, Sims.Old, SystemComponents.tempMolStorage, FF, Sims.Blocksum, SystemComponents, SelectedComponent, UpdateLocation); energy.GGEwaldE = EwaldE.x; energy.HGEwaldE = EwaldE.y; @@ -109,8 +107,8 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations& //Put it after Ewald summation, the required positions are already in place (done by the preparation parts of Ewald Summation)// if(SystemComponents.UseDNNforHostGuest) { - if(!EwaldPerformed) Prepare_DNN_InitialPositions_Reinsertion(Sims.d_a, Sims.Old, temp, SystemComponents, SelectedComponent, UpdateLocation); - energy.DNN_E = DNN_Prediction_Reinsertion(SystemComponents, Sims, SelectedComponent, temp); + if(!EwaldPerformed) Prepare_DNN_InitialPositions_Reinsertion(Sims.d_a, Sims.Old, SystemComponents.tempMolStorage, SystemComponents, SelectedComponent, UpdateLocation); + energy.DNN_E = DNN_Prediction_Reinsertion(SystemComponents, Sims, SelectedComponent, SystemComponents.tempMolStorage); //Correction of DNN - HostGuest energy to the Rosenbluth weight// double correction = energy.DNN_Correction(); if(fabs(correction) > SystemComponents.DNNDrift) //If there is a huge drift in the energy correction between DNN and Classical HostGuest// @@ -131,8 +129,8 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations& { // accept the move SystemComponents.Moves[SelectedComponent].ReinsertionAccepted ++; //size_t UpdateLocation = SystemComponents.Moleculesize[SelectedComponent] * SelectedMolInComponent; - Update_Reinsertion_data<<<1,SystemComponents.Moleculesize[SelectedComponent]>>>(Sims.d_a, temp, SelectedComponent, UpdateLocation); checkCUDAError("error Updating Reinsertion data"); - cudaFree(temp); + Update_Reinsertion_data<<<1,SystemComponents.Moleculesize[SelectedComponent]>>>(Sims.d_a, SystemComponents.tempMolStorage, SelectedComponent, UpdateLocation); checkCUDAError("error Updating Reinsertion data"); + if(!FF.noCharges && SystemComponents.hasPartialCharge[SelectedComponent]) Update_Vector_Ewald(Sims.Box, false, SystemComponents, SelectedComponent); SystemComponents.Tmmc[SelectedComponent].Update(1.0, NMol, REINSERTION); //Update for TMMC, since Macrostate not changed, just add 1.// @@ -141,7 +139,6 @@ static inline MoveEnergy Reinsertion(Components& SystemComponents, Simulations& } else { - cudaFree(temp); SystemComponents.Tmmc[SelectedComponent].Update(1.0, NMol, REINSERTION); //Update for TMMC, since Macrostate not changed, just add 1.// energy.zero(); return energy; @@ -523,9 +520,7 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati energy += temp_energy; } // Store The New Locations // - double3* temp; - cudaMalloc(&temp, sizeof(double3) * SystemComponents.Moleculesize[NEWComponent]); - StoreNewLocation_Reinsertion<<<1,1>>>(Sims.Old, Sims.New, temp, SelectedTrial, SystemComponents.Moleculesize[NEWComponent]); + StoreNewLocation_Reinsertion<<<1,1>>>(Sims.Old, Sims.New, SystemComponents.tempMolStorage, SelectedTrial, SystemComponents.Moleculesize[NEWComponent]); ///////////// // RETRACE // @@ -549,7 +544,7 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati size_t UpdateLocation = SystemComponents.Moleculesize[OLDComponent] * OLDMolInComponent; if(!FF.noCharges) { - double2 EwaldE = GPU_EwaldDifference_IdentitySwap(Sims.Box, Sims.d_a, Sims.Old, temp, FF, Sims.Blocksum, SystemComponents, OLDComponent, NEWComponent, UpdateLocation); + double2 EwaldE = GPU_EwaldDifference_IdentitySwap(Sims.Box, Sims.d_a, Sims.Old, SystemComponents.tempMolStorage, FF, Sims.Blocksum, SystemComponents, OLDComponent, NEWComponent, UpdateLocation); energy.GGEwaldE = EwaldE.x; energy.HGEwaldE = EwaldE.y; Rosenbluth *= std::exp(-SystemComponents.Beta * (EwaldE.x + EwaldE.y)); @@ -597,7 +592,7 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati } UpdateLocation = SystemComponents.Moleculesize[NEWComponent] * NEWMolInComponent; - Update_IdentitySwap_Insertion_data<<<1,1>>>(Sims.d_a, temp, NEWComponent, UpdateLocation, NEWMolInComponent, SystemComponents.Moleculesize[NEWComponent]); checkCUDAError("error Updating Identity Swap Insertion data"); + Update_IdentitySwap_Insertion_data<<<1,1>>>(Sims.d_a, SystemComponents.tempMolStorage, NEWComponent, UpdateLocation, NEWMolInComponent, SystemComponents.Moleculesize[NEWComponent]); checkCUDAError("error Updating Identity Swap Insertion data"); Update_NumberOfMolecules(SystemComponents, Sims.d_a, NEWComponent, INSERTION); Update_NumberOfMolecules(SystemComponents, Sims.d_a, OLDComponent, DELETION); @@ -607,9 +602,8 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati //Regarding the UpdateLocation, in the code it is the old molecule position// //if the OLDComponent = NEWComponent, The new location will be filled into the old position// //So just use what is already in the code// - Update_Reinsertion_data<<<1,SystemComponents.Moleculesize[OLDComponent]>>>(Sims.d_a, temp, OLDComponent, UpdateLocation); + Update_Reinsertion_data<<<1,SystemComponents.Moleculesize[OLDComponent]>>>(Sims.d_a, SystemComponents.tempMolStorage, OLDComponent, UpdateLocation); } - cudaFree(temp); //Zhao's note: BUG!!!!, Think about if OLD/NEW Component belong to different type (framework/adsorbate)// if(!FF.noCharges && ((SystemComponents.hasPartialCharge[NEWComponent]) ||(SystemComponents.hasPartialCharge[OLDComponent]))) Update_Vector_Ewald(Sims.Box, false, SystemComponents, NEWComponent); @@ -618,7 +612,6 @@ static inline MoveEnergy IdentitySwapMove(Components& SystemComponents, Simulati } else { - cudaFree(temp); energy.zero(); return energy; } diff --git a/src_clean/mc_widom.h b/src_clean/mc_widom.h index 3baea1a..89ffee3 100644 --- a/src_clean/mc_widom.h +++ b/src_clean/mc_widom.h @@ -177,26 +177,36 @@ __global__ void get_random_trial_orientation(Boxsize Box, Atoms* d_a, Atoms Mol, //Deletion: MolID = selected ID const size_t i = blockIdx.x * blockDim.x + threadIdx.x; + __shared__ double3 OldPos; + __shared__ double Chosenscale; + __shared__ double ChosenscaleCoul; //Record First Bead Information// + //For all orientations// if(i == 0) { - Mol.pos[0] = NewMol.pos[FirstBeadTrial]; - Mol.scale[0] = NewMol.scale[FirstBeadTrial]; Mol.charge[0] = NewMol.charge[FirstBeadTrial]; - Mol.scaleCoul[0] = NewMol.scaleCoul[FirstBeadTrial]; + Chosenscale = NewMol.scale[FirstBeadTrial]; + ChosenscaleCoul = NewMol.scaleCoul[FirstBeadTrial]; + Mol.scale[0] = Chosenscale; + Mol.scaleCoul[0] = ChosenscaleCoul; Mol.Type[0] = NewMol.Type[FirstBeadTrial]; Mol.MolID[0] = NewMol.MolID[FirstBeadTrial]; + OldPos = NewMol.pos[FirstBeadTrial]; + Mol.pos[0] = OldPos; } + __syncthreads(); + size_t trial = i / chainsize; + size_t a = i % chainsize; //Quaternions uses 3 random seeds// - size_t random_index = i + offset; - const Atoms AllData = d_a[SelectedComponent]; + size_t random_index = trial + offset; + const Atoms& AllData = d_a[SelectedComponent]; //different from translation (where we copy a whole molecule), here we duplicate the properties of the first bead of a molecule // so use start_position, not real_pos //Zhao's note: when there are zero molecule for the species, we need to use some preset values //the first values always have some numbers. The xyz are not correct, but type and charge are correct. Use those. - double scale = 0.0; double scaleCoul = 0.0; - for(size_t a = 0; a < chainsize; a++) - { + //double scale = 0.0; double scaleCoul = 0.0; + //for(size_t a = 0; a < chainsize; a++) + //{ double3 Vec; Vec = AllData.pos[1+a] - AllData.pos[0]; switch(MoveType) @@ -205,56 +215,46 @@ __global__ void get_random_trial_orientation(Boxsize Box, Atoms* d_a, Atoms Mol, //Zhao's note: It depends on whether Identity_swap needs to be operated on fractional molecules// //FOR NOW, JUST PUT IT ALONGSIDE WITH CBMC_INSERTION // ///////////////////////////////////////////////////////////////////////////////////////////////// - case CBMC_INSERTION: case IDENTITY_SWAP_NEW: //Insertion (whole/fractional Molecule)// + case CBMC_INSERTION: case IDENTITY_SWAP_NEW: case REINSERTION_INSERTION: //Insertion (whole/fractional Molecule)// { - scale = proposed_scale.x; scaleCoul = proposed_scale.y; + //scale = proposed_scale.x; scaleCoul = proposed_scale.y; Rotate_Quaternions(Vec, random[random_index]); - NewMol.pos[i*chainsize+a] = Mol.pos[0] + Vec; - break; - } - case CBMC_DELETION: //Deletion (whole/fractional molecule)// - { - scale = AllData.scale[start_position+a]; scaleCoul = AllData.scaleCoul[start_position+a]; - if(i==0) //if deletion, the first trial position is the old position of the selected molecule// - { - NewMol.pos[i*chainsize+a] = AllData.pos[start_position+a]; - } - else - { - Rotate_Quaternions(Vec, random[random_index]); - NewMol.pos[i*chainsize+a] = Mol.pos[0] + Vec; - } - //printf("CHAIN: trial: %lu, xyz: %.5f %.5f %.5f\n", i, NewMol.pos[i*chainsize+a].x, NewMol.pos[i*chainsize+a].y, NewMol.pos[i*chainsize+a].z); - //if(i == 0) printf("i=0, start_position: %lu\n", start_position); + NewMol.pos[i] = OldPos + Vec; break; } + /* case REINSERTION_INSERTION: //Reinsertion-Insertion// { - scale = AllData.scale[start_position+a]; scaleCoul = AllData.scaleCoul[start_position+a]; + //scale = AllData.scale[start_position+a]; scaleCoul = AllData.scaleCoul[start_position+a]; Rotate_Quaternions(Vec, random[random_index]); - NewMol.pos[i*chainsize+a] = Mol.pos[0] + Vec; + NewMol.pos[i] = OldPos + Vec; break; } + */ + case CBMC_DELETION: //Deletion (whole/fractional molecule)// case REINSERTION_RETRACE: case IDENTITY_SWAP_OLD: //Reinsertion-Retrace, but also works for Identity swap (old molecule) for multiple orientations (not first bead) // { - scale = AllData.scale[start_position+a]; scaleCoul = AllData.scaleCoul[start_position+a]; - if(i==0) //if deletion, the first trial position is the old position of the selected molecule// + //scale = AllData.scale[start_position+a]; scaleCoul = AllData.scaleCoul[start_position+a]; + if(trial==0) //if deletion, the first trial position is the old position of the selected molecule// { - NewMol.pos[i*chainsize+a] = AllData.pos[start_position+a]; + NewMol.pos[i] = AllData.pos[start_position+a]; } else { Rotate_Quaternions(Vec, random[random_index]); - NewMol.pos[i*chainsize+a] = Mol.pos[0] + Vec; + NewMol.pos[i] = OldPos + Vec; } break; } } - NewMol.scale[i*chainsize+a] = scale; NewMol.charge[i*chainsize+a] = AllData.charge[start_position+a]; - NewMol.scaleCoul[i*chainsize+a] = scaleCoul; - NewMol.Type[i*chainsize+a] = AllData.Type[start_position+a]; NewMol.MolID[i*chainsize+a] = MolID; - } - device_flag[i] = false; + NewMol.scale[i] = Chosenscale; + NewMol.charge[i] = AllData.charge[start_position+a]; + NewMol.scaleCoul[i] = ChosenscaleCoul; + NewMol.Type[i] = AllData.Type[start_position+a]; + NewMol.MolID[i] = MolID; + //} + if(a == 0) + device_flag[trial] = false; } @@ -438,7 +438,8 @@ static inline double Widom_Move_Chain_PARTIAL(Components& SystemComponents, Simu Random.Check(Widom.NumberWidomTrialsOrientations); //Get the first bead positions, and setup trial orientations// - get_random_trial_orientation<<<1,Widom.NumberWidomTrialsOrientations>>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, Sims.device_flag, Random.device_random, Random.offset, FirstBeadTrial, start_position, SelectedComponent, SelectedMolID, chainsize, MoveType, proposed_scale, SystemComponents.CURRENTCYCLE); checkCUDAError("error getting random trials orientations"); + size_t trialO_Nthread=0; size_t trialO_Nblock=0; Setup_threadblock(Widom.NumberWidomTrialsOrientations*chainsize, &trialO_Nblock, &trialO_Nthread); + get_random_trial_orientation<<>>(Sims.Box, Sims.d_a, Sims.Old, Sims.New, Sims.device_flag, Random.device_random, Random.offset, FirstBeadTrial, start_position, SelectedComponent, SelectedMolID, chainsize, MoveType, proposed_scale, SystemComponents.CURRENTCYCLE); checkCUDAError("error getting random trials orientations"); Random.Update(Widom.NumberWidomTrialsOrientations);