diff --git a/RecoTracker/MkFit/plugins/MkFitGeometryESProducer.cc b/RecoTracker/MkFit/plugins/MkFitGeometryESProducer.cc index b62e9aeb1f7b0..25d5959d4631a 100644 --- a/RecoTracker/MkFit/plugins/MkFitGeometryESProducer.cc +++ b/RecoTracker/MkFit/plugins/MkFitGeometryESProducer.cc @@ -569,7 +569,10 @@ std::unique_ptr MkFitGeometryESProducer::produce(const TrackerRec pconf.backward_fit_to_pca = false; pconf.finding_requires_propagation_to_hit_pos = true; pconf.finding_inter_layer_pflags = PropagationFlags(PF_use_param_b_field | PF_apply_material); - pconf.finding_intra_layer_pflags = PropagationFlags(PF_none); + if (Config::usePropToPlane) + pconf.finding_intra_layer_pflags = PropagationFlags(PF_use_param_b_field | PF_apply_material); + else + pconf.finding_intra_layer_pflags = PropagationFlags(PF_none); pconf.backward_fit_pflags = PropagationFlags(PF_use_param_b_field | PF_apply_material); pconf.forward_fit_pflags = PropagationFlags(PF_use_param_b_field | PF_apply_material); pconf.seed_fit_pflags = PropagationFlags(PF_none); diff --git a/RecoTracker/MkFitCore/interface/Config.h b/RecoTracker/MkFitCore/interface/Config.h index ac4dfe1277b0a..f76c362eec0ab 100644 --- a/RecoTracker/MkFitCore/interface/Config.h +++ b/RecoTracker/MkFitCore/interface/Config.h @@ -48,6 +48,8 @@ namespace mkfit { // Config for propagation - could/should enter into PropagationFlags?! constexpr int Niter = 5; constexpr bool useTrigApprox = true; + constexpr bool usePropToPlane = false; + constexpr bool usePtMultScat = false; // Config for Bfield. Note: for now the same for CMS-phase1 and CylCowWLids. constexpr float Bfield = 3.8112; diff --git a/RecoTracker/MkFitCore/interface/TrackerInfo.h b/RecoTracker/MkFitCore/interface/TrackerInfo.h index 965f47976b4f3..0ee221b156e77 100644 --- a/RecoTracker/MkFitCore/interface/TrackerInfo.h +++ b/RecoTracker/MkFitCore/interface/TrackerInfo.h @@ -5,6 +5,7 @@ #include "RecoTracker/MkFitCore/interface/PropagationConfig.h" #include "RecoTracker/MkFitCore/interface/Config.h" #include +#include #include #include diff --git a/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.cc b/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.cc index eba6d9b367ba5..481ad42150fdc 100644 --- a/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.cc +++ b/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.cc @@ -236,6 +236,90 @@ namespace { RotateResidualsOnTangentPlane_impl(R00, R01, A, B, 0, NN); } + //============================================================================== + + inline void ProjectResErr(const MPlex2H& A, const MPlexHS& B, MPlex2H& C) { + // C = A * B, C is 2x3, A is 2x3 , B is 3x3 sym + + /* + A 0 1 2 + 3 4 5 + B 0 1 3 + 1 2 4 + 3 4 5 + */ + + typedef float T; + const idx_t N = NN; + + const T* a = A.fArray; + ASSUME_ALIGNED(a, 64); + const T* b = B.fArray; + ASSUME_ALIGNED(b, 64); + T* c = C.fArray; + ASSUME_ALIGNED(c, 64); + +#pragma omp simd + for (int n = 0; n < N; ++n) { + c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n]; + c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[2 * N + n] + a[2 * N + n] * b[4 * N + n]; + c[2 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[4 * N + n] + a[2 * N + n] * b[5 * N + n]; + c[3 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[1 * N + n] + a[5 * N + n] * b[3 * N + n]; + c[4 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[2 * N + n] + a[5 * N + n] * b[4 * N + n]; + c[5 * N + n] = a[3 * N + n] * b[3 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[5 * N + n]; + } + } + + inline void ProjectResErrTransp(const MPlex2H& A, const MPlex2H& B, MPlex2S& C) { + // C = B * A^T, C is 2x2 sym, A is 2x3 (A^T is 3x2), B is 2x3 + + /* + B 0 1 2 + 3 4 5 + A^T 0 3 + 1 4 + 2 5 + */ + + typedef float T; + const idx_t N = NN; + + const T* a = A.fArray; + ASSUME_ALIGNED(a, 64); + const T* b = B.fArray; + ASSUME_ALIGNED(b, 64); + T* c = C.fArray; + ASSUME_ALIGNED(c, 64); + +#pragma omp simd + for (int n = 0; n < N; ++n) { + c[0 * N + n] = b[0 * N + n] * a[0 * N + n] + b[1 * N + n] * a[1 * N + n] + b[2 * N + n] * a[2 * N + n]; + c[1 * N + n] = b[0 * N + n] * a[3 * N + n] + b[1 * N + n] * a[4 * N + n] + b[2 * N + n] * a[5 * N + n]; + c[2 * N + n] = b[3 * N + n] * a[3 * N + n] + b[4 * N + n] * a[4 * N + n] + b[5 * N + n] * a[5 * N + n]; + } + } + + inline void RotateResidualsOnPlane(const MPlex2H& R, //prj + const MPlexHV& A, //res_glo + MPlex2V& B) //res_loc + { + // typedef float T; + // const idx_t N = NN; + + // const T* a = A.fArray; + // ASSUME_ALIGNED(a, 64); + // T* b = B.fArray; + // ASSUME_ALIGNED(b, 64); + // const T* r = R.fArray; + // ASSUME_ALIGNED(r, 64); + +#pragma omp simd + for (int n = 0; n < NN; ++n) { + B(n, 0, 0) = R(n, 0, 0) * A(n, 0, 0) + R(n, 0, 1) * A(n, 1, 0) + R(n, 0, 2) * A(n, 2, 0); + B(n, 1, 0) = R(n, 1, 0) * A(n, 0, 0) + R(n, 1, 1) * A(n, 1, 0) + R(n, 1, 2) * A(n, 2, 0); + } + } + inline void KalmanHTG(const MPlexQF& A00, const MPlexQF& A01, const MPlex2S& B, MPlexHH& C) { // HTG = rot * res_loc // C = A * B @@ -304,6 +388,93 @@ namespace { } } + inline void KalmanHTG(const MPlex2H& A, const MPlex2S& B, MPlexH2& C) { + // HTG = prj^T * res_loc + // C = A^T * B + + /* + A^T 0 3 + 1 4 + 2 5 + B 0 1 + 1 2 + C 0 1 + 2 3 + 4 5 + */ + + typedef float T; + const idx_t N = NN; + + const T* a = A.fArray; + ASSUME_ALIGNED(a, 64); + const T* b = B.fArray; + ASSUME_ALIGNED(b, 64); + T* c = C.fArray; + ASSUME_ALIGNED(c, 64); + +#pragma omp simd + for (int n = 0; n < N; ++n) { + c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[3 * N + n] * b[1 * N + n]; + c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[3 * N + n] * b[2 * N + n]; + c[2 * N + n] = a[1 * N + n] * b[0 * N + n] + a[4 * N + n] * b[1 * N + n]; + c[3 * N + n] = a[1 * N + n] * b[1 * N + n] + a[4 * N + n] * b[2 * N + n]; + c[4 * N + n] = a[2 * N + n] * b[0 * N + n] + a[5 * N + n] * b[1 * N + n]; + c[5 * N + n] = a[2 * N + n] * b[1 * N + n] + a[5 * N + n] * b[2 * N + n]; + } + } + + inline void KalmanGain(const MPlexLS& A, const MPlexH2& B, MPlexL2& C) { + // C = A * B, C is 6x2, A is 6x6 sym , B is 3x2 (6x2 but half of it is zeros) + + /* + A 0 1 3 6 10 15 + 1 2 4 7 11 16 + 3 4 5 8 12 17 + 6 7 8 9 13 18 + 10 11 12 13 14 19 + 15 16 17 18 19 20 + B 0 1 + 2 3 + 4 5 + X X with X=0, so not even included in B + X X + X X + C 0 1 + 2 3 + 4 5 + 6 7 + 8 9 + 10 11 + */ + + typedef float T; + const idx_t N = NN; + + const T* a = A.fArray; + ASSUME_ALIGNED(a, 64); + const T* b = B.fArray; + ASSUME_ALIGNED(b, 64); + T* c = C.fArray; + ASSUME_ALIGNED(c, 64); + +#pragma omp simd + for (int n = 0; n < N; ++n) { + c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[2 * N + n] + a[3 * N + n] * b[4 * N + n]; + c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[3 * N + n] + a[3 * N + n] * b[5 * N + n]; + c[2 * N + n] = a[1 * N + n] * b[0 * N + n] + a[2 * N + n] * b[2 * N + n] + a[4 * N + n] * b[4 * N + n]; + c[3 * N + n] = a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n] + a[4 * N + n] * b[5 * N + n]; + c[4 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[2 * N + n] + a[5 * N + n] * b[4 * N + n]; + c[5 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[5 * N + n]; + c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[2 * N + n] + a[8 * N + n] * b[4 * N + n]; + c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[5 * N + n]; + c[8 * N + n] = a[10 * N + n] * b[0 * N + n] + a[11 * N + n] * b[2 * N + n] + a[12 * N + n] * b[4 * N + n]; + c[9 * N + n] = a[10 * N + n] * b[1 * N + n] + a[11 * N + n] * b[3 * N + n] + a[12 * N + n] * b[5 * N + n]; + c[10 * N + n] = a[15 * N + n] * b[0 * N + n] + a[16 * N + n] * b[2 * N + n] + a[17 * N + n] * b[4 * N + n]; + c[11 * N + n] = a[15 * N + n] * b[1 * N + n] + a[16 * N + n] * b[3 * N + n] + a[17 * N + n] * b[5 * N + n]; + } + } + inline void CovXYconstrain(const MPlexQF& R00, const MPlexQF& R01, const MPlexLS& Ci, MPlexLS& Co) { // C is transformed to align along y after rotation and rotated back @@ -367,6 +538,77 @@ namespace { KHMult_imp(A, B00, B01, C, 0, NN); } + inline void KHMult(const MPlexL2& A, const MPlex2H& B, MPlexLL& C) { + // C = A * B, C is 6x6, A is 6x2 , B is 2x3 (2x6 but half of it made of zeros) + + /* + A 0 1 + 2 3 + 4 5 + 6 7 + 8 9 + 10 11 + B 0 1 2 X X X with X=0 so not included in B + 3 4 5 X X X + C 0 1 2 3 4 5 + 6 7 8 9 10 11 + 12 13 14 15 16 17 + 18 19 20 21 22 23 + 24 25 26 27 28 29 + 30 31 32 33 34 34 + */ + + // typedef float T; + // const idx_t N = NN; + + // const T* a = A.fArray; + // ASSUME_ALIGNED(a, 64); + // const T* b = B.fArray; + // ASSUME_ALIGNED(b, 64); + // T* c = C.fArray; + // ASSUME_ALIGNED(c, 64); + +#pragma omp simd + for (int n = 0; n < NN; ++n) { + C(n, 0, 0) = A(n, 0, 0) * B(n, 0, 0) + A(n, 0, 1) * B(n, 1, 0); + C(n, 0, 1) = A(n, 0, 0) * B(n, 0, 1) + A(n, 0, 1) * B(n, 1, 1); + C(n, 0, 2) = A(n, 0, 0) * B(n, 0, 2) + A(n, 0, 1) * B(n, 1, 2); + C(n, 0, 3) = 0; + C(n, 0, 4) = 0; + C(n, 0, 5) = 0; + C(n, 0, 6) = A(n, 1, 0) * B(n, 0, 0) + A(n, 1, 1) * B(n, 1, 0); + C(n, 0, 7) = A(n, 1, 0) * B(n, 0, 1) + A(n, 1, 1) * B(n, 1, 1); + C(n, 0, 8) = A(n, 1, 0) * B(n, 0, 2) + A(n, 1, 1) * B(n, 1, 2); + C(n, 0, 9) = 0; + C(n, 0, 10) = 0; + C(n, 0, 11) = 0; + C(n, 0, 12) = A(n, 2, 0) * B(n, 0, 0) + A(n, 2, 1) * B(n, 1, 0); + C(n, 0, 13) = A(n, 2, 0) * B(n, 0, 1) + A(n, 2, 1) * B(n, 1, 1); + C(n, 0, 14) = A(n, 2, 0) * B(n, 0, 2) + A(n, 2, 1) * B(n, 1, 2); + C(n, 0, 15) = 0; + C(n, 0, 16) = 0; + C(n, 0, 17) = 0; + C(n, 0, 18) = A(n, 3, 0) * B(n, 0, 0) + A(n, 3, 1) * B(n, 1, 0); + C(n, 0, 19) = A(n, 3, 0) * B(n, 0, 1) + A(n, 3, 1) * B(n, 1, 1); + C(n, 0, 20) = A(n, 3, 0) * B(n, 0, 2) + A(n, 3, 1) * B(n, 1, 2); + C(n, 0, 21) = 0; + C(n, 0, 22) = 0; + C(n, 0, 23) = 0; + C(n, 0, 24) = A(n, 4, 0) * B(n, 0, 0) + A(n, 4, 1) * B(n, 1, 0); + C(n, 0, 25) = A(n, 4, 0) * B(n, 0, 1) + A(n, 4, 1) * B(n, 1, 1); + C(n, 0, 26) = A(n, 4, 0) * B(n, 0, 2) + A(n, 4, 1) * B(n, 1, 2); + C(n, 0, 27) = 0; + C(n, 0, 28) = 0; + C(n, 0, 29) = 0; + C(n, 0, 30) = A(n, 5, 0) * B(n, 0, 0) + A(n, 5, 1) * B(n, 1, 0); + C(n, 0, 31) = A(n, 5, 0) * B(n, 0, 1) + A(n, 5, 1) * B(n, 1, 1); + C(n, 0, 32) = A(n, 5, 0) * B(n, 0, 2) + A(n, 5, 1) * B(n, 1, 2); + C(n, 0, 33) = 0; + C(n, 0, 34) = 0; + C(n, 0, 35) = 0; + } + } + inline void KHC(const MPlexLL& A, const MPlexLS& B, MPlexLS& C) { // C = A * B, C is 6x6, A is 6x6 , B is 6x6 sym @@ -622,6 +864,30 @@ namespace mkfit { #ifdef DEBUG { dmutex_guard; + printf("res_glo:\n"); + for (int i = 0; i < 3; ++i) { + printf("%8f ", res_glo.At(0, i, 0)); + } + printf("\n"); + printf("resErr_glo:\n"); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) + printf("%8f ", resErr_glo.At(0, i, j)); + printf("\n"); + } + printf("\n"); + printf("res_loc:\n"); + for (int i = 0; i < 2; ++i) { + printf("%8f ", res_loc.At(0, i, 0)); + } + printf("\n"); + printf("tempHH:\n"); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) + printf("%8f ", tempHH.At(0, i, j)); + printf("\n"); + } + printf("\n"); printf("resErr_loc:\n"); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 2; ++j) @@ -663,10 +929,10 @@ namespace mkfit { KalmanGain(psErrLoc, tempHH, K); MultResidualsAdd(K, psPar, res_loc, outPar); - MPlexLL tempLL; squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi| + MPlexLL tempLL; KHMult(K, rotT00, rotT01, tempLL); KHC(tempLL, psErrLoc, outErr); outErr.subtract(psErrLoc, outErr); @@ -683,16 +949,330 @@ namespace mkfit { } printf("\n"); } - printf("res_glo:\n"); + printf("resErr_loc (Inv):\n"); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) + printf("%8f ", resErr_loc.At(0, i, j)); + printf("\n"); + } + printf("\n"); + printf("tempHH:\n"); for (int i = 0; i < 3; ++i) { - printf("%8f ", res_glo.At(0, i, 0)); + for (int j = 0; j < 3; ++j) + printf("%8f ", tempHH.At(0, i, j)); + printf("\n"); + } + printf("\n"); + printf("K:\n"); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 3; ++j) + printf("%8f ", K.At(0, i, j)); + printf("\n"); } printf("\n"); - printf("res_loc:\n"); + printf("tempLL:\n"); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) + printf("%8f ", tempLL.At(0, i, j)); + printf("\n"); + } + printf("\n"); + printf("outPar:\n"); + for (int i = 0; i < 6; ++i) { + printf("%8f ", outPar.At(0, i, 0)); + } + printf("\n"); + printf("outErr:\n"); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) + printf("%8f ", outErr.At(0, i, j)); + printf("\n"); + } + printf("\n"); + } +#endif + } + } + + //============================================================================== + // Kalman operations - Plane + //============================================================================== + + void kalmanUpdatePlane(const MPlexLS& psErr, + const MPlexLV& psPar, + const MPlexHS& msErr, + const MPlexHV& msPar, + const MPlexHV& plNrm, + const MPlexHV& plDir, + MPlexLS& outErr, + MPlexLV& outPar, + const int N_proc) { + kalmanOperationPlane( + KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, plNrm, plDir, outErr, outPar, dummy_chi2, N_proc); + } + + void kalmanPropagateAndUpdatePlane(const MPlexLS& psErr, + const MPlexLV& psPar, + MPlexQI& Chg, + const MPlexHS& msErr, + const MPlexHV& msPar, + const MPlexHV& plNrm, + const MPlexHV& plDir, + MPlexLS& outErr, + MPlexLV& outPar, + MPlexQI& outFailFlag, + const int N_proc, + const PropagationFlags& propFlags, + const bool propToHit) { + if (propToHit) { + MPlexLS propErr; + MPlexLV propPar; + propagateHelixToPlaneMPlex(psErr, psPar, Chg, msPar, plNrm, propErr, propPar, outFailFlag, N_proc, propFlags); + + kalmanOperationPlane(KFO_Update_Params | KFO_Local_Cov, + propErr, + propPar, + msErr, + msPar, + plNrm, + plDir, + outErr, + outPar, + dummy_chi2, + N_proc); + } else { + kalmanOperationPlane(KFO_Update_Params | KFO_Local_Cov, + psErr, + psPar, + msErr, + msPar, + plNrm, + plDir, + outErr, + outPar, + dummy_chi2, + N_proc); + } + for (int n = 0; n < NN; ++n) { + if (outPar.At(n, 3, 0) < 0) { + Chg.At(n, 0, 0) = -Chg.At(n, 0, 0); + outPar.At(n, 3, 0) = -outPar.At(n, 3, 0); + } + } + } + + //------------------------------------------------------------------------------ + + void kalmanComputeChi2Plane(const MPlexLS& psErr, + const MPlexLV& psPar, + const MPlexQI& inChg, + const MPlexHS& msErr, + const MPlexHV& msPar, + const MPlexHV& plNrm, + const MPlexHV& plDir, + MPlexQF& outChi2, + const int N_proc) { + kalmanOperationPlane( + KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, plNrm, plDir, dummy_err, dummy_par, outChi2, N_proc); + } + + void kalmanPropagateAndComputeChi2Plane(const MPlexLS& psErr, + const MPlexLV& psPar, + const MPlexQI& inChg, + const MPlexHS& msErr, + const MPlexHV& msPar, + const MPlexHV& plNrm, + const MPlexHV& plDir, + MPlexQF& outChi2, + MPlexLV& propPar, + MPlexQI& outFailFlag, + const int N_proc, + const PropagationFlags& propFlags, + const bool propToHit) { + propPar = psPar; + if (propToHit) { + MPlexLS propErr; + propagateHelixToPlaneMPlex(psErr, psPar, inChg, msPar, plNrm, propErr, propPar, outFailFlag, N_proc, propFlags); + + kalmanOperationPlane( + KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, plNrm, plDir, dummy_err, dummy_par, outChi2, N_proc); + } else { + kalmanOperationPlane( + KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, plNrm, plDir, dummy_err, dummy_par, outChi2, N_proc); + } + } + + //------------------------------------------------------------------------------ + + void kalmanOperationPlane(const int kfOp, + const MPlexLS& psErr, + const MPlexLV& psPar, + const MPlexHS& msErr, + const MPlexHV& msPar, + const MPlexHV& plNrm, + const MPlexHV& plDir, + MPlexLS& outErr, + MPlexLV& outPar, + MPlexQF& outChi2, + const int N_proc) { +#ifdef DEBUG + { + dmutex_guard; + printf("psPar:\n"); + for (int i = 0; i < 6; ++i) { + printf("%8f ", psPar.constAt(0, 0, i)); + printf("\n"); + } + printf("\n"); + printf("psErr:\n"); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) + printf("%8f ", psErr.constAt(0, i, j)); + printf("\n"); + } + printf("\n"); + printf("msPar:\n"); + for (int i = 0; i < 3; ++i) { + printf("%8f ", msPar.constAt(0, 0, i)); + printf("\n"); + } + printf("\n"); + printf("msErr:\n"); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) + printf("%8f ", msErr.constAt(0, i, j)); + printf("\n"); + } + printf("\n"); + } +#endif + + // Rotate global point on tangent plane to cylinder + // Tangent point is half way between hit and propagate position + + // Rotation matrix + // D0 D1 D2 + // X0 X1 X2 + // N0 N1 N2 + // where D is the strip direction vector plDir, N is the normal plNrm, and X is the cross product between the two + + MPlex2H prj; + for (int n = 0; n < NN; ++n) { + prj(n, 0, 0) = plDir(n, 0, 0); + prj(n, 0, 1) = plDir(n, 1, 0); + prj(n, 0, 2) = plDir(n, 2, 0); + prj(n, 1, 0) = plNrm(n, 1, 0) * plDir(n, 2, 0) - plNrm(n, 2, 0) * plDir(n, 1, 0); + prj(n, 1, 1) = plNrm(n, 2, 0) * plDir(n, 0, 0) - plNrm(n, 0, 0) * plDir(n, 2, 0); + prj(n, 1, 2) = plNrm(n, 0, 0) * plDir(n, 1, 0) - plNrm(n, 1, 0) * plDir(n, 0, 0); + } + + MPlexHV res_glo; //position residual in global coordinates + SubtractFirst3(msPar, psPar, res_glo); + + MPlexHS resErr_glo; //covariance sum in global position coordinates + AddIntoUpperLeft3x3(psErr, msErr, resErr_glo); + + MPlex2V res_loc; //position residual in local coordinates + RotateResidualsOnPlane(prj, res_glo, res_loc); + MPlex2S resErr_loc; //covariance sum in local position coordinates + MPlex2H temp2H; + ProjectResErr(prj, resErr_glo, temp2H); + ProjectResErrTransp(prj, temp2H, resErr_loc); + +#ifdef DEBUG + { + dmutex_guard; + printf("prj:\n"); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) + printf("%8f ", prj.At(0, i, j)); + printf("\n"); + } + printf("\n"); + printf("res_glo:\n"); + for (int i = 0; i < 3; ++i) { + printf("%8f ", res_glo.At(0, i, 0)); + } + printf("\n"); + printf("resErr_glo:\n"); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) + printf("%8f ", resErr_glo.At(0, i, j)); + printf("\n"); + } + printf("\n"); + printf("res_loc:\n"); + for (int i = 0; i < 2; ++i) { + printf("%8f ", res_loc.At(0, i, 0)); + } + printf("\n"); + printf("temp2H:\n"); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) + printf("%8f ", temp2H.At(0, i, j)); + printf("\n"); + } + printf("\n"); + printf("resErr_loc:\n"); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) + printf("%8f ", resErr_loc.At(0, i, j)); + printf("\n"); + } + printf("\n"); + } +#endif + + //invert the 2x2 matrix + Matriplex::invertCramerSym(resErr_loc); + + if (kfOp & KFO_Calculate_Chi2) { + Chi2Similarity(res_loc, resErr_loc, outChi2); + +#ifdef DEBUG + { + dmutex_guard; + printf("resErr_loc (Inv):\n"); for (int i = 0; i < 2; ++i) { - printf("%8f ", res_loc.At(0, i, 0)); + for (int j = 0; j < 2; ++j) + printf("%8f ", resErr_loc.At(0, i, j)); + printf("\n"); } printf("\n"); + printf("chi2: %8f\n", outChi2.At(0, 0, 0)); + } +#endif + } + + if (kfOp & KFO_Update_Params) { + MPlexLS psErrLoc = psErr; + + MPlexH2 tempH2; + MPlexL2 K; // kalman gain, fixme should be L2 + KalmanHTG(prj, resErr_loc, tempH2); // intermediate term to get kalman gain (H^T*G) + KalmanGain(psErrLoc, tempH2, K); + + MultResidualsAdd(K, psPar, res_loc, outPar); + + squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi| + + MPlexLL tempLL; + KHMult(K, prj, tempLL); + KHC(tempLL, psErrLoc, outErr); + outErr.subtract(psErrLoc, outErr); + +#ifdef DEBUG + { + dmutex_guard; + if (kfOp & KFO_Local_Cov) { + printf("psErrLoc:\n"); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) + printf("% 8e ", psErrLoc.At(0, i, j)); + printf("\n"); + } + printf("\n"); + } printf("resErr_loc (Inv):\n"); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 2; ++j) @@ -700,13 +1280,27 @@ namespace mkfit { printf("\n"); } printf("\n"); + printf("tempH2:\n"); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 2; ++j) + printf("%8f ", tempH2.At(0, i, j)); + printf("\n"); + } + printf("\n"); printf("K:\n"); for (int i = 0; i < 6; ++i) { - for (int j = 0; j < 3; ++j) + for (int j = 0; j < 2; ++j) printf("%8f ", K.At(0, i, j)); printf("\n"); } printf("\n"); + printf("tempLL:\n"); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) + printf("%8f ", tempLL.At(0, i, j)); + printf("\n"); + } + printf("\n"); printf("outPar:\n"); for (int i = 0; i < 6; ++i) { printf("%8f ", outPar.At(0, i, 0)); diff --git a/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.h b/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.h index 67291ebf06ce8..746e1b05a6f9b 100644 --- a/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.h +++ b/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.h @@ -114,5 +114,67 @@ namespace mkfit { MPlexQF& outChi2, const int N_proc); + //------------------------------------------------------------------------------ + + void kalmanUpdatePlane(const MPlexLS& psErr, + const MPlexLV& psPar, + const MPlexHS& msErr, + const MPlexHV& msPar, + const MPlexHV& plNrm, + const MPlexHV& plDir, + MPlexLS& outErr, + MPlexLV& outPar, + const int N_proc); + + void kalmanPropagateAndUpdatePlane(const MPlexLS& psErr, + const MPlexLV& psPar, + MPlexQI& Chg, + const MPlexHS& msErr, + const MPlexHV& msPar, + const MPlexHV& plNrm, + const MPlexHV& plDir, + MPlexLS& outErr, + MPlexLV& outPar, + MPlexQI& outFailFlag, + const int N_proc, + const PropagationFlags& propFlags, + const bool propToHit); + + void kalmanComputeChi2Plane(const MPlexLS& psErr, + const MPlexLV& psPar, + const MPlexQI& inChg, + const MPlexHS& msErr, + const MPlexHV& msPar, + const MPlexHV& plNrm, + const MPlexHV& plDir, + MPlexQF& outChi2, + const int N_proc); + + void kalmanPropagateAndComputeChi2Plane(const MPlexLS& psErr, + const MPlexLV& psPar, + const MPlexQI& inChg, + const MPlexHS& msErr, + const MPlexHV& msPar, + const MPlexHV& plNrm, + const MPlexHV& plDir, + MPlexQF& outChi2, + MPlexLV& propPar, + MPlexQI& outFailFlag, + const int N_proc, + const PropagationFlags& propFlags, + const bool propToHit); + + void kalmanOperationPlane(const int kfOp, + const MPlexLS& psErr, + const MPlexLV& psPar, + const MPlexHS& msErr, + const MPlexHV& msPar, + const MPlexHV& plNrm, + const MPlexHV& plDir, + MPlexLS& outErr, + MPlexLV& outPar, + MPlexQF& outChi2, + const int N_proc); + } // end namespace mkfit #endif diff --git a/RecoTracker/MkFitCore/src/Matrix.h b/RecoTracker/MkFitCore/src/Matrix.h index 809b438964e5e..ba426f59acdcc 100644 --- a/RecoTracker/MkFitCore/src/Matrix.h +++ b/RecoTracker/MkFitCore/src/Matrix.h @@ -53,6 +53,10 @@ namespace mkfit { typedef Matriplex::Matriplex MPlexHV; typedef Matriplex::MatriplexSym MPlexHS; + typedef Matriplex::Matriplex MPlex55; + typedef Matriplex::Matriplex MPlex56; + typedef Matriplex::Matriplex MPlex65; + typedef Matriplex::Matriplex MPlex22; typedef Matriplex::Matriplex MPlex2V; typedef Matriplex::MatriplexSym MPlex2S; @@ -61,6 +65,8 @@ namespace mkfit { typedef Matriplex::Matriplex MPlexHL; typedef Matriplex::Matriplex MPlexL2; + typedef Matriplex::Matriplex MPlexH2; + typedef Matriplex::Matriplex MPlex2H; typedef Matriplex::Matriplex MPlexQF; typedef Matriplex::Matriplex MPlexQI; diff --git a/RecoTracker/MkFitCore/src/MkBuilder.cc b/RecoTracker/MkFitCore/src/MkBuilder.cc index 97c0c6dd06f01..2c984991519b8 100644 --- a/RecoTracker/MkFitCore/src/MkBuilder.cc +++ b/RecoTracker/MkFitCore/src/MkBuilder.cc @@ -1098,6 +1098,11 @@ namespace mkfit { // from intra-layer to inter-layer. // mkfndr->copyOutParErr(eoccs.refCandidates_nc(), end - itrack, true); + // For prop-to-plane propagate from the last hit, not layer center. + if (Config::usePropToPlane) { + mkfndr->inputTracksAndHitIdx(eoccs.refCandidates(), seed_cand_idx, itrack, end, false); + } + dprint("make new candidates"); cloner.begin_iteration(); @@ -1118,7 +1123,7 @@ namespace mkfit { mkfndr->inputTracksAndHits(eoccs.refCandidates(), layer_of_hits, seed_cand_update_idx, itrack, end, true); - mkfndr->updateWithLoadedHit(end - itrack, fnd_foos); + mkfndr->updateWithLoadedHit(end - itrack, layer_of_hits, fnd_foos); // copy_out the updated track params, errors only (hit-idcs and chi2 already set) mkfndr->copyOutParErr(eoccs.refCandidates_nc(), end - itrack, false); diff --git a/RecoTracker/MkFitCore/src/MkFinder.cc b/RecoTracker/MkFitCore/src/MkFinder.cc index ecfc46c6dfdac..ee9726a8eb485 100644 --- a/RecoTracker/MkFitCore/src/MkFinder.cc +++ b/RecoTracker/MkFitCore/src/MkFinder.cc @@ -168,6 +168,10 @@ namespace mkfit { m_CandIdx(imp, 0, 0) = idxs[i].cand_idx; m_SeedOriginIdx[imp] = tracks[idxs[i].seed_idx].seed_origin_index(); + // Reuse selectHitIndices() arrays -- used also in packModuleNormDir() + m_XHitArr(imp, 0, 0) = idxs[i].hit_idx; + m_XHitSize(imp, 0, 0) = 1; + const Hit &hit = layer_of_hits.refHit(idxs[i].hit_idx); m_msErr.copyIn(imp, hit.errArray()); m_msPar.copyIn(imp, hit.posArray()); @@ -220,6 +224,23 @@ namespace mkfit { } } + void MkFinder::packModuleNormDir( + const LayerOfHits &layer_of_hits, int hit_cnt, MPlexHV &norm, MPlexHV &dir, int N_proc) const { + for (int itrack = 0; itrack < N_proc; ++itrack) { + if (hit_cnt < m_XHitSize[itrack]) { + const auto &hit = layer_of_hits.refHit(m_XHitArr.constAt(itrack, hit_cnt, 0)); + unsigned int mid = hit.detIDinLayer(); + const ModuleInfo &mi = layer_of_hits.layer_info()->module_info(mid); + norm.At(itrack, 0, 0) = mi.zdir[0]; + norm.At(itrack, 1, 0) = mi.zdir[1]; + norm.At(itrack, 2, 0) = mi.zdir[2]; + dir.At(itrack, 0, 0) = mi.xdir[0]; + dir.At(itrack, 1, 0) = mi.xdir[1]; + dir.At(itrack, 2, 0) = mi.xdir[2]; + } + } + } + //============================================================================== // getHitSelDynamicWindows //============================================================================== @@ -1245,17 +1266,37 @@ namespace mkfit { MPlexQF outChi2; MPlexLV propPar; clearFailFlag(); - (*fnd_foos.m_compute_chi2_foo)(m_Err[iP], - m_Par[iP], - m_Chg, - m_msErr, - m_msPar, - outChi2, - propPar, - m_FailFlag, - N_proc, - m_prop_config->finding_intra_layer_pflags, - m_prop_config->finding_requires_propagation_to_hit_pos); + + if (Config::usePropToPlane) { + // Maybe could use 2 matriplex packers ... ModuleInfo has 3 * SVector3 and uint + MPlexHV norm, dir; + packModuleNormDir(layer_of_hits, hit_cnt, norm, dir, N_proc); + kalmanPropagateAndComputeChi2Plane(m_Err[iP], + m_Par[iP], + m_Chg, + m_msErr, + m_msPar, + norm, + dir, + outChi2, + propPar, + m_FailFlag, + N_proc, + m_prop_config->finding_intra_layer_pflags, + m_prop_config->finding_requires_propagation_to_hit_pos); + } else { + (*fnd_foos.m_compute_chi2_foo)(m_Err[iP], + m_Par[iP], + m_Chg, + m_msErr, + m_msPar, + outChi2, + propPar, + m_FailFlag, + N_proc, + m_prop_config->finding_intra_layer_pflags, + m_prop_config->finding_requires_propagation_to_hit_pos); + } // Now update the track parameters with this hit (note that some // calculations are already done when computing chi2, to be optimized). @@ -1492,17 +1533,37 @@ namespace mkfit { MPlexQF outChi2; MPlexLV propPar; clearFailFlag(); - (*fnd_foos.m_compute_chi2_foo)(m_Err[iP], - m_Par[iP], - m_Chg, - m_msErr, - m_msPar, - outChi2, - propPar, - m_FailFlag, - N_proc, - m_prop_config->finding_intra_layer_pflags, - m_prop_config->finding_requires_propagation_to_hit_pos); + + if (Config::usePropToPlane) { + // Maybe could use 2 matriplex packers ... ModuleInfo has 3 * SVector3 and uint + MPlexHV norm, dir; + packModuleNormDir(layer_of_hits, hit_cnt, norm, dir, N_proc); + kalmanPropagateAndComputeChi2Plane(m_Err[iP], + m_Par[iP], + m_Chg, + m_msErr, + m_msPar, + norm, + dir, + outChi2, + propPar, + m_FailFlag, + N_proc, + m_prop_config->finding_intra_layer_pflags, + m_prop_config->finding_requires_propagation_to_hit_pos); + } else { + (*fnd_foos.m_compute_chi2_foo)(m_Err[iP], + m_Par[iP], + m_Chg, + m_msErr, + m_msPar, + outChi2, + propPar, + m_FailFlag, + N_proc, + m_prop_config->finding_intra_layer_pflags, + m_prop_config->finding_requires_propagation_to_hit_pos); + } //#pragma omp simd // DOES NOT VECTORIZE AS IT IS NOW for (int itrack = 0; itrack < N_proc; ++itrack) { @@ -1646,21 +1707,39 @@ namespace mkfit { // UpdateWithLoadedHit //============================================================================== - void MkFinder::updateWithLoadedHit(int N_proc, const FindingFoos &fnd_foos) { + void MkFinder::updateWithLoadedHit(int N_proc, const LayerOfHits &layer_of_hits, const FindingFoos &fnd_foos) { // See comment in MkBuilder::find_tracks_in_layer() about intra / inter flags used here // for propagation to the hit. clearFailFlag(); - (*fnd_foos.m_update_param_foo)(m_Err[iP], - m_Par[iP], - m_Chg, - m_msErr, - m_msPar, - m_Err[iC], - m_Par[iC], - m_FailFlag, - N_proc, - m_prop_config->finding_inter_layer_pflags, - m_prop_config->finding_requires_propagation_to_hit_pos); + if (Config::usePropToPlane) { + MPlexHV norm, dir; + packModuleNormDir(layer_of_hits, 0, norm, dir, N_proc); + kalmanPropagateAndUpdatePlane(m_Err[iP], + m_Par[iP], + m_Chg, + m_msErr, + m_msPar, + norm, + dir, + m_Err[iC], + m_Par[iC], + m_FailFlag, + N_proc, + m_prop_config->finding_inter_layer_pflags, + m_prop_config->finding_requires_propagation_to_hit_pos); + } else { + (*fnd_foos.m_update_param_foo)(m_Err[iP], + m_Par[iP], + m_Chg, + m_msErr, + m_msPar, + m_Err[iC], + m_Par[iC], + m_FailFlag, + N_proc, + m_prop_config->finding_inter_layer_pflags, + m_prop_config->finding_requires_propagation_to_hit_pos); + } // PROP-FAIL-ENABLE The following to be enabled when propagation failure // detection is properly implemented in propagate-to-R/Z. diff --git a/RecoTracker/MkFitCore/src/MkFinder.h b/RecoTracker/MkFitCore/src/MkFinder.h index 17da11ad8aa9d..7a9e0f3c96500 100644 --- a/RecoTracker/MkFitCore/src/MkFinder.h +++ b/RecoTracker/MkFitCore/src/MkFinder.h @@ -113,6 +113,8 @@ namespace mkfit { HitOnTrack bestHitLastHoT(int itrack) const { return m_HoTArrs[itrack][m_NHits(itrack, 0, 0) - 1]; } + void packModuleNormDir(const LayerOfHits &layer_of_hits, int hit_cnt, MPlexHV &norm, MPlexHV &dir, int N_proc) const; + //---------------------------------------------------------------------------- void getHitSelDynamicWindows( @@ -141,7 +143,7 @@ namespace mkfit { const int N_proc, const FindingFoos &fnd_foos); - void updateWithLoadedHit(int N_proc, const FindingFoos &fnd_foos); + void updateWithLoadedHit(int N_proc, const LayerOfHits &layer_of_hits, const FindingFoos &fnd_foos); void copyOutParErr(std::vector &seed_cand_vec, int N_proc, bool outputProp) const; diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.cc b/RecoTracker/MkFitCore/src/PropagationMPlex.cc index f5bdff96e3180..bc8057ee81129 100644 --- a/RecoTracker/MkFitCore/src/PropagationMPlex.cc +++ b/RecoTracker/MkFitCore/src/PropagationMPlex.cc @@ -12,7 +12,7 @@ // propagateLineToRMPlex //============================================================================== -using namespace Matriplex; +//using namespace Matriplex; namespace mkfit { @@ -25,7 +25,7 @@ namespace mkfit { const int N_proc) { // XXX Regenerate parts below with a script. - const idx_t N = NN; + const Matriplex::idx_t N = NN; #pragma omp simd for (int n = 0; n < NN; ++n) { @@ -97,7 +97,7 @@ namespace { // C = A * B typedef float T; - const idx_t N = NN; + const Matriplex::idx_t N = NN; const T* a = A.fArray; ASSUME_ALIGNED(a, 64); @@ -113,7 +113,7 @@ namespace { // C = B * AT; typedef float T; - const idx_t N = NN; + const Matriplex::idx_t N = NN; const T* a = A.fArray; ASSUME_ALIGNED(a, 64); @@ -129,7 +129,7 @@ namespace { // C = A * B typedef float T; - const idx_t N = NN; + const Matriplex::idx_t N = NN; const T* a = A.fArray; ASSUME_ALIGNED(a, 64); @@ -145,7 +145,7 @@ namespace { // C = B * AT; typedef float T; - const idx_t N = NN; + const Matriplex::idx_t N = NN; const T* a = A.fArray; ASSUME_ALIGNED(a, 64); @@ -161,7 +161,7 @@ namespace { // C = A * B typedef float T; - const idx_t N = NN; + const Matriplex::idx_t N = NN; const T* a = A.fArray; ASSUME_ALIGNED(a, 64); @@ -226,7 +226,6 @@ namespace { c[35 * N + n] = a[32 * N + n] * b[17 * N + n] + a[35 * N + n]; } -#ifdef UNUSED // this version does not assume to know which elements are 0 or 1, so it does the full multiplication void MultHelixPropFull(const MPlexLL& A, const MPlexLS& B, MPlexLL& C) { #pragma omp simd @@ -241,29 +240,30 @@ namespace { } } - // this version does not assume to know which elements are 0 or 1, so it does the full multiplication - void MultHelixPropFull(const MPlexLL& A, const MPlexLL& B, MPlexLL& C) { + // this version does not assume to know which elements are 0 or 1, so it does the full mupltiplication + void MultHelixPropTranspFull(const MPlexLL& A, const MPlexLL& B, MPlexLS& C) { #pragma omp simd for (int n = 0; n < NN; ++n) { for (int i = 0; i < 6; ++i) { for (int j = 0; j < 6; ++j) { C(n, i, j) = 0.; for (int k = 0; k < 6; ++k) - C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j); + C(n, i, j) += B.constAt(n, i, k) * A.constAt(n, j, k); } } } } - // this version does not assume to know which elements are 0 or 1, so it does the full mupltiplication - void MultHelixPropTranspFull(const MPlexLL& A, const MPlexLL& B, MPlexLS& C) { +#ifdef UNUSED + // this version does not assume to know which elements are 0 or 1, so it does the full multiplication + void MultHelixPropFull(const MPlexLL& A, const MPlexLL& B, MPlexLL& C) { #pragma omp simd for (int n = 0; n < NN; ++n) { for (int i = 0; i < 6; ++i) { for (int j = 0; j < 6; ++j) { C(n, i, j) = 0.; for (int k = 0; k < 6; ++k) - C(n, i, j) += B.constAt(n, i, k) * A.constAt(n, j, k); + C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j); } } } @@ -488,7 +488,19 @@ namespace mkfit { errorProp.setVal(0.f); outFailFlag.setVal(0.f); + //helixAtRFromIterativeCCS_impl_new(inPar, inChg, msRad, outPar, errorProp, outFailFlag, 0, NN, N_proc, pflags); + helixAtRFromIterativeCCS_impl(inPar, inChg, msRad, outPar, errorProp, outFailFlag, 0, NN, N_proc, pflags); + /* + //float nv = errorProp(0,0,0); + + outPar = inPar; + errorProp.setVal(0.f); + outFailFlag.setVal(0.f); + helixAtRFromIterativeCCS_impl(inPar, inChg, msRad, outPar, errorProp, outFailFlag, 0, NN, N_proc, pflags); + //float ov = errorProp(0,0,0); + assert(0); + */ } void propagateHelixToRMPlex(const MPlexLS& inErr, @@ -557,18 +569,27 @@ namespace mkfit { const float r = msRad(n, 0, 0); propSign(n, 0, 0) = (r > r0 ? 1. : -1.); } - applyMaterialEffects(hitsRl, hitsXi, propSign, outErr, outPar, N_proc, true); + MPlexHV plNrm; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + plNrm(n, 0, 0) = std::cos(outPar.constAt(n, 4, 0)); + plNrm(n, 1, 0) = std::sin(outPar.constAt(n, 4, 0)); + plNrm(n, 2, 0) = 0.f; + } + applyMaterialEffects(hitsRl, hitsXi, propSign, plNrm, outErr, outPar, N_proc); } squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi| - // Matriplex version of: - // result.errors = ROOT::Math::Similarity(errorProp, outErr); - // MultHelixProp can be optimized for CCS coordinates, see GenMPlexOps.pl MPlexLL temp; MultHelixProp(errorProp, outErr, temp); MultHelixPropTransp(errorProp, temp, outErr); + // MultHelixPropFull(errorProp, outErr, temp); + // MultHelixPropTranspFull(errorProp, temp, outErr); + + // Matriplex version of: + // result.errors = ROOT::Math::Similarity(errorProp, outErr); /* // To be used with: MPT_DIM = 1 @@ -613,11 +634,18 @@ namespace mkfit { MPlexLL errorProp; + //helixAtZ_new(inPar, inChg, msZ, outPar, errorProp, outFailFlag, N_proc, pflags); helixAtZ(inPar, inChg, msZ, outPar, errorProp, outFailFlag, N_proc, pflags); #ifdef DEBUG if (debug && g_debug) { for (int kk = 0; kk < N_proc; ++kk) { + dprintf("inPar %d\n", kk); + for (int i = 0; i < 6; ++i) { + dprintf("%8f ", inPar.constAt(kk, i, 0)); + } + dprintf("\n"); + dprintf("inErr %d\n", kk); for (int i = 0; i < 6; ++i) { for (int j = 0; j < 6; ++j) @@ -637,6 +665,20 @@ namespace mkfit { } #endif +#ifdef DEBUG + if (debug && g_debug) { + for (int kk = 0; kk < N_proc; ++kk) { + dprintf("outErr %d\n", kk); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) + dprintf("%8f ", outErr.constAt(kk, i, j)); + dprintf("\n"); + } + dprintf("\n"); + } + } +#endif + if (pflags.apply_material) { MPlexQF hitsRl; MPlexQF hitsXi; @@ -657,9 +699,39 @@ namespace mkfit { } const float zout = msZ.constAt(n, 0, 0); const float zin = inPar.constAt(n, 2, 0); - propSign(n, 0, 0) = (std::abs(zout) > std::abs(zin) ? 1. : -1.); + propSign(n, 0, 0) = (std::abs(zout) > std::abs(zin) ? 1.f : -1.f); + } + MPlexHV plNrm; +#pragma omp simd + for (int n = 0; n < NN; ++n) { + plNrm(n, 0, 0) = 0.f; + plNrm(n, 1, 0) = 0.f; + plNrm(n, 2, 0) = 1.f; + } + applyMaterialEffects(hitsRl, hitsXi, propSign, plNrm, outErr, outPar, N_proc); +#ifdef DEBUG + if (debug && g_debug) { + for (int kk = 0; kk < N_proc; ++kk) { + dprintf("propSign %d\n", kk); + for (int i = 0; i < 1; ++i) { + dprintf("%8f ", propSign.constAt(kk, i, 0)); + } + dprintf("\n"); + dprintf("plNrm %d\n", kk); + for (int i = 0; i < 3; ++i) { + dprintf("%8f ", plNrm.constAt(kk, i, 0)); + } + dprintf("\n"); + dprintf("outErr(after material) %d\n", kk); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) + dprintf("%8f ", outErr.constAt(kk, i, j)); + dprintf("\n"); + } + dprintf("\n"); + } } - applyMaterialEffects(hitsRl, hitsXi, propSign, outErr, outPar, N_proc, false); +#endif } squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi| @@ -669,6 +741,8 @@ namespace mkfit { MPlexLL temp; MultHelixPropEndcap(errorProp, outErr, temp); MultHelixPropTranspEndcap(errorProp, temp, outErr); + // MultHelixPropFull(errorProp, outErr, temp); + // MultHelixPropTranspFull(errorProp, temp, outErr); // PROP-FAIL-ENABLE To keep physics changes minimal, we always restore the // state to input when propagation fails -- as was the default before. @@ -680,33 +754,6 @@ namespace mkfit { } } // } - - // This dump is now out of its place as similarity is done with matriplex ops. - /* -#ifdef DEBUG - { - dmutex_guard; - for (int kk = 0; kk < N_proc; ++kk) - { - dprintf("outErr %d\n", kk); - for (int i = 0; i < 6; ++i) { for (int j = 0; j < 6; ++j) - dprintf("%8f ", outErr.At(kk,i,j)); printf("\n"); - } dprintf("\n"); - - dprintf("outPar %d\n", kk); - for (int i = 0; i < 6; ++i) { - dprintf("%8f ", outPar.At(kk,i,0)); printf("\n"); - } dprintf("\n"); - if (std::abs(outPar.At(kk,2,0) - msZ.constAt(kk, 0, 0)) > 0.0001) { - float pt = 1.0f / inPar.constAt(kk,3,0); - dprint_np(kk, "DID NOT GET TO Z, dZ=" << std::abs(outPar.At(kk,2,0) - msZ.constAt(kk, 0, 0)) - << " z=" << msZ.constAt(kk, 0, 0) << " zin=" << inPar.constAt(kk,2,0) << " zout=" << outPar.At(kk,2,0) << std::endl - << "pt=" << pt << " pz=" << pt/std::tan(inPar.constAt(kk,5,0))); - } - } - } -#endif - */ } void helixAtZ(const MPlexLV& inPar, @@ -718,7 +765,9 @@ namespace mkfit { const int N_proc, const PropagationFlags& pflags) { errorProp.setVal(0.f); + outFailFlag.setVal(0.f); + // debug = true; #pragma omp simd for (int n = 0; n < NN; ++n) { //initialize erroProp to identity matrix, except element 2,2 which is zero @@ -773,7 +822,23 @@ namespace mkfit { << " inPar.constAt(n, 2, 0)=" << std::setprecision(9) << inPar.constAt(n, 2, 0) << " inPar.constAt(n, 3, 0)=" << std::setprecision(9) << inPar.constAt(n, 3, 0) << " inPar.constAt(n, 4, 0)=" << std::setprecision(9) << inPar.constAt(n, 4, 0) - << " inPar.constAt(n, 5, 0)=" << std::setprecision(9) << inPar.constAt(n, 5, 0)); + << " inPar.constAt(n, 5, 0)=" << std::setprecision(9) << inPar.constAt(n, 5, 0) + << " inChg.constAt(n, 0, 0)=" << std::setprecision(9) << inChg.constAt(n, 0, 0)); + } +#pragma omp simd + for (int n = 0; n < NN; ++n) { + dprint_np(n, + "propagation start, dump parameters" + << std::endl + << "pos = " << inPar.constAt(n, 0, 0) << " " << inPar.constAt(n, 1, 0) << " " + << inPar.constAt(n, 2, 0) << std::endl + << "mom (cart) = " << std::cos(inPar.constAt(n, 4, 0)) / inPar.constAt(n, 3, 0) << " " + << std::sin(inPar.constAt(n, 4, 0)) / inPar.constAt(n, 3, 0) << " " + << 1. / (inPar.constAt(n, 3, 0) * tan(inPar.constAt(n, 5, 0))) << " r=" + << std::sqrt(inPar.constAt(n, 0, 0) * inPar.constAt(n, 0, 0) + + inPar.constAt(n, 1, 0) * inPar.constAt(n, 1, 0)) + << " pT=" << 1. / std::abs(inPar.constAt(n, 3, 0)) << " q=" << inChg.constAt(n, 0, 0) + << " targetZ=" << msZ.constAt(n, 0, 0) << std::endl); } float pt[NN]; @@ -818,15 +883,7 @@ namespace mkfit { pxin[n] = cosP[n] * pt[n]; pyin[n] = sinP[n] * pt[n]; } -#pragma omp simd - for (int n = 0; n < NN; ++n) { - //fixme, make this printout useful for propagation to z - dprint_np(n, - std::endl - << "k=" << std::setprecision(9) << k[n] << " pxin=" << std::setprecision(9) << pxin[n] - << " pyin=" << std::setprecision(9) << pyin[n] << " cosP=" << std::setprecision(9) << cosP[n] - << " sinP=" << std::setprecision(9) << sinP[n] << " pt=" << std::setprecision(9) << pt[n]); - } + float deltaZ[NN]; float alpha[NN]; #pragma omp simd @@ -870,6 +927,7 @@ namespace mkfit { cosa[n] = 1.f - 2.f * sinah[n] * sinah[n]; sina[n] = 2.f * sinah[n] * cosah[n]; } + //update parameters #pragma omp simd for (int n = 0; n < NN; ++n) { @@ -882,9 +940,14 @@ namespace mkfit { #pragma omp simd for (int n = 0; n < NN; ++n) { dprint_np(n, - std::endl - << "outPar.At(n, 0, 0)=" << outPar.At(n, 0, 0) << " outPar.At(n, 1, 0)=" << outPar.At(n, 1, 0) - << " pxin=" << pxin[n] << " pyin=" << pyin[n]); + "propagation to Z end (OLD), dump parameters\n" + << " pos = " << outPar(n, 0, 0) << " " << outPar(n, 1, 0) << " " << outPar(n, 2, 0) << "\t\t r=" + << std::sqrt(outPar(n, 0, 0) * outPar(n, 0, 0) + outPar(n, 1, 0) * outPar(n, 1, 0)) << std::endl + << " mom = " << outPar(n, 3, 0) << " " << outPar(n, 4, 0) << " " << outPar(n, 5, 0) << std::endl + << " cart= " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " + << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " + << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0))) << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0)) + << std::endl); } float pxcaMpysa[NN]; @@ -933,7 +996,7 @@ namespace mkfit { "propagation end, dump parameters" << std::endl << "pos = " << outPar.At(n, 0, 0) << " " << outPar.At(n, 1, 0) << " " << outPar.At(n, 2, 0) << std::endl - << "mom = " << std::cos(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " " + << "mom (cart) = " << std::cos(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " " << std::sin(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " " << 1. / (outPar.At(n, 3, 0) * tan(outPar.At(n, 5, 0))) << " r=" << std::sqrt(outPar.At(n, 0, 0) * outPar.At(n, 0, 0) + outPar.At(n, 1, 0) * outPar.At(n, 1, 0)) @@ -1015,30 +1078,234 @@ namespace mkfit { #endif } + void helixAtPlane(const MPlexLV& inPar, + const MPlexQI& inChg, + const MPlexHV& plPnt, + const MPlexHV& plNrm, + MPlexQF& pathL, + MPlexLV& outPar, + MPlexLL& errorProp, + MPlexQI& outFailFlag, + const int N_proc, + const PropagationFlags& pflags) { + errorProp.setVal(0.f); + outFailFlag.setVal(0.f); + + helixAtPlane_impl(inPar, inChg, plPnt, plNrm, pathL, outPar, errorProp, outFailFlag, 0, NN, N_proc, pflags); + } + + void propagateHelixToPlaneMPlex(const MPlexLS& inErr, + const MPlexLV& inPar, + const MPlexQI& inChg, + const MPlexHV& plPnt, + const MPlexHV& plNrm, + MPlexLS& outErr, + MPlexLV& outPar, + MPlexQI& outFailFlag, + const int N_proc, + const PropagationFlags& pflags, + const MPlexQI* noMatEffPtr) { + // debug = true; + + outErr = inErr; + outPar = inPar; + + MPlexQF pathL; + MPlexLL errorProp; + + helixAtPlane(inPar, inChg, plPnt, plNrm, pathL, outPar, errorProp, outFailFlag, N_proc, pflags); + + for (int n = 0; n < NN; ++n) { + dprint_np( + n, + "propagation to plane end, dump parameters\n" + //<< " D = " << s[n] << " alpha = " << s[n] * std::sin(inPar(n, 5, 0)) * inPar(n, 3, 0) * kinv[n] << " kinv = " << kinv[n] << std::endl + << " pos = " << outPar(n, 0, 0) << " " << outPar(n, 1, 0) << " " << outPar(n, 2, 0) << "\t\t r=" + << std::sqrt(outPar(n, 0, 0) * outPar(n, 0, 0) + outPar(n, 1, 0) * outPar(n, 1, 0)) << std::endl + << " mom = " << outPar(n, 3, 0) << " " << outPar(n, 4, 0) << " " << outPar(n, 5, 0) << std::endl + << " cart= " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " + << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0))) + << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0)) << std::endl); + } + +#ifdef DEBUG + if (debug && g_debug) { + for (int kk = 0; kk < N_proc; ++kk) { + dprintf("inPar %d\n", kk); + for (int i = 0; i < 6; ++i) { + dprintf("%8f ", inPar.constAt(kk, i, 0)); + } + dprintf("\n"); + dprintf("inErr %d\n", kk); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) + dprintf("%8f ", inErr.constAt(kk, i, j)); + dprintf("\n"); + } + dprintf("\n"); + + for (int kk = 0; kk < N_proc; ++kk) { + dprintf("plNrm %d\n", kk); + for (int j = 0; j < 3; ++j) + dprintf("%8f ", plNrm.constAt(kk, 0, j)); + } + dprintf("\n"); + + for (int kk = 0; kk < N_proc; ++kk) { + dprintf("pathL %d\n", kk); + for (int j = 0; j < 1; ++j) + dprintf("%8f ", pathL.constAt(kk, 0, j)); + } + dprintf("\n"); + + dprintf("errorProp %d\n", kk); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) + dprintf("%8f ", errorProp.At(kk, i, j)); + dprintf("\n"); + } + dprintf("\n"); + } + } +#endif + + // Matriplex version of: + // result.errors = ROOT::Math::Similarity(errorProp, outErr); + MPlexLL temp; + MultHelixPropFull(errorProp, outErr, temp); + MultHelixPropTranspFull(errorProp, temp, outErr); + +#ifdef DEBUG + if (debug && g_debug) { + for (int kk = 0; kk < N_proc; ++kk) { + dprintf("outErr %d\n", kk); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) + dprintf("%8f ", outErr.constAt(kk, i, j)); + dprintf("\n"); + } + dprintf("\n"); + } + } +#endif + + if (pflags.apply_material) { + MPlexQF hitsRl; + MPlexQF hitsXi; + MPlexQF propSign; + + const TrackerInfo& tinfo = *pflags.tracker_info; + +#pragma omp simd + for (int n = 0; n < NN; ++n) { + if (n >= N_proc || (noMatEffPtr && noMatEffPtr->constAt(n, 0, 0))) { + hitsRl(n, 0, 0) = 0.f; + hitsXi(n, 0, 0) = 0.f; + } else { + const float hypo = std::hypot(outPar(n, 0, 0), outPar(n, 1, 0)); + auto mat = tinfo.material_checked(std::abs(outPar(n, 2, 0)), hypo); + hitsRl(n, 0, 0) = mat.radl; + hitsXi(n, 0, 0) = mat.bbxi; + } + propSign(n, 0, 0) = (pathL(n, 0, 0) > 0.f ? 1.f : -1.f); + } + applyMaterialEffects(hitsRl, hitsXi, propSign, plNrm, outErr, outPar, N_proc); +#ifdef DEBUG + if (debug && g_debug) { + for (int kk = 0; kk < N_proc; ++kk) { + dprintf("propSign %d\n", kk); + for (int i = 0; i < 1; ++i) { + dprintf("%8f ", propSign.constAt(kk, i, 0)); + } + dprintf("\n"); + dprintf("plNrm %d\n", kk); + for (int i = 0; i < 3; ++i) { + dprintf("%8f ", plNrm.constAt(kk, i, 0)); + } + dprintf("\n"); + dprintf("outErr(after material) %d\n", kk); + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) + dprintf("%8f ", outErr.constAt(kk, i, j)); + dprintf("\n"); + } + dprintf("\n"); + } + } +#endif + } + + squashPhiMPlex(outPar, N_proc); // ensure phi is between |pi| + + // PROP-FAIL-ENABLE To keep physics changes minimal, we always restore the + // state to input when propagation fails -- as was the default before. + // if (pflags.copy_input_state_on_fail) { + for (int i = 0; i < N_proc; ++i) { + if (outFailFlag(i, 0, 0)) { + outPar.copySlot(i, inPar); + outErr.copySlot(i, inErr); + } + } + // } + + // This dump is now out of its place as similarity is done with matriplex ops. + /* +#ifdef DEBUG + { + dmutex_guard; + for (int kk = 0; kk < N_proc; ++kk) + { + dprintf("outErr %d\n", kk); + for (int i = 0; i < 6; ++i) { for (int j = 0; j < 6; ++j) + dprintf("%8f ", outErr.At(kk,i,j)); printf("\n"); + } dprintf("\n"); + + dprintf("outPar %d\n", kk); + for (int i = 0; i < 6; ++i) { + dprintf("%8f ", outPar.At(kk,i,0)); printf("\n"); + } dprintf("\n"); + if (std::abs(outPar.At(kk,2,0) - msZ.constAt(kk, 0, 0)) > 0.0001) { + float pt = 1.0f / inPar.constAt(kk,3,0); + dprint_np(kk, "DID NOT GET TO Z, dZ=" << std::abs(outPar.At(kk,2,0) - msZ.constAt(kk, 0, 0)) + << " z=" << msZ.constAt(kk, 0, 0) << " zin=" << inPar.constAt(kk,2,0) << " zout=" << outPar.At(kk,2,0) << std::endl + << "pt=" << pt << " pz=" << pt/std::tan(inPar.constAt(kk,5,0))); + } + } + } +#endif + */ + } + //============================================================================== void applyMaterialEffects(const MPlexQF& hitsRl, const MPlexQF& hitsXi, const MPlexQF& propSign, + const MPlexHV& plNrm, MPlexLS& outErr, MPlexLV& outPar, - const int N_proc, - const bool isBarrel) { + const int N_proc) { #pragma omp simd for (int n = 0; n < NN; ++n) { float radL = hitsRl.constAt(n, 0, 0); if (radL < 1e-13f) continue; //ugly, please fixme const float theta = outPar.constAt(n, 5, 0); - const float pt = 1.f / outPar.constAt(n, 3, 0); //fixme, make sure it is positive? + // const float pt = 1.f / outPar.constAt(n, 3, 0); //fixme, make sure it is positive? + const float ipt = outPar.constAt(n, 3, 0); + const float pt = 1.f / ipt; //fixme, make sure it is positive? + const float ipt2 = ipt * ipt; const float p = pt / std::sin(theta); + const float pz = p * std::cos(theta); const float p2 = p * p; constexpr float mpi = 0.140; // m=140 MeV, pion constexpr float mpi2 = mpi * mpi; // m=140 MeV, pion const float beta2 = p2 / (p2 + mpi2); const float beta = std::sqrt(beta2); //radiation lenght, corrected for the crossing angle (cos alpha from dot product of radius vector and momentum) - const float invCos = (isBarrel ? p / pt : 1.f / std::abs(std::cos(theta))); + const float invCos = + p / std::abs(pt * std::cos(outPar.constAt(n, 4, 0)) * plNrm.constAt(n, 0, 0) + + pt * std::sin(outPar.constAt(n, 4, 0)) * plNrm.constAt(n, 1, 0) + pz * plNrm.constAt(n, 2, 0)); radL = radL * invCos; //fixme works only for barrel geom // multiple scattering //vary independently phi and theta by the rms of the planar multiple scattering angle @@ -1049,9 +1316,15 @@ namespace mkfit { // const float thetaMSC2 = thetaMSC*thetaMSC; const float thetaMSC = 0.0136f * (1.f + 0.038f * std::log(radL)) / (beta * p); // eq 32.15 const float thetaMSC2 = thetaMSC * thetaMSC * radL; - outErr.At(n, 4, 4) += thetaMSC2; - // outErr.At(n, 4, 5) += thetaMSC2; - outErr.At(n, 5, 5) += thetaMSC2; + if (Config::usePtMultScat) { + outErr.At(n, 3, 3) += thetaMSC2 * pz * pz * ipt2 * ipt2; + outErr.At(n, 3, 5) -= thetaMSC2 * pz * ipt2; + outErr.At(n, 4, 4) += thetaMSC2 * p2 * ipt2; + outErr.At(n, 5, 5) += thetaMSC2; + } else { + outErr.At(n, 4, 4) += thetaMSC2; + outErr.At(n, 5, 5) += thetaMSC2; + } //std::cout << "beta=" << beta << " p=" << p << std::endl; //std::cout << "multiple scattering thetaMSC=" << thetaMSC << " thetaMSC2=" << thetaMSC2 << " radL=" << radL << std::endl; // energy loss diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.h b/RecoTracker/MkFitCore/src/PropagationMPlex.h index 5d1b0034ad400..3522365538b13 100644 --- a/RecoTracker/MkFitCore/src/PropagationMPlex.h +++ b/RecoTracker/MkFitCore/src/PropagationMPlex.h @@ -80,13 +80,35 @@ namespace mkfit { const int N_proc, const PropagationFlags& pflags); + void helixAtPlane(const MPlexLV& inPar, + const MPlexQI& inChg, + const MPlexHV& plPnt, + const MPlexHV& plNrm, + MPlexQF& pathL, + MPlexLV& outPar, + MPlexLL& errorProp, + MPlexQI& outFailFlag, + const int N_proc, + const PropagationFlags& pflags); + + void propagateHelixToPlaneMPlex(const MPlexLS& inErr, + const MPlexLV& inPar, + const MPlexQI& inChg, + const MPlexHV& plPnt, + const MPlexHV& plNrm, + MPlexLS& outErr, + MPlexLV& outPar, + MPlexQI& outFailFlag, + const int N_proc, + const PropagationFlags& pflags, + const MPlexQI* noMatEffPtr = nullptr); + void applyMaterialEffects(const MPlexQF& hitsRl, const MPlexQF& hitsXi, const MPlexQF& propSign, + const MPlexHV& plNrm, MPlexLS& outErr, MPlexLV& outPar, - const int N_proc, - const bool isBarrel); - + const int N_proc); } // end namespace mkfit #endif diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.icc b/RecoTracker/MkFitCore/src/PropagationMPlex.icc index 8cb581401ad88..bb13d0e75ad91 100644 --- a/RecoTracker/MkFitCore/src/PropagationMPlex.icc +++ b/RecoTracker/MkFitCore/src/PropagationMPlex.icc @@ -2,6 +2,753 @@ /// helixAtRFromIterativeCCS_impl /////////////////////////////////////////////////////////////////////////////// +//#define DEBUG +//#include "Debug.h" + +template +static inline void parsFromPathL_impl(const Tf& __restrict__ inPar, + TfLL1& __restrict__ outPar, + const float* kinv, + const Tf1& __restrict__ s, + const int nmin, + const int nmax) { + float alpha[nmax - nmin]; + for (int n = nmin; n < nmax; ++n) { + alpha[n - nmin] = s[n - nmin] * std::sin(inPar(n, 5, 0)) * inPar(n, 3, 0) * kinv[n - nmin]; + } + + float cosah[nmax - nmin]; + float sinah[nmax - nmin]; + if constexpr (Config::useTrigApprox) { +#if !defined(__INTEL_COMPILER) +#pragma omp simd +#endif + for (int n = nmin; n < nmax; ++n) { + sincos4(alpha[n - nmin] * 0.5f, sinah[n - nmin], cosah[n - nmin]); + } + } else { +#if !defined(__INTEL_COMPILER) +#pragma omp simd +#endif + for (int n = nmin; n < nmax; ++n) { + cosah[n - nmin] = std::cos(alpha[n - nmin] * 0.5f); + sinah[n - nmin] = std::sin(alpha[n - nmin] * 0.5f); + } + } + + for (int n = nmin; n < nmax; ++n) { + outPar(n, 0, 0) = + inPar(n, 0, 0) + 2.f * sinah[n - nmin] * + (std::cos(inPar(n, 4, 0)) * cosah[n - nmin] - std::sin(inPar(n, 4, 0)) * sinah[n - nmin]) / + (inPar(n, 3, 0) * kinv[n - nmin]); + outPar(n, 1, 0) = + inPar(n, 1, 0) + 2.f * sinah[n - nmin] * + (std::sin(inPar(n, 4, 0)) * cosah[n - nmin] + std::cos(inPar(n, 4, 0)) * sinah[n - nmin]) / + (inPar(n, 3, 0) * kinv[n - nmin]); + outPar(n, 2, 0) = inPar(n, 2, 0) + alpha[n - nmin] / kinv[n - nmin] * std::cos(inPar(n, 5, 0)) / + (inPar(n, 3, 0) * std::sin(inPar(n, 5, 0))); + outPar(n, 3, 0) = inPar(n, 3, 0); + outPar(n, 4, 0) = inPar(n, 4, 0) + alpha[n - nmin]; + outPar(n, 5, 0) = inPar(n, 5, 0); + } +} + +//should kinv and D be templated??? +template +static inline void parsAndErrPropFromPathL_impl(const Tf& __restrict__ inPar, + const Ti& __restrict__ inChg, + TfLL1& __restrict__ outPar, + const float* kinv, + const Tf1& __restrict__ s, + TfLLL& __restrict__ errorProp, + const int nmin, + const int nmax, + const int N_proc, + const PropagationFlags& pf) { + //iteration should return the path length s, then update parameters and compute errors + + parsFromPathL_impl(inPar, outPar, kinv, s, nmin, nmax); + + float cosPin[nmax - nmin]; + float sinPin[nmax - nmin]; + float cosPout[nmax - nmin]; + float sinPout[nmax - nmin]; + float cosT[nmax - nmin]; + float sinT[nmax - nmin]; + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + cosPin[n - nmin] = std::cos(inPar(n, 4, 0)); + sinPin[n - nmin] = std::sin(inPar(n, 4, 0)); + cosPout[n - nmin] = std::cos(outPar(n, 4, 0)); + sinPout[n - nmin] = std::sin(outPar(n, 4, 0)); + cosT[n - nmin] = std::cos(inPar(n, 5, 0)); + sinT[n - nmin] = std::sin(inPar(n, 5, 0)); + } + + // use code from AnalyticalCurvilinearJacobian::computeFullJacobian for error propagation in curvilinear coordinates, then convert to CCS + // main difference from the above function is that we assume that the magnetic field is purely along z (which also implies that there is no change in pz) + // this simplifies significantly the code + + MPlex55 errorPropCurv; + for (int n = nmin; n < nmax; ++n) { + const float qbp = inChg(n, 0, 0) * sinT[n - nmin] * inPar(n, 3, 0); + // calculate transport matrix + // Origin: TRPRFN + const float t11 = cosPin[n - nmin] * sinT[n - nmin]; + const float t12 = sinPin[n - nmin] * sinT[n - nmin]; + const float t21 = cosPout[n - nmin] * sinT[n - nmin]; + const float t22 = sinPout[n - nmin] * sinT[n - nmin]; + const float cosl1 = 1.f / sinT[n - nmin]; + // define average magnetic field and gradient + // at initial point - inlike TRPRFN + const float bF = + (pf.use_param_b_field + ? 0.01f * Const::sol * Config::bFieldFromZR(inPar(n, 2, 0), hipo(inPar(n, 0, 0), inPar(n, 1, 0))) + : 0.01f * Const::sol * Config::Bfield); + const float q = -bF * qbp; + const float theta = q * s[n - nmin]; + //float sint, cost; + //vdt::fast_sincos(theta, sint, cost); + const float sint = std::sin(theta); + const float cost = std::cos(theta); + const float dx1 = inPar(n, 0, 0) - outPar(n, 0, 0); + const float dx2 = inPar(n, 1, 0) - outPar(n, 1, 0); + const float dx3 = inPar(n, 2, 0) - outPar(n, 2, 0); + float au = 1.f / sqrt(t11 * t11 + t12 * t12); + const float u11 = -au * t12; + const float u12 = au * t11; + const float v11 = -cosT[n - nmin] * u12; + const float v12 = cosT[n - nmin] * u11; + const float v13 = t11 * u12 - t12 * u11; + au = 1.f / sqrt(t21 * t21 + t22 * t22); + const float u21 = -au * t22; + const float u22 = au * t21; + const float v21 = -cosT[n - nmin] * u22; + const float v22 = cosT[n - nmin] * u21; + const float v23 = t21 * u22 - t22 * u21; + // now prepare the transport matrix + const float omcost = 1.f - cost; + const float tmsint = theta - sint; + // 1/p - doesn't change since |p1| = |p2| + errorPropCurv(n, 0, 0) = 1.f; + for (auto i = 1; i < 5; ++i) + errorPropCurv(n, 0, i) = 0.f; + // lambda + errorPropCurv(n, 1, 0) = 0.f; + errorPropCurv(n, 1, 1) = + cost * (v11 * v21 + v12 * v22 + v13 * v23) + sint * (-v12 * v21 + v11 * v22) + omcost * v13 * v23; + errorPropCurv(n, 1, 2) = (cost * (u11 * v21 + u12 * v22) + sint * (-u12 * v21 + u11 * v22)) * sinT[n - nmin]; + errorPropCurv(n, 1, 3) = 0.f; + errorPropCurv(n, 1, 4) = 0.f; + // phi + errorPropCurv(n, 2, 0) = bF * v23 * (t21 * dx1 + t22 * dx2 + cosT[n - nmin] * dx3) * cosl1; + errorPropCurv(n, 2, 1) = (cost * (v11 * u21 + v12 * u22) + sint * (-v12 * u21 + v11 * u22) + + v23 * (-sint * (v11 * t21 + v12 * t22 + v13 * cosT[n - nmin]) + + omcost * (-v11 * t22 + v12 * t21) - tmsint * cosT[n - nmin] * v13)) * + cosl1; + errorPropCurv(n, 2, 2) = (cost * (u11 * u21 + u12 * u22) + sint * (-u12 * u21 + u11 * u22) + + v23 * (-sint * (u11 * t21 + u12 * t22) + omcost * (-u11 * t22 + u12 * t21))) * + cosl1 * sinT[n - nmin]; + errorPropCurv(n, 2, 3) = -q * v23 * (u11 * t21 + u12 * t22) * cosl1; + errorPropCurv(n, 2, 4) = -q * v23 * (v11 * t21 + v12 * t22 + v13 * cosT[n - nmin]) * cosl1; + // yt + float cutCriterion = fabs(s[n - nmin] * sinT[n - nmin] * inPar(n, 3, 0)); + const float limit = 5.f; // valid for propagations with effectively float precision + if (cutCriterion > limit) { + const float pp = 1.f / qbp; + errorPropCurv(n, 3, 0) = pp * (u21 * dx1 + u22 * dx2); + errorPropCurv(n, 4, 0) = pp * (v21 * dx1 + v22 * dx2 + v23 * dx3); + } else { + const float temp1 = -t12 * u21 + t11 * u22; + const float s2 = s[n - nmin] * s[n - nmin]; + const float secondOrder41 = -0.5f * bF * temp1 * s2; + const float temp2 = -t11 * u21 - t12 * u22; + const float s3 = s2 * s[n - nmin]; + const float s4 = s3 * s[n - nmin]; + const float h2 = bF * bF; + const float h3 = h2 * bF; + const float qbp2 = qbp * qbp; + const float thirdOrder41 = 1.f / 3 * h2 * s3 * qbp * temp2; + const float fourthOrder41 = 1.f / 8 * h3 * s4 * qbp2 * temp1; + errorPropCurv(n, 3, 0) = secondOrder41 + (thirdOrder41 + fourthOrder41); + const float temp3 = -t12 * v21 + t11 * v22; + const float secondOrder51 = -0.5f * bF * temp3 * s2; + const float temp4 = -t11 * v21 - t12 * v22 - cosT[n - nmin] * v23; + const float thirdOrder51 = 1.f / 3 * h2 * s3 * qbp * temp4; + const float fourthOrder51 = 1.f / 8 * h3 * s4 * qbp2 * temp3; + errorPropCurv(n, 4, 0) = secondOrder51 + (thirdOrder51 + fourthOrder51); + } + errorPropCurv(n, 3, 1) = (sint * (v11 * u21 + v12 * u22) + omcost * (-v12 * u21 + v11 * u22)) / q; + errorPropCurv(n, 3, 2) = (sint * (u11 * u21 + u12 * u22) + omcost * (-u12 * u21 + u11 * u22)) * sinT[n - nmin] / q; + errorPropCurv(n, 3, 3) = (u11 * u21 + u12 * u22); + errorPropCurv(n, 3, 4) = (v11 * u21 + v12 * u22); + // zt + errorPropCurv(n, 4, 1) = + (sint * (v11 * v21 + v12 * v22 + v13 * v23) + omcost * (-v12 * v21 + v11 * v22) + tmsint * v23 * v13) / q; + errorPropCurv(n, 4, 2) = (sint * (u11 * v21 + u12 * v22) + omcost * (-u12 * v21 + u11 * v22)) * sinT[n - nmin] / q; + errorPropCurv(n, 4, 3) = (u11 * v21 + u12 * v22); + errorPropCurv(n, 4, 4) = (v11 * v21 + v12 * v22 + v13 * v23); + + } //end loop over n + +//debug = true; +#ifdef DEBUG + for (int n = nmin; n < nmax; ++n) { + if (debug && g_debug && n < N_proc) { + dmutex_guard; + std::cout << n << ": errorPropCurv" << std::endl; + printf("%5f %5f %5f %5f %5f\n", + errorPropCurv(n, 0, 0), + errorPropCurv(n, 0, 1), + errorPropCurv(n, 0, 2), + errorPropCurv(n, 0, 3), + errorPropCurv(n, 0, 4)); + printf("%5f %5f %5f %5f %5f\n", + errorPropCurv(n, 1, 0), + errorPropCurv(n, 1, 1), + errorPropCurv(n, 1, 2), + errorPropCurv(n, 1, 3), + errorPropCurv(n, 1, 4)); + printf("%5f %5f %5f %5f %5f\n", + errorPropCurv(n, 2, 0), + errorPropCurv(n, 2, 1), + errorPropCurv(n, 2, 2), + errorPropCurv(n, 2, 3), + errorPropCurv(n, 2, 4)); + printf("%5f %5f %5f %5f %5f\n", + errorPropCurv(n, 3, 0), + errorPropCurv(n, 3, 1), + errorPropCurv(n, 3, 2), + errorPropCurv(n, 3, 3), + errorPropCurv(n, 3, 4)); + printf("%5f %5f %5f %5f %5f\n", + errorPropCurv(n, 4, 0), + errorPropCurv(n, 4, 1), + errorPropCurv(n, 4, 2), + errorPropCurv(n, 4, 3), + errorPropCurv(n, 4, 4)); + printf("\n"); + } + } +#endif + + //now we need jacobians to convert to/from curvilinear and CCS + // code from TrackState::jacobianCCSToCurvilinear + MPlex56 jacCCS2Curv; + for (int n = nmin; n < nmax; ++n) { + for (int ii = 0; ii < 5; ii++) { + for (int jj = 0; jj < 6; jj++) { + jacCCS2Curv(n, ii, jj) = 0.f; + } + } + jacCCS2Curv(n, 0, 3) = inChg(n, 0, 0) * sinT[n - nmin]; + jacCCS2Curv(n, 0, 5) = inChg(n, 0, 0) * cosT[n - nmin] * inPar(n, 3, 0); + jacCCS2Curv(n, 1, 5) = -1.f; + jacCCS2Curv(n, 2, 4) = 1.f; + jacCCS2Curv(n, 3, 0) = -sinPin[n - nmin]; + jacCCS2Curv(n, 3, 1) = cosPin[n - nmin]; + jacCCS2Curv(n, 4, 0) = -cosPin[n - nmin] * cosT[n - nmin]; + jacCCS2Curv(n, 4, 1) = -sinPin[n - nmin] * cosT[n - nmin]; + jacCCS2Curv(n, 4, 2) = sinT[n - nmin]; + } + + // code from TrackState::jacobianCurvilinearToCCS + MPlex65 jacCurv2CCS; + for (int n = nmin; n < nmax; ++n) { + for (int ii = 0; ii < 6; ii++) { + for (int jj = 0; jj < 5; jj++) { + jacCurv2CCS(n, ii, jj) = 0.f; + } + } + + jacCurv2CCS(n, 0, 3) = -sinPout[n - nmin]; + jacCurv2CCS(n, 0, 4) = -cosT[n - nmin] * cosPout[n - nmin]; + jacCurv2CCS(n, 1, 3) = cosPout[n - nmin]; + jacCurv2CCS(n, 1, 4) = -cosT[n - nmin] * sinPout[n - nmin]; + jacCurv2CCS(n, 2, 4) = sinT[n - nmin]; + jacCurv2CCS(n, 3, 0) = inChg(n, 0, 0) / sinT[n - nmin]; + jacCurv2CCS(n, 3, 1) = outPar(n, 3, 0) * cosT[n - nmin] / sinT[n - nmin]; + jacCurv2CCS(n, 4, 2) = 1.f; + jacCurv2CCS(n, 5, 1) = -1.f; + } + + //need to compute errorProp = jacCurv2CCS*errorPropCurv*jacCCS2Curv + Matriplex::MPlex tmp; + Matriplex::multiplyGeneral(jacCurv2CCS, errorPropCurv, tmp); + Matriplex::multiplyGeneral(tmp, jacCCS2Curv, errorProp); +} + +// from P.Avery's notes (http://www.phys.ufl.edu/~avery/fitting/transport.pdf eq. 5) +inline float getS(float delta0, + float delta1, + float delta2, + float eta0, + float eta1, + float eta2, + float sinP, + float cosP, + float sinT, + float cosT, + float pt, + int q, + float kinv) { + float A = delta0 * eta0 + delta1 * eta1 + delta2 * eta2; + float ip = sinT / pt; + float p0[3] = {pt * cosP, pt * sinP, cosT / ip}; + float B = (p0[0] * eta0 + p0[1] * eta1 + p0[2] * eta2) * ip; + float rho = kinv * ip; + float C = (eta0 * p0[1] - eta1 * p0[0]) * rho * 0.5f * ip; + float sqb2m4ac = std::sqrt(B * B - 4.f * A * C); + float s1 = (-B + sqb2m4ac) * 0.5f / C; + float s2 = (-B - sqb2m4ac) * 0.5f / C; +#ifdef DEBUG + if (debug) + std::cout << "A=" << A << " B=" << B << " C=" << C << " s1=" << s1 << " s2=" << s2 << std::endl; +#endif + //take the closest + return (std::abs(s1) > std::abs(s2) ? s2 : s1); +} + +template +static inline void helixAtPlane_impl(const Tf& __restrict__ inPar, + const Ti& __restrict__ inChg, + const Tf11& __restrict__ plPnt, + const Tf11& __restrict__ plNrm, + Tf1& __restrict__ s, + TfLL1& __restrict__ outPar, + TfLLL& __restrict__ errorProp, + Ti& __restrict__ outFailFlag, // expected to be initialized to 0 + const int nmin, + const int nmax, + const int N_proc, + const PropagationFlags& pf) { + for (int n = nmin; n < nmax; ++n) { + dprint_np(n, + "input parameters" + << " inPar(n, 0, 0)=" << std::setprecision(9) << inPar(n, 0, 0) << " inPar(n, 1, 0)=" + << std::setprecision(9) << inPar(n, 1, 0) << " inPar(n, 2, 0)=" << std::setprecision(9) + << inPar(n, 2, 0) << " inPar(n, 3, 0)=" << std::setprecision(9) << inPar(n, 3, 0) + << " inPar(n, 4, 0)=" << std::setprecision(9) << inPar(n, 4, 0) + << " inPar(n, 5, 0)=" << std::setprecision(9) << inPar(n, 5, 0)); + } + + float kinv[nmax - nmin]; + if (pf.use_param_b_field) { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + kinv[n - nmin] = inChg(n, 0, 0) * 0.01f * + (-Const::sol * Config::bFieldFromZR(inPar(n, 2, 0), hipo(inPar(n, 0, 0), inPar(n, 1, 0)))); + } + } else { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + kinv[n - nmin] = inChg(n, 0, 0) * 0.01f * (-Const::sol * Config::Bfield); + } + } + + float delta0[nmax - nmin]; + float delta1[nmax - nmin]; + float delta2[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + delta0[n - nmin] = inPar(n, 0, 0) - plPnt(n, 0, 0); + delta1[n - nmin] = inPar(n, 1, 0) - plPnt(n, 1, 0); + delta2[n - nmin] = inPar(n, 2, 0) - plPnt(n, 2, 0); + } + + float sinP[nmax - nmin]; + float cosP[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + sinP[n - nmin] = std::sin(inPar(n, 4, 0)); + cosP[n - nmin] = std::cos(inPar(n, 4, 0)); + } + + // determine solution for straight line + float sl[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + //sl[n - nmin] = - ( plNrm(n, 0, 0)*delta0[n - nmin] + plNrm(n, 1, 0)*delta1[n - nmin] + plNrm(n, 2, 0)*delta2[n - nmin] ) / ( plNrm(n, 0, 0)*cosP[n - nmin]/inPar(n,3,0) + plNrm(n, 1, 0)*sinP[n - nmin]/inPar(n,3,0) + plNrm(n, 2, 0)*std::cos(inPar(n,5,0))/std::sin(inPar(n,5,0))/inPar(n,3,0) ); + sl[n - nmin] = + -(plNrm(n, 0, 0) * delta0[n - nmin] + plNrm(n, 1, 0) * delta1[n - nmin] + plNrm(n, 2, 0) * delta2[n - nmin]) / + (plNrm(n, 0, 0) * cosP[n - nmin] * std::sin(inPar(n, 5, 0)) + + plNrm(n, 1, 0) * sinP[n - nmin] * std::sin(inPar(n, 5, 0)) + plNrm(n, 2, 0) * std::cos(inPar(n, 5, 0))); + } + + //float s[nmax - nmin]; + //first iteration outside the loop +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + s[n - nmin] = (std::abs(plNrm(n, 2, 0)) < 1.f + ? getS(delta0[n - nmin], + delta1[n - nmin], + delta2[n - nmin], + plNrm(n, 0, 0), + plNrm(n, 1, 0), + plNrm(n, 2, 0), + sinP[n - nmin], + cosP[n - nmin], + std::sin(inPar(n, 5, 0)), + std::cos(inPar(n, 5, 0)), + inPar(n, 3, 0), + inChg(n, 0, 0), + kinv[n - nmin]) + : (plPnt.constAt(n, 2, 0) - inPar.constAt(n, 2, 0)) / std::cos(inPar.constAt(n, 5, 0))); + } + + MPlexLV outParTmp; + + CMS_UNROLL_LOOP_COUNT(Config::Niter - 1) + for (int i = 0; i < Config::Niter - 1; ++i) { + parsFromPathL_impl(inPar, outParTmp, kinv, s, nmin, nmax); + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + delta0[n - nmin] = outParTmp(n, 0, 0) - plPnt(n, 0, 0); + delta1[n - nmin] = outParTmp(n, 1, 0) - plPnt(n, 1, 0); + delta2[n - nmin] = outParTmp(n, 2, 0) - plPnt(n, 2, 0); + } + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + sinP[n - nmin] = std::sin(outParTmp(n, 4, 0)); + cosP[n - nmin] = std::cos(outParTmp(n, 4, 0)); + } + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + s[n - nmin] += (std::abs(plNrm(n, 2, 0)) < 1.f ? getS(delta0[n - nmin], + delta1[n - nmin], + delta2[n - nmin], + plNrm(n, 0, 0), + plNrm(n, 1, 0), + plNrm(n, 2, 0), + sinP[n - nmin], + cosP[n - nmin], + std::sin(inPar(n, 5, 0)), + std::cos(inPar(n, 5, 0)), + inPar(n, 3, 0), + inChg(n, 0, 0), + kinv[n - nmin]) + : (plPnt.constAt(n, 2, 0) - outParTmp.constAt(n, 2, 0)) / + std::cos(outParTmp.constAt(n, 5, 0))); + } + } //end Niter-1 + + // use linear approximation if s did not converge (for very high pT tracks) + for (int n = nmin; n < nmax; ++n) { +#ifdef DEBUG + if (debug) + std::cout << "s[n - nmin]=" << s[n - nmin] << " sl[n - nmin]=" << sl[n - nmin] + << " std::isnan(s[n - nmin])=" << std::isnan(s[n - nmin]) + << " std::isfinite(s[n - nmin])=" << std::isfinite(s[n - nmin]) + << " std::isnormal(s[n - nmin])=" << std::isnormal(s[n - nmin]) << std::endl; +#endif + if ((std::abs(sl[n - nmin]) > std::abs(s[n - nmin])) || std::isnormal(s[n - nmin]) == false) + s[n - nmin] = sl[n - nmin]; + } + +#ifdef DEBUG + if (debug) + std::cout << "s=" << s[0] << std::endl; +#endif + parsAndErrPropFromPathL_impl(inPar, inChg, outPar, kinv, s, errorProp, nmin, nmax, N_proc, pf); +} + +/* +// this function just calculates the path length (using the iterative approach as before) +// and then calls parsAndErrPropFromPathL_impl for error propagation +template +static inline void helixAtRFromIterativeCCS_impl_new(const Tf& __restrict__ inPar, + const Ti& __restrict__ inChg, + const Tf11& __restrict__ msRad, + TfLL1& __restrict__ outPar, + TfLLL& __restrict__ errorProp, + Ti& __restrict__ outFailFlag, // expected to be initialized to 0 + const int nmin, + const int nmax, + const int N_proc, + const PropagationFlags& pf) { + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + //initialize erroProp to identity matrix + errorProp(n, 0, 0) = 1.f; + errorProp(n, 1, 1) = 1.f; + errorProp(n, 2, 2) = 1.f; + errorProp(n, 3, 3) = 1.f; + errorProp(n, 4, 4) = 1.f; + errorProp(n, 5, 5) = 1.f; + } + float r0[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + //initialize erroProp to identity matrix + r0[n - nmin] = hipo(inPar(n, 0, 0), inPar(n, 1, 0)); + } + float k[nmax - nmin]; + if (pf.use_param_b_field) { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + k[n - nmin] = inChg(n, 0, 0) * 100.f / (-Const::sol * Config::bFieldFromZR(inPar(n, 2, 0), r0[n - nmin])); + } + } else { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + k[n - nmin] = inChg(n, 0, 0) * 100.f / (-Const::sol * Config::Bfield); + } + } + float r[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + r[n - nmin] = msRad(n, 0, 0); + } + float xin[nmax - nmin]; + float yin[nmax - nmin]; + float ipt[nmax - nmin]; + float phiin[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + xin[n - nmin] = inPar(n, 0, 0); + yin[n - nmin] = inPar(n, 1, 0); + ipt[n - nmin] = inPar(n, 3, 0); + phiin[n - nmin] = inPar(n, 4, 0); + } + + for (int n = nmin; n < nmax; ++n) { + dprint_np(n, + "input parameters" + << " inPar(n, 0, 0)=" << std::setprecision(9) << inPar(n, 0, 0) << " inPar(n, 1, 0)=" + << std::setprecision(9) << inPar(n, 1, 0) << " inPar(n, 2, 0)=" << std::setprecision(9) + << inPar(n, 2, 0) << " inPar(n, 3, 0)=" << std::setprecision(9) << inPar(n, 3, 0) + << " inPar(n, 4, 0)=" << std::setprecision(9) << inPar(n, 4, 0) + << " inPar(n, 5, 0)=" << std::setprecision(9) << inPar(n, 5, 0)); + } + + float kinv[nmax - nmin]; + float pt[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + kinv[n - nmin] = 1.f / k[n - nmin]; + pt[n - nmin] = 1.f / ipt[n - nmin]; + } + float D[nmax - nmin]; + float cosa[nmax - nmin]; + float sina[nmax - nmin]; + float cosah[nmax - nmin]; + float sinah[nmax - nmin]; + float id[nmax - nmin]; + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + D[n - nmin] = 0.; + } + + //no trig approx here, phi can be large + float cosPorT[nmax - nmin]; + float sinPorT[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + cosPorT[n - nmin] = std::cos(phiin[n - nmin]); + } +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + sinPorT[n - nmin] = std::sin(phiin[n - nmin]); + } + + float pxin[nmax - nmin]; + float pyin[nmax - nmin]; +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + pxin[n - nmin] = cosPorT[n - nmin] * pt[n - nmin]; + pyin[n - nmin] = sinPorT[n - nmin] * pt[n - nmin]; + } + + for (int n = nmin; n < nmax; ++n) { + dprint_np(n, + "k=" << std::setprecision(9) << k[n - nmin] << " pxin=" << std::setprecision(9) << pxin[n - nmin] + << " pyin=" << std::setprecision(9) << pyin[n - nmin] << " cosPorT=" << std::setprecision(9) + << cosPorT[n - nmin] << " sinPorT=" << std::setprecision(9) << sinPorT[n - nmin] + << " pt=" << std::setprecision(9) << pt[n - nmin]); + } + + float oodotp[nmax - nmin]; + float pxinold[nmax - nmin]; + + CMS_UNROLL_LOOP_COUNT(Config::Niter) + for (int i = 0; i < Config::Niter; ++i) { +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + //compute distance and path for the current iteration + r0[n - nmin] = hipo(xin[n - nmin], yin[n - nmin]); + } + + // Use one over dot product of transverse momentum and radial + // direction to scale the step. Propagation is prevented from reaching + // too close to the apex (dotp > 0.2). + // - Can / should we come up with a better approximation? + // - Can / should take +/- curvature into account? + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + oodotp[n - nmin] = + r0[n - nmin] * pt[n - nmin] / (pxin[n - nmin] * xin[n - nmin] + pyin[n - nmin] * yin[n - nmin]); + } + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + if (oodotp[n - nmin] > 5.0f || oodotp[n - nmin] < 0) // 0.2 is 78.5 deg + { + outFailFlag(n, 0, 0) = 1; + oodotp[n - nmin] = 0.0f; + } else if (r[n - nmin] - r0[n - nmin] < 0.0f && pt[n - nmin] < 1.0f) { + // Scale down the correction for low-pT ingoing tracks. + oodotp[n - nmin] = 1.0f + (oodotp[n - nmin] - 1.0f) * pt[n - nmin]; + } + } + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + // Can we come up with a better approximation? + // Should take +/- curvature into account. + id[n - nmin] = (r[n - nmin] - r0[n - nmin]) * oodotp[n - nmin]; + } + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + D[n - nmin] += id[n - nmin]; + } + + if constexpr (Config::useTrigApprox) { +#if !defined(__INTEL_COMPILER) +#pragma omp simd +#endif + for (int n = nmin; n < nmax; ++n) { + sincos4(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f, sinah[n - nmin], cosah[n - nmin]); + } + } else { +#if !defined(__INTEL_COMPILER) +#pragma omp simd +#endif + for (int n = nmin; n < nmax; ++n) { + cosah[n - nmin] = std::cos(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f); + sinah[n - nmin] = std::sin(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f); + } + } + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + cosa[n - nmin] = 1.f - 2.f * sinah[n - nmin] * sinah[n - nmin]; + sina[n - nmin] = 2.f * sinah[n - nmin] * cosah[n - nmin]; + } + + for (int n = nmin; n < nmax; ++n) { + dprint_np(n, + "Attempt propagation from r=" + << r0[n - nmin] << " to r=" << r[n - nmin] << std::endl + << " x=" << xin[n - nmin] << " y=" << yin[n - nmin] << " z=" << inPar(n, 2, 0) + << " px=" << pxin[n - nmin] << " py=" << pyin[n - nmin] + << " pz=" << pt[n - nmin] / std::tan(inPar(n, 5, 0)) << " q=" << inChg(n, 0, 0) << std::endl + << " r=" << std::setprecision(9) << r[n - nmin] << " r0=" << std::setprecision(9) << r0[n - nmin] + << " id=" << std::setprecision(9) << id[n - nmin] << " dr=" << std::setprecision(9) + << r[n - nmin] - r0[n - nmin] << " cosa=" << cosa[n - nmin] << " sina=" << sina[n - nmin] + << " dir_cos(rad,pT)=" << 1.0f / oodotp[n - nmin]); + } + +#pragma omp simd + for (int n = nmin; n < nmax; ++n) { + //update parameters + xin[n - nmin] = xin[n - nmin] + 2.f * k[n - nmin] * sinah[n - nmin] * + (pxin[n - nmin] * cosah[n - nmin] - pyin[n - nmin] * sinah[n - nmin]); + yin[n - nmin] = yin[n - nmin] + 2.f * k[n - nmin] * sinah[n - nmin] * + (pyin[n - nmin] * cosah[n - nmin] + pxin[n - nmin] * sinah[n - nmin]); + pxinold[n - nmin] = pxin[n - nmin]; //copy before overwriting + pxin[n - nmin] = pxin[n - nmin] * cosa[n - nmin] - pyin[n - nmin] * sina[n - nmin]; + pyin[n - nmin] = pyin[n - nmin] * cosa[n - nmin] + pxinold[n - nmin] * sina[n - nmin]; + } + for (int n = nmin; n < nmax; ++n) { + dprint_np(n, + "outPar(n, 0, 0)=" << outPar(n, 0, 0) << " outPar(n, 1, 0)=" << outPar(n, 1, 0) + << " pxin=" << pxin[n - nmin] << " pyin=" << pyin[n - nmin]); + } + } // iteration loop + + //float s[nmax - nmin]; + MPlexQF s; + for (int n = nmin; n < nmax; ++n) { + //s[n - nmin] = D[n - nmin]/std::sin(inPar(n, 5, 0)); + s(n, 0, 0) = D[n - nmin]/std::sin(inPar(n, 5, 0)); + } + parsAndErrPropFromPathL_impl(inPar, inChg, outPar, kinv, s, errorProp, nmin, nmax, N_proc, pf); + + for (int n = nmin; n < nmax; ++n) { + dprint_np(n, + "propagation to R end (NEW), dump parameters\n" + << " D = " << D[n - nmin] << " alpha = " << D[n - nmin] * inPar(n, 3, 0) * kinv[n - nmin] << " kinv = " << kinv[n - nmin] << std::endl + << " pos = " << outPar(n, 0, 0) << " " << outPar(n, 1, 0) << " " << outPar(n, 2, 0) << "\t\t r=" + << std::sqrt(outPar(n, 0, 0) * outPar(n, 0, 0) + outPar(n, 1, 0) * outPar(n, 1, 0)) << std::endl + << " mom = " << outPar(n, 3, 0) << " " << outPar(n, 4, 0) << " " << outPar(n, 5, 0) << std::endl + << " cart= " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " + << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0))) + << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0)) << std::endl); + } + +#ifdef DEBUG + for (int n = nmin; n < nmax; ++n) { + if (debug && g_debug && n < N_proc) { + dmutex_guard; + std::cout << n << ": jacobian" << std::endl; + printf("%5f %5f %5f %5f %5f %5f\n", + errorProp(n, 0, 0), + errorProp(n, 0, 1), + errorProp(n, 0, 2), + errorProp(n, 0, 3), + errorProp(n, 0, 4), + errorProp(n, 0, 5)); + printf("%5f %5f %5f %5f %5f %5f\n", + errorProp(n, 1, 0), + errorProp(n, 1, 1), + errorProp(n, 1, 2), + errorProp(n, 1, 3), + errorProp(n, 1, 4), + errorProp(n, 1, 5)); + printf("%5f %5f %5f %5f %5f %5f\n", + errorProp(n, 2, 0), + errorProp(n, 2, 1), + errorProp(n, 2, 2), + errorProp(n, 2, 3), + errorProp(n, 2, 4), + errorProp(n, 2, 5)); + printf("%5f %5f %5f %5f %5f %5f\n", + errorProp(n, 3, 0), + errorProp(n, 3, 1), + errorProp(n, 3, 2), + errorProp(n, 3, 3), + errorProp(n, 3, 4), + errorProp(n, 3, 5)); + printf("%5f %5f %5f %5f %5f %5f\n", + errorProp(n, 4, 0), + errorProp(n, 4, 1), + errorProp(n, 4, 2), + errorProp(n, 4, 3), + errorProp(n, 4, 4), + errorProp(n, 4, 5)); + printf("%5f %5f %5f %5f %5f %5f\n", + errorProp(n, 5, 0), + errorProp(n, 5, 1), + errorProp(n, 5, 2), + errorProp(n, 5, 3), + errorProp(n, 5, 4), + errorProp(n, 5, 5)); + printf("\n"); + } + } +#endif + +} +*/ + template static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, const Ti& __restrict__ inChg, @@ -69,6 +816,7 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, //dprint(std::endl); } + //debug = true; for (int n = nmin; n < nmax; ++n) { dprint_np(n, "input parameters" @@ -240,7 +988,7 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, << " r=" << std::setprecision(9) << r[n - nmin] << " r0=" << std::setprecision(9) << r0[n - nmin] << " id=" << std::setprecision(9) << id[n - nmin] << " dr=" << std::setprecision(9) << r[n - nmin] - r0[n - nmin] << " cosa=" << cosa[n - nmin] << " sina=" << sina[n - nmin] - << " dir_cos(rad,pT)=" << 1.0f / oodotp[n]); + << " dir_cos(rad,pT)=" << 1.0f / oodotp[n - nmin]); } //update derivatives on total distance @@ -457,9 +1205,12 @@ static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar, for (int n = nmin; n < nmax; ++n) { dprint_np(n, "propagation end, dump parameters\n" + << " D = " << D[n - nmin] << " alpha = " << alpha[n - nmin] << " kinv = " << kinv[n - nmin] + << std::endl << " pos = " << outPar(n, 0, 0) << " " << outPar(n, 1, 0) << " " << outPar(n, 2, 0) << "\t\t r=" << std::sqrt(outPar(n, 0, 0) * outPar(n, 0, 0) + outPar(n, 1, 0) * outPar(n, 1, 0)) << std::endl - << " mom = " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " + << " mom = " << outPar(n, 3, 0) << " " << outPar(n, 4, 0) << " " << outPar(n, 5, 0) << std::endl + << " cart= " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0))) << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0)) << std::endl); }