-
Notifications
You must be signed in to change notification settings - Fork 0
/
Smart-Choices-for-Measurement-Models.tex
1516 lines (1301 loc) · 72.4 KB
/
Smart-Choices-for-Measurement-Models.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% Options for packages loaded elsewhere
\PassOptionsToPackage{unicode}{hyperref}
\PassOptionsToPackage{hyphens}{url}
\PassOptionsToPackage{dvipsnames,svgnames,x11names}{xcolor}
%
\documentclass[
a4paper,
]{article}
\usepackage{amsmath,amssymb}
\usepackage{iftex}
\ifPDFTeX
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{textcomp} % provide euro and other symbols
\else % if luatex or xetex
\usepackage{unicode-math}
\defaultfontfeatures{Scale=MatchLowercase}
\defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
\fi
\usepackage{lmodern}
\ifPDFTeX\else
% xetex/luatex font selection
\fi
% Use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\IfFileExists{microtype.sty}{% use microtype if available
\usepackage[]{microtype}
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
\makeatletter
\@ifundefined{KOMAClassName}{% if non-KOMA class
\IfFileExists{parskip.sty}{%
\usepackage{parskip}
}{% else
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}}
}{% if KOMA class
\KOMAoptions{parskip=half}}
\makeatother
\usepackage{xcolor}
\usepackage[paperwidth=8.00in,paperheight=10.00in,left=1.25in,textwidth=
5.25in,top=1.00in,textheight=8.25in]{geometry}
\setlength{\emergencystretch}{3em} % prevent overfull lines
\setcounter{secnumdepth}{5}
% Make \paragraph and \subparagraph free-standing
\makeatletter
\ifx\paragraph\undefined\else
\let\oldparagraph\paragraph
\renewcommand{\paragraph}{
\@ifstar
\xxxParagraphStar
\xxxParagraphNoStar
}
\newcommand{\xxxParagraphStar}[1]{\oldparagraph*{#1}\mbox{}}
\newcommand{\xxxParagraphNoStar}[1]{\oldparagraph{#1}\mbox{}}
\fi
\ifx\subparagraph\undefined\else
\let\oldsubparagraph\subparagraph
\renewcommand{\subparagraph}{
\@ifstar
\xxxSubParagraphStar
\xxxSubParagraphNoStar
}
\newcommand{\xxxSubParagraphStar}[1]{\oldsubparagraph*{#1}\mbox{}}
\newcommand{\xxxSubParagraphNoStar}[1]{\oldsubparagraph{#1}\mbox{}}
\fi
\makeatother
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}\usepackage{longtable,booktabs,array}
\usepackage{calc} % for calculating minipage widths
% Correct order of tables after \paragraph or \subparagraph
\usepackage{etoolbox}
\makeatletter
\patchcmd\longtable{\par}{\if@noskipsec\mbox{}\fi\par}{}{}
\makeatother
% Allow footnotes in longtable head/foot
\IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}}
\makesavenoteenv{longtable}
\usepackage{graphicx}
\makeatletter
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
\makeatother
% Scale images if necessary, so that they will not overflow the page
% margins by default, and it is still possible to overwrite the defaults
% using explicit options in \includegraphics[width, height, ...]{}
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
% Set default figure placement to htbp
\makeatletter
\def\fps@figure{htbp}
\makeatother
% definitions for citeproc citations
\NewDocumentCommand\citeproctext{}{}
\NewDocumentCommand\citeproc{mm}{%
\begingroup\def\citeproctext{#2}\cite{#1}\endgroup}
\makeatletter
% allow citations to break across lines
\let\@cite@ofmt\@firstofone
% avoid brackets around text for \cite:
\def\@biblabel#1{}
\def\@cite#1#2{{#1\if@tempswa , #2\fi}}
\makeatother
\newlength{\cslhangindent}
\setlength{\cslhangindent}{1.5em}
\newlength{\csllabelwidth}
\setlength{\csllabelwidth}{3em}
\newenvironment{CSLReferences}[2] % #1 hanging-indent, #2 entry-spacing
{\begin{list}{}{%
\setlength{\itemindent}{0pt}
\setlength{\leftmargin}{0pt}
\setlength{\parsep}{0pt}
% turn on hanging indent if param 1 is 1
\ifodd #1
\setlength{\leftmargin}{\cslhangindent}
\setlength{\itemindent}{-1\cslhangindent}
\fi
% set entry spacing
\setlength{\itemsep}{#2\baselineskip}}}
{\end{list}}
\usepackage{calc}
\newcommand{\CSLBlock}[1]{\hfill\break\parbox[t]{\linewidth}{\strut\ignorespaces#1\strut}}
\newcommand{\CSLLeftMargin}[1]{\parbox[t]{\csllabelwidth}{\strut#1\strut}}
\newcommand{\CSLRightInline}[1]{\parbox[t]{\linewidth - \csllabelwidth}{\strut#1\strut}}
\newcommand{\CSLIndent}[1]{\hspace{\cslhangindent}#1}
\makeatletter
\@ifpackageloaded{bookmark}{}{\usepackage{bookmark}}
\makeatother
\makeatletter
\@ifpackageloaded{caption}{}{\usepackage{caption}}
\AtBeginDocument{%
\ifdefined\contentsname
\renewcommand*\contentsname{Table of contents}
\else
\newcommand\contentsname{Table of contents}
\fi
\ifdefined\listfigurename
\renewcommand*\listfigurename{List of Figures}
\else
\newcommand\listfigurename{List of Figures}
\fi
\ifdefined\listtablename
\renewcommand*\listtablename{List of Tables}
\else
\newcommand\listtablename{List of Tables}
\fi
\ifdefined\figurename
\renewcommand*\figurename{Figure}
\else
\newcommand\figurename{Figure}
\fi
\ifdefined\tablename
\renewcommand*\tablename{Table}
\else
\newcommand\tablename{Table}
\fi
}
\@ifpackageloaded{float}{}{\usepackage{float}}
\floatstyle{ruled}
\@ifundefined{c@chapter}{\newfloat{codelisting}{h}{lop}}{\newfloat{codelisting}{h}{lop}[chapter]}
\floatname{codelisting}{Listing}
\newcommand*\listoflistings{\listof{codelisting}{List of Listings}}
\makeatother
\makeatletter
\makeatother
\makeatletter
\@ifpackageloaded{caption}{}{\usepackage{caption}}
\@ifpackageloaded{subcaption}{}{\usepackage{subcaption}}
\makeatother
\ifLuaTeX
\usepackage{selnolig} % disable illegal ligatures
\fi
\usepackage{bookmark}
\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
\urlstyle{same} % disable monospaced font for URLs
\hypersetup{
pdftitle={Smart Choices for Measurement Models},
pdfauthor={Pablo Rogers},
colorlinks=true,
linkcolor={blue},
filecolor={Maroon},
citecolor={Blue},
urlcolor={Blue},
pdfcreator={LaTeX via pandoc}}
\title{Smart Choices for Measurement Models}
\usepackage{etoolbox}
\makeatletter
\providecommand{\subtitle}[1]{% add subtitle to \maketitle
\apptocmd{\@title}{\par {\large #1 \par}}{}{}
}
\makeatother
\subtitle{Executable Manuscript Tutorial for your Confirmatory Factor
Analysis in R Environment}
\author{Pablo Rogers}
\date{March 7, 2024}
\begin{document}
\maketitle
\bookmarksetup{startatroot}
\section*{Abstract}\label{abstract}
\markboth{Abstract}{Abstract}
This article aims to accomplish three objectives: first, to compile
guidelines for the application of Confirmatory Factor Analysis (CFA), a
widely utilized technique in applied social sciences; second, to
demonstrate how these guidelines can be practically implemented through
a real-world example; and third, to structure this narrative using tools
that promote reproducibility, replicability, and transparency of
results. To this end, we propose a solution in the form of a tutorial
article wherein the key decisions made in conducting a CFA are validated
through recent literature and presented within a dynamic document
framework. This framework enables readers to access the article's source
code, utilized data, analytical execution codes, and various reading
media. We anticipate that by employing this pedagogical approach,
developed entirely within an open environment
(\href{https://github.com/phdpablo/smart-cfa/}{https://phdpablo.github.io/smart-cfa/}),
researchers proficient in specific statistical techniques relevant to
their domains will adopt and disseminate this proposal, thereby
benefiting their colleagues.
\textbf{Keywords:} Confirmatory Factor Analysis, Structural Equation
Modeling, Internal Structure Validity, Open Science, \emph{lavaan}.
\emph{How to cite this article:}
Rogers, P. (2024, June 10). Smart Choices for Measurement Models:
Executable Manuscript Tutorial for your Confirmatory Factor Analysis in
R Environment. \emph{PsyArXiv Preprints.}
\url{https://doi.org/10.31234/osf.io/2dy3z}
\bookmarksetup{startatroot}
\section{Introduction}\label{introduction}
Confirmatory Factor Analysis (CFA) is a key method for assessing the
validity of a measurement instrument through its internal structure
(Bandalos 2018; Hughes 2018; Sireci and Sukin 2013). Validity is
arguably the most crucial characteristic of a measurement model (Furr
2021), as it addresses the essential question of what measuring
instruments truly assess (Bandalos 2018). This concern is closely linked
with the classical definition of validity: the degree to which a test
measures what it claims to measure (Bandalos 2018; Furr 2021; Sireci and
Sukin 2013; Urbina 2014), aligning with the tripartite model still
embraced by numerous scholars (Widodo 2018).
The tripartite model of validity frames the concept using three
categories of evidence: content, criterion, and construct (Bandalos
2018). Content validity pertains to the adequacy and representativeness
of test items relative to the domain or objective under investigation
(Cohen, Schneider, and Tobin 2022). Criterion validity is the
correlation between test outcomes and a significant external criterion,
such as performance on another measure or future occurrences (Cohen,
Schneider, and Tobin 2022). Construct validity evaluates the test's
capacity to measure the theoretical construct it is intended to assess,
taking into account related hypotheses and empirical data (Cohen,
Schneider, and Tobin 2022).
Introduced in the American Psychological Association (APA) ``Standards
for Educational and Psychological Testing'' in 1966, the tripartite
concept of validity has been a cornerstone in the social sciences for
decades (Bandalos 2018). However, its fragmented and confusing nature
has led to widespread criticism, prompting a shift towards a more
holistic view of validity (Sireci and Sukin 2013). This evolution was
signified by the publication of the 1999 standards (AERA, APA, and NCME
1999), and further by the 2014 standards (AERA, APA, and NCME 2014),
which redefined test validity in terms of the interpretations and uses
of test scores (Furr 2021). Under this new paradigm, validation requires
diverse theoretical and empirical evidence, recognizing validity as a
unified concept -- construct validity -- encompassing various evidence
sources for evaluating potential interpretations of test scores for
specific purposes (Furr 2021; Urbina 2014).
Thus, key authorities in psychological assessment now define validity as
the degree to which evidence and theory support the interpretations of
test scores for their intended purposes (AERA, APA, and NCME 2014).
Validity involves a comprehensive evaluation of how well empirical
evidence and theoretical rationales uphold the conclusions and actions
derived from test scores or other assessment types (Bandalos 2018; Furr
2021; Urbina 2014).
According to APA guidelines (AERA, APA, and NCME 2014), five types of
validity evidence are critical: content, response process, association
with external variables, consequences of test use, and internal
structure. Content validity examines the extent to which test content
accurately represents the domain of interest exclusively (Furr 2021).
The response process refers to the link between the construct and the
specifics of the examinees' responses (Sireci and Sukin 2013). Validity
based on external variables concerns the test's correlation with other
measures or constructs expected to be related or unrelated to the
evaluated construct (Furr 2021). The implications of test use focus on
the positive or negative effects on the individuals or groups assessed
(Bandalos 2018).
Evidence based on internal structure assesses how well the interactions
among test items and their components align with the theoretical
framework used to explain the outcomes of the measurement instrument
(AERA, APA, and NCME 2014; Rios and Wells 2014). Sources of internal
structural validity evidence may include analyses of reliability,
dimensionality, and measurement invariance.
Reliability is gauged by internal consistency, reflecting i) the
reproducibility of test scores under consistent conditions and ii) the
ratio of true score variance to observed score variance (Rios and Wells
2014). Dimensionality analysis aims to verify if item interrelations
support the inferences made by the measurement model's scores, which are
assumed to be unidimensional (Rios and Wells 2014). Measurement
invariance confirms that item properties remain consistent across
specified groups, such as gender or ethnicity.
CFA facilitates the integration of these diverse sources to substantiate
the validity of the internal structure (Bandalos 2018; Flora and Flake
2017; Hughes 2018; Reeves and Marbach-Ad 2016; Rios and Wells 2014). In
the applied social sciences, researchers often have a theoretical
dimensional structure in mind (Sireci and Sukin 2013), and CFA is
employed to align the structure of the hypothesized measurement model
with the observed data (Rios and Wells 2014).
CFA constitutes a fundamental aspect of the covariance-based Structural
Equation Modeling (SEM) framework (CB-SEM) (Brown 2023; Harrington 2009;
Jackson, Gillaspy, and Purc-Stephenson 2009; Kline 2023; Nye 2022). SEM
is a prevalent statistical approach in the applied social sciences
(Hoyle 2023; Kline 2023), serving as a generalization of multiple
regression and factor analysis (Hoyle 2023). This methodology
facilitates the examination of complex relationships between variables
and the consideration of measurement error, aligning with the
requirements for measurement model validation (Hoyle 2023).
Applications of CFA present significant complexities (Crede and Harms
2019; Jessica K. Flake, Pek, and Hehman 2017; Jessica Kay Flake and
Fried 2020; Jackson, Gillaspy, and Purc-Stephenson 2009; Nye 2022;
Rogers 2024), influenced by data structure, measurement level of items,
research goals, and other factors. CFA can proceed smoothly in scenarios
involving unidimensional measurement models with continuous items and
large samples, but may encounter challenges, such as diminished SEM
flexibility, when dealing with multidimensional models with ordinal
items and small sample sizes (Rogers 2024).
This leads to an important question: Can certain strategies within CFA
applications simplify the process for social scientists seeking evidence
of validity in the internal structure of a measurement model? This
inquiry does not suggest that research objectives should conform to
quantitative methods. Rather, research aims guide scientific inquiry,
defining our learning targets and priorities. Quantitative methods serve
as tools towards these ends, not as objectives themselves. They
represent one among many tools available to researchers, with the
study's purpose dictating method selection (Pilcher and Cortazzi 2023).
However, as the scientific method is an ongoing journey of discovery,
many questions, especially in Psychometrics concerning measurement model
validation, remain open-ended. The lack of consensus on complex and
varied topics suggests researchers should opt for paths offering maximal
analytical flexibility, enabling exploration of diverse methodologies
and solutions while keeping research objectives forefront (Price 2017).
A recurrent topic in Factor Analysis (FA) is how to handle the
measurement level of scale items. Empirical studies (Rhemtulla,
Brosseau-Liard, and Savalei 2012; Robitzsch 2022, 2020) advocating for
the treatment of scales with five or more response options as continuous
variables have shown to enhance CFA flexibility and address validity
evidence for the internal structure. The FA literature acknowledges
methodological dilemmas faced when dealing with binary and/or ordinal
response items with fewer than five options (Rogers 2024, 2022).
For continuous scale items, the maximum likelihood (ML) estimator and
its robust variations are applicable. For non-continuous items,
estimators from the Least Squares (cat-LS) family are recommended (Nye
2022; Rogers 2024, 2022). Though cat-LS estimators impose fewer
assumptions on data, they require larger sample sizes, more
computational power, and greater researcher expertise (Robitzsch 2020).
Assessing model fit is more challenging with cat-LS estimated models
compared to those estimated by ML, which are better established and more
familiar to researchers (Rhemtulla, Brosseau-Liard, and Savalei 2012).
Despite their increasing popularity, cat-LS models are newer, less
recognized, and seldom available in software (Rhemtulla, Brosseau-Liard,
and Savalei 2012). Handling missing data remains straightforward with ML
models using the Full Information ML (FIML) method but is problematic
with ordinal data (Rogers 2024).
Thus, we can optimize the potential of some of the available software
(Arbuckle 2019; Bentler and Wu 2020; Fox 2022; JASP Team 2023; Jöreskog
and Sörbom 2022; Muthén and Muthén 2023; Neale et al. 2016; Ringle,
Wende, and Becker 2022; Rosseel 2012; The jamovi project 2023) and
overcome many of the limitations for ordinal and nominal data, which are
still present in some of them (Arbuckle 2019; Bentler and Wu 2020; Neale
et al. 2016; Ringle, Wende, and Becker 2022).
This discussion does not intend to oversimplify, digress, or claim
superiority of one software over another. Rather, it underscores a
fundamental statistical principle: transitioning from nominal to ordinal
and then to scalar measurement levels increases the flexibility of
statistical methods. Empirical studies in CFA support these
clarifications (Rhemtulla, Brosseau-Liard, and Savalei 2012; Robitzsch
2022, 2020).
This article assists applied social scientists in decision-making from
selecting a measurement model to comparing and updating models for
enhanced CFA flexibility. It addresses power analysis, data
preprocessing, estimation procedures, and model modification from three
angles: smart choices or recommended practices (Jessica K. Flake, Pek,
and Hehman 2017; Nye 2022; Rogers 2024), pitfalls to avoid (Crede and
Harms 2019; Rogers 2024), and essential reporting elements (Jessica Kay
Flake and Fried 2020; Jackson, Gillaspy, and Purc-Stephenson 2009;
Rogers 2024).
The aim is to guide researchers through CFA to access the underlying
structure of measurement models without falling into common traps at any
stage of the validation process. Early-stage decisions can preempt later
limitations, while missteps may necessitate exploratory research or
additional efforts in subsequent phases.
Practically, this includes an R tutorial utilizing the lavaan package
(Rosseel 2012), adhering to reproducibility, replicability, and
transparency standards of the Open Science movement (Gilroy and Kaplan
2019; Kathawalla, Silverstein, and Syed 2021; Klein et al. 2018).
Tutorial articles, following the FAIR principles (Findable, Accessible,
Interoperable, and Reusable) (Wilkinson et al. 2016), play a vital role
in promoting open science (Martins 2021; Mendes-Da-Silva 2023), by
detailing significant methods or application areas in an accessible yet
comprehensive manner. This encourages adherence to best practices among
researchers, minimizing the impact of positive publication bias.
This tutorial is structured into three sections, beyond the introductory
discussion. It includes a thorough review of CFA recommended practices,
an example of real-world research application in the R ecosystem, and
final considerations, following Martins (2021) format for tutorial
articles. This approach, combined with workflow recommendations for
reproducibility, aims to support the applied social sciences community
in effectively utilizing CFA (Martins 2021; Mendes-Da-Silva 2023).
\bookmarksetup{startatroot}
\section{Smart Choices in CFA}\label{smart-choices-in-cfa}
This paper presents a comprehensive approach to conducting a standard
CFA within the applied social sciences, following the guidelines
outlined by Rogers (2024). According to Rogers (2024), a typical CFA
study seeks to fit a reflective common factor model with a predefined
multifactor structure, established psychometric properties, and a
maximum of five Likert-type response options. This scenario frequently
occurs in research endeavors where the measurement model facilitates the
examination of hypotheses derived from the structural model.
The initial phase in such research involves data preprocessing.
Specifically, for categorical data, Rogers (2024) advises employing
multiple imputation to handle missing data, taking into consideration
the limitations posed by available software and methodologies (Rogers
2024). When a measurement model allows for the treatment of items as
continuous variables, addressing this challenge can be deferred to the
estimation process stage through the selection of an appropriate
estimator (Robitzsch 2022).
This paper reinterprets the insights from Rogers (2024) for CFAs that
accommodate continuous item treatment. Thus, a strategic choice involves
opting for measurement models that permit this approach, thereby
circumventing methodological hurdles (Robitzsch 2022, 2020) associated
with binary and/or ordinal response items with up to four or five
gradations. Such a decision influences various aspects of the research
process, including the choice of software, power analysis, estimation
techniques, criteria for model adjustment, and model comparisons. These
choices, in turn, affect requirements concerning sample size,
computational resources, and the researcher's expertise (Robitzsch
2020).
Subsequent sections delve into themes previously summarized by Rogers
(2024), specifically concerning CFAs with ordinal items. These themes
are explored in terms of recommended practices (Jessica K. Flake, Pek,
and Hehman 2017; Nye 2022; Rogers 2024), pitfalls to avoid (Crede and
Harms 2019; Rogers 2024), and reporting guidelines (Jessica Kay Flake
and Fried 2020; Jackson, Gillaspy, and Purc-Stephenson 2009; Rogers
2024), all within the context of selecting measurement models that
accommodate continuous data interpretation.
Assuming that readers possess a foundational understanding of the topic,
this paper omits certain technical details, directing readers to
authoritative texts (Brown 2015; Kline 2023) and scholarly articles that
provide an introduction to Covariance-Based Structural Equation Modeling
(CB-SEM) (Davvetas et al. 2020; Shek and Yu 2014). The discussion is
framed within the CB-SEM paradigm (Brown 2015; Jackson, Gillaspy, and
Purc-Stephenson 2009; Kline 2023; Nye 2022), with a focus on CFA. The
paper explicitly excludes discussions on measurement model modifications
in Variance-Based SEM (VB-SEM), which are predominantly addressed in the
literature on Partial Least Squares SEM (PLS-SEM) (Hair et al. 2022,
2017; Henseler 2021).
\subsection{Measurement Model
Selection}\label{measurement-model-selection}
Selecting an appropriate measurement model is a critical initial step in
the research process. For robust analysis, it is advisable to prioritize
models that provide five or more ordinal response options. Research has
shown that a higher number of response gradations enhances the ability
to detect inaccurately defined models (Green et al. 1997;
Maydeu-Olivares, Fairchild, and Hall 2017), even when using estimators
designed for ordinal items (Xia and Yang 2018). This strategy also
mitigates some of the methodological challenges associated with the
analysis of ordinal data in CFA (Rhemtulla, Brosseau-Liard, and Savalei
2012; Robitzsch 2022, 2020).
When choosing a measurement scale, it is crucial to select ones that
have been validated in the language of application and with the study's
target audience (Jessica K. Flake, Pek, and Hehman 2017). Avoid scales
that are proprietary or specific to certain professions. An examination
of your country's Psychological Test Assessment System can be an
effective starting point. If the desired scale is not found within these
resources, consider looking into scales developed with the support of
public institutions, non-governmental organizations, research centers,
or universities, as these entities often invest significant resources in
validating measurement models for broader public policy purposes.
An extensive literature review is essential for selecting a suitable
measurement model. This should include consulting specialized journals,
books, technical reports, and academic dissertations or theses.
Schumacker, Wind, and Holmes (2021) provide a detailed guide for
initiating this search. Consideration should also be given to systematic
reviews or meta-analyses focusing on measurement models related to your
topic of interest. It is important to review both the original articles
on the scales and subsequent applications. Kline (2016) offers a useful
checklist for assessing various measurement methods.
Incorporate control questions, such as requiring respondents to select
``strongly agree'' on specific items, and monitor survey response times
to gauge participant engagement (Collier 2020).
Avoid adopting measurement models designed for narrow purposes or those
lacking rigorous psychometric validation (Jessica Kay Flake and Fried
2020; Kline 2016). The mere existence of a scale does not ensure its
validity (Jessica K. Flake, Pek, and Hehman 2017). Also, steer clear of
seldom-used or outdated scales, as they may have compromised
psychometric properties. Translating a scale from another language for
immediate use without thorough translation and retranslation processes
is inadvisable. Be cautious of overlooking alternative factorial
structures (e.g., higher-order or bifactor models) that could
potentially salvage the research if considered thoroughly (Crede and
Harms 2019).
When selecting a scale, justify its choice by highlighting its strong
psychometric properties, including previous empirical evidence of its
application within the target population and its reliability and
validity metrics (Jessica Kay Flake and Fried 2020; Jackson, Gillaspy,
and Purc-Stephenson 2009; Kline 2016). If the scale has multiple
potential factorial structures, provide a rationale for the chosen model
to prevent the misuse of CFA for exploratory purposes (Jackson,
Gillaspy, and Purc-Stephenson 2009).
Clearly specify the selected model and rationalize your choice by
detailing its advantages over other theoretical models. Illustrating the
models under consideration can further clarify your research approach
(Jackson, Gillaspy, and Purc-Stephenson 2009). Finally, identify and
explain any potential cross-loadings based on prior empirical evidence
(Brown 2023; Nye 2022), ensuring a comprehensive and well-justified
methodological foundation for your study.
\subsection{Power Analysis}\label{power-analysis}
When addressing Power Analysis (PA) in CFA and SEM, it's essential to
move beyond general rules of thumb for determining sample sizes.
Commonly cited guidelines suggesting minimum sizes or specific ratios of
observations to parameters (e.g., 50, 100, 200, 300, 400, 500, 1000 for
sample sizes or 20/1, 10/1, 5/1 for observation/parameter ratios) (Kline
2023; Kyriazos 2018) are based on controlled conditions that may not
directly transfer to your study's context.
Reliance on lower-bound sample sizes as a substitute for thorough PA
risks inadequate power for detecting meaningful effects in your model
(Westland 2010; Yilin Andre Wang 2023). Tools like Soper's calculator
(\url{https://www.danielsoper.com/statcalc/}), while popular and
frequently cited (as of 02/20/2024, with almost four years of existence,
it had collected more than 1,000 citations on Google Scholar), should
not replace a tailored PA approach. Such calculators, despite their
utility, may not fully accommodate the complexities and specific
requirements of your research design (Kyriazos 2018; Feng and Hancock
2023; Moshagen and Bader 2023).
A modern perspective on sample size determination emphasizes customizing
power calculations to fit the unique aspects of each study,
incorporating specific research settings and questions (Feng and Hancock
2023; Moshagen and Bader 2023). This approach underscores that there is
no universal sample size or minimum that applies across all research
scenarios (Kline 2023).
Planning for PA should ideally precede data collection, enhancing the
researcher's understanding of the study and facilitating informed
decisions regarding the measurement model based on existing literature
and known population characteristics (Feng and Hancock 2023; Leite,
Bandalos, and Shen 2023). A priori PA not only ensures adequate sample
size for detecting the intended effects, minimizing Type II errors, but
also aids in budgeting for data collection and enhancing overall
research design (Feng and Hancock 2023).
PA in SEM can be approached analytically, using asymptotic theory, or
through simulation methods. Analytical methods require specifying the
effect size in relation to the non-centrality parameter, while simulated
PA leverages a population model to empirically estimate power (Moshagen
and Bader 2023; Feng and Hancock 2023). These approaches are applicable
to assessing both global model fit and specific model parameters.
For CFA, evaluating the power related to the global fit of the
measurement model is recommended (Nye 2022). Although analytical
solutions have their limitations, they can serve as preliminary steps,
complemented by simulation techniques for a more comprehensive PA (Feng
and Hancock 2023; Moshagen and Bader 2023).
Several resources offer analytical solutions for global fit PA,
including ShinyApps by Jak et al. (2021), Moshagen and Bader (2023), Y.
Andre Wang and Rhemtulla (2021), and Zhang and Yuan (2018), with the
last application providing a comprehensive suite for Monte Carlo
Simulation (SMC) that accommodates missing data, non-normal
distributions, and facilitates model testing without extensive coding
(Y. Andre Wang and Rhemtulla 2021). For an overview of these solutions
and a discussion of analytical approaches, see Feng and Hancock (2023),
Jak et al. (2021), Nye (2022), and Yilin Andre Wang (2023).
However, it is a smart decision to run an SMC for the PA of your CFA
model using solutions that are consistent with the results' reproducible
and replicability. In this way, even analytical solutions that the
researcher may use as a starting point are recommended in the R
environment via the semTools packages (Jak et al. 2021) and semPower 2
(Jobst, Bader, and Moshagen 2023; Moshagen and Bader 2023). The first
option is compatible with the lavaan syntax and looks to be enough. The
second, albeit including SMC in some cases, has a more difficult syntax.
For detailed and tailored PA, especially in complex models or unique
study designs, the simsem package offers a robust solution, allowing for
the relaxation of traditional assumptions and supporting the use of
robust estimators. This package, which utilizes the familiar lavaan
syntax, simplifies the learning curve for researchers already accustomed
to SEM analyses, providing a user-friendly interface for conducting SMC
(Pornprasertmanit et al. 2022).
Publishing the sampling design and methodology enhances the
reproducibility and replicability of research, contributing to the
scientific community's collective understanding and validation of
measurement models (Jessica K. Flake, Pek, and Hehman 2017; Jessica Kay
Flake et al. 2022; Jessica Kay Flake and Fried 2020; Leite, Bandalos,
and Shen 2023). In the context of CFA, acknowledging the power
limitations of your study can signal potential concerns for the broader
inferences drawn from your research, emphasizing the importance of
external validity and the relevance of the outcomes over mere precision
(Leite, Bandalos, and Shen 2023).
\subsection{Pre-processing}\label{pre-processing}
Upon gathering and tabulating original data, ideally in non-binary
formats such as CSV, TXT, or JSON, the first step in data preprocessing
should be to eliminate responses from participants who have abandoned
the study. This identification often occurs at the end of preprocessing,
where these incomplete responses can offer insights into handling
missing data, outliers, and multicollinearity.
Incorporating control questions and measuring response time allows
researchers to further refine their dataset by excluding participants
who fail control items or complete the survey unusually quickly (Collier
2020). Calculating individual response variability (standard deviation)
can identify respondents who may not have engaged meaningfully with the
survey, indicated by minimal variation in their responses.
These preliminary data cleaning steps are fundamental yet frequently
overlooked in empirical research. They can significantly enhance data
quality before engaging in more complex statistical analyses. Visual and
descriptive examination of measurement model items is implicitly
beneficial for any statistical investigation and should be considered
standard practice.
While data transformation methods like linearization or normalization
are available, they are generally not necessary given the robust
estimation processes that can handle non-normal data (Brown 2015).
Parceling items is also discouraged due to its potential to obscure
underlying multidimensional structures (Brown 2015; Crede and Harms
2019).
Addressing missing data, outliers, and multicollinearity is critical.
Single imputation methods should be avoided as they underestimate error
variance and can lead to identification problems in your model (Enders
2023). For missing data under 5\%, the impact may be minimal, but for
higher rates, Full Information ML (FIML) or Multiple Imputation (MI)
should be utilized, with FIML often being the most straightforward and
effective choice for CFA (Brown 2015; Kline 2023).
FIML and MI are preferred for handling missing data due to their ability
to produce consistent and efficient parameter estimates under conditions
similar to MI (Enders 2023; Kline 2023). FIML it can be adapted for
non-normal data using robust estimators (Brown 2015).
Calculating the Variance Inflation Factor (VIF) helps identify items
with problematic multicollinearity (VIF \textgreater{} 10), which should
be addressed to prevent model convergence issues and misinterpretations
(Kline 2016; Whittaker and Schumacker 2022). Reflective constructs in
CFA require some level of item correlation but not to the extent that it
causes statistical or validity concerns.
Consider multivariate outliers rather than univariate ones, identifying
and assessing their exclusion based on sample characteristics. Reporting
all data cleaning processes, including any loss of items and strategies
for assessing respondent engagement, is crucial for transparency.
Additionally, documenting signs of multicollinearity and the software or
packages used (with versions) enhances the reproducibility and
credibility of the research (Jessica Kay Flake and Fried 2020; Jackson,
Gillaspy, and Purc-Stephenson 2009).
Finally, making raw data public adheres to the principles of open
science, promoting transparency and allowing for independent validation
of research findings (Crede and Harms 2019; Jessica Kay Flake et al.
2022; Jessica Kay Flake and Fried 2020). This practice not only
contributes to the scientific community's collective knowledge base but
also reinforces the integrity and reliability of the research conducted.
\subsection{Estimation Process}\label{estimation-process}
In CFA with ordinal items, such as those involving Likert-type scales
with up to five points, Rogers (2024) advocates for the use of
estimators from the Ordinary Least Squares (OLS) family. Specifically,
for smaller samples, the recommendation is to utilize the Unweighted
Least Squares (ULS) in its robust form (RULS), and for larger samples,
the Diagonally Weighted Least Squares (DWLS) in its robust version
(RDWLS), citing substantial supporting research.
Despite this, empirical evidence (Rhemtulla, Brosseau-Liard, and Savalei
2012; Robitzsch 2022) and theoretical considerations (Robitzsch 2020)
suggest that treating ordinal data as continuous can yield acceptable
outcomes when the response options number five or more. Particularly
with 6-7 categories, comparisons between methods under various
conditions reveal little difference, and it is recommended to use a
greater number of response alternatives (≥5) to enhance the power for
detecting model misspecifications (Maydeu-Olivares, Fairchild, and Hall
2017).
The ML estimator, noted for its robustness to minor deviations from
normality (Brown 2015), is further improved by using robust versions
like MLR (employing Huber-White standard errors and Yuan-Bentler scaled
\(\chi^2\). This adjustment allows for generating robust standard errors
and adjusted test statistics, with MLR offering extensive applicability
including in scenarios of missing data (RFIML) or where data breaches
the independence of observations assumption (Brown 2015; Rosseel 2012).
Comparative empirical studies have supported the effectiveness of MLR
against alternative estimators (Bandalos 2014; Holgado-Tello,
Morata-Ramirez, and García 2016; Li 2016; Nalbantoğlu-Yılmaz 2019; Yang
and Liang 2013; Yang-Wallentin, Jöreskog, and Luo 2010).
Researchers are advised to carefully describe and justify the chosen
estimation method based on the data characteristics and the specific
model being evaluated (Crede and Harms 2019). It is also critical to
report any estimation challenges encountered, such as algorithm
non-convergence or model misidentification (Nye 2022). In case of
estimation difficulties, alternative approaches like MLM estimators
(employing robust standard errors and Satorra-Bentler scaled \(\chi^2\))
or the default ML with non-parametric bootstrapping, as proposed by
Bollen-Stine, can be considered. This latter approach is also capable of
accommodating missing data (Brown 2015; Kline 2023).
Additionally, it is important to clarify whether the variance of a
marker variable was fixed (=1) to scale the latent variables (Jackson,
Gillaspy, and Purc-Stephenson 2009), and to provide both standardized
and unstandardized parameter estimates (Nye 2022). These steps are
crucial for ensuring transparency, reproducibility, and the ability to
critically assess the validity of the CFA results.
\subsection{Model Fit}\label{model-fit}
In conducting CFA with ordinal items, such as Likert-type scales, it's
crucial to approach model evaluation with nuance and avoid reliance on
rigid cutoff values for fit indices. Adhering strictly to traditional
cutoffs -- whether more conservative (e.g., SRMR ≤ .06, RMSEA ≤ .06, CFI
≥ .95) or less conservative (e.g., RMSEA ≤ .08, CFI ≥ .90, SRMR ≤ .08)
-- should not be the sole criterion for model acceptance (Xia and Yang
2019). The origins of these thresholds are in simulation studies with
specific configurations (up to three factors, fifteen items, factor
loadings between 0.7 and 0.8) (West et al. 2023), and may not
universally apply due to the variance in the number of items, factors,
model degrees of freedom, misfit types, and presence of missing data
(Groskurth, Bluemke, and Lechner 2023; Niemand and Mai 2018; West et al.
2023).
Evaluation of global fit indices (SRMR, RMSEA, CFI) should be done in a
collective manner, rather than fixating on any single index. A deviation
from traditional cutoffs warrants further investigation into whether the
discrepancy is attributable to data characteristics or limitations of
the index, rather than indicating a fundamental model misspecification
(Nye 2022). Interpreting fit indices as effect sizes can offer a more
meaningful assessment of model fit, aligning with their original
conceptualization (McNeish and Wolf 2023a; McNeish 2023b).
The SRMR is noted for its robustness across various conditions,
including non-normality and different measurement levels of items.
Pairing SRMR with CFI can help balance Type I and Type II errors, but
reliance on alternative indices may increase the risk of Type I error
(Mai, Niemand, and Kraus 2021; Niemand and Mai 2018).
Emerging methods like the Dynamic Fit Index (DFI) and Flexible Cutoffs
(FCO) offer tailored approaches to evaluating global fit. DFI, based on
simulation, provides model-specific cutoff points, adjusting simulations
to match the empirical model's characteristics (McNeish 2023a; McNeish
and Wolf 2023b; McNeish and Wolf 2023a). FCO, while not requiring
identification of a misspecified model like DFI, conservatively defines
misfit, shifting focus from approximate to accurate fit (McNeish and
Wolf 2023b).
For those hesitant to delve into simulation-based methods, Equivalence
Testing (EQT) presents an alternative. EQT aligns with the analytical
mindset of PA and incorporates DFI principles, challenging the
conventional hypothesis testing framework by considering model
specification and misspecification size control (Yuan et al. 2016).
When addressing reliability, Cronbach's Alpha should not be the default
measure due to its limitations. Instead, consider McDonald's Omega or
the Greatest Lower Bound (GLB) for a more accurate reliability
assessment within the CFA context (Bell, Chalmers, and Flora 2023; Cho
2022; Dunn, Baguley, and Brunsden 2014; Flora 2020; Goodboy and Martin
2020; Green and Yang 2015; Hayes and Coutts 2020; Kalkbrenner 2023;
McNeish 2018; Trizano-Hermosilla and Alvarado 2016).
Before modifying the model, first check for Heywood instances, which are
standardized factor loadings greater than one or negative variances (Nye
2022) and document the chosen cutoffs for evaluation. Tools and
resources like ShinyApp for DFI and the FCO package in R can facilitate
the application of these advanced methodologies (McNeish and Wolf 2023a;
Mai, Niemand, and Kraus 2021; Niemand and Mai 2018). Always report
corrected chi-square and degrees of freedom, alongside a minimum of
three global fit indices (RMSEA, CFI, SRMR) and local fit measures to
provide a comprehensive view of model fit and adjustment decisions
(Crede and Harms 2019; Jessica Kay Flake and Fried 2020).
\subsection{Model Comparisons and
Modifications}\label{model-comparisons-and-modifications}
Researchers embarking on CFA should avoid prematurely committing to a
specific factor structure without thoroughly evaluating and comparing
alternate configurations. It's advisable to consider various potential
structures early in the study design, ensuring the selected model is
based on its merits relative to competing theories (Jackson, Gillaspy,
and Purc-Stephenson 2009). Since models are inherently approximations of
reality, adopting the most effective ``working hypothesis'' is a dynamic
process, contingent on ongoing assessments against emerging alternatives
(Preacher and Yaremych 2023).
Good models are characterized not only by their interpretability,
simplicity, and generalizability but notably by their capacity to
surpass competing models in critical aspects. This competitive advantage
frames the selected theory as the prevailing hypothesis until a more
compelling alternative is identified (Preacher and Yaremych 2023).
The evaluation of model fit should extend beyond isolated assessments
using fit indices. A comprehensive approach involves comparing multiple
models, each grounded in substantiated theories, to discern the most
accurate representation of the underlying structure. This comparative
analysis is preferred over singular model evaluations, fostering a more
holistic understanding of the phenomena under study (Preacher and
Yaremych 2023).
Uniform application of models across the same dataset, utilizing
identical software and sample size, ensures consistency in the
researcher's analytical freedom, mitigating the risk of results
manipulation. This standardized approach underpins a more rigorous and
transparent investigative process (Preacher and Yaremych 2023).
Model selection is instrumental in pinpointing the most effective
explanatory framework for the observed phenomena, enabling the dismissal
of less performance models while retaining promising ones for further
exploration. This methodological flexibility enhances the depth of
analysis, contributing to the advancement of knowledge within the social
sciences (Preacher and Yaremych 2023).
Adjustments to a model, particularly in response to unsatisfactory fit
indices, should be theoretically grounded and reflective of findings
from prior research. Blind adherence to a pre-established model or
making hasty modifications can adversely affect the structural model's
integrity. Thoughtful adjustments, potentially revisiting exploratory
factor analysis (EFA) or considering Exploratory SEM (ESEM) for
cross-loadings representation, are preferable to drastic changes that
might shift the study from confirmatory to exploratory research (Brown
2023; Jessica K. Flake, Pek, and Hehman 2017; Jackson, Gillaspy, and
Purc-Stephenson 2009; Crede and Harms 2019).
All modifications to the measurement model, especially those enhancing
model fit, must be meticulously documented to maintain transparency and
support reproducibility (Jessica Kay Flake and Fried 2020). Openly
reporting these adjustments, including item exclusions and inter-item
correlations, is vital for the scientific integrity of the research (Nye
2022; Jessica Kay Flake et al. 2022).
Regarding model comparison and selection, traditional fit indices (SRMR,
RMSEA, CFI) have limitations for direct model comparisons. Adjusted
chi-square tests and information criteria like AIC and BIC are more
suitable for this purpose, balancing model fit and parsimony. These
criteria, however, should be applied with an understanding of their
constraints and complemented by theoretical judgment to inform model
selection decisions (Preacher and Yaremych 2023; Brown 2015; Huang 2017;
Lai 2020, 2021).
Ultimately, model selection in SEM is a nuanced process, blending
empirical evidence with theoretical insights. Researchers are encouraged
to leverage a range of models based on theoretical foundations, ensuring
that the eventual model selection is not solely determined by
statistical criteria but is also informed by substantive theory and
expertise (Preacher and Yaremych 2023). This balanced approach
underscores the importance of theory-driven research in the social
sciences, guiding the interpretation and application of findings derived
from chosen models.
\bookmarksetup{startatroot}
\section{Executable Manuscript}\label{executable-manuscript}
\subsection{Measurement Model
Selection}\label{measurement-model-selection-1}
\subsection{Power Analysis}\label{power-analysis-1}
\subsection{Pre-processing}\label{pre-processing-1}
\subsection{Estimation Process}\label{estimation-process-1}
\subsection{Model Fit}\label{model-fit-1}
\subsection{Model Comparisons and
Modifications}\label{model-comparisons-and-modifications-1}
\bookmarksetup{startatroot}
\section{Final Considerations}\label{final-considerations}
\bookmarksetup{startatroot}
\section*{References}\label{references}
\addcontentsline{toc}{section}{References}
\markboth{References}{References}
\phantomsection\label{refs}
\begin{CSLReferences}{1}{0}
\bibitem[\citeproctext]{ref-aera1999}
AERA, APA, and NCME. 1999. {``Standards for {Educational} and
{Psychological Testing}.''} Washington: American Educational Research
Association, American Psychological Association, \& National Council on
Measurement in Education.
\bibitem[\citeproctext]{ref-aera2014}
---------. 2014. {``Standards for {Educational} and {Pshychological
Testing}.''} Washington: American Educational Research Association,
American Psychological Association \& National Council on Measurement in
Education.
\bibitem[\citeproctext]{ref-arbuckle2019}
Arbuckle, J. L. 2019. {``Amos.''} Chicago: IBM Corp.
\bibitem[\citeproctext]{ref-bandalos2014}
Bandalos, Deborah L. 2014. {``Relative {Performance} of {Categorical
Diagonally Weighted Least Squares} and {Robust Maximum Likelihood
Estimation}.''} \emph{Structural Equation Modeling} 21 (1): 102--16.
\url{https://doi.org/10.1080/10705511.2014.859510}.
\bibitem[\citeproctext]{ref-bandalos2018}
---------. 2018. \emph{Measurement Theory and Applications for the
Social Sciences}. New York: Guilford Press.
\bibitem[\citeproctext]{ref-bell2023}
Bell, Stephanie M., R. Philip Chalmers, and David B. Flora. 2023. {``The
{Impact} of {Measurement Model Misspecification} on {Coefficient Omega
Estimates} of {Composite Reliability}.''} \emph{Educational and
Psychological Measurement}, 1--36.
\url{https://doi.org/10.1177/00131644231155804}.
\bibitem[\citeproctext]{ref-bentler2020}
Bentler, Peter M., and Erik Wu. 2020. {``{EQS} 6.4 for {Windows}.''}
Multivariate Software, Inc. \url{https://mvsoft.com}.
\bibitem[\citeproctext]{ref-brown2015}
Brown, Timothy A. 2015. \emph{Confirmatory {Factor Analysis} for
{Applied Research}}. New York: The Guilford Press.
\bibitem[\citeproctext]{ref-brown2023}
---------. 2023. {``Confirmatory {Factor Analysis}.''} In \emph{Handbook
of {Structural Equation Modeling}}, edited by Rick H. Hoyle. New York:
The Guilford Press.
\bibitem[\citeproctext]{ref-cho2022}
Cho, Eunseong. 2022. {``Reliability and {Omega Hierarchical} in
{Multidimensional Data}: {A Comparison} of {Various Estimators}.''}
\emph{Psychological Methods}. \url{https://doi.org/10.1037/met0000525}.
\bibitem[\citeproctext]{ref-cohen2022}
Cohen, Ronald Jay, Joel W. Schneider, and Renée M. Tobin. 2022.
\emph{Psychological {Testing} and {Assessment}: {An Introduction} to
{Test} and {Measurement}}. New York: McGraw Hill LLC.
\bibitem[\citeproctext]{ref-collier2020}
Collier, Joel E. 2020. \emph{Applied {Structural Equation Modeling Using
AMOS}: {Basic} to {Advanced Techniques}}. New York: Routledge.
\bibitem[\citeproctext]{ref-crede2019}
Crede, Marcus, and Peter Harms. 2019. {``Questionable Research Practices