-
Notifications
You must be signed in to change notification settings - Fork 0
/
terry_reilly_report.Rmd
267 lines (214 loc) · 9.94 KB
/
terry_reilly_report.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
---
title: "Terry Reilly Clinics"
author:
- Jeremy Boyd, Ph.D.
- jeremyboyd@pm.me
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
pdf_document:
extra_dependencies: ["booktabs"]
---
```{r setup, include = FALSE}
# YAML info for HTML output
# output:
# html_document:
# theme: spacelab
# toc: true
# toc_float: true
# Packages
library(tidyverse)
library(kableExtra)
library(feather)
# SVG preferred for HTML, but doesn't work for PDF
# knitr::opts_chunk$set(dev = "svg")
# knitr::opts_chunk$set(dev = "png")
knitr::opts_chunk$set(echo = FALSE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(message = FALSE)
options(tinytex.verbose = TRUE)
```
```{r data}
# Read in 2020 by-provider data
df <- read_feather("Idaho prescribers by provider data.feather") %>%
filter(year == 2020)
# Make sure all addresses have been queried in Google Places API
get_address_names(input_table = df)
# Read in place names
places <- read_feather("Address-name bank.feather")
# Clean up & organize data
df2 <- df %>%
# Join place names to data by dataset address
mutate(
dataset_address = str_squish(
paste(
Prscrbr_St1,
Prscrbr_St2,
Prscrbr_City,
Prscrbr_State_Abrvtn,
Prscrbr_Zip5
)
)) %>%
left_join(places %>% select(dataset_address, name, status),
by = "dataset_address") %>%
rename(Clinic = name) %>%
# Filter to Terry Reilly clinics
filter(str_detect(Clinic, "Terry")) %>%
# Categorize providers into medical & dental
mutate(provider_cat = case_when(
Prscrbr_Type %in%
c("Family Practice", "Physician Assistant", "Nurse Practitioner",
"Certified Clinical Nurse Specialist") ~ "Medical",
Prscrbr_Type %in% c("Dentist") ~ "Dental",
TRUE ~ Prscrbr_Type)) %>%
# Keep only medical & dental providers
filter(provider_cat %in% c("Medical", "Dental")) %>%
# Clean up clinic info. One Boise dentist seems to have given his home
# address, but web searches show he's at the Boise dental clinic. Another
# clinic--simply called "Terry Reilly Health Services"--has a 16th Ave Nampa
# address, so group it with 16th Ave Clinic.
mutate(Clinic = case_when(
dataset_address == "2301 N 26th St Ste 102 Boise ID 83702" ~
"Terry Reilly Health Services - Boise Dental",
Clinic == "Terry Reilly Health Services" ~
"Terry Reilly Health Services - 16th Ave. Clinic",
TRUE ~ Clinic),
# Simplify clinic names & order
Clinic = str_remove(Clinic, ".+ - "),
# Only last 4 digits of prescriber NPI
Prscrbr_NPI = str_extract(Prscrbr_NPI, ".{4}$"),
# claims/1k
claims_1k = Antbtc_Tot_Clms / (Tot_Benes / 1000))
# Compute claims/1k benes & deal with NAs
df2.1 <- df2 %>%
mutate(
# Flag for missing claims_1k
na_claims1k = if_else(is.na(claims_1k), 1L, 0L),
# Impute missing counts as 10
across(.cols = c("Antbtc_Tot_Clms", "Tot_Benes"),
~ if_else(is.na(.x), 10, .x)),
# Recompute claims_1k
claims_1k = Antbtc_Tot_Clms / (Tot_Benes / 1000))
# Create a single table containing two datasets: one with NA values of claims_1k
# removed, and one with them imputed.
df3 <- bind_rows(
df2.1 %>% mutate(dataset = "NAs_imputed"),
df2.1 %>% filter(na_claims1k != 1) %>% mutate(dataset = "NAs_removed"))
```
# Data
- Data are from the 2020 Medicare Part D by-provider dataset.
- Provider addresses were submitted to the Google Places API to retrieve organization names.
- Terry Reilly clinics are organizations with "Terry" in their names. This identified the `r df2.1 %>% count(Clinic) %>% nrow()` different clinics shown in the figures below.
- Providers were categorized as either *medical* or *dental*. Medical providers are medical doctors, physician assistants, nurse practitioners, or certified nurse specialists. Dental providers are dentists. `r df2.1 %>% count(Prscrbr_NPI) %>% nrow()` medical or dental providers were identified.
- Each provider had their number of antibiotic claims per 1K beneficiaries computed. `r df2.1 %>% filter(na_claims1k == 1) %>% nrow()` providers ended up with NA values for claims/1K due to either NAs in their antibiotic claims count, or their beneficiaries count. NA values for these variables indicate counts less than 11.
- In the analyses below, two datasets were used. In one, all NA values for antibiotic claims/1K were removed. In the other, NA values for claims/1K were imputed in the following way: NA values for the antibiotic claims count and the beneficiaries count were imputed as 10. The number of claims per 1K beneficiaries was then computed from these.
See Table 1 below for summary statistics for medical and dental providers.
```{r stat}
# Table of summary stats
df2 %>%
select(Prscrbr_NPI, provider_cat, Antbtc_Tot_Clms, Tot_Benes, claims_1k) %>%
pivot_longer(matches("Clms|Benes|claims"), names_to = "Variable",
values_to = "value") %>%
group_by(provider_cat, Variable) %>%
summarize(n = sum(!is.na(Prscrbr_NPI)),
min = min(value, na.rm = TRUE),
max = max(value, na.rm = TRUE),
mean = mean(value, na.rm = TRUE), .groups = "drop") %>%
mutate(across(min:mean, ~ format(.x, digits = 1, nsmall = 1)),
across(min:mean, ~ str_trim(.x)),
value = paste0(mean, " (", min, "-", max, ")"),
Variable = case_when(
str_detect(Variable, "Antbtc") ~ "Antibiotic Claims Count",
str_detect(Variable, "Benes") ~ "Beneficiary Count",
str_detect(Variable, "1k") ~ "Antibiotic Claims per 1K Beneficiaries",
TRUE ~ "error"),
Variable = fct_relevel(Variable, "Antibiotic Claims Count",
"Beneficiary Count")) %>%
arrange(provider_cat, Variable) %>%
select(`Provider Type` = provider_cat, `N Providers` = n,
Variable , `Mean (Range)` = value) %>%
kable(
format = "latex",
booktabs = TRUE,
caption =
"Table 1. Summary statistics for medical and dental providers.")
# %>%
# collapse_rows()
# Trying to collapse duplicate rows in columns 1 & 2 of table throws error in the latex knitting process.
```
# Antibiotic use by clinic
## All providers
Figure 1 shows mean antibiotic claims per 1K beneficiaries by clinic and provider type. Additionally, the columns show results for the datasets with NAs imputed (left) and removed (right). The value to the right of each bar indicates the number of providers.
```{r clinic, fig.height = 7, fig.cap = "Clinic means for all providers."}
# Compute mean claims_1k by dataset, clinic, provider_cat
clinic_sum <- df3 %>%
group_by(Clinic, provider_cat, dataset) %>%
summarize(n = sum(!is.na(claims_1k)),
mean_claims_1k = mean(claims_1k, na.rm = TRUE),
.groups = "drop") %>%
mutate(Clinic = fct_rev(Clinic))
# Visualize
clinic_sum %>%
ggplot(aes(y = Clinic, x = mean_claims_1k, label = n)) +
geom_col(fill = "lightskyblue", alpha = .6) +
geom_text(nudge_x = 40, size = 3) +
facet_grid(rows = vars(provider_cat),
cols = vars(dataset)) +
labs(x = "Mean Claims per 1K Beneficiaries")
```
## Medical & dental separated
Figures 2 and 3 show the same data as Figure 1, but with either medical providers only, or dental providers only. This makes it easier to see which clinics have higher or lower values of claims/1K.
```{r clinic_med, fig.cap = "Clinic means for medical providers only."}
# Medical providers only
clinic_sum %>%
filter(provider_cat == "Medical") %>%
mutate(Clinic = fct_reorder(Clinic, mean_claims_1k)) %>%
ggplot(aes(y = Clinic, x = mean_claims_1k, label = n)) +
geom_col(fill = "lightskyblue", alpha = .6) +
geom_text(nudge_x = 30, size = 3.5) +
facet_wrap(~ dataset) +
labs(title = "Medical Providers",
x = "Mean Claims per 1K Beneficiaries")
```
```{r clinic_dent, fig.cap = "Clinic means for dental providers only."}
# Medical providers only
clinic_sum %>%
filter(provider_cat == "Dental") %>%
mutate(Clinic = fct_reorder(Clinic, mean_claims_1k)) %>%
ggplot(aes(y = Clinic, x = mean_claims_1k, label = n)) +
geom_col(fill = "lightskyblue", alpha = .6) +
geom_text(nudge_x = 30, size = 3.5) +
facet_wrap(~ dataset) +
labs(title = "Dental Providers",
x = "Mean Claims per 1K Beneficiaries")
```
# Antibiotic use by provider
Figures 4 and 5 give antibiotic claims/1K for individual medical and dental providers, respectively.
- Figures are based on the dataset with no imputation.
- Only the last four digits of Provider NPIs are shown.
- The figures use the same scale to facilitate comparison.
```{r provider_med, fig.height = 8.5, fig.cap = "Claims/1K for individual medical providers."}
df3 %>%
filter(provider_cat == "Medical",
dataset == "NAs_removed") %>%
mutate(Prscrbr_NPI = fct_reorder(Prscrbr_NPI, claims_1k)) %>%
ggplot(aes(x = Prscrbr_NPI, y = claims_1k)) +
geom_col(fill = "lightskyblue", alpha = .6) +
scale_y_continuous(limits = c(0, 1400), breaks = seq(0, 1400, 200)) +
coord_flip() +
labs(title = "Medical Providers",
x = "Provider\nNPI",
y = "Claims per 1K Beneficiaries")
```
```{r provider_dent, fig.height = 4, fig.cap = "Claims/1K for individual dental providers."}
df3 %>%
filter(provider_cat == "Dental",
dataset == "NAs_removed") %>%
mutate(Prscrbr_NPI = fct_reorder(Prscrbr_NPI, claims_1k)) %>%
ggplot(aes(x = Prscrbr_NPI, y = claims_1k)) +
geom_col(fill = "lightskyblue", alpha = .6) +
scale_y_continuous(limits = c(0, 1400), breaks = seq(0, 1400, 200)) +
coord_flip() +
labs(title = "Dental Providers",
x = "Provider\nNPI",
y = "Claims per 1K Beneficiaries")
```