-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcagr.R
146 lines (115 loc) · 4.64 KB
/
cagr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# ---- Load Necessary Libraries ----
if (!requireNamespace("lubridate", quietly = TRUE)) {
install.packages("lubridate")
}
library(lubridate)
if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
library(ggplot2)
# ---- Load and Prepare Data ----
# Load the data from the CSV file
csv_file <- "./data/bitcoin_daily_prices.csv"
#csv_file <- "./data/Tartu.csv"
price_data <- read.csv(csv_file)
# Ensure your Date column is in Date format
price_data$Date <- as.Date(price_data$Date)
# Sort data by Date
price_data <- price_data[order(price_data$Date), ]
# ---- Functions ----
calculate_subperiod_cagrs <- function(data, period_length) {
# Ensure the dataset is sorted by date
data <- data[order(data$Date), ]
# Initialize a dataframe to store subperiod CAGR results
cagr_results <- data.frame(
Start_Date = as.Date(character()),
End_Date = as.Date(character()),
CAGR = numeric(),
stringsAsFactors = FALSE
)
# Loop through all possible start dates
for (i in 1:(nrow(data) - 1)) {
start_date <- data$Date[i]
end_date <- start_date %m+% years(period_length) # Add precise years
# Skip if the calculated end date exceeds the maximum date in the data
if (end_date > max(data$Date)) {
next
}
subperiod_data <- data[data$Date >= start_date & data$Date <= end_date, ]
if (nrow(subperiod_data) > 1) {
actual_years <- as.numeric(interval(start_date, max(subperiod_data$Date)) / years(1))
start_value <- subperiod_data$Close[1]
end_value <- subperiod_data$Close[nrow(subperiod_data)]
cagr <- (end_value / start_value)^(1 / actual_years) - 1
cagr_results <- rbind(
cagr_results,
data.frame(Start_Date = start_date, End_Date = max(subperiod_data$Date), CAGR = cagr)
)
}
}
return(cagr_results)
}
visualize_subperiod_cagrs <- function(cagr_results, period_length) {
library(ggplot2)
# Calculate IQR and determine bounds
Q1 <- quantile(cagr_results$CAGR, 0.25)
Q3 <- quantile(cagr_results$CAGR, 0.75)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
# Filter out outliers
filtered_results <- cagr_results[cagr_results$CAGR >= lower_bound & cagr_results$CAGR <= upper_bound, ]
# Calculate outlier stats
total_points <- nrow(cagr_results)
remaining_points <- nrow(filtered_results)
outliers_removed <- total_points - remaining_points
# Print information about outliers
cat("\nOutlier Removal Summary:\n")
cat("- Total data points: ", total_points, "\n")
cat("- Remaining data points after outlier removal: ", remaining_points, "\n")
cat("- Number of outliers removed: ", outliers_removed, "\n")
cat("- Percentage of data points removed: ", round((outliers_removed / total_points) * 100, 2), "%\n")
# Plot the distribution of CAGRs without outliers
ggplot(filtered_results, aes(x = CAGR)) +
geom_density(fill = "blue", alpha = 0.4) +
labs(
title = paste("CAGR Distribution for", period_length, "Year Holding Periods (No Outliers)"),
x = "CAGR",
y = "Density"
) +
theme_minimal()
}
summarize_subperiod_cagrs <- function(cagr_results, period_length) {
if (nrow(cagr_results) == 0) {
cat("\nNo valid subperiods found for the specified holding period length.\n")
return(NULL)
}
# Summary Statistics
mean_cagr <- mean(cagr_results$CAGR)
median_cagr <- median(cagr_results$CAGR)
min_cagr <- min(cagr_results$CAGR)
max_cagr <- max(cagr_results$CAGR)
deciles <- quantile(cagr_results$CAGR, probs = seq(0, 1, by = 0.1))
# Display textual summary
cat("\nCAGR Summary Statistics for", period_length, "Year Holding Periods:\n")
cat("- Mean CAGR: ", scales::percent(mean_cagr, accuracy = 0.1), "\n")
cat("- Median CAGR: ", scales::percent(median_cagr, accuracy = 0.1), "\n")
cat("- Minimum CAGR: ", scales::percent(min_cagr, accuracy = 0.1), "\n")
cat("- Maximum CAGR: ", scales::percent(max_cagr, accuracy = 0.1), "\n")
cat("\nDecile Analysis:\n")
for (i in 1:length(deciles)) {
cat(paste0(" - ", names(deciles)[i], ": ", scales::percent(deciles[i], accuracy = 0.1)), "\n")
}
}
# ---- Call the Functions ----
# Specify the period length
#period_length <- as.numeric(readline(prompt = "Enter the holding period length (in years): "))
period_length <- 4
# Step 1: Calculate the subperiod CAGRs
cagr_results <- calculate_subperiod_cagrs(price_data, period_length)
# Step 2: Visualize the subperiod CAGRs
if (!is.null(cagr_results) && nrow(cagr_results) > 0) {
visualize_subperiod_cagrs(cagr_results, period_length)
}
# Step 3: Summarize the subperiod CAGRs
summarize_subperiod_cagrs(cagr_results, period_length)