diff --git a/NAMESPACE b/NAMESPACE index c38c4e2d..570d1b1e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ # Generated by roxygen2: do not edit by hand +S3method(print,rfm_table_customer) S3method(print,rfm_table_order) +S3method(rfm_table_customer,default) S3method(rfm_table_order,default) export(rfm_bar_chart) export(rfm_fm_plot) @@ -9,6 +11,7 @@ export(rfm_histograms) export(rfm_order_dist) export(rfm_rf_plot) export(rfm_rm_plot) +export(rfm_table_customer) export(rfm_table_order) importFrom(RColorBrewer,brewer.pal) importFrom(assertthat,are_equal) @@ -51,6 +54,7 @@ importFrom(magrittr,add) importFrom(magrittr,extract) importFrom(magrittr,extract2) importFrom(magrittr,multiply_by) +importFrom(magrittr,set_names) importFrom(magrittr,use_series) importFrom(purrr,prepend) importFrom(rlang,enquo) diff --git a/R/rfm-data.R b/R/rfm-data.R index eadca0e4..f9f57092 100644 --- a/R/rfm-data.R +++ b/R/rfm-data.R @@ -1,4 +1,4 @@ -#' RFM Data Set +#' RFM transaction data #' #' A dataset containing transactions of different customers. #' @@ -10,3 +10,18 @@ #' } #' "rfm_data_orders" + +#' RFM customer data +#' +#' A dataset containing customer level data. +#' +#' @format A tibble with 39,999 rows and 5 variables: +#' \describe{ +#' \item{customer_id}{Customer id.} +#' \item{total_amount}{Total amount of all orders.} +#' \item{most_recent_visit}{Date of the most recent transaction.} +#' \item{number_of_purchases}{Total number of transactions/orders.} +#' \item{purchase_interval}{Number of days since last transaction/order.} +#' } +#' +"rfm_data_customer" diff --git a/R/rfm-table.R b/R/rfm-table.R index 73bb3212..a40e7fad 100644 --- a/R/rfm-table.R +++ b/R/rfm-table.R @@ -1,4 +1,4 @@ -#' RFM table +#' RFM table (transaction data) #' #' Recency, frequency, monetary and RFM score. #' @@ -147,3 +147,138 @@ rfm_table_order.default <- function(data = NULL, customer_id = NULL, order_date print.rfm_table_order <- function(x, ...) { print(x$rfm) } + + +#' RFM table (customer data) +#' +#' Recency, frequency, monetary and RFM score. +#' +#' @param data A \code{data.frame} or \code{tibble}. +#' @param customer_id Unique id of the customer. +#' @param n_transactions Number of transactions/orders. +#' @param recency_days Number of days since the last transaction. +#' @param analysis_date Date of analysis. +#' @param recency_bins Number of bins for recency. +#' @param frequency_bins Number of bins for frequency. +#' @param monetary_bins Number of bins for monetary. +#' @param ... Other arguments. +#' +#' @return \code{rfm_table} returns a tibble with the following columns: +#' +#' \item{customer_id}{Unique id of the customer.} +#' \item{recency_days}{Number of days since the most recent transaction.} +#' \item{transaction_count}{Total number of transactions of the customer.} +#' \item{revenue}{Revenue from the customer.} +#' +#' @importFrom magrittr set_names +#' +#' @examples +#' analysis_date <- lubridate::as_date('2007-01-01', tz = 'UTC') +#' rfm_table_customer(rfm_data_customer, customer_id, number_of_purchases, +#' purchase_interval, total_amount, analysis_date) +#' +#' @export +#' +rfm_table_customer <- function(data = NULL, customer_id = NULL, n_transactions = NULL, + recency_days = NULL, total_revenue = NULL, analysis_date = NULL, recency_bins = 5, + frequency_bins = 5, monetary_bins = 5, ...) UseMethod("rfm_table_customer") + +#' @export +#' +rfm_table_customer.default <- function(data = NULL, customer_id = NULL, n_transactions = NULL, + recency_days = NULL, total_revenue = NULL, analysis_date = NULL, recency_bins = 5, + frequency_bins = 5, monetary_bins = 5, ...) { + + cust_id <- enquo(customer_id) + order_count <- enquo(n_transactions) + n_recency <- enquo(recency_days) + revenues <- enquo(total_revenue) + + result <- + data %>% + select(!! cust_id, !! n_recency, !! order_count, !! revenues) %>% + set_names(c("customer_id", "recency_days", "transaction_count", "amount")) + + result$recency_score <- NA + result$frequency_score <- NA + result$monetary_score <- NA + + rscore <- + recency_bins %>% + seq_len() %>% + rev() + + if (length(recency_bins) == 1) { + bins_recency <- bins(result, recency_days, recency_bins) + } else { + bins_recency <- recency_bins + } + lower_recency <- bins_lower(result, recency_days, bins_recency) + upper_recency <- bins_upper(result, recency_days, bins_recency) + + for (i in seq_len(recency_bins)) { + result$recency_score[result$recency_days >= lower_recency[i] & + result$recency_days < upper_recency[i]] <- rscore[i] + } + + fscore <- + frequency_bins %>% + seq_len() %>% + rev() + + if (length(frequency_bins) == 1) { + bins_frequency <- bins(result, transaction_count, frequency_bins) + } else { + bins_frequency <- frequency_bins + } + lower_frequency <- bins_lower(result, transaction_count, bins_frequency) + upper_frequency <- bins_upper(result, transaction_count, bins_frequency) + + for (i in seq_len(frequency_bins)) { + result$frequency_score[result$transaction_count >= lower_frequency[i] & + result$transaction_count < upper_frequency[i]] <- i + } + + mscore <- + monetary_bins %>% + seq_len() %>% + rev() + + if (length(monetary_bins) == 1) { + bins_monetary <- bins(result, amount, monetary_bins) + } else { + bins_monetary <- monetary_bins + } + lower_monetary <- bins_lower(result, amount, bins_monetary) + upper_monetary <- bins_upper(result, amount, bins_monetary) + + for (i in seq_len(monetary_bins)) { + result$monetary_score[result$amount >= lower_monetary[i] & + result$amount < upper_monetary[i]] <- i + } + + result %<>% + mutate( + rfm_score = recency_score * 100 + frequency_score * 10 + monetary_score + ) + + result$transaction_count <- as.numeric(result$transaction_count) + + out <- list( + rfm = result, analysis_date = analysis_date, + frequency_bins = frequency_bins, recency_bins = recency_bins, + monetary_bins = monetary_bins + ) + + class(out) <- c("rfm_table_customer", "tibble", "data.frame") + return(out) + +} + + + +#' @export +#' +print.rfm_table_customer <- function(x, ...) { + print(x$rfm) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 78a77b2b..92cf9f93 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -33,6 +33,7 @@ reference: contents: - rfm_table_order + - rfm_table_customer - rfm_heatmap - rfm_bar_chart - rfm_histograms @@ -44,3 +45,4 @@ reference: - title: Data contents: - rfm_data_orders + - rfm_data_customer diff --git a/data/rfm_data_customer.rda b/data/rfm_data_customer.rda new file mode 100644 index 00000000..c72243c0 Binary files /dev/null and b/data/rfm_data_customer.rda differ diff --git a/docs/reference/index.html b/docs/reference/index.html index 11388cbc..510d5b02 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -127,7 +127,13 @@
RFM table
RFM table (transaction data)
RFM table (customer data)
RFM Data Set
RFM transaction data
RFM customer data
A dataset containing customer level data.
+ + +rfm_data_customer
+
+ A tibble with 39,999 rows and 5 variables:
Customer id.
Total amount of all orders.
Date of the most recent transaction.
Total number of transactions/orders.
Number of days since last transaction/order.
Recency, frequency, monetary and RFM score.
+ + +rfm_table_customer(data = NULL, customer_id = NULL, n_transactions = NULL, + recency_days = NULL, total_revenue = NULL, analysis_date = NULL, + recency_bins = 5, frequency_bins = 5, monetary_bins = 5, ...)+ +
data | +A |
+
---|---|
customer_id | +Unique id of the customer. |
+
n_transactions | +Number of transactions/orders. |
+
recency_days | +Number of days since the last transaction. |
+
analysis_date | +Date of analysis. |
+
recency_bins | +Number of bins for recency. |
+
frequency_bins | +Number of bins for frequency. |
+
monetary_bins | +Number of bins for monetary. |
+
... | +Other arguments. |
+
rfm_table
returns a tibble with the following columns:
Unique id of the customer.
Number of days since the most recent transaction.
Total number of transactions of the customer.
Revenue from the customer.
+analysis_date <- lubridate::as_date('2007-01-01', tz = 'UTC') +rfm_table_customer(rfm_data_customer, customer_id, number_of_purchases, +purchase_interval, total_amount, analysis_date)#> # A tibble: 39,999 x 8 +#> customer_id recency_days transaction_count amount recency_score +#> <dbl> <dbl> <dbl> <dbl> <int> +#> 1 1.00 229 10.0 1313 2 +#> 2 2.00 467 11.0 1230 1 +#> 3 3.00 143 13.0 1194 3 +#> 4 4.00 222 9.00 794 2 +#> 5 5.00 659 3.00 278 1 +#> 6 6.00 157 9.00 922 3 +#> 7 7.00 195 11.0 961 2 +#> 8 8.00 572 6.00 615 1 +#> 9 9.00 244 9.00 1097 2 +#> 10 10.0 239 12.0 1164 2 +#> # ... with 39,989 more rows, and 3 more variables: frequency_score <int>, +#> # monetary_score <int>, rfm_score <dbl>+
analysis_date <- lubridate::as_date('2006-12-31', tz = 'UTC') +analysis_date <- lubridate::as_date('2006-12-31', tz = 'UTC') rfm_table_order(rfm_data_orders, customer_id, order_date, revenue, analysis_date)#> # A tibble: 995 x 9 #> customer_id date_most_recent recency_days transaction_count amount #> <chr> <date> <dbl> <dbl> <dbl> diff --git a/man/rfm_data_customer.Rd b/man/rfm_data_customer.Rd new file mode 100644 index 00000000..2f88acca --- /dev/null +++ b/man/rfm_data_customer.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rfm-data.R +\docType{data} +\name{rfm_data_customer} +\alias{rfm_data_customer} +\title{RFM customer data} +\format{A tibble with 39,999 rows and 5 variables: +\describe{ + \item{customer_id}{Customer id.} + \item{total_amount}{Total amount of all orders.} + \item{most_recent_visit}{Date of the most recent transaction.} + \item{number_of_purchases}{Total number of transactions/orders.} + \item{purchase_interval}{Number of days since last transaction/order.} +}} +\usage{ +rfm_data_customer +} +\description{ +A dataset containing customer level data. +} +\keyword{datasets} diff --git a/man/rfm_data_orders.Rd b/man/rfm_data_orders.Rd index 73994e38..86e7e46b 100644 --- a/man/rfm_data_orders.Rd +++ b/man/rfm_data_orders.Rd @@ -3,7 +3,7 @@ \docType{data} \name{rfm_data_orders} \alias{rfm_data_orders} -\title{RFM Data Set} +\title{RFM transaction data} \format{A tibble with 49.6 rows and 3 variables: \describe{ \item{order_date}{order date} diff --git a/man/rfm_table_customer.Rd b/man/rfm_table_customer.Rd new file mode 100644 index 00000000..f273416a --- /dev/null +++ b/man/rfm_table_customer.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rfm-table.R +\name{rfm_table_customer} +\alias{rfm_table_customer} +\title{RFM table (customer data)} +\usage{ +rfm_table_customer(data = NULL, customer_id = NULL, n_transactions = NULL, + recency_days = NULL, total_revenue = NULL, analysis_date = NULL, + recency_bins = 5, frequency_bins = 5, monetary_bins = 5, ...) +} +\arguments{ +\item{data}{A \code{data.frame} or \code{tibble}.} + +\item{customer_id}{Unique id of the customer.} + +\item{n_transactions}{Number of transactions/orders.} + +\item{recency_days}{Number of days since the last transaction.} + +\item{analysis_date}{Date of analysis.} + +\item{recency_bins}{Number of bins for recency.} + +\item{frequency_bins}{Number of bins for frequency.} + +\item{monetary_bins}{Number of bins for monetary.} + +\item{...}{Other arguments.} +} +\value{ +\code{rfm_table} returns a tibble with the following columns: + +\item{customer_id}{Unique id of the customer.} +\item{recency_days}{Number of days since the most recent transaction.} +\item{transaction_count}{Total number of transactions of the customer.} +\item{revenue}{Revenue from the customer.} +} +\description{ +Recency, frequency, monetary and RFM score. +} +\examples{ +analysis_date <- lubridate::as_date('2007-01-01', tz = 'UTC') +rfm_table_customer(rfm_data_customer, customer_id, number_of_purchases, +purchase_interval, total_amount, analysis_date) + +} diff --git a/man/rfm_table_order.Rd b/man/rfm_table_order.Rd index c4c7b8d4..f55b14cc 100644 --- a/man/rfm_table_order.Rd +++ b/man/rfm_table_order.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/rfm-table.R \name{rfm_table_order} \alias{rfm_table_order} -\title{RFM table} +\title{RFM table (transaction data)} \usage{ rfm_table_order(data = NULL, customer_id = NULL, order_date = NULL, revenue = NULL, analysis_date = NULL, recency_bins = 5,