From f3914d2bab0293fed06332be31b7d553ae488ed7 Mon Sep 17 00:00:00 2001 From: Saqib Ansari Date: Tue, 31 Dec 2024 22:23:02 +0530 Subject: [PATCH 1/5] feat: better code completions --- frontend/src2/helpers/index.ts | 20 + .../query/components/ExpressionEditor.vue | 176 ++- .../insights_data_source_v3/ibis/__init__.py | 0 .../insights_data_source_v3/ibis/functions.py | 1019 +++++++++++++++++ .../insights_data_source_v3/ibis/utils.py | 115 ++ .../insights_data_source_v3/ibis_functions.py | 292 ----- .../insights_data_source_v3/ibis_utils.py | 5 +- 7 files changed, 1289 insertions(+), 338 deletions(-) create mode 100644 insights/insights/doctype/insights_data_source_v3/ibis/__init__.py create mode 100644 insights/insights/doctype/insights_data_source_v3/ibis/functions.py create mode 100644 insights/insights/doctype/insights_data_source_v3/ibis/utils.py delete mode 100644 insights/insights/doctype/insights_data_source_v3/ibis_functions.py diff --git a/frontend/src2/helpers/index.ts b/frontend/src2/helpers/index.ts index ced7f8981..51610ef3e 100644 --- a/frontend/src2/helpers/index.ts +++ b/frontend/src2/helpers/index.ts @@ -12,6 +12,7 @@ import { import { FIELDTYPES } from './constants' import { createToast } from './toasts' import { getFormattedDate } from '../query/helpers' +import { call } from 'frappe-ui' export function getUniqueId(length = 8) { return (+new Date() * Math.random()).toString(36).substring(0, length) @@ -431,3 +432,22 @@ function areValidDates(data: string[]) { function isValidDate(value: string) { return !isNaN(new Date(value).getTime()) } + +const fetchCache = new Map() +export function fetchCall(url: string, options: any): Promise { + // a function that makes a fetch call, but also caches the response for the same url & options + const key = JSON.stringify({ url, options }) + if (fetchCache.has(key)) { + return Promise.resolve(fetchCache.get(key)) + } + + return call(url, options) + .then((response: any) => { + fetchCache.set(key, response) + return response + }) + .catch((err: Error) => { + fetchCache.delete(key) + throw err + }) +} diff --git a/frontend/src2/query/components/ExpressionEditor.vue b/frontend/src2/query/components/ExpressionEditor.vue index a14c7fb71..8bfc09ef2 100644 --- a/frontend/src2/query/components/ExpressionEditor.vue +++ b/frontend/src2/query/components/ExpressionEditor.vue @@ -1,7 +1,8 @@ diff --git a/insights/insights/doctype/insights_data_source_v3/ibis/__init__.py b/insights/insights/doctype/insights_data_source_v3/ibis/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/insights/insights/doctype/insights_data_source_v3/ibis/functions.py b/insights/insights/doctype/insights_data_source_v3/ibis/functions.py new file mode 100644 index 000000000..ed8373f0f --- /dev/null +++ b/insights/insights/doctype/insights_data_source_v3/ibis/functions.py @@ -0,0 +1,1019 @@ +import math + +import frappe +import ibis +import ibis.expr.types as ir +import ibis.selectors as s +from ibis import _ + + +# aggregate functions +def count(column: ir.Column = None, where: ir.BooleanValue = None): + """ + def count(column=None, where=None) + + Count the number of non-null values in a column. + + Examples: + - count() + - count(user_id) + - count(user_id, status == 'Active') + """ + + if column is None: + query = frappe.flags.current_ibis_query + column = query.columns[0] + column = getattr(query, column) + + return column.count(where=where) + + +def count_if(condition: ir.BooleanValue, column: ir.Column = None): + """ + def count_if(condition) + + Count the number of rows that satisfy a condition. + + Examples: + - count_if(status == 'Active') + """ + + return count(column, where=condition) + + +def min(column: ir.Column, where: ir.BooleanValue = None): + """ + def min(column, where=None) + + Find the minimum value in a column. + + Examples: + - min(column) + - min(column, status == 'Active') + """ + return column.min(where=where) + + +def max(column: ir.Column, where: ir.BooleanValue = None): + """ + def max(column, where=None) + + Find the maximum value in a column. + + Examples: + - max(column) + - max(column, status == 'Active') + """ + return column.max(where=where) + + +def sum(column: ir.NumericColumn, where: ir.BooleanValue = None): + """ + def sum(column, where=None) + + Find the sum of values in a column. + + Examples: + - sum(column) + - sum(column, status == 'Active') + """ + return column.sum(where=where) + + +def avg(column: ir.NumericColumn, where: ir.BooleanValue = None): + """ + def avg(column, where=None) + + Find the average of values in a column. + + Examples: + - avg(column) + - avg(column, status == 'Active') + """ + return column.mean(where=where) + + +def median(column: ir.NumericColumn, where: ir.BooleanValue = None): + """ + def median(column, where=None) + + Find the median value in a column. + + Examples: + - median(column) + - median(column, status == 'Active') + """ + return column.median(where=where) + + +def group_concat(column: ir.Column, sep: str = ",", where: ir.BooleanValue = None): + """ + def group_concat(column, sep=',', where=None) + + Concatenate values of a column into a single string. + + Examples: + - group_concat(column) + - group_concat(column, '-', status == 'Active') + """ + return column.group_concat(sep=sep, where=where) + + +def distinct_count(column: ir.Column, where: ir.BooleanValue = None): + """ + def distinct_count(column, where=None) + + Count the number of unique values in a column. + + Examples: + - distinct_count(column) + - distinct_count(column, status == 'Active') + """ + return column.nunique(where=where) + + +def sum_if(condition: ir.BooleanValue, column: ir.NumericColumn): + """ + def sum_if(condition, column) + + Find the sum of values in a column that satisfy a condition. + + Examples: + - sum_if(status == 'Active', column) + """ + return sum(column, where=condition) + + +def distinct_count_if(condition: ir.BooleanValue, column: ir.Column): + """ + def distinct_count_if(condition, column) + + Count the number of unique values in a column that satisfy a condition. + + Examples: + - distinct_count_if(status == 'Active', column) + """ + return distinct_count(column, where=condition) + + +def is_in(column: ir.Column, *values: tuple[ir.Value, ...]): + """ + def is_in(column, *values) + + Check if value is in a list of values. + + Examples: + - is_in(status, 'Active', 'Inactive') + - is_in(user_id, 1, 2, 3) + """ + return column.isin(values) + + +def is_not_in(column: ir.Column, *values: tuple[ir.Value, ...]): + """ + def is_not_in(column, *values) + + Check if value is not in a list of values. + + Examples: + - is_not_in(status, 'Active', 'Inactive') + - is_not_in(user_id, 1, 2, 3) + """ + return column.notin(values) + + +def is_set(column: ir.Column): + """ + def is_set(column) + + Check if value is not null. + + Examples: + - is_set(email) + """ + return column.notnull() + + +def is_not_set(column: ir.Column): + """ + def is_not_set(column) + + Check if value is null. + + Examples: + - is_not_set(email) + """ + return column.isnull() + + +def is_between(column: ir.Column, start: ir.Value, end: ir.Value): + """ + def is_between(column, start, end) + + Check if value is between start and end. + + Examples: + - is_between(age, 18, 60) + """ + return column.between(start, end) + + +def is_not_between(column: ir.Column, start: ir.Value, end: ir.Value): + """ + def is_not_between(column, start, end) + + Check if value is not between start and end. + + Examples: + - is_not_between(age, 18, 60) + """ + return ~column.between(start, end) + + +# is_within = lambda args, kwargs: None # TODO + + +# conditional functions +def if_else(condition: ir.BooleanValue, true_value: ir.Value, false_value: ir.Value): + """ + def if_else(condition, true_value, false_value) + + Return true_value if condition is true, else return false_value. + + Examples: + - if_else(status == 'Active', 1, 0) + """ + return ibis.case().when(condition, true_value).else_(false_value).end() + + +def case( + condition: ir.BooleanValue, value: ir.Value, *args: tuple[ir.BooleanValue, ir.Value] +): + """ + def case(condition, value, *args) + + Return value if condition is true, else return value of the next condition. + + Examples: + - case(age > 18, 'Eligible', 'Not Eligible') + - case(age > 30, 'Above 30', age > 20, 'Above 20') + """ + case = ibis.case().when(condition, value) + for i in range(0, len(args) - 1, 2): + case = case.when(args[i], args[i + 1]) + + if len(args) % 2 == 1: + return case.else_(args[-1]).end() + else: + return case.end() + + +# number Functions +def abs(column: ir.NumericColumn): + """ + def abs(column) + + Return the absolute value of a column. + + Examples: + - abs(column) + """ + return column.abs() + + +def round(column: ir.NumericColumn, decimals: int = 0): + """ + def round(column, decimals=0) + + Round the values of a column to the nearest integer. + + Examples: + - round(column) + - round(column, 2) + """ + return column.round(decimals) + + +def floor(column: ir.NumericColumn): + """ + def floor(column) + + Return the floor of a column. + + Examples: + - floor(column) + """ + return column.floor() + + +def ceil(column: ir.NumericColumn): + """ + def ceil(column) + + Return the ceiling of a column. + + Examples: + - ceil(column) + """ + return column.ceil() + + +# String Functions + + +def lower(column: ir.StringColumn): + """ + def lower(column) + + Convert the values of a column to lowercase. + + Examples: + - lower(column) + """ + return column.lower() + + +def upper(column: ir.StringColumn): + """ + def upper(column) + + Convert the values of a column to uppercase. + + Examples: + - upper(column) + """ + return column.upper() + + +def concat(column: ir.StringColumn, *args: tuple[str | ir.Column, ...]): + """ + def concat(column, *args) + + Concatenate values of multiple strings or string columns into one string. + + Examples: + - concat(first_name, ' ', last_name) + """ + return column.concat(*args) + + +def replace(column: ir.StringColumn, old: str, new: str): + """ + def replace(column, old, new) + + Replace a substring with another substring in a column. + + Examples: + - replace(email, '@', ' at ') + """ + return column.replace(old, new) + + +def find(column: ir.StringColumn, sub: str): + """ + def find(column, sub) + + Find the position of a substring in a column. + + Examples: + - find(email, '@') + """ + return column.find(sub) + + +def substring(column: ir.StringColumn, start: int, length: int | None = None): + """ + def substring(column, start, length=None) + + Extract a substring from a column. + + Examples: + - substring(email, 0, 3) + - substring(email, find(email, '@')) + """ + return column.substr(start, length) + + +def contains(column: ir.StringColumn, sub: str): + """ + def contains(column, sub) + + Check if a substring is present in a column. + + Examples: + - contains(email, '@') + - contains(name, first_name) + """ + return column.contains(sub) + + +# not_contains = lambda column, *args, **kwargs: ~column.contains(*args, **kwargs) +def not_contains(column: ir.StringColumn, sub: str): + """ + def not_contains(column, sub) + + Check if a substring is not present in a column. + + Examples: + - not_contains(email, '@') + - not_contains(name, first_name) + """ + return ~column.contains(sub) + + +def starts_with(column: ir.StringColumn, sub: str): + """ + def starts_with(column, sub) + + Check if a column starts with a substring. + + Examples: + - starts_with(email, 'info') + """ + return column.startswith(sub) + + +def ends_with(column: ir.StringColumn, sub: str): + """ + def ends_with(column, sub) + + Check if a column ends with a substring. + + Examples: + - ends_with(email, '.com') + """ + return column.endswith(sub) + + +def length(column: ir.StringColumn): + """ + def length(column) + + Find the length of a column. + + Examples: + - length(column) + """ + return column.length() + + +# date functions +def year(column: ir.DateValue): + """ + def year(column) + + Extract the year from a date column. + + Examples: + - year(order_date) + """ + return column.year() + + +def quarter(column: ir.DateValue): + """ + def quarter(column) + + Extract the quarter from a date column. + + Examples: + - quarter(order_date) + """ + return column.quarter() + + +def month(column: ir.DateValue): + """ + def month(column) + + Extract the month from a date column. + + Examples: + - month(order_date) + """ + return column.month() + + +def week_of_year(column: ir.DateValue): + """ + def week_of_year(column) + + Extract the week of the year from a date column. + + Examples: + - week_of_year(order_date) + """ + return column.week_of_year() + + +def day_of_year(column: ir.DateValue): + """ + def day_of_year(column) + + Extract the day of the year from a date column. + + Examples: + - day_of_year(order_date) + """ + return column.day_of_year() + + +def day_of_week(column: ir.DateValue): + """ + def day_of_week(column) + + Extract the day of the week from a date column. + + Examples: + - day_of_week(order_date) + """ + return column.day_of_week() + + +def day(column: ir.DateValue): + """ + def day(column) + + Extract the day from a date column. + + Examples: + - day(order_date) + """ + return column.day() + + +def hour(column: ir.TimeValue): + """ + def hour(column) + + Extract the hour from a time column. + + Examples: + - hour(time_column) + """ + return column.hour() + + +def minute(column: ir.TimeValue): + """ + def minute(column) + + Extract the minute from a time column. + + Examples: + - minute(time_column) + """ + return column.minute() + + +def second(column: ir.TimeValue): + """ + def second(column) + + Extract the second from a time column. + + Examples: + - second(time_column) + """ + return column.second() + + +def microsecond(column: ir.TimeValue): + """ + def microsecond(column) + + Extract the microsecond from a time column. + + Examples: + - microsecond(time_column) + """ + return column.microsecond() + + +def format_date(column: ir.DateValue, format_str: str): + """ + def format_date(column, format_str) + + Format a date column according to a format string. + + Examples: + - format_date(order_date, '%Y-%m-%d') + """ + return column.strftime(format_str) + + +def date_diff( + column: ir.DateValue, + other: ir.DateValue, + unit: str, +): + """ + def date_diff(column, other, unit) + + Calculate the difference between two date columns. The unit can be year, quarter, month, week, or day. + + Examples: + - date_diff(order_date, delivery_date, 'day') + - date_diff(order_date, delivery_date, 'week') + """ + return column.delta(other, unit) + + +def now(): + """ + def now() + + Get the current timestamp. + + Examples: + - now() + """ + return ibis.now() + + +def today(): + """ + def today() + + Get the current date. + + Examples: + - today() + """ + return ibis.today() + + +# utility functions +def to_inr(curr: ir.StringValue, amount: ir.NumericValue, rate: int = 83): + """ + def to_inr(curr, amount, rate=83) + + Convert an amount from USD to INR. + + Examples: + - to_inr('USD', amount) + - to_inr('USD', amount, 75) + - to_inr('USD', amount, exchange_rate) + """ + return if_else(curr == "USD", amount * rate, amount) + + +def to_usd(curr: ir.StringValue, amount: ir.NumericValue, rate: int = 83): + """ + def to_usd(curr, amount, rate=83) + + Convert an amount from INR to USD. + + Examples: + - to_usd('INR', amount) + - to_usd('INR', amount, 75) + - to_usd('INR', amount, exchange_rate) + """ + return if_else(curr == "INR", amount / rate, amount) + + +def literal(value): + """ + def literal(value) + + Create a literal value. + + Examples: + - literal(1) + - literal('Active') + """ + return ibis.literal(value) + + +def constant(value): + """ + def constant(value) + + Create a constant value. + + Examples: + - constant(1) + - constant('Active') + """ + return ibis.literal(value) + + +def row_number(): + """ + def row_number() + + Assign a unique number to each row. + """ + return ibis.row_number() + + +def sql(query): + """ + def sql(query) + + Execute a SQL query. + + Examples: + - sql('SELECT * FROM table') + """ + return _.sql(query) + + +def coalesce(*args): + """ + def coalesce(*args) + + Return the first non-null value in a list of columns. + + Examples: + - coalesce(column1, column2, column3) + """ + return ibis.coalesce(*args) + + +def if_null(column, value): + """ + def ifnull(column, value) + + Replace null values in a column with a default value. + + Examples: + - ifnull(email, 'No Email') + """ + return ibis.coalesce(column, value) + + +def asc(column): + """ + def asc(column) + + Sort a column in ascending order. + + Examples: + - asc(column) + """ + return ibis.asc(column) + + +def desc(column): + """ + def desc(column) + + Sort a column in descending order. + + Examples: + - desc(column) + """ + return ibis.desc(column) + + +def previous_value(column: ir.Column, group_by=None, order_by=None, offset=1): + """ + def previous_value(column, group_by=None, order_by=None, offset=1) + + Get the value of a column in the previous row. Provide group_by and order_by columns for partitioning and ordering. + + Examples: + - previous_value(amount) + - previous_value(amount, group_by=user_id, order_by=date) + - previous_value(amount, group_by=[user_id, month(date)], order_by=asc(date)) + """ + return column.lag(offset).over(group_by=group_by, order_by=order_by) + + +def next_value(column: ir.Column, group_by=None, order_by=None, offset=1): + """ + def next_value(column, group_by=None, order_by=None, offset=1) + + Get the value of a column in the next row. Provide group_by and order_by columns for partitioning and ordering. + + Examples: + - next_value(amount) + - next_value(amount, group_by=user_id, order_by=date) + - next_value(amount, group_by=[user_id, month(date)], order_by=asc(date)) + """ + return column.lead(offset).over(group_by=group_by, order_by=order_by) + + +def previous_period_value(column: ir.Column, date_column: ir.DateColumn, offset=1): + """ + def previous_period_value(column, date_column, offset=1) + + Get the value of a column in the previous period. If the date values are at month level then the previous month value will be returned. Similarly, at year level, the previous year value will be returned. + + Examples: + - previous_period_value(amount, date) + - previous_period_value(amount, date, 2) + """ + date_column_name = ( + date_column.get_name() if hasattr(date_column, "get_name") else date_column + ) + return column.lag(offset).over( + group_by=(~s.numeric() & ~s.matches(date_column_name)), + order_by=ibis.asc(date_column_name), + ) + + +def next_period_value(column: ir.Column, date_column: ir.DateColumn, offset=1): + """ + def next_period_value(column, date_column, offset=1) + + Get the value of a column in the next period. If the date values are at month level then the next month value will be returned. Similarly, at year level, the next year value will be returned. + + Examples: + - next_period_value(amount, date) + - next_period_value(amount, date, 2) + """ + date_column_name = ( + date_column.get_name() if hasattr(date_column, "get_name") else date_column + ) + return column.lead(offset).over( + group_by=(~s.numeric() & ~s.matches(date_column_name)), + order_by=ibis.asc(date_column_name), + ) + + +def percentage_change(column: ir.Column, date_column: ir.DateColumn, offset=1): + """ + def percentage_change(column, date_column, offset=1) + + Calculate the percentage change of a column in the previous period. If the date values are at month level then percentage change from the previous month will be calculated. Similarly, at year level, percentage change from the previous year will be calculated. + + Examples: + - percentage_change(amount, date) + - percentage_change(amount, date, 2) + """ + prev_value = previous_period_value(column, date_column, offset) + return ((column - prev_value) * 100) / prev_value + + +def is_first_row(group_by=None, order_by=None, sort_order="asc"): + """ + def is_first_row(group_by=None, order_by=None, sort_order="asc") + + Check if the row is the first row in the group. Provide group_by and order_by columns for partitioning and ordering. + + Examples: + - is_first_row() + - is_first_row(group_by=user_id, order_by=date) + - is_first_row(group_by=[user_id, month(date)], order_by=asc(date)) + """ + _order_by = ibis.asc(order_by) if sort_order == "asc" else ibis.desc(order_by) + index = row_number().over(group_by=group_by, order_by=_order_by) + return if_else(index == 0, 1, 0) + + +def is_last_row(group_by=None, order_by=None, sort_order="asc"): + """ + def is_last_row(group_by=None, order_by=None, sort_order="asc") + + Check if the row is the last row in the group. Provide group_by and order_by columns for partitioning and ordering. + + Examples: + - is_last_row() + - is_last_row(group_by=user_id, order_by=date) + - is_last_row(group_by=[user_id, month(date)], order_by=asc(date)) + """ + _order_by = ibis.desc(order_by) if sort_order == "asc" else ibis.asc(order_by) + index = row_number().over(group_by=group_by, order_by=_order_by) + return if_else(index == 0, 1, 0) + + +def filter_first_row(group_by=None, order_by=None, sort_order="asc"): + """ + def filter_first_row(group_by=None, order_by=None, sort_order="asc") + + Filter to keep only the first row of each group. Provide group_by and order_by columns for partitioning and ordering. + + Examples: + - filter_first_row() + - filter_first_row(group_by=user_id, order_by=date) + - filter_first_row(group_by=[user_id, month(date)], order_by=asc(date)) + """ + _order_by = ibis.asc(order_by) if sort_order == "asc" else ibis.desc(order_by) + index = row_number().over(group_by=group_by, order_by=_order_by) + return index == 0 + + +def create_buckets(column: ir.Column, num_buckets: int): + """ + def create_buckets(column, num_buckets) + + Create buckets based on the values in a column. The number of buckets will be equal to num_buckets. + + Examples: + - create_buckets(age, 3) + -> 0-33, 34-66, 67-100 + """ + query = frappe.flags.current_ibis_query + if query is None: + frappe.throw("Failed to create buckets. Query not found") + + values_df = query.select(column).distinct().execute() + values = [v[0] for v in values_df.values.tolist()] + values = sorted(values) + + if not values: + frappe.throw("Failed to create buckets. No data found in the column") + + if len(values) < num_buckets: + num_buckets = len(values) + + bucket_size = math.ceil(len(values) / num_buckets) + buckets = [] + for i in range(0, len(values), bucket_size): + buckets.append(values[i : i + bucket_size]) + + case = ibis.case() + for bucket in buckets: + min_val = bucket[0] + max_val = bucket[-1] + label = f"{min_val}-{max_val}" + case = case.when(is_in(column, *bucket), label) + + return case.else_(None).end() + + +def week_start(column: ir.DateValue): + """ + def week_start(column) + + Get the start date of the week for a given date. + + Examples: + - week_start(order_date) + """ + + week_start_day = ( + frappe.db.get_single_value("Insights Settings", "week_starts_on") or "Monday" + ) + days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + week_starts_on = days.index(week_start_day) + day_of_week = column.day_of_week.index().cast("int32") + adjusted_week_start = (day_of_week - week_starts_on + 7) % 7 + week_start = column - adjusted_week_start.as_interval(unit="D") + return week_start + + +def get_retention_data(date_column: ir.DateValue, id_column: ir.Column, unit: str): + """ + def get_retention_data(date_column, id_column, unit) + + Calculate retention data based on the cohort analysis. The unit can be day, week, month, or year. + + Examples: + - get_retention_data(date, user_id, 'day') + """ + + query = frappe.flags.current_ibis_query + if query is None: + frappe.throw("Query not found") + + if isinstance(date_column, str): + date_column = getattr(query, date_column) + + if isinstance(id_column, str): + id_column = getattr(query, id_column) + + if not date_column.type().is_date(): + frappe.throw(f"Invalid date column. Expected date, got {date_column.type()}") + + unit_start = { + "day": lambda column: column.strftime("%Y-%m-%d").cast("date"), + "week": week_start, + "month": lambda column: column.strftime("%Y-%m-01").cast("date"), + "year": lambda column: column.strftime("%Y-01-01").cast("date"), + }[unit] + + query = query.mutate( + cohort_start=unit_start(date_column).min().over(group_by=id_column) + ) + + query = query.mutate( + cohort_size=id_column.nunique().over(group_by=query.cohort_start) + ) + + query = query.mutate(offset=date_column.delta(query.cohort_start, unit)) + + zero_padded_offset = (query.offset < 10).ifelse( + literal("0").concat(query.offset.cast("string")), query.offset.cast("string") + ) + query = query.mutate( + offset_label=ibis.literal(f"{unit}_").concat(zero_padded_offset) + ) + + query = query.group_by(["cohort_start", "cohort_size", "offset_label"]).aggregate( + unique_ids=id_column.nunique() + ) + + query = query.mutate(retention=(query.unique_ids / query.cohort_size) * 100) + + return query diff --git a/insights/insights/doctype/insights_data_source_v3/ibis/utils.py b/insights/insights/doctype/insights_data_source_v3/ibis/utils.py new file mode 100644 index 000000000..4abc1a4dc --- /dev/null +++ b/insights/insights/doctype/insights_data_source_v3/ibis/utils.py @@ -0,0 +1,115 @@ +import frappe +from ibis import selectors as s +from jedi import Script + + +def get_functions(): + import insights.insights.doctype.insights_data_source_v3.ibis.functions as functions + + context = frappe._dict() + exclude_keys = [ + "frappe", + "ibis", + "ir", + "math", + "s", + ] + for key in dir(functions): + if not key.startswith("_") and key not in exclude_keys: + context[key] = getattr(functions, key) + + selectors = frappe._dict() + for key in get_whitelisted_selectors(): + selectors[key] = getattr(s, key) + + context["s"] = selectors + context["selectors"] = selectors + + return context + + +def get_whitelisted_selectors(): + # all the selectors that are decorated with @public + # are added to __all__ in the selectors module + # check: ibis.selectors.py & public.py + try: + whitelisted_selectors = s.__dict__["__all__"] + except KeyError: + whitelisted_selectors = [] + return whitelisted_selectors + + +@frappe.whitelist() +def get_code_completions(code: str, columns: list): + import_statement = ( + "from insights.insights.doctype.insights_data_source_v3.ibis.functions import *" + ) + column_definitions = "\n".join([f"{column} = 0" for column in columns]) + code = f"{import_statement}\n\n{column_definitions}\n\n{code}" + + cursor_pos = code.find("|") + line_pos = code.count("\n", 0, cursor_pos) + column_pos = cursor_pos - code.rfind("\n", 0, cursor_pos) - 1 + code = code.replace("|", "") + + completions = [] + current_function = None + + script = Script(code) + completion_items = script.complete(line_pos + 1, column_pos) + + functions_module = ( + "insights.insights.doctype.insights_data_source_v3.ibis.functions" + ) + for c in completion_items: + if ( + c.in_builtin_module() + or c.name.startswith("_") + or c.type not in ["function", "statement"] + or (c.type == "function" and c.module_name != functions_module) + ): + continue + completions.append( + { + "name": c.name, + "type": "column" if c.type == "statement" else c.type, + "completion": c.complete + "()" if c.type == "function" else c.complete, + } + ) + + signature_items = script.get_signatures(line_pos + 1, column_pos) + for sig in signature_items: + description = sig.docstring() + # check if description is empty or only contains a single line + if not description or "\n" not in description: + current_function = {"name": sig.name} + continue + + # remove the standard definition from the description, i.e first line + description = description.split("\n", 1)[1].strip() + + # use custom definition, if "def " is present in the docstring + definition = "" + if "def " in description: + definition = description.split("\n", 1)[0].strip() + description = description.replace(definition, "").strip() + definition = definition.replace("def ", "") + + current_function = { + "name": sig.name, + "definition": definition, + "description": description, + "params": [ + {"name": param.name, "description": param.description} + for param in sig.params + ], + } + if sig.index is not None: + current_param = sig.params[sig.index] + current_function["current_param"] = current_param.name + current_function["current_param_description"] = current_param.description + + return { + "completions": completions, + "current_function": current_function, + } diff --git a/insights/insights/doctype/insights_data_source_v3/ibis_functions.py b/insights/insights/doctype/insights_data_source_v3/ibis_functions.py deleted file mode 100644 index 73420e653..000000000 --- a/insights/insights/doctype/insights_data_source_v3/ibis_functions.py +++ /dev/null @@ -1,292 +0,0 @@ -import math - -import frappe -import ibis -from ibis import _ -from ibis import selectors as s - -# from ibis.expr.types.numeric import NumericValue -# from ibis.expr.types.strings import StringValue -# from ibis.expr.types.temporal import DateValue, TimestampValue - - -# aggregate functions -def f_count(column=None, where=None): - if column is None: - query = frappe.flags.current_ibis_query - column = query.columns[0] - column = getattr(query, column) - - return column.count(where=where) - - -def f_count_if(condition, column=None): - if column is None: - query = frappe.flags.current_ibis_query - column = query.columns[0] - column = getattr(query, column) - - return f_count(column, where=condition) - - -f_min = lambda column, *args, **kwargs: column.min(*args, **kwargs) -f_max = lambda column, *args, **kwargs: column.max(*args, **kwargs) -f_sum = lambda column, *args, **kwargs: column.sum(*args, **kwargs) -f_avg = lambda column, *args, **kwargs: column.mean(*args, **kwargs) -f_group_concat = lambda column, *args, **kwargs: column.group_concat(*args, **kwargs) -f_distinct_count = lambda column: column.nunique() -f_sum_if = lambda condition, column: f_sum(column, where=condition) -f_distinct_count_if = lambda condition, column: column.nunique(where=condition) - -# boolean functions -f_is_in = lambda column, *values: column.isin(values) -f_is_not_in = lambda column, *values: column.notin(values) -f_is_set = lambda column: column.notnull() -f_is_not_set = lambda column: column.isnull() -f_is_between = lambda column, start, end: column.between(start, end) -f_is_not_between = lambda column, start, end: ~column.between(start, end) -f_is_within = lambda args, kwargs: None # TODO - -# conditional functions -f_if_else = ( - lambda condition, true_value, false_value: ibis.case() - .when(condition, true_value) - .else_(false_value) - .end() -) - - -def f_case(*args): - # args = [condition1, value1, condition2, value2, ..., default_value] - if len(args) % 2 == 0: - raise ValueError("Odd number of arguments expected") - - case = ibis.case() - for i in range(0, len(args) - 1, 2): - case = case.when(args[i], args[i + 1]) - - return case.else_(args[-1]).end() - - -# number Functions -f_abs = lambda column, *args, **kwargs: column.abs(*args, **kwargs) -f_round = lambda column, *args, **kwargs: column.round(*args, **kwargs) -f_floor = lambda column, *args, **kwargs: column.floor(*args, **kwargs) -f_ceil = lambda column, *args, **kwargs: column.ceil(*args, **kwargs) - -# String Functions -f_lower = lambda column, *args, **kwargs: column.lower(*args, **kwargs) -f_upper = lambda column, *args, **kwargs: column.upper(*args, **kwargs) -f_concat = lambda column, *args, **kwargs: column.concat(*args, **kwargs) -f_replace = lambda column, *args, **kwargs: column.replace(*args, **kwargs) -f_substring = lambda column, *args, **kwargs: column.substr(*args, **kwargs) -f_contains = lambda column, *args, **kwargs: column.contains(*args, **kwargs) -f_not_contains = lambda column, *args, **kwargs: ~column.contains(*args, **kwargs) -f_starts_with = lambda column, *args, **kwargs: column.startswith(*args, **kwargs) -f_ends_with = lambda column, *args, **kwargs: column.endswith(*args, **kwargs) -f_length = lambda column, *args, **kwargs: column.length(*args, **kwargs) - -# date functions -f_year = lambda column: column.year() -f_quarter = lambda column: column.quarter() -f_month = lambda column: column.month() -f_week_of_year = lambda column: column.week_of_year() -f_day_of_year = lambda column: column.day_of_year() -f_day_of_week = lambda column: column.day_of_week() -f_day = lambda column: column.day() -f_hour = lambda column: column.hour() -f_minute = lambda column: column.minute() -f_second = lambda column: column.second() -f_microsecond = lambda column: column.microsecond() -f_format_date = lambda column, *args, **kwargs: column.strftime(*args, **kwargs) -f_date_diff = lambda column, *args, **kwargs: column.delta(*args, **kwargs) -f_now = ibis.now -f_today = ibis.today -f_start_of = lambda unit, date: None # TODO - -# utility functions -f_to_inr = lambda curr, amount, rate=83: f_if_else(curr == "USD", amount * rate, amount) -f_to_usd = lambda curr, amount, rate=83: f_if_else(curr == "INR", amount / rate, amount) -f_literal = ibis.literal -f_row_number = ibis.row_number -f_sql = lambda query: _.sql(query) -f_coalesce = ibis.coalesce -f_if_null = ibis.coalesce -f_asc = ibis.asc -f_desc = ibis.desc - - -def f_previous_value(column, group_by, order_by, offset=1): - return column.lag(offset).over(group_by=group_by, order_by=order_by) - - -def f_next_value(column, group_by, order_by, offset=1): - return column.lead(offset).over(group_by=group_by, order_by=order_by) - - -def f_previous_period_value(column, date_column, offset=1): - date_column_name = ( - date_column.get_name() if hasattr(date_column, "get_name") else date_column - ) - return column.lag(offset).over( - group_by=(~s.numeric() & ~s.matches(date_column_name)), - order_by=ibis.asc(date_column_name), - ) - - -def f_next_period_value(column, date_column, offset=1): - date_column_name = ( - date_column.get_name() if hasattr(date_column, "get_name") else date_column - ) - return column.lead(offset).over( - group_by=(~s.numeric() & ~s.matches(date_column_name)), - order_by=ibis.asc(date_column_name), - ) - - -def f_percentage_change(column, date_column, offset=1): - prev_value = f_previous_period_value(column, date_column, offset) - return ((column - prev_value) * 100) / prev_value - - -def f_is_first_row(group_by, order_by, sort_order="asc"): - _order_by = ibis.asc(order_by) if sort_order == "asc" else ibis.desc(order_by) - row_number = f_row_number().over(group_by=group_by, order_by=_order_by) - return f_if_else(row_number == 1, 1, 0) - - -def f_filter_first_row(group_by, order_by, sort_order="asc"): - _order_by = ibis.asc(order_by) if sort_order == "asc" else ibis.desc(order_by) - row_number = f_row_number().over(group_by=group_by, order_by=_order_by) - return row_number == 1 - - -def f_create_buckets(column, num_buckets): - query = frappe.flags.current_ibis_query - if query is None: - frappe.throw("Failed to create buckets. Query not found") - - values_df = query.select(column).distinct().execute() - values = [v[0] for v in values_df.values.tolist()] - values = sorted(values) - - if not values: - frappe.throw("Failed to create buckets. No data found in the column") - - if len(values) < num_buckets: - num_buckets = len(values) - - bucket_size = math.ceil(len(values) / num_buckets) - buckets = [] - for i in range(0, len(values), bucket_size): - buckets.append(values[i : i + bucket_size]) - - case = ibis.case() - for bucket in buckets: - min_val = bucket[0] - max_val = bucket[-1] - label = f"{min_val}-{max_val}" - case = case.when(f_is_in(column, *bucket), label) - - return case.else_(None).end() - - -def f_week_start(column): - week_start_day = ( - frappe.db.get_single_value("Insights Settings", "week_starts_on") or "Monday" - ) - days = [ - "Monday", - "Tuesday", - "Wednesday", - "Thursday", - "Friday", - "Saturday", - "Sunday", - ] - week_starts_on = days.index(week_start_day) - day_of_week = column.day_of_week.index().cast("int32") - adjusted_week_start = (day_of_week - week_starts_on + 7) % 7 - week_start = column - adjusted_week_start.as_interval(unit="D") - return week_start - - -def f_get_retention_data(date_column, id_column, unit="week"): - query = frappe.flags.current_ibis_query - if query is None: - frappe.throw("Query not found") - - if isinstance(date_column, str): - date_column = getattr(query, date_column) - - if isinstance(id_column, str): - id_column = getattr(query, id_column) - - if not date_column.type().is_date(): - frappe.throw(f"Invalid date column. Expected date, got {date_column.type()}") - - unit_start = { - "day": lambda column: column.strftime("%Y-%m-%d").cast("date"), - "week": f_week_start, - "month": lambda column: column.strftime("%Y-%m-01").cast("date"), - "year": lambda column: column.strftime("%Y-01-01").cast("date"), - }[unit] - - query = query.mutate( - cohort_start=unit_start(date_column).min().over(group_by=id_column) - ) - - query = query.mutate( - cohort_size=id_column.nunique().over(group_by=query.cohort_start) - ) - - query = query.mutate(offset=date_column.delta(query.cohort_start, unit)) - - zero_padded_offset = (query.offset < 10).ifelse( - f_literal("0").concat(query.offset.cast("string")), query.offset.cast("string") - ) - query = query.mutate( - offset_label=ibis.literal(f"{unit}_").concat(zero_padded_offset) - ) - - query = query.group_by(["cohort_start", "cohort_size", "offset_label"]).aggregate( - unique_ids=id_column.nunique() - ) - - query = query.mutate(retention=(query.unique_ids / query.cohort_size) * 100) - - return query - - -def get_functions(): - context = frappe._dict() - - functions = globals() - for key in functions: - if key.startswith("f_"): - context[key[2:]] = functions[key] - - selectors = frappe._dict() - for key in get_whitelisted_selectors(): - selectors[key] = getattr(s, key) - - context["s"] = selectors - context["selectors"] = selectors - - return context - - -@frappe.whitelist() -def get_function_list(): - return [key for key in get_functions() if not key.startswith("_")] - - -def get_whitelisted_selectors(): - # all the selectors that are decorated with @public - # are added to __all__ in the selectors module - # check: ibis.selectors.py & public.py - try: - whitelisted_selectors = s.__dict__["__all__"] - except KeyError: - whitelisted_selectors = [] - return whitelisted_selectors diff --git a/insights/insights/doctype/insights_data_source_v3/ibis_utils.py b/insights/insights/doctype/insights_data_source_v3/ibis_utils.py index 9fd8b248a..a7d78ee13 100644 --- a/insights/insights/doctype/insights_data_source_v3/ibis_utils.py +++ b/insights/insights/doctype/insights_data_source_v3/ibis_utils.py @@ -24,7 +24,8 @@ from insights.utils import create_execution_log from insights.utils import deep_convert_dict_to_dict as _dict -from .ibis_functions import f_week_start, get_functions +from .ibis.functions import week_start +from .ibis.utils import get_functions class IbisQueryBuilder: @@ -491,7 +492,7 @@ def apply_aggregate(self, column, aggregate_function): def apply_granularity(self, column, granularity): if granularity == "week": - return f_week_start(column).strftime("%Y-%m-%d").name(column.get_name()) + return week_start(column).strftime("%Y-%m-%d").name(column.get_name()) if granularity == "quarter": year = column.year() quarter = column.quarter() From fc92159c1b2ec3664cc7e69cb0e6c01c88cf32bd Mon Sep 17 00:00:00 2001 From: Saqib Ansari Date: Wed, 1 Jan 2025 16:35:40 +0530 Subject: [PATCH 2/5] refactor: use codemirror's autocomplete & show function signature separately --- frontend/src2/helpers/index.ts | 2 +- frontend/src2/index.css | 4 + .../query/components/ExpressionEditor.vue | 198 ++++++++++-------- frontend/src2/styles/codemirror.css | 12 +- .../insights_data_source_v3/ibis/functions.py | 4 +- .../insights_data_source_v3/ibis/utils.py | 37 +--- 6 files changed, 136 insertions(+), 121 deletions(-) diff --git a/frontend/src2/helpers/index.ts b/frontend/src2/helpers/index.ts index 51610ef3e..8d70612cc 100644 --- a/frontend/src2/helpers/index.ts +++ b/frontend/src2/helpers/index.ts @@ -434,7 +434,7 @@ function isValidDate(value: string) { } const fetchCache = new Map() -export function fetchCall(url: string, options: any): Promise { +export function fetchCall(url: string, options?: any): Promise { // a function that makes a fetch call, but also caches the response for the same url & options const key = JSON.stringify({ url, options }) if (fetchCache.has(key)) { diff --git a/frontend/src2/index.css b/frontend/src2/index.css index c3f571e87..ef959e916 100644 --- a/frontend/src2/index.css +++ b/frontend/src2/index.css @@ -2,6 +2,10 @@ @import 'frappe-ui/src/style.css'; @import './styles/codemirror.css'; +body { + @apply text-base; +} + .tnum { font-feature-settings: 'tnum'; } diff --git a/frontend/src2/query/components/ExpressionEditor.vue b/frontend/src2/query/components/ExpressionEditor.vue index 8bfc09ef2..cc9002a88 100644 --- a/frontend/src2/query/components/ExpressionEditor.vue +++ b/frontend/src2/query/components/ExpressionEditor.vue @@ -1,6 +1,6 @@ diff --git a/frontend/src2/styles/codemirror.css b/frontend/src2/styles/codemirror.css index 1d61920a4..e26be07aa 100644 --- a/frontend/src2/styles/codemirror.css +++ b/frontend/src2/styles/codemirror.css @@ -44,11 +44,7 @@ outline: none !important; } .cm-tooltip-autocomplete { - border: 1px solid #fafafa !important; - padding: 0.25rem; - background-color: #fff !important; - border-radius: 0.375rem; - filter: drop-shadow(0 4px 3px rgb(0 0 0 / 0.07)) drop-shadow(0 2px 2px rgb(0 0 0 / 0.06)); + @apply !rounded-lg !shadow-md !bg-white !p-1.5 !border-none; } .cm-tooltip-autocomplete > ul { font-family: 'Inter' !important; @@ -57,3 +53,9 @@ @apply !rounded !bg-gray-200/80; color: #000 !important; } +.cm-completionLabel { + margin-right: 1rem !important; +} +.cm-completionDetail { + margin-left: auto !important; +} diff --git a/insights/insights/doctype/insights_data_source_v3/ibis/functions.py b/insights/insights/doctype/insights_data_source_v3/ibis/functions.py index ed8373f0f..0abd7ce76 100644 --- a/insights/insights/doctype/insights_data_source_v3/ibis/functions.py +++ b/insights/insights/doctype/insights_data_source_v3/ibis/functions.py @@ -733,12 +733,12 @@ def coalesce(*args) def if_null(column, value): """ - def ifnull(column, value) + def if_null(column, value) Replace null values in a column with a default value. Examples: - - ifnull(email, 'No Email') + - if_null(email, 'No Email') """ return ibis.coalesce(column, value) diff --git a/insights/insights/doctype/insights_data_source_v3/ibis/utils.py b/insights/insights/doctype/insights_data_source_v3/ibis/utils.py index 4abc1a4dc..1de7591ba 100644 --- a/insights/insights/doctype/insights_data_source_v3/ibis/utils.py +++ b/insights/insights/doctype/insights_data_source_v3/ibis/utils.py @@ -40,43 +40,23 @@ def get_whitelisted_selectors(): @frappe.whitelist() -def get_code_completions(code: str, columns: list): - import_statement = ( - "from insights.insights.doctype.insights_data_source_v3.ibis.functions import *" - ) - column_definitions = "\n".join([f"{column} = 0" for column in columns]) - code = f"{import_statement}\n\n{column_definitions}\n\n{code}" +def get_function_list(): + return [key for key in get_functions() if not key.startswith("_")] + + +@frappe.whitelist() +def get_code_completions(code: str): + import_statement = """from insights.insights.doctype.insights_data_source_v3.ibis.functions import *\nfrom ibis import selectors as s""" + code = f"{import_statement}\n\n{code}" cursor_pos = code.find("|") line_pos = code.count("\n", 0, cursor_pos) column_pos = cursor_pos - code.rfind("\n", 0, cursor_pos) - 1 code = code.replace("|", "") - completions = [] current_function = None script = Script(code) - completion_items = script.complete(line_pos + 1, column_pos) - - functions_module = ( - "insights.insights.doctype.insights_data_source_v3.ibis.functions" - ) - for c in completion_items: - if ( - c.in_builtin_module() - or c.name.startswith("_") - or c.type not in ["function", "statement"] - or (c.type == "function" and c.module_name != functions_module) - ): - continue - completions.append( - { - "name": c.name, - "type": "column" if c.type == "statement" else c.type, - "completion": c.complete + "()" if c.type == "function" else c.complete, - } - ) - signature_items = script.get_signatures(line_pos + 1, column_pos) for sig in signature_items: description = sig.docstring() @@ -110,6 +90,5 @@ def get_code_completions(code: str, columns: list): current_function["current_param_description"] = current_param.description return { - "completions": completions, "current_function": current_function, } From 59d97310aea257f7882677e548628057c6cce76e Mon Sep 17 00:00:00 2001 From: Saqib Ansari Date: Wed, 1 Jan 2025 16:40:14 +0530 Subject: [PATCH 3/5] chore: remove unused variables --- .../query/components/ExpressionEditor.vue | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/frontend/src2/query/components/ExpressionEditor.vue b/frontend/src2/query/components/ExpressionEditor.vue index cc9002a88..0dc472d12 100644 --- a/frontend/src2/query/components/ExpressionEditor.vue +++ b/frontend/src2/query/components/ExpressionEditor.vue @@ -60,10 +60,10 @@ function getFunctionMatches(word: string) { const codeEditor = ref(null) const codeContainer = ref(null) -const suggestionElement = ref(null) +const signatureElement = ref(null) onMounted(() => { - // fix clipping of tooltip & suggestion element because of dialog styling + // fix clipping of tooltip & signature element because of dialog styling const dialogElement = codeContainer.value?.closest('.my-8.overflow-hidden.rounded-xl') if (!dialogElement) { return @@ -72,12 +72,6 @@ onMounted(() => { dialogElement.children[0]?.classList.add('rounded-xl') }) -type Completion = { - name: string - type: string - completion: string -} -const completions = ref([]) type FunctionSignature = { name: string definition: string @@ -89,7 +83,7 @@ type FunctionSignature = { const currentFunctionSignature = ref() const fetchCompletions = debounce(() => { if (!codeEditor.value) { - completions.value = [] + currentFunctionSignature.value = undefined return } @@ -103,7 +97,6 @@ const fetchCompletions = debounce(() => { code, }) .then((res: any) => { - completions.value = res.completions currentFunctionSignature.value = res.current_function // if there is a current_param, then we need to update the definition // add & underline tags before and after the current_param value in the definition @@ -125,14 +118,14 @@ const fetchCompletions = debounce(() => { }) }, 1000) -function setSuggestionElementPosition() { +function setSignatureElementPosition() { setTimeout(() => { const containerRect = codeContainer.value?.getBoundingClientRect() const tooltipElement = codeContainer.value?.querySelector('.cm-tooltip-autocomplete') const cursorElement = codeContainer.value?.querySelector('.cm-cursor.cm-cursor-primary') if (!containerRect) return - if (!suggestionElement.value) return + if (!signatureElement.value) return let left = 0, top = 0 @@ -151,8 +144,8 @@ function setSuggestionElementPosition() { return } - suggestionElement.value.style.left = `${left}px` - suggestionElement.value.style.top = `${top}px` + signatureElement.value.style.left = `${left}px` + signatureElement.value.style.top = `${top}px` }, 100) } @@ -166,11 +159,11 @@ function setSuggestionElementPosition() { v-model="expression" :placeholder="placeholder" :completions="getCompletions" - @view-update="() => (fetchCompletions(), setSuggestionElementPosition())" + @view-update="() => (fetchCompletions(), setSignatureElementPosition())" >
From de7f1a0bc07ebf0e226684ec379d8cf45dd78655 Mon Sep 17 00:00:00 2001 From: Saqib Ansari Date: Wed, 1 Jan 2025 16:52:25 +0530 Subject: [PATCH 4/5] refactor: use expression editor in filter expressions --- frontend/src2/query/components/FiltersSelector.vue | 9 ++++++--- frontend/src2/styles/codemirror.css | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/frontend/src2/query/components/FiltersSelector.vue b/frontend/src2/query/components/FiltersSelector.vue index 21916fc50..4a65cb442 100644 --- a/frontend/src2/query/components/FiltersSelector.vue +++ b/frontend/src2/query/components/FiltersSelector.vue @@ -4,8 +4,8 @@ import { computed, reactive } from 'vue' import { copy, flattenOptions } from '../../helpers' import { ColumnOption, FilterGroupArgs, GroupedColumnOption } from '../../types/query.types' import { column, expression } from '../helpers' +import ExpressionEditor from './ExpressionEditor.vue' import FilterRule from './FilterRule.vue' -import InlineExpression from './InlineExpression.vue' import { isFilterExpressionValid, isFilterValid } from './filter_utils' const props = defineProps<{ @@ -90,9 +90,12 @@ const areFiltersUpdated = computed(() => { {{ filterGroup.logical_operator.toLowerCase() }}
- Date: Wed, 1 Jan 2025 16:56:25 +0530 Subject: [PATCH 5/5] fix: inline-expression css --- frontend/src2/styles/codemirror.css | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/frontend/src2/styles/codemirror.css b/frontend/src2/styles/codemirror.css index 5f6a009bd..02b2f1d5a 100644 --- a/frontend/src2/styles/codemirror.css +++ b/frontend/src2/styles/codemirror.css @@ -60,16 +60,13 @@ margin-left: auto !important; } - -.inline-expression { - .cm-content { - padding: 0 !important; - line-height: 26px !important; - } - .cm-placeholder { - line-height: 26px !important; - } - .cm-gutters { - line-height: 26px !important; - } +.inline-expression .cm-content { + padding: 0 !important; + line-height: 26px !important; +} +.inline-expression .cm-placeholder { + line-height: 26px !important; +} +.inline-expression .cm-gutters { + line-height: 26px !important; }