from datetime import timedelta from decimal import Decimal from typing import Iterable from dateutil.relativedelta import relativedelta from django.utils import timezone from core.models import Subject, Transaction days_per_year = Decimal("365.25") days_per_month = days_per_year / Decimal("12") monthly_distance_threshold = 3 certain_day_of_month_threshold = 3 def _group_follow_up_objects(objects): """ Creates a list of each follow-up pair of objects within a given list, like this: [1, 2, 3] -> [(1, 2), (2, 3)] """ tuples = [] if len(objects) >= 2: for idx in range(len(objects) - 1): tuples.append((objects[idx], objects[idx + 1])) return tuples def predict_amount(subject: Subject): if subject.transactions.exists(): return subject.transactions.latest().amount return None def predict_booking_dates(subject: Subject): dates, recurring_days, recurring_months, day_of_month = [], None, None, None if subject.transactions.count() < 2: return dates, recurring_days, recurring_months, day_of_month existing_transactions = subject.transactions.order_by("booking_date") last_date = existing_transactions.latest().booking_date one_year_later = last_date + relativedelta(years=1) transaction_tuples = _group_follow_up_objects(existing_transactions) date_deltas = [second.booking_date - first.booking_date for first, second in transaction_tuples] average_delta_in_days = Decimal(sum(delta.days for delta in date_deltas)) / Decimal(len(date_deltas)) average_delta_in_months = average_delta_in_days / days_per_month days_per_month_mod = average_delta_in_days % days_per_month distance_from_days_per_month = min(days_per_month_mod, days_per_month - days_per_month_mod) if distance_from_days_per_month <= monthly_distance_threshold: # transactions can be considered to happen every n months recurring_months = round(average_delta_in_months) while last_date < one_year_later and len(dates) < 10: last_date += relativedelta(months=recurring_months) dates.append(last_date) days_of_month = [t.booking_date.day for t in existing_transactions] if max(days_of_month) - min(days_of_month) <= certain_day_of_month_threshold: # since transactions occurred in a close range around a certain day of month, we can # improve our prediction day_of_month = round(sum(days_of_month) / len(days_of_month)) for idx in range(len(dates)): dates[idx] = dates[idx].replace(day=day_of_month) else: # there is no monthly pattern, just add the average delta to determine new dates recurring_days = round(average_delta_in_days) while last_date < one_year_later and len(dates) < 10: last_date += relativedelta(days=recurring_days) dates.append(last_date) return dates, recurring_days, recurring_months, day_of_month def predict_transactions(subject: Subject): """ Analyze existing transactions of a given subject and predict future transactions, up to one year in advance. """ transactions, prediction_info = [], None amount = predict_amount(subject) dates, rec_days, rec_months, day_of_month = predict_booking_dates(subject) if amount and dates: transactions = [Transaction(amount=amount, booking_date=date, subject=subject) for date in dates] prediction_info = { "recurring_days": rec_days, "recurring_months": rec_months, "day_of_month": day_of_month, } return transactions, prediction_info def predict_all(subjects: Iterable[Subject], past_days=30, future_days=60): today = timezone.now().date() past_lookup_bound = today - timedelta(days=past_days) future_lookup_bound = today + timedelta(days=future_days) lookup_bounds = (past_lookup_bound, future_lookup_bound) prediction_list = [] for subject in subjects: transactions = list( subject.transactions.filter(booking_date__range=lookup_bounds).order_by("booking_date")) predicted_transactions, prediction_info = predict_transactions(subject) if predicted_transactions: first_predicted_date = predicted_transactions[0].booking_date if first_predicted_date >= past_lookup_bound: # if two weeks after the first predicted transaction have passed, the subject is considered done for transaction in predicted_transactions: if past_lookup_bound <= transaction.booking_date <= future_lookup_bound: transactions.append(transaction) prediction_list.append((subject, transactions, prediction_info)) return prediction_list