diff --git a/conf/solr_cron b/conf/solr_cron new file mode 100644 index 00000000000..1cecaac7804 --- /dev/null +++ b/conf/solr_cron @@ -0,0 +1,7 @@ +PATH=/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + + +#Updating Solr's trending scores. +0 * * * * su openlibrary "python scripts/calculate_trending_scores_hourly.py" + +5 0 * * * su openlibrary "python scripts/calculate_trending_scores_daily.py" diff --git a/scripts/calculate_trending_scores_daily.py b/scripts/calculate_trending_scores_daily.py index 5edeb3c9698..3813a802c02 100644 --- a/scripts/calculate_trending_scores_daily.py +++ b/scripts/calculate_trending_scores_daily.py @@ -42,11 +42,10 @@ def form_inplace_updates(work_id: str, current_day: int, new_value: int): load_config(ol_config) current_day = datetime.datetime.now().weekday() work_data = fetch_works(current_day) - print(work_data) + request_body = [ form_inplace_updates(work_id, current_day, work_data[work_id]) for work_id in work_data ] - print(request_body) + resp = get_solr().update_in_place(request_body) - print(resp) diff --git a/scripts/calculate_trending_scores_hourly.py b/scripts/calculate_trending_scores_hourly.py index 4558c6c9099..4383d1bf954 100644 --- a/scripts/calculate_trending_scores_hourly.py +++ b/scripts/calculate_trending_scores_hourly.py @@ -63,8 +63,18 @@ def fetch_works(current_hour: int): return doc_data +# If the arithmetic mean is below 10/7 (i.e: there have been) +# less than 10 reading log events for the work in the past week, +# it should not show up on trending. +AVERAGE_DAILY_EVENTS_FLOOR = 10 / 7 + + +# This function calculates how many standard deviations the value for the last +# 24 hours is away from the mean. def get_z_score(solr_doc: dict, count: int, current_hour: int): arith_mean = sum([solr_doc[f'trending_score_daily_{i}'] for i in range(7)]) / 7 + if arith_mean < (): + return 0 last_24_hours_value = ( solr_doc['trending_score_hourly_sum'] + count