From 3ec13b75f2f96cb0ee1c0d9167a3dc27503eca26 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Fri, 16 Aug 2024 12:12:23 -0700
Subject: [PATCH 01/22] 1. Introduce load_viz_notebook_inferred_data(),
 filter_inferred_trips() and expand_inferredlabels() for processsing,
 filtering and expanding inferred labels. 2. map_trip_data() to extract the
 mapping functionality.

---
 viz_scripts/scaffolding.py | 99 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index e2abc572..e7e73a4a 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -78,6 +78,15 @@ def filter_labeled_trips(mixed_trip_df):
     disp.display(labeled_ct.head())
     return labeled_ct
 
+def filter_inferred_trips(mixed_trip_df):
+    # CASE 1 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
+    if len(mixed_trip_df) == 0:
+        return mixed_trip_df
+    inferred_ct = mixed_trip_df[mixed_trip_df['inferred_labels'].apply(lambda x: bool(x))]
+    print("After filtering, found %s inferred trips" % len(inferred_ct))
+    disp.display(inferred_ct.head())
+    return inferred_ct
+
 def expand_userinputs(labeled_ct):
     '''
     param: labeled_ct: a dataframe of confirmed trips, some of which have labels
@@ -105,6 +114,27 @@ def expand_userinputs(labeled_ct):
     disp.display(expanded_ct.head())
     return expanded_ct
 
+def expand_inferredlabels(inferred_ct):
+    if len(inferred_ct) == 0:
+        return inferred_ct
+
+    max_labels_list = []
+    max_p_list = []
+
+    for item in inferred_ct.inferred_labels:
+        max_entry = max(item, key=lambda x: x['p'])
+        max_labels_list.append(max_entry['labels'])
+        max_p_list.append(max_entry['p'])
+
+    inferred_only_labels = pd.DataFrame(max_labels_list, index=inferred_ct.index)
+    disp.display(inferred_only_labels)
+    inferred_only_p = pd.DataFrame(max_p_list, index=inferred_ct.index, columns=['p'])
+    disp.display(inferred_only_p)
+    expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels, inferred_only_p], axis=1)
+    expanded_inferred_ct.reset_index(drop=True, inplace=True)
+    disp.display(expanded_inferred_ct.head())
+    return expanded_inferred_ct
+
 # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
 unique_users = lambda df: len(df.user_id.unique()) if "user_id" in df.columns else 0
 trip_label_count = lambda s, df: len(df[s].dropna()) if s in df.columns else 0
@@ -176,6 +206,75 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic
 
     return expanded_ct, file_suffix, quality_text, debug_df
 
+def map_trip_data(df, study_type, dynamic_labels, dic_re, dic_pur):
+    # Change meters to miles
+    # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
+    if "distance" in df.columns:
+        unit_conversions(df)
+
+    # Map new mode labels with translations dictionary from dynamic_labels
+    # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
+    if "mode_confirm" in df.columns:
+        if (len(dynamic_labels)):
+            dic_mode_mapping = mapping_labels(dynamic_labels, "MODE")
+            df['Mode_confirm'] = df['mode_confirm'].map(dic_mode_mapping)
+        else:
+            df['Mode_confirm'] = df['mode_confirm'].map(dic_re)
+    if study_type == 'program':
+        # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
+        if 'replaced_mode' in df.columns:
+            if (len(dynamic_labels)):
+                dic_replaced_mapping = mapping_labels(dynamic_labels, "REPLACED_MODE")
+                df['Replaced_mode'] = df['replaced_mode'].map(dic_replaced_mapping)
+            else:
+                df['Replaced_mode'] = df['replaced_mode'].map(dic_re)
+        else:
+            print("This is a program, but no replaced modes found. Likely cold start case. Ignoring replaced mode mapping")
+    else:
+            print("This is a study, not expecting any replaced modes.")
+
+    # Trip purpose mapping
+    # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
+    if dic_pur is not None and "purpose_confirm" in df.columns:
+        if (len(dynamic_labels)):
+             dic_purpose_mapping = mapping_labels(dynamic_labels, "PURPOSE")
+             df['Trip_purpose'] = df['purpose_confirm'].map(dic_purpose_mapping)
+        else:
+            df['Trip_purpose'] = df['purpose_confirm'].map(dic_pur)
+    return df
+
+def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False):
+    """ Inputs:
+    year/month/program/study_type = parameters from the visualization notebook
+    dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose
+
+    Pipeline to load and process the data before use in visualization notebooks.
+    """
+    # Access database
+    tq = get_time_query(year, month)
+    participant_ct_df = load_all_participant_trips(program, tq, include_test_users)
+    inferred_ct = filter_inferred_trips(participant_ct_df)
+    expanded_it = expand_inferredlabels(inferred_ct)
+    expanded_it = map_trip_data(expanded_it, study_type, dynamic_labels, dic_re, dic_pur)
+
+    # Document data quality
+    file_suffix = get_file_suffix(year, month, program)
+    quality_text = get_quality_text(participant_ct_df, expanded_it, None, include_test_users)
+
+    debug_df = pd.DataFrame.from_dict({
+            "year": year,
+            "month": month,
+            "Registered_participants": len(get_participant_uuids(program, include_test_users)),
+            "Participants_with_at_least_one_trip": unique_users(participant_ct_df),
+            "Participant_with_at_least_one_inferred_trip": unique_users(inferred_ct),
+            "Trips_with_at_least_one_inferred_label": len(inferred_ct),
+            "Trips_with_mode_confirm_inferred_label": trip_label_count("Mode_confirm", expanded_it),
+            "Trips_with_trip_purpose_inferred_label": trip_label_count("Trip_purpose", expanded_it)
+            },
+        orient='index', columns=["value"])
+
+    return expanded_it, file_suffix, quality_text, debug_df
+
 # Function to map the "MODE", "REPLACED_MODE", "PURPOSE" to respective en-translations
 # Input: dynamic_labels, label_type: MODE, REPLACED_MODE, PURPOSE
 # Return: Dictionary mapping between the label type and its english translation.

From 9897c76ca43127d526ebd2a97faa061f45dd4155 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Fri, 16 Aug 2024 12:37:47 -0700
Subject: [PATCH 02/22] Utilize map_trip_data() for common trip mapping
 functionality in load_viz_notebook_data() for refactor.

---
 viz_scripts/scaffolding.py | 36 +-----------------------------------
 1 file changed, 1 insertion(+), 35 deletions(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index e7e73a4a..ef2200db 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -152,41 +152,7 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic
     labeled_ct = filter_labeled_trips(participant_ct_df)
     expanded_ct = expand_userinputs(labeled_ct)
     expanded_ct = data_quality_check(expanded_ct)
-
-    # Change meters to miles
-    # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
-    if "distance" in expanded_ct.columns:
-        unit_conversions(expanded_ct)
-    
-    # Map new mode labels with translations dictionary from dynamic_labels
-    # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
-    if "mode_confirm" in expanded_ct.columns:
-        if (len(dynamic_labels)):
-            dic_mode_mapping = mapping_labels(dynamic_labels, "MODE")
-            expanded_ct['Mode_confirm'] = expanded_ct['mode_confirm'].map(dic_mode_mapping)
-        else:
-            expanded_ct['Mode_confirm'] = expanded_ct['mode_confirm'].map(dic_re)
-    if study_type == 'program':
-        # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
-        if 'replaced_mode' in expanded_ct.columns:
-            if (len(dynamic_labels)):
-                dic_replaced_mapping = mapping_labels(dynamic_labels, "REPLACED_MODE")
-                expanded_ct['Replaced_mode'] = expanded_ct['replaced_mode'].map(dic_replaced_mapping)
-            else:
-                expanded_ct['Replaced_mode'] = expanded_ct['replaced_mode'].map(dic_re)
-        else:
-            print("This is a program, but no replaced modes found. Likely cold start case. Ignoring replaced mode mapping")
-    else:
-            print("This is a study, not expecting any replaced modes.")
-
-    # Trip purpose mapping
-    # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
-    if dic_pur is not None and "purpose_confirm" in expanded_ct.columns:
-        if (len(dynamic_labels)):
-             dic_purpose_mapping = mapping_labels(dynamic_labels, "PURPOSE")
-             expanded_ct['Trip_purpose'] = expanded_ct['purpose_confirm'].map(dic_purpose_mapping)
-        else:
-            expanded_ct['Trip_purpose'] = expanded_ct['purpose_confirm'].map(dic_pur)
+    expanded_ct = map_trip_data(expanded_ct, study_type, dynamic_labels, dic_re, dic_pur)
 
     # Document data quality
     file_suffix = get_file_suffix(year, month, program)

From b4d704b9281139e9be6c75d83a981e6935df583f Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Sun, 18 Aug 2024 13:41:43 -0700
Subject: [PATCH 03/22] Add load_viz_notebook_inferred_data() function for
 inferred metrics, and incorporate inferred label for Distribution of modes.

---
 viz_scripts/generic_metrics.ipynb | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb
index 6ced8fc7..21bee814 100644
--- a/viz_scripts/generic_metrics.ipynb
+++ b/viz_scripts/generic_metrics.ipynb
@@ -140,6 +140,23 @@
     "                                                                            sensed_algo_prefix)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c26ff5f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = scaffolding.load_viz_notebook_inferred_data(year,\n",
+    "                                                                            month,\n",
+    "                                                                            program,\n",
+    "                                                                            study_type,\n",
+    "                                                                            dynamic_labels,\n",
+    "                                                                            dic_re,\n",
+    "                                                                            dic_pur=dic_pur,\n",
+    "                                                                            include_test_users=include_test_users)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -171,9 +188,13 @@
     "labeled_match = re.match(r'Based on ([0-9]+) confirmed trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total  trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', quality_text)\n",
     "# labeled_match\n",
     "stacked_bar_quality_text_labeled = f\"{labeled_match.group(1)} trips {labeled_match.group(7)}\\n from {labeled_match.group(2)} {labeled_match.group(3)}\"\n",
+    "\n",
+    "inferred_match = re.match(r'Based on ([0-9]+) confirmed trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total  trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', quality_text_inferred)\n",
+    "stacked_bar_quality_text_inferred = f\"{inferred_match.group(1)} trips {inferred_match.group(7)}\\n from {inferred_match.group(2)} {inferred_match.group(3)}\"\n",
+    "\n",
     "sensed_match = re.match(r'Based on ([0-9]+) trips from ([0-9]+) (users|testers and participants)', quality_text_sensed)\n",
     "stacked_bar_quality_text_sensed = f\"{sensed_match.group(1)} trips (100%)\\n from {sensed_match.group(2)} {sensed_match.group(3)}\"\n",
-    "stacked_bar_quality_text_labeled, stacked_bar_quality_text_sensed"
+    "stacked_bar_quality_text_labeled, stacked_bar_quality_text_sensed, stacked_bar_quality_text_inferred"
    ]
   },
   {
@@ -203,14 +224,16 @@
     "plot_title_no_quality= \"Number of trips for each mode\"\n",
     "\n",
     "try:\n",
-    "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+    "    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n",
     "    # We will have text results corresponding to the axes for simplicity and consistency\n",
-    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
+    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    \n",
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
     "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
+    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
     "    \n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",

From add0e507307f86fabc97b61fce71a86cee8ff3ea Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Mon, 19 Aug 2024 10:13:56 -0700
Subject: [PATCH 04/22] 1. Introduce markdown for collecting data from database
 for Inferred Metrics 2. Update quality_text, fig, ax, text_results and
 introduce new plot_and_text_stacked_bar_chart() for all Stacked Bar Charts to
 represent inferred labels bar in generic_metrics notebook

---
 viz_scripts/generic_metrics.ipynb | 53 +++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb
index 21bee814..53024221 100644
--- a/viz_scripts/generic_metrics.ipynb
+++ b/viz_scripts/generic_metrics.ipynb
@@ -140,6 +140,14 @@
     "                                                                            sensed_algo_prefix)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "325e5eda",
+   "metadata": {},
+   "source": [
+    "## Collect Data from Database for Inferred Metrics"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -189,11 +197,12 @@
     "# labeled_match\n",
     "stacked_bar_quality_text_labeled = f\"{labeled_match.group(1)} trips {labeled_match.group(7)}\\n from {labeled_match.group(2)} {labeled_match.group(3)}\"\n",
     "\n",
+    "sensed_match = re.match(r'Based on ([0-9]+) trips from ([0-9]+) (users|testers and participants)', quality_text_sensed)\n",
+    "stacked_bar_quality_text_sensed = f\"{sensed_match.group(1)} trips (100%)\\n from {sensed_match.group(2)} {sensed_match.group(3)}\"\n",
+    "\n",
     "inferred_match = re.match(r'Based on ([0-9]+) confirmed trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total  trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', quality_text_inferred)\n",
     "stacked_bar_quality_text_inferred = f\"{inferred_match.group(1)} trips {inferred_match.group(7)}\\n from {inferred_match.group(2)} {inferred_match.group(3)}\"\n",
     "\n",
-    "sensed_match = re.match(r'Based on ([0-9]+) trips from ([0-9]+) (users|testers and participants)', quality_text_sensed)\n",
-    "stacked_bar_quality_text_sensed = f\"{sensed_match.group(1)} trips (100%)\\n from {sensed_match.group(2)} {sensed_match.group(3)}\"\n",
     "stacked_bar_quality_text_labeled, stacked_bar_quality_text_sensed, stacked_bar_quality_text_inferred"
    ]
   },
@@ -276,13 +285,17 @@
     "\n",
     "    expanded_ct_commute = expanded_ct.query(trip_purpose_query)\n",
     "    commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct_commute, \"commute\", include_test_users) if not expanded_ct.empty else \"\"\n",
+    "    expanded_ct_inferred_commute = expanded_ct_inferred.query(trip_purpose_query)\n",
+    "    commute_quality_text_inferred = scaffolding.get_quality_text(expanded_ct_inferred, expanded_ct_inferred_commute, \"commute\", include_test_users) if not expanded_ct_inferred.empty else \"\"\n",
     "    plot_title = plot_title_no_quality + \"\\n\" + commute_quality_text\n",
     "    \n",
     "    # Plot entries\n",
-    "    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)    \n",
-    "    text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+    "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)    \n",
+    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_mode, debug_df)\n",
+    "                                    \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_inferred_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
+    "                                    \"Inferred by OpenPATH\\n (Confirmed trips)\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -312,10 +325,12 @@
     "plot_title_no_quality=\"Number of trips for each purpose\"\n",
     "file_name= f\"ntrips_purpose{file_suffix}\"\n",
     "try:\n",
-    "    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
-    "    text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+    "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax, text_results, colors_purpose, debug_df)\n",
+    "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_purpose, debug_df)\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
+    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -359,17 +374,21 @@
     "    ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n",
     "    ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n",
     "    expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct.columns else None\n",
+    "    expanded_ct_inferred_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct.columns else None\n",
     "    expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff)]\n",
     "    sensed_u80_quality_text = f\"{len(expanded_ct_sensed_u80)} trips ({round(len(expanded_ct_sensed_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_sensed_u80)} {sensed_match.group(3)}\"\n",
     "    labeled_u80_quality_text = f\"{len(expanded_ct_u80)} trips ({round(len(expanded_ct_u80)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(expanded_ct_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n",
+    "    inferred_u80_quality_text = f\"{len(expanded_ct_inferred_u80)} trips ({round(len(expanded_ct_inferred_u80)/len(expanded_ct_inferred)*100)}% of all inferred,\\n{round(len(expanded_ct_inferred_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_inferred_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct_inferred.columns else \"0 inferred trips\"\n",
     "    \n",
     "    # Plot entries\n",
-    "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
-    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
+    "    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n",
+    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
+    "                                    \"Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
@@ -403,13 +422,15 @@
     "file_name =f\"total_trip_length{file_suffix}\"\n",
     "\n",
     "try:\n",
-    "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+    "    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n",
     "    \n",
-    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
+    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
+    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -443,16 +464,20 @@
     "    ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n",
     "    ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n",
     "    labeled_land_trips_df = expanded_ct[expanded_ct['Mode_confirm'] != \"Airplane\"] if \"Mode_confirm\" in expanded_ct.columns else None\n",
+    "    inferred_land_trips_df = expanded_ct_inferred[expanded_ct_inferred['Mode_confirm'] != \"Airplane\"] if \"Mode_confirm\" in expanded_ct_inferred.columns else None\n",
     "    sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n",
     "    \n",
     "    sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n",
     "    labeled_land_quality_text = f\"{len(labeled_land_trips_df)} trips ({round(len(labeled_land_trips_df)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(labeled_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(labeled_land_trips_df)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n",
-    "\n",
-    "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+    "    inferred_land_quality_text = f\"{len(inferred_land_trips_df)} trips ({round(len(inferred_land_trips_df)/len(expanded_ct_inferred)*100)}% of all inferred,\\n{round(len(inferred_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(inferred_land_trips_df)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct_inferred.columns else \"0 inferred trips\"\n",
+    "    \n",
+    "    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n",
     "    plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+labeled_land_quality_text,  ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+    "    plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
+    "                                    \"Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",

From 5d0546b569c44cdca1fdb079ae75a1b2d5ff5771 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Mon, 19 Aug 2024 11:05:54 -0700
Subject: [PATCH 05/22] 1. Add load_viz_notebook_inferred_data() to collect
 data from db 2. Add query for mode_of_interest for inferred labels 3. Update
 fig, ax, text_results, plot_and_text_stacked_bar_chart() for all Stacked Bar
 Charts.

---
 viz_scripts/mode_specific_metrics.ipynb | 81 +++++++++++++++++++++----
 1 file changed, 70 insertions(+), 11 deletions(-)

diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb
index 0e2e63d6..e2fbb76f 100644
--- a/viz_scripts/mode_specific_metrics.ipynb
+++ b/viz_scripts/mode_specific_metrics.ipynb
@@ -131,6 +131,31 @@
     "                                                                            include_test_users=include_test_users)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "dbc2bb22",
+   "metadata": {},
+   "source": [
+    "## Collect Data From Database for Inferred Metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b8bd1755",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = scaffolding.load_viz_notebook_inferred_data(year,\n",
+    "                                                                            month,\n",
+    "                                                                            program,\n",
+    "                                                                            study_type,\n",
+    "                                                                            dynamic_labels,\n",
+    "                                                                            dic_re,\n",
+    "                                                                            dic_pur=dic_pur,\n",
+    "                                                                            include_test_users=include_test_users)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -172,6 +197,34 @@
     "quality_text = scaffolding.get_quality_text(expanded_ct, data_eb, mode_of_interest, include_test_users)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "832337a6",
+   "metadata": {},
+   "source": [
+    "## Metrics for Specific Inferred Mode\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bed648bc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_eb_inferred = expanded_ct_inferred.query(f\"mode_confirm == '{mode_of_interest}'\") if \"mode_confirm\" in expanded_ct_inferred.columns else expanded_ct_inferred"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "54fcaff2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "quality_text_inferred = scaffolding.get_quality_text(expanded_ct_inferred, data_eb_inferred, mode_of_interest, include_test_users)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "loaded-expert",
@@ -191,10 +244,12 @@
     "file_name= f\"ntrips_{mode_of_interest}_purpose{file_suffix}\"\n",
     "\n",
     "try:\n",
-    "    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
-    "    text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+    "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
-    "                                    f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_purpose, debug_df)\n",
+    "                                    f\"Labeled `{mode_of_interest}` by user\", ax[0], text_results[0], colors_purpose, debug_df)\n",
+    "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
+    "                                    f\"Inferred `{mode_of_interest}` by OpenPATH\", ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
     "    plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
@@ -226,12 +281,14 @@
     "file_name = f\"total_trip_length_{mode_of_interest}_replaced_mode{file_suffix}\"\n",
     "\n",
     "try:\n",
-    "    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
-    "    text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+    "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Labeled by user\\n (Trip distance)\", ax, text_results, colors_mode, debug_df)\n",
-    "    plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
-    "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
+    "                                    \"Labeled by user\\n (Trip distance)\", ax[0], text_results[0], colors_mode, debug_df)\n",
+    "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
+    "                                    \"Inferred by OpenPATH\\n (Trip distance)\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
+    "#     plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
+    "#     set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
     "    generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n",
@@ -261,10 +318,12 @@
     "file_name = f'ntrips_{mode_of_interest}_total{file_suffix}'\n",
     "\n",
     "try:\n",
-    "    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
-    "    text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
+    "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
+    "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_mode, debug_df)\n",
+    "                                    f\"Labeled `{mode_of_interest}` by user\", ax[0], text_results[0], colors_mode, debug_df)\n",
+    "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
+    "                                    f\"Inferred `{mode_of_interest}` by OpenPATH\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",

From 9998bee96683ac63bd7acc16ce2430bfcf25a136 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Mon, 19 Aug 2024 11:07:09 -0700
Subject: [PATCH 06/22] Uncomment plot_title() and set_title_save() for total
 trip length in mode_specific_metrics notebook

---
 viz_scripts/mode_specific_metrics.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb
index e2fbb76f..725d211a 100644
--- a/viz_scripts/mode_specific_metrics.ipynb
+++ b/viz_scripts/mode_specific_metrics.ipynb
@@ -287,8 +287,8 @@
     "                                    \"Labeled by user\\n (Trip distance)\", ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Inferred by OpenPATH\\n (Trip distance)\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
-    "#     plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
-    "#     set_title_and_save(fig, text_results, plot_title, file_name)\n",
+    "    plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
+    "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
     "    generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n",

From 043771078c5c5e2d14b6daf043c9fe91d0f36907 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Mon, 19 Aug 2024 15:45:19 -0700
Subject: [PATCH 07/22] 1. Add commute_labeled/inferred_match regex,
 stacked_bar_quality_text_ 2. Update plot_and_text_stacked_bar_chart() for
 Distribution of modes in commute trips

---
 viz_scripts/generic_metrics.ipynb | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb
index 53024221..f5960157 100644
--- a/viz_scripts/generic_metrics.ipynb
+++ b/viz_scripts/generic_metrics.ipynb
@@ -287,15 +287,21 @@
     "    commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct_commute, \"commute\", include_test_users) if not expanded_ct.empty else \"\"\n",
     "    expanded_ct_inferred_commute = expanded_ct_inferred.query(trip_purpose_query)\n",
     "    commute_quality_text_inferred = scaffolding.get_quality_text(expanded_ct_inferred, expanded_ct_inferred_commute, \"commute\", include_test_users) if not expanded_ct_inferred.empty else \"\"\n",
-    "    plot_title = plot_title_no_quality + \"\\n\" + commute_quality_text\n",
-    "    \n",
+    "    plot_title = plot_title_no_quality\n",
+    "\n",
+    "    commute_labeled_match = re.match(r'Based on ([0-9]+) confirmed commute trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', commute_quality_text)\n",
+    "    stacked_bar_quality_text_commute_labeled = f\"{commute_labeled_match.group(1)} trips {commute_labeled_match.group(7)}\\n from {commute_labeled_match.group(2)} {commute_labeled_match.group(3)}\"\n",
+    "\n",
+    "    commute_inferred_match = re.match(r'Based on ([0-9]+) confirmed commute trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', commute_quality_text_inferred)\n",
+    "    stacked_bar_quality_text_commute_inferred = f\"{commute_inferred_match.group(1)} trips {commute_inferred_match.group(7)}\\n from {commute_inferred_match.group(2)} {commute_inferred_match.group(3)}\"\n",
+    "\n",
     "    # Plot entries\n",
     "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)    \n",
     "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n",
+    "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_commute_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Inferred by OpenPATH\\n (Confirmed trips)\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
+    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",

From d098c18bfd726b31b6ea68c7394b55f265d1900e Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Mon, 19 Aug 2024 16:21:15 -0700
Subject: [PATCH 08/22] 1. Introduce regex to extract labeled_match and
 inferred_match 2. Use stacked_bar_quality_text and
 stacked_bar_quality_text_inferred with plot_and_text_stacked_bar_chart() 3.
 Adjust plot_title to plot_title_no_quality

---
 viz_scripts/mode_specific_metrics.ipynb | 44 ++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb
index 725d211a..ce535a13 100644
--- a/viz_scripts/mode_specific_metrics.ipynb
+++ b/viz_scripts/mode_specific_metrics.ipynb
@@ -225,6 +225,32 @@
     "quality_text_inferred = scaffolding.get_quality_text(expanded_ct_inferred, data_eb_inferred, mode_of_interest, include_test_users)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "044773bc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "quality_text, quality_text_inferred"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "139b4060",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "labeled_match = re.match(r'Based on ([0-9]+) confirmed {} trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))'.format(mode_of_interest), quality_text)\n",
+    "stacked_bar_quality_text = f\"{labeled_match.group(1)} trips {labeled_match.group(7)}\\n from {labeled_match.group(2)} {labeled_match.group(3)}\"\n",
+    "inferred_match =re.match(r'Based on ([0-9]+) confirmed {} trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))'.format(mode_of_interest), quality_text_inferred)\n",
+    "stacked_bar_quality_text_inferred = f\"{inferred_match.group(1)} trips {inferred_match.group(7)}\\n from {inferred_match.group(2)} {inferred_match.group(3)}\"\n",
+    "\n",
+    "stacked_bar_quality_text, stacked_bar_quality_text_inferred"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "loaded-expert",
@@ -247,10 +273,10 @@
     "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
     "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
-    "                                    f\"Labeled `{mode_of_interest}` by user\", ax[0], text_results[0], colors_purpose, debug_df)\n",
+    "                                    f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_purpose, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
-    "                                    f\"Inferred `{mode_of_interest}` by OpenPATH\", ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
-    "    plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
+    "                                    f\"Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
+    "    plot_title = plot_title_no_quality\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -284,10 +310,10 @@
     "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
     "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Labeled by user\\n (Trip distance)\", ax[0], text_results[0], colors_mode, debug_df)\n",
+    "                                    \"Labeled by user\\n (Trip distance)\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Inferred by OpenPATH\\n (Trip distance)\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
-    "    plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
+    "                                    \"Inferred by OpenPATH\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
+    "    plot_title = plot_title_no_quality\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -321,10 +347,10 @@
     "    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
     "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    f\"Labeled `{mode_of_interest}` by user\", ax[0], text_results[0], colors_mode, debug_df)\n",
+    "                                    f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    f\"Inferred `{mode_of_interest}` by OpenPATH\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
-    "    plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
+    "                                    f\"Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
+    "    plot_title = plot_title_no_quality\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",

From 704450abb4b830b243632960f7515d9e53d74e29 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Mon, 19 Aug 2024 16:49:38 -0700
Subject: [PATCH 09/22] Update expanded_ct_inferred_u80 to use
 expanded_ct_inferred instead of expanded_ct

---
 viz_scripts/generic_metrics.ipynb | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb
index f5960157..42f13978 100644
--- a/viz_scripts/generic_metrics.ipynb
+++ b/viz_scripts/generic_metrics.ipynb
@@ -380,12 +380,13 @@
     "    ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n",
     "    ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n",
     "    expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct.columns else None\n",
-    "    expanded_ct_inferred_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct.columns else None\n",
+    "    expanded_ct_inferred_u80 = expanded_ct_inferred.loc[(expanded_ct_inferred['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct_inferred.columns else None\n",
     "    expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff)]\n",
+    "\n",
     "    sensed_u80_quality_text = f\"{len(expanded_ct_sensed_u80)} trips ({round(len(expanded_ct_sensed_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_sensed_u80)} {sensed_match.group(3)}\"\n",
     "    labeled_u80_quality_text = f\"{len(expanded_ct_u80)} trips ({round(len(expanded_ct_u80)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(expanded_ct_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n",
     "    inferred_u80_quality_text = f\"{len(expanded_ct_inferred_u80)} trips ({round(len(expanded_ct_inferred_u80)/len(expanded_ct_inferred)*100)}% of all inferred,\\n{round(len(expanded_ct_inferred_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_inferred_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct_inferred.columns else \"0 inferred trips\"\n",
-    "    \n",
+    "\n",
     "    # Plot entries\n",
     "    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n",
     "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",

From 49b2da455f47625505c57d38ef27687e0029d11e Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Wed, 21 Aug 2024 09:54:38 -0700
Subject: [PATCH 10/22] Update in map_trip_data() param name from df to
 expanded_trip_df

---
 viz_scripts/scaffolding.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index ef2200db..aa2735c8 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -172,28 +172,28 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic
 
     return expanded_ct, file_suffix, quality_text, debug_df
 
-def map_trip_data(df, study_type, dynamic_labels, dic_re, dic_pur):
+def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur):
     # Change meters to miles
     # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
-    if "distance" in df.columns:
-        unit_conversions(df)
+    if "distance" in expanded_trip_df.columns:
+        unit_conversions(expanded_trip_df)
 
     # Map new mode labels with translations dictionary from dynamic_labels
     # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
-    if "mode_confirm" in df.columns:
+    if "mode_confirm" in expanded_trip_df.columns:
         if (len(dynamic_labels)):
             dic_mode_mapping = mapping_labels(dynamic_labels, "MODE")
-            df['Mode_confirm'] = df['mode_confirm'].map(dic_mode_mapping)
+            expanded_trip_df['Mode_confirm'] = expanded_trip_df['mode_confirm'].map(dic_mode_mapping)
         else:
-            df['Mode_confirm'] = df['mode_confirm'].map(dic_re)
+            expanded_trip_df['Mode_confirm'] = expanded_trip_df['mode_confirm'].map(dic_re)
     if study_type == 'program':
         # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
-        if 'replaced_mode' in df.columns:
+        if 'replaced_mode' in expanded_trip_df.columns:
             if (len(dynamic_labels)):
                 dic_replaced_mapping = mapping_labels(dynamic_labels, "REPLACED_MODE")
-                df['Replaced_mode'] = df['replaced_mode'].map(dic_replaced_mapping)
+                expanded_trip_df['Replaced_mode'] = expanded_trip_df['replaced_mode'].map(dic_replaced_mapping)
             else:
-                df['Replaced_mode'] = df['replaced_mode'].map(dic_re)
+                expanded_trip_df['Replaced_mode'] = expanded_trip_df['replaced_mode'].map(dic_re)
         else:
             print("This is a program, but no replaced modes found. Likely cold start case. Ignoring replaced mode mapping")
     else:
@@ -201,13 +201,13 @@ def map_trip_data(df, study_type, dynamic_labels, dic_re, dic_pur):
 
     # Trip purpose mapping
     # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
-    if dic_pur is not None and "purpose_confirm" in df.columns:
+    if dic_pur is not None and "purpose_confirm" in expanded_trip_df.columns:
         if (len(dynamic_labels)):
              dic_purpose_mapping = mapping_labels(dynamic_labels, "PURPOSE")
-             df['Trip_purpose'] = df['purpose_confirm'].map(dic_purpose_mapping)
+             expanded_trip_df['Trip_purpose'] = expanded_trip_df['purpose_confirm'].map(dic_purpose_mapping)
         else:
-            df['Trip_purpose'] = df['purpose_confirm'].map(dic_pur)
-    return df
+            expanded_trip_df['Trip_purpose'] = expanded_trip_df['purpose_confirm'].map(dic_pur)
+    return expanded_trip_df
 
 def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False):
     """ Inputs:

From 62c3af07c3e1fa4c4d9f5b21f781b5f3998775f2 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Fri, 13 Sep 2024 10:24:29 -0700
Subject: [PATCH 11/22] Re-order Inferred Trip Stacked Charts above Sensed.

---
 viz_scripts/generic_metrics.ipynb | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb
index 42f13978..e33db0dc 100644
--- a/viz_scripts/generic_metrics.ipynb
+++ b/viz_scripts/generic_metrics.ipynb
@@ -239,11 +239,10 @@
     "    \n",
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
-    "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
-    "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
     "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
-    "    \n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
+    "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -392,10 +391,10 @@
     "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n",
-    "    plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
+    "                                    \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
@@ -434,10 +433,10 @@
     "    text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
-    "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
+    "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
+    "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -481,10 +480,10 @@
     "    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n",
     "    plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+labeled_land_quality_text,  ax[0], text_results[0], colors_mode, debug_df)\n",
-    "    plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
     "    plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
+    "    plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
+    "                                    \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",

From 007f9bf1e2c87542ea728fcf5c881ee1ea23d73b Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Fri, 13 Sep 2024 10:52:54 -0700
Subject: [PATCH 12/22] Update the index of axis and text_results for sensed
 and labeled trip stacked bars.

---
 viz_scripts/generic_metrics.ipynb | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb
index e33db0dc..198321e5 100644
--- a/viz_scripts/generic_metrics.ipynb
+++ b/viz_scripts/generic_metrics.ipynb
@@ -240,9 +240,9 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
-    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
+    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
-    "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+    "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -392,9 +392,9 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
+    "                                    \"Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+    "                                    \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
@@ -434,9 +434,9 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
+    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+    "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -481,9 +481,9 @@
     "    plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+labeled_land_quality_text,  ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n",
+    "                                    \"Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
+    "                                    \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",

From 260e8ad11909b44bf52ab5fd5369a1452ee37e91 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Mon, 16 Sep 2024 22:22:22 -0700
Subject: [PATCH 13/22] Update expand_inferredlabels(). Iterate over the
 inferred_ct to see if there is user_input or not. If there is user_input,
 chose it over inferred_labels.

---
 viz_scripts/scaffolding.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index aa2735c8..11689ad7 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -82,6 +82,7 @@ def filter_inferred_trips(mixed_trip_df):
     # CASE 1 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
     if len(mixed_trip_df) == 0:
         return mixed_trip_df
+    # Identify which trips contains inferred_labels
     inferred_ct = mixed_trip_df[mixed_trip_df['inferred_labels'].apply(lambda x: bool(x))]
     print("After filtering, found %s inferred trips" % len(inferred_ct))
     disp.display(inferred_ct.head())
@@ -119,18 +120,18 @@ def expand_inferredlabels(inferred_ct):
         return inferred_ct
 
     max_labels_list = []
-    max_p_list = []
-
-    for item in inferred_ct.inferred_labels:
-        max_entry = max(item, key=lambda x: x['p'])
-        max_labels_list.append(max_entry['labels'])
-        max_p_list.append(max_entry['p'])
+    for _, row in inferred_ct.iterrows():
+        # In the trip, prioritize availabilty of user_input over inferred_labels for label selection
+        if row.user_input == {}:
+            # Extract the label which has highest "p" value
+            max_entry = max(row.inferred_labels, key=lambda x: x['p'])
+            max_labels_list.append(max_entry['labels'])
+        else:
+            max_labels_list.append(row.user_input)
 
     inferred_only_labels = pd.DataFrame(max_labels_list, index=inferred_ct.index)
-    disp.display(inferred_only_labels)
-    inferred_only_p = pd.DataFrame(max_p_list, index=inferred_ct.index, columns=['p'])
-    disp.display(inferred_only_p)
-    expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels, inferred_only_p], axis=1)
+    disp.display(inferred_only_labels.head())
+    expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels], axis=1)
     expanded_inferred_ct.reset_index(drop=True, inplace=True)
     disp.display(expanded_inferred_ct.head())
     return expanded_inferred_ct

From 3c33e7585321809bf7de8640cf7be9cae6dbbbec Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Mon, 16 Sep 2024 23:00:06 -0700
Subject: [PATCH 14/22] Filter for inferred trip bar - it should have either
 user_input or inferred_labels

---
 viz_scripts/scaffolding.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index 11689ad7..51d29bce 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -82,8 +82,8 @@ def filter_inferred_trips(mixed_trip_df):
     # CASE 1 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
     if len(mixed_trip_df) == 0:
         return mixed_trip_df
-    # Identify which trips contains inferred_labels
-    inferred_ct = mixed_trip_df[mixed_trip_df['inferred_labels'].apply(lambda x: bool(x))]
+    # Identify trips which has either inferred_labels or has user_input
+    inferred_ct = mixed_trip_df[(mixed_trip_df['inferred_labels'].apply(lambda x: bool(x))) | (mixed_trip_df.user_input != {})]
     print("After filtering, found %s inferred trips" % len(inferred_ct))
     disp.display(inferred_ct.head())
     return inferred_ct

From 72fcb205c6e4946e227a9cd264952774013096e4 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Wed, 18 Sep 2024 10:12:10 -0700
Subject: [PATCH 15/22] Use confidence_threshold to filter labels from
 inferred_labels.

---
 viz_scripts/scaffolding.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index 51d29bce..ec1e423c 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -125,7 +125,10 @@ def expand_inferredlabels(inferred_ct):
         if row.user_input == {}:
             # Extract the label which has highest "p" value
             max_entry = max(row.inferred_labels, key=lambda x: x['p'])
-            max_labels_list.append(max_entry['labels'])
+            if (max_entry['p'] > row.confidence_threshold):
+                max_labels_list.append(max_entry['labels'])
+            else:
+                max_labels_list.append({})
         else:
             max_labels_list.append(row.user_input)
 

From 3d718b5cadd15ac2c37d71db71161a415e856844 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Wed, 18 Sep 2024 10:36:49 -0700
Subject: [PATCH 16/22] Update bar_label for inferred bars from Inferred by
 OpenPATH ... to Labeled and Inferred by OpenPATH ...

---
 viz_scripts/generic_metrics.ipynb       | 12 ++++++------
 viz_scripts/mode_specific_metrics.ipynb |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb
index 198321e5..3e3dbcdb 100644
--- a/viz_scripts/generic_metrics.ipynb
+++ b/viz_scripts/generic_metrics.ipynb
@@ -240,7 +240,7 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
-    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
+    "                                    \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
     "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
@@ -300,7 +300,7 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_commute_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
+    "                                    \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -335,7 +335,7 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_purpose, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
+    "                                    \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -392,7 +392,7 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
+    "                                    \"Labeled and Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
@@ -434,7 +434,7 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
+    "                                    \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",
@@ -481,7 +481,7 @@
     "    plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+labeled_land_quality_text,  ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
+    "                                    \"Labeled and Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",
diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb
index ce535a13..101e8c9b 100644
--- a/viz_scripts/mode_specific_metrics.ipynb
+++ b/viz_scripts/mode_specific_metrics.ipynb
@@ -275,7 +275,7 @@
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
     "                                    f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_purpose, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
-    "                                    f\"Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
+    "                                    f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
     "    plot_title = plot_title_no_quality\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
@@ -312,7 +312,7 @@
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n (Trip distance)\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Inferred by OpenPATH\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
+    "                                    \"Labeled and Inferred by OpenPATH\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    plot_title = plot_title_no_quality\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
@@ -349,7 +349,7 @@
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n",
     "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    f\"Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
+    "                                    f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n",
     "    plot_title = plot_title_no_quality\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",

From 83e259bd2bcb8c5dbe19e58d5e657c453866c9ce Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Fri, 20 Sep 2024 23:00:03 -0700
Subject: [PATCH 17/22] In case there is no user_input, and
 confidence_threshold is not met, append the labels_list with dict - uncertain
 for all labels. Later filter it out from the dataframe.

---
 viz_scripts/scaffolding.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index 03c3f8e0..a6c73334 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -128,7 +128,7 @@ def expand_inferredlabels(inferred_ct):
             if (max_entry['p'] > row.confidence_threshold):
                 max_labels_list.append(max_entry['labels'])
             else:
-                max_labels_list.append({})
+                max_labels_list.append({'mode_confirm':'uncertain', 'purpose_confirm':'uncertain', 'replaced_mode':'uncertain'})
         else:
             max_labels_list.append(row.user_input)
 
@@ -136,6 +136,8 @@ def expand_inferredlabels(inferred_ct):
     disp.display(inferred_only_labels.head())
     expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels], axis=1)
     expanded_inferred_ct.reset_index(drop=True, inplace=True)
+    # Filter out the dataframe in which mode_confirm, purpose_confirm and replaced_mode is uncertain
+    expanded_inferred_ct = expanded_inferred_ct[(expanded_inferred_ct['mode_confirm'] != 'uncertain') & (expanded_inferred_ct['purpose_confirm'] != 'uncertain') & (expanded_inferred_ct['replaced_mode'] != 'uncertain')]
     disp.display(expanded_inferred_ct.head())
     return expanded_inferred_ct
 

From 9905ca7e9c7aaac92d19c9ed368630a2b0beaa65 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Fri, 20 Sep 2024 23:42:35 -0700
Subject: [PATCH 18/22] Replace use of iterrow over panda dataframe with
 df.apply() method. Remove reset_index on expanded_inferred_ct dataframe.

---
 viz_scripts/scaffolding.py | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index a6c73334..8917e7d1 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -119,23 +119,19 @@ def expand_inferredlabels(inferred_ct):
     if len(inferred_ct) == 0:
         return inferred_ct
 
-    max_labels_list = []
-    for _, row in inferred_ct.iterrows():
-        # In the trip, prioritize availabilty of user_input over inferred_labels for label selection
-        if row.user_input == {}:
-            # Extract the label which has highest "p" value
-            max_entry = max(row.inferred_labels, key=lambda x: x['p'])
-            if (max_entry['p'] > row.confidence_threshold):
-                max_labels_list.append(max_entry['labels'])
-            else:
-                max_labels_list.append({'mode_confirm':'uncertain', 'purpose_confirm':'uncertain', 'replaced_mode':'uncertain'})
-        else:
-            max_labels_list.append(row.user_input)
-
-    inferred_only_labels = pd.DataFrame(max_labels_list, index=inferred_ct.index)
+    def _select_max_label(row):
+        if row['user_input']:
+            return row['user_input']
+        max_entry = max(row['inferred_labels'], key=lambda x: x['p'])
+        return max_entry['labels'] if max_entry['p'] > row['confidence_threshold'] else {
+            'mode_confirm': 'uncertain',
+            'purpose_confirm': 'uncertain',
+            'replaced_mode': 'uncertain'
+        }
+
+    inferred_only_labels = inferred_ct.apply(_select_max_label, axis=1).apply(pd.Series)
     disp.display(inferred_only_labels.head())
     expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels], axis=1)
-    expanded_inferred_ct.reset_index(drop=True, inplace=True)
     # Filter out the dataframe in which mode_confirm, purpose_confirm and replaced_mode is uncertain
     expanded_inferred_ct = expanded_inferred_ct[(expanded_inferred_ct['mode_confirm'] != 'uncertain') & (expanded_inferred_ct['purpose_confirm'] != 'uncertain') & (expanded_inferred_ct['replaced_mode'] != 'uncertain')]
     disp.display(expanded_inferred_ct.head())

From a983a5b19b283728b8f4e32ef083cd5429f0f0b0 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Fri, 20 Sep 2024 23:50:59 -0700
Subject: [PATCH 19/22] Update expand_inferredlabels() to
 expand_labeled_inferredlabels(), and update the variable names to add prefix
 of labeled. We display both labeled and inferred labels altogether for
 inferred bars in stacked bar charts.

---
 viz_scripts/scaffolding.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index 8917e7d1..96cf19eb 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -115,9 +115,9 @@ def expand_userinputs(labeled_ct):
     disp.display(expanded_ct.head())
     return expanded_ct
 
-def expand_inferredlabels(inferred_ct):
-    if len(inferred_ct) == 0:
-        return inferred_ct
+def expand_inferredlabels(labeled_inferred_ct):
+    if len(labeled_inferred_ct) == 0:
+        return labeled_inferred_ct
 
     def _select_max_label(row):
         if row['user_input']:
@@ -129,13 +129,13 @@ def _select_max_label(row):
             'replaced_mode': 'uncertain'
         }
 
-    inferred_only_labels = inferred_ct.apply(_select_max_label, axis=1).apply(pd.Series)
-    disp.display(inferred_only_labels.head())
-    expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels], axis=1)
+    labeled_inferred_labels = labeled_inferred_ct.apply(_select_max_label, axis=1).apply(pd.Series)
+    disp.display(labeled_inferred_labels.head())
+    expanded_labeled_inferred_ct = pd.concat([labeled_inferred_ct, labeled_inferred_labels], axis=1)
     # Filter out the dataframe in which mode_confirm, purpose_confirm and replaced_mode is uncertain
-    expanded_inferred_ct = expanded_inferred_ct[(expanded_inferred_ct['mode_confirm'] != 'uncertain') & (expanded_inferred_ct['purpose_confirm'] != 'uncertain') & (expanded_inferred_ct['replaced_mode'] != 'uncertain')]
-    disp.display(expanded_inferred_ct.head())
-    return expanded_inferred_ct
+    expanded_labeled_inferred_ct = expanded_labeled_inferred_ct[(expanded_labeled_inferred_ct['mode_confirm'] != 'uncertain') & (expanded_labeled_inferred_ct['purpose_confirm'] != 'uncertain') & (expanded_labeled_inferred_ct['replaced_mode'] != 'uncertain')]
+    disp.display(expanded_labeled_inferred_ct.head())
+    return expanded_labeled_inferred_ct
 
 # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
 unique_users = lambda df: len(df.user_id.unique()) if "user_id" in df.columns else 0

From acf5da6786425b1d6c9b6919ba846909ab8a3443 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Sat, 21 Sep 2024 08:29:53 -0700
Subject: [PATCH 20/22] Fix merge with main - Introduce read_json_resource
 function. Introduce *_w_other cols for the dataframe.

---
 viz_scripts/scaffolding.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index 650b41f8..1e2bae87 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -183,6 +183,13 @@ def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur)
     if "distance" in expanded_trip_df.columns:
         unit_conversions(expanded_trip_df)
 
+    # Select the labels from dynamic_labels is available,
+    # else get it from emcommon/resources/label-options.default.json
+    if (len(dynamic_labels)):
+        labels = dynamic_labels
+    else:
+        labels = await emcu.read_json_resource("label-options.default.json")
+
     # Map new mode labels with translations dictionary from dynamic_labels
     # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
     if "mode_confirm" in expanded_trip_df.columns:
@@ -191,6 +198,9 @@ def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur)
             expanded_trip_df['Mode_confirm'] = expanded_trip_df['mode_confirm'].map(dic_mode_mapping)
         else:
             expanded_trip_df['Mode_confirm'] = expanded_trip_df['mode_confirm'].map(dic_re)
+        # If the 'mode_confirm' is not available as the list of keys in the dynamic_labels or label_options.default.json, then, we should transform it as 'other'
+        mode_values = [item['value'] for item in labels['MODE']]
+        expanded_trip_df['mode_confirm_w_other'] = expanded_trip_df['mode_confirm'].apply(lambda mode: 'other' if mode not in mode_values else mode)
     if study_type == 'program':
         # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
         if 'replaced_mode' in expanded_trip_df.columns:
@@ -199,6 +209,8 @@ def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur)
                 expanded_trip_df['Replaced_mode'] = expanded_trip_df['replaced_mode'].map(dic_replaced_mapping)
             else:
                 expanded_trip_df['Replaced_mode'] = expanded_trip_df['replaced_mode'].map(dic_re)
+            replaced_modes = [item['value'] for item in labels['REPLACED_MODE']]
+            expanded_trip_df['replaced_mode_w_other'] = expanded_trip_df['replaced_mode'].apply(lambda mode: 'other' if mode not in replaced_modes else mode)
         else:
             print("This is a program, but no replaced modes found. Likely cold start case. Ignoring replaced mode mapping")
     else:
@@ -212,6 +224,9 @@ def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur)
              expanded_trip_df['Trip_purpose'] = expanded_trip_df['purpose_confirm'].map(dic_purpose_mapping)
         else:
             expanded_trip_df['Trip_purpose'] = expanded_trip_df['purpose_confirm'].map(dic_pur)
+        purpose_values = [item['value'] for item in labels['PURPOSE']]
+        expanded_trip_df['purpose_confirm_w_other'] = expanded_trip_df['purpose_confirm'].apply(lambda value: 'other' if value not in purpose_values else value)
+
     return expanded_trip_df
 
 def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False):

From 4d8c403d1c121cba4c0751a05bccbb59f7921151 Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Sat, 21 Sep 2024 08:37:38 -0700
Subject: [PATCH 21/22] Update load_viz_notebook_inferred_data() to be async,
 and call it as await from notebook. Update the map_trip_data() to be async,
 and call to it as await in scaffolding.py

---
 viz_scripts/generic_metrics.ipynb       | 2 +-
 viz_scripts/mode_specific_metrics.ipynb | 2 +-
 viz_scripts/scaffolding.py              | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb
index 32e81bfb..656601cc 100644
--- a/viz_scripts/generic_metrics.ipynb
+++ b/viz_scripts/generic_metrics.ipynb
@@ -156,7 +156,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = scaffolding.load_viz_notebook_inferred_data(year,\n",
+    "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = await scaffolding.load_viz_notebook_inferred_data(year,\n",
     "                                                                            month,\n",
     "                                                                            program,\n",
     "                                                                            study_type,\n",
diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb
index be9e8e7f..09136505 100644
--- a/viz_scripts/mode_specific_metrics.ipynb
+++ b/viz_scripts/mode_specific_metrics.ipynb
@@ -147,7 +147,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = scaffolding.load_viz_notebook_inferred_data(year,\n",
+    "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = await scaffolding.load_viz_notebook_inferred_data(year,\n",
     "                                                                            month,\n",
     "                                                                            program,\n",
     "                                                                            study_type,\n",
diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
index 1e2bae87..880c8190 100644
--- a/viz_scripts/scaffolding.py
+++ b/viz_scripts/scaffolding.py
@@ -157,7 +157,7 @@ async def load_viz_notebook_data(year, month, program, study_type, dynamic_label
     labeled_ct = filter_labeled_trips(participant_ct_df)
     expanded_ct = expand_userinputs(labeled_ct)
     expanded_ct = data_quality_check(expanded_ct)
-    expanded_ct = map_trip_data(expanded_ct, study_type, dynamic_labels, dic_re, dic_pur)
+    expanded_ct = await map_trip_data(expanded_ct, study_type, dynamic_labels, dic_re, dic_pur)
 
     # Document data quality
     file_suffix = get_file_suffix(year, month, program)
@@ -177,7 +177,7 @@ async def load_viz_notebook_data(year, month, program, study_type, dynamic_label
 
     return expanded_ct, file_suffix, quality_text, debug_df
 
-def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur):
+async def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur):
     # Change meters to miles
     # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867
     if "distance" in expanded_trip_df.columns:
@@ -229,7 +229,7 @@ def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur)
 
     return expanded_trip_df
 
-def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False):
+async def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False):
     """ Inputs:
     year/month/program/study_type = parameters from the visualization notebook
     dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose
@@ -241,7 +241,7 @@ def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_la
     participant_ct_df = load_all_participant_trips(program, tq, include_test_users)
     inferred_ct = filter_inferred_trips(participant_ct_df)
     expanded_it = expand_inferredlabels(inferred_ct)
-    expanded_it = map_trip_data(expanded_it, study_type, dynamic_labels, dic_re, dic_pur)
+    expanded_it = await map_trip_data(expanded_it, study_type, dynamic_labels, dic_re, dic_pur)
 
     # Document data quality
     file_suffix = get_file_suffix(year, month, program)

From 1a5abfbb0a5448e324efc55254340074ce419a5b Mon Sep 17 00:00:00 2001
From: iantei <anshrest@asu.edu>
Date: Sat, 21 Sep 2024 08:38:44 -0700
Subject: [PATCH 22/22] Fix type : debug_df_inferred from debug_df_inferre

---
 viz_scripts/mode_specific_metrics.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb
index 09136505..a1d362eb 100644
--- a/viz_scripts/mode_specific_metrics.ipynb
+++ b/viz_scripts/mode_specific_metrics.ipynb
@@ -276,7 +276,7 @@
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
     "                                    f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)\n",
     "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
-    "                                    f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferre,value_to_translations_purpose)\n",
+    "                                    f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred, value_to_translations_purpose)\n",
     "    plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",