"""Report generation helper module."""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING
import pandas as pd
import panel as pn
from plotly.graph_objects import Violin
from shapash.explainer.smart_explainer import SmartExplainer
if TYPE_CHECKING:
from eurybia import SmartDrift
from eurybia.report.project_report import DriftReport
from eurybia.report.properties import report_css, report_jscallback, report_text, select_callback
pn.extension("plotly")
def _get_index_panel(
dr: DriftReport, project_info_file: str | None = None, config_report: dict | None = None
) -> pn.Column:
parts = []
header_logo = pn.pane.PNG(
"https://eurybia.readthedocs.io/en/latest/_images/eurybia-fond-clair.png?raw=true",
styles={"max-width": "150px", "height": "auto"},
)
header_title = pn.pane.Markdown(f"# {dr.title_story}")
header = pn.Row(header_logo, header_title)
parts.append(header)
if (
config_report is not None
and "title_description" in config_report.keys()
and config_report["title_description"] != ""
):
raw_title = config_report["title_description"]
parts.append(pn.pane.Markdown(f"## {raw_title}"))
content_parts = ["## Eurybia Report contents"]
if project_info_file is not None:
content_parts.append(report_text["Index"]["01"])
content_parts.append(report_text["Index"]["02"])
content_parts.append(report_text["Index"]["03"])
if dr.smartdrift.data_modeldrift is not None:
content_parts.append(report_text["Index"]["04"])
content = pn.pane.Markdown("\n".join(content_parts))
parts.append(content)
if dr.smartdrift.auc is None:
raise RuntimeError("AUC should have been set.")
# AUC
auc_block = dr.smartdrift.plot.generate_indicator(
fig_value=dr.smartdrift.auc, height=280, width=500, title="Datadrift classifier AUC", image=True
)
auc_indicator = pn.pane.PNG(auc_block)
# Jensen-Shannon
if dr.smartdrift.deployed_model is not None:
if dr.smartdrift.js_divergence is None:
raise RuntimeError("Jensen-Shannon divergence should have been set.")
JS_block = dr.smartdrift.plot.generate_indicator(
fig_value=dr.smartdrift.js_divergence,
height=280,
width=500,
title="Jensen Shannon Datadrift",
min_gauge=0,
max_gauge=0.2,
image=True,
)
js_indicator = pn.pane.PNG(JS_block)
indicators = pn.Row(auc_indicator, js_indicator)
else:
indicators = pn.Row(auc_indicator)
parts.append(indicators)
return pn.Column(*parts, name="Index", css_classes=["index"])
[docs]def dict_to_text_blocks(text_dict: dict, level: int = 1) -> pn.Column:
"""This function recursively explores the dict and returns a Panel Column containing
other groups and text blocks fed with the dict
Parameters
----------
text_dict: dict
This dict must contain string as keys, and dicts or strings as values
level: int = 1
Recursion level, starting at 1 to allow for direct string manipulation
Returns
-------
pn.Column
Column of blocks
"""
blocks = []
text = ""
for k, v in text_dict.items():
if isinstance(v, (str, int, float)) or v is None:
if k.lower() == "date" and isinstance(v, str) and v.lower() == "auto":
val = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
else:
val = str(v)
text += f"**{k}** : {val} \n"
elif isinstance(v, dict):
if text != "":
blocks.append(pn.pane.Markdown(text))
text = ""
blocks.append(
pn.Column(pn.pane.Markdown("#" * min(level, 6) + " " + str(k)), dict_to_text_blocks(v, level + 1))
)
if text != "":
blocks.append(pn.pane.Markdown(text))
return pn.Column(*blocks)
def _get_project_information_panel(dr: DriftReport) -> pn.Column | None:
if dr.metadata is None:
return None
blocks = dict_to_text_blocks(dr.metadata)
return pn.Column(*blocks, name="Project information", styles=dict(display="none"))
def _get_consistency_analysis_panel(dr: DriftReport, modalities_analysis: bool) -> pn.Column:
# Title
blocks = [pn.pane.Markdown("# Consistency Analysis")]
# Manually ignored columns
ignore_cols = pd.DataFrame({"ignore_cols": dr.smartdrift.da.ignored_cols}).rename(
columns={"ignore_cols": "Ignored columns"}
)
blocks += [
pn.pane.Markdown("## Ignored columns in the report (manually excluded)"),
]
if len(ignore_cols) > 0:
blocks += [pn.pane.DataFrame(ignore_cols)]
else:
blocks += [pn.pane.Markdown("- Ignored columns : None.")]
# Column mismatches
blocks += [
pn.pane.Markdown("## Consistency checks: column match between the 2 datasets."),
pn.pane.Markdown(report_text["Consistency analysis"]["01"]),
]
pb_cols = {
"New columns": dr.smartdrift.da.new_columns,
"Removed columns": dr.smartdrift.da.removed_columns,
"Type errors": list(dr.smartdrift.da.dtype_mismatches.keys()),
}
for k, v in pb_cols.items():
if len(v) > 0:
blocks += [pn.pane.DataFrame(pd.DataFrame(v).transpose())]
else:
blocks += [pn.pane.Markdown(f"- No {k.lower()} have been detected.")]
if modalities_analysis:
blocks += [
pn.pane.Markdown("### Unique values identified"),
pn.pane.Markdown(report_text["Consistency analysis"]["02"]),
]
if len(dr.smartdrift.da.categorical_value_differences) > 0:
blocks += [
pn.pane.DataFrame(
pd.DataFrame(dr.smartdrift.da.categorical_value_differences).transpose(),
)
]
else:
blocks += [
pn.pane.Markdown(
"- No modalities have been detected as present in one dataset and absent in the other."
)
]
return pn.Column(*blocks, name="Consistency Analysis", styles=dict(display="none"), css_classes=["information"])
def _get_select_plots(labels: list, key: str, tab: str, figures: list) -> list:
blocks = []
select = pn.widgets.Select(value=labels[0], options=labels)
select.jscallback(args={"key": f".{key}", "tab": tab}, value=select_callback)
blocks += [select]
for i in range(len(labels)):
f_class = labels[i].replace(" ", "-").lower()
css_classes = [f_class, key]
if labels[i] != labels[0]:
css_classes.append("hidden")
for figure_trace in figures[i].data:
if isinstance(figure_trace, Violin):
figure_trace.update(side="both")
figures[i].update_layout(width=1240)
node = pn.pane.Plotly(figures[i], name=labels[i], css_classes=css_classes)
blocks += [node]
return blocks
def _get_select_tables(labels: list, key: str, tab: str, tables: list) -> list:
blocks = []
select = pn.widgets.Select(value=labels[0], options=labels)
select.jscallback(args={"key": f".{key}", "tab": tab}, value=select_callback)
blocks += [select]
for i in range(len(labels)):
f_class = labels[i].replace(" ", "-").lower()
css_classes = [f_class, key]
if i > 0:
css_classes.append("hidden")
node = pn.pane.DataFrame(tables[i], css_classes=css_classes)
blocks += [node]
return blocks
def _get_data_drift_panel(dr: DriftReport) -> pn.Column:
blocks = [
pn.pane.Markdown("# Data drift"),
pn.pane.Markdown(report_text["Data drift"]["01"]),
pn.pane.Markdown("## Detecting data drift"),
pn.pane.Markdown("### Datadrift classifier model perfomances"),
pn.pane.Markdown(report_text["Data drift"]["02"]),
]
if dr.smartdrift.auc is None:
raise RuntimeError("AUC should have been set.")
auc = dr.smartdrift.plot.generate_indicator(
fig_value=dr.smartdrift.auc, height=300, width=500, title="Datadrift classifier AUC", image=True
)
blocks += [pn.pane.PNG(auc)]
blocks += [
pn.pane.Markdown("## Importance of features in data drift"),
pn.pane.Markdown("### Global feature importance plot"),
pn.pane.Markdown(report_text["Data drift"]["03"]),
]
fig_features_importance = dr.explainer.plot.features_importance()
fig_features_importance.update_layout(width=1240)
blocks += [pn.pane.Plotly(fig_features_importance)]
if dr.smartdrift.deployed_model is not None:
fig_scatter_feature_importance = dr.smartdrift.plot.scatter_feature_importance()
fig_scatter_feature_importance.update_layout(width=1240)
blocks += [
pn.pane.Markdown("### Feature importance overview"),
pn.pane.Markdown(report_text["Data drift"]["04"]),
pn.pane.Plotly(fig_scatter_feature_importance),
]
blocks += [
pn.pane.Markdown("## Dataset analysis"),
pn.pane.Markdown(report_text["Data drift"]["05"]),
pn.pane.Markdown("### Global analysis"),
pn.pane.DataFrame(dr._display_dataset_analysis_global()),
pn.pane.Markdown("### Univariate analysis"),
pn.pane.Markdown(report_text["Data drift"]["07"]),
]
distribution_figures, labels, distribution_tables = dr.display_dataset_analysis(global_analysis=False)["univariate"]
distribution_plots_blocks = _get_select_plots(
labels=labels, key="distribution-plot", tab=".data-drift", figures=distribution_figures
)
blocks += distribution_plots_blocks
distribute_tables_blocks = _get_select_tables(
labels=labels, key="distribution-table", tab=".data-drift", tables=distribution_tables
)
blocks += distribute_tables_blocks
if dr.smartdrift.deployed_model is not None:
fig_01 = dr.smartdrift.plot.generate_fig_univariate(df_all=dr.smartdrift.df_predict, col="Score", hue="dataset")
fig_01.update_layout(width=1240)
blocks += [
pn.pane.Markdown("### Distribution of predicted values"),
pn.pane.Markdown(report_text["Data drift"]["06"]),
pn.pane.Plotly(fig_01),
pn.pane.Markdown(report_text["Data drift"]["08"]),
]
if dr.smartdrift.js_divergence is None:
raise RuntimeError("Jensen-Shannon divergence should have been set.")
js_fig = dr.smartdrift.plot.generate_indicator(
fig_value=dr.smartdrift.js_divergence,
height=280,
width=500,
title="Jensen Shannon Datadrift",
min_gauge=0,
max_gauge=0.2,
image=True,
)
blocks += [pn.pane.PNG(js_fig)]
contribution_figures, contribution_labels = dr.display_model_contribution()
blocks += [
pn.pane.Markdown("## Feature contribution on data drift's detection"),
pn.pane.Markdown(report_text["Data drift"]["09"]),
]
contribution_plots_blocks = _get_select_plots(
labels=contribution_labels,
key="contribution-plot",
tab=".data-drift",
figures=contribution_figures,
)
blocks += contribution_plots_blocks
fig_02 = dr.explainer.plot.top_interactions_plot(nb_top_interactions=10)
fig_02.update_layout(width=1240)
blocks += [
pn.pane.Markdown("## Feature interaction on data drift's detection"),
pn.pane.Markdown(report_text["Data drift"]["10"]),
pn.pane.Plotly(fig_02),
]
if dr.smartdrift.historical_auc is not None:
fig = dr.smartdrift.plot.generate_historical_datadrift_metric()
fig.update_layout(width=1240)
blocks += [
pn.pane.Markdown("## Historical Data drift"),
pn.pane.Markdown(report_text["Data drift"]["11"]),
pn.pane.Plotly(fig),
]
return pn.Column(*blocks, name="Data drift", styles=dict(display="none"), css_classes=["data-drift"])
def _get_model_drift_panel(dr: DriftReport) -> pn.Column:
"""This function generates and returns a Panel Column page containing the Eurybia model drift analysis
Parameters
----------
dr : DriftReport
DriftReport object
Returns
-------
pn.Column
"""
blocks = [
pn.pane.Markdown("# Model drift"),
pn.pane.Markdown(report_text["Model drift"]["01"]),
pn.pane.Markdown("## Performance evolution of the deployed model"),
pn.pane.Markdown(report_text["Model drift"]["02"]),
]
if dr.smartdrift.data_modeldrift is None:
blocks += [pn.pane.Markdown("## Smartdrift.data_modeldrift is None")]
else:
figures, labels = dr.display_data_modeldrift()
if labels == []:
figures[0].update_layout(width=1240)
blocks += [pn.pane.Plotly(figures[0])]
else:
list_blocks = _get_select_plots(labels=labels, key="modeldrift-plot", tab=".model-drift", figures=figures)
blocks += list_blocks
return pn.Column(*blocks, name="Model drift", styles=dict(display="none"), css_classes=["model-drift"])
[docs]def execute_report(
smartdrift: SmartDrift,
explainer: SmartExplainer,
output_file: str,
project_info_file: str | None = None,
config_report: dict | None = None,
modalities_analysis: bool = False,
) -> None:
"""Creates the report
Parameters
----------
smartdrift : eurybia.core.smartdrift.SmartDrift object
Compiled SmartDrift class
explainer : shapash.explainer.smart_explainer.SmartExplainer object
Compiled shapash explainer.
project_info_file : str
Path to the file used to display some information about the project in the report.
config_report : dict, optional
Report configuration options.
output_file : str
Path to the HTML file to write
"""
if config_report is None:
config_report = {}
dr = DriftReport(
smartdrift=smartdrift,
explainer=explainer,
project_info_file=project_info_file,
config_report=config_report,
)
tab_list = []
tab_list.append(_get_index_panel(dr, project_info_file, config_report))
if project_info_file is not None:
tab_list.append(_get_project_information_panel(dr))
tab_list.append(_get_consistency_analysis_panel(dr, modalities_analysis))
tab_list.append(_get_data_drift_panel(dr))
if dr.smartdrift.data_modeldrift is not None:
tab_list.append(_get_model_drift_panel(dr))
pn.config.raw_css.append(report_css)
report = pn.Tabs(*tab_list, css_classes=["main-report"])
report.jscallback(args={"active": report}, active=report_jscallback)
report.save(output_file, embed=True)