Metrics and checks#
Note
There are two main parts related to handling of metrics and checks in Dataiku’s Python APIs:
dataiku.core.metrics.ComputedMetrics
in thedataiku
package. It was initially designed for usage within DSSdataikuapi.dss.metrics.ComputedMetrics
in thedataikuapi
package. It was initially designed for usage outside of DSS.
Both classes have fairly similar capabilities
For more details on the two packages, please see Concepts and examples
Add metric on a column#
def add_metrics_probes_col_stats(probes, aggregation, column):
"""
Add a metrics of column statistics to probes
:param probes: the list of existing probes
:param aggregation: which aggregation is used
:param column: the column dataset to use
Usage example:
.. code-block:: python
settings: DSSDatasetSettings = dataset.get_settings()
metrics: ComputedMetrics = settings.get_raw()['metrics']
add_metrics_probes_col_stats(metrics['probes'], 'MIN', 'purchase_amount')
"""
types_index = next((index for (index, d) in enumerate(probes) if d["type"] == 'col_stats'), None)
if types_index:
types_value = probes[types_index]
existing_aggregation = types_value['configuration']['aggregates']
to_append = {'aggregated': aggregation, 'column': column}
if to_append not in existing_aggregation:
existing_aggregation.append(to_append)
else:
probes.append({'computeOnBuildMode': 'NO',
'configuration': {'aggregates': [{'aggregated': aggregation,
'column': column}
]},
'enabled': True,
'meta': {'level': 2, 'name': 'Columns statistics'},
'type': 'col_stats'})
settings = dataset.get_settings()
metrics = settings.get_raw()['metrics']
add_metrics_probes_col_stats(metrics['probes'], 'MIN', 'purchase_amount')
Make a defined metric visible#
def add_displayed_state_to_metrics(displayed_state, type_to_add, function_to_add, column=""):
"""
Add to the metrics used a new one
:param displayed_state: the previous state
:param type_to_add: which kind of metrics
:param function_to_add: function that been used
:param column: column if any
Usage example:
.. code-block:: python
settings: DSSDatasetSettings = dataset.get_settings()
metrics: ComputedMetrics = settings.get_raw()['metrics']
add_displayed_state_to_metrics(metrics['displayedState'], 'col_stats', 'MIN', 'purchase_amount')
"""
line_to_add = type_to_add + ':' + function_to_add
if column:
line_to_add += ':' + column
if line_to_add not in displayed_state['metrics']:
displayed_state['metrics'].append(line_to_add)
settings = dataset.get_settings()
metrics = settings.get_raw()['metrics']
add_displayed_state_to_metrics(metrics['displayedState'], 'col_stats', 'MIN', 'purchase_amount')
Define a new numerical check#
def add_metrics_checks_numeric_range(checks, label, which, parameters):
"""
Add a metric if only it doesn't exist
:param checks: Existing checks
:param label: Label for the check
:param which: Probe for the check
:param parameters: Operation to check
Usage example:
.. code-block:: python
settings: DSSDatasetSettings = dataset.get_settings()
checks = settings.get_raw()['metricsChecks']
CHECK_RECORDS_NAME = 'Number of records should be greater than 100'
add_metrics_checks_numeric_range(checks, CHECK_RECORDS_NAME, 'records:COUNT_RECORDS',
[('minimum', 100)])
"""
is_already_present = next((check for check in checks['checks'] if check['type'] == 'numericRange' and
check['metricId'] == which), None)
if not is_already_present:
new_metric = {
'computeOnBuildMode': 'PARTITION',
'meta': {
'label': label,
'name': 'Value in range'
},
'metricId': which,
'maximum': 0.0,
'maximumEnabled': False,
'minimum': 0.0,
'minimumEnabled': False,
'softMaximum': 0.0,
'softMaximumEnabled': False,
'softMinimum': 0.0,
'softMinimumEnabled': False,
'type': 'numericRange'
}
for parameter in parameters:
new_metric[parameter[0]] = parameter[1]
new_metric[parameter[0] + 'Enabled'] = True
checks['checks'].append(new_metric)
setting = dataset.get_settings()
checks = settings.get_raw()['metricsChecks']
CHECK_RECORDS_NAME = 'Number of records should be greater than 100'
add_metrics_checks_numeric_range(checks, CHECK_RECORDS_NAME, 'records:COUNT_RECORDS',
[('minimum', 100)])
Make a defined check visible#
def set_check_visible(checks, label):
"""
Add a defined checks to the displayed state (so the user can see it in the GUI)
:param checks: the metricsChecks part of the dataset settings
:param label: label to use
:return:
Usage example:
.. code-block:: python
settings: DSSDatasetSettings = dataset.get_settings()
CHECK_RECORDS_NAME = 'Number of records should be greater than 100'
checks = settings.get_raw()['metricsChecks']
set_check_visible(checks, CHECK_RECORDS_NAME)
"""
displayed_state = checks['displayedState']
displayed = displayed_state['checks']
if label not in displayed:
displayed.append(label)
settings = dataset.get_settings()
CHECK_RECORDS_NAME = 'Number of records should be greater than 100'
checks = settings.get_raw()['metricsChecks']
set_check_visible(checks, CHECK_RECORDS_NAME)
Retrieve metric results#
def get_metrics(dataset):
"""
Compute and return all used metrics (only id) for a particular dataset
:param dataset: the dataset
Usage example:
.. code-block:: python
last_metrics = dataset.get_last_metric_values()
metrics = get_metrics(dataset)
for metric in metrics:
metric_value = last_metrics.get_metric_by_id(metric)
if metric_value and metric_value['lastValues']:
result[metric] = {
'initialValue': metric_value['lastValues'][0]['value']
}
"""
dataset.compute_metrics()
last_metrics = dataset.get_last_metric_values().get_raw()
return_list = list()
id_metrics = list(map((lambda metric: metric['metric']['id']),
filter(lambda metric: metric['displayedAsMetric'], last_metrics['metrics'])))
return_list.extend(id_metrics)
return return_list
result = {}
last_metrics = dataset.get_last_metric_values()
metrics = get_metrics(dataset)
for metric in metrics:
metric_value = last_metrics.get_metric_by_id(metric)
if metric_value and metric_value['lastValues']:
result[metric] = {
'initialValue': metric_value['lastValues'][0]['value']
}
print(result)
Retrieve check results#
def get_checks_used(settings):
"""
Get the list of all used checks for a dataset
:param settings: the settings of the dataset
:return: the list of all checks used for this dataset
"""
return list(map((lambda check: 'check:CHECK:'+check), settings['metricsChecks']['displayedState']['checks']))
def get_checks(dataset):
"""
Compute and return all used checks (only id) for a particular dataset
:param dataset: the dataset
Usage example:
.. code-block:: python
last_metrics = dataset.get_last_metric_values()
checks = get_checks(dataset)
for check in checks:
check_value = last_metrics.get_metric_by_id(metric)
if check_value and check_value['lastValues']:
result[metric] = {
'initialValue': metric_value['lastValues'][0]['value']
}
"""
dataset.compute_metrics()
dataset.run_checks()
return_list = list()
return_list.extend(get_checks_used(dataset.get_settings().get_raw()))
return return_list
Reference documentation#
dataiku package API#
Handle to the metrics of a DSS object and their last computed value |
|
A value of a metric, on a partition |
|
Handle to the checks of a DSS object and their last computed value |
dataikuapi package API#
Handle to the metrics of a DSS object and their last computed value |