Dataiku and Govern items helpers functions#

Helpers function definitions#

def get_dataiku_project_as_artifact(govern_client, node_id, project_key):
    """
    Retrieve the Dataiku project as an artifact from both a node ID and a project Key

    :param GovernClient govern_client: a govern client connected via the public API to the govern instance
    :param str node_id: the node ID of the project to look for
    :param str project_key: the project Key of the project to look for
    :return: the dataiku project as an artifact or None if not found
    :rtype: GovernArtifact or None
    """
    from dataikuapi.govern.artifact_search import GovernArtifactSearchQuery, GovernArtifactFilterArchivedStatus, GovernArtifactFilterBlueprints, GovernArtifactFilterFieldValue
    results = govern_client.new_artifact_search_request(GovernArtifactSearchQuery(
        artifact_filters=[
            GovernArtifactFilterArchivedStatus(is_archived=False),
            GovernArtifactFilterBlueprints(blueprint_ids=['bp.system.dataiku_project']),
            GovernArtifactFilterFieldValue(condition_type='EQUALS', condition=node_id, field_id='node_id'),
            GovernArtifactFilterFieldValue(condition_type='EQUALS', condition=project_key, field_id='project_key'),
        ]
    )).fetch_next_batch().get_response_hits()
    if len(results) == 0:
        return None
    return results[0].to_artifact()

def get_govern_item_from_dataiku_item(govern_client, dataiku_item):
    """
    Retrieve the Govern item (could be project, bundle, model, model version) corresponding to the Dataiku item

    :param GovernClient govern_client: a govern client connected via the public API to the govern instance
    :param GovernArtifact dataiku_item: the Dataiku item as input
    :return: the corresponding Govern item or None if not governed
    :rtype: GovernArtifact or None
    """
    definition = dataiku_item.get_definition()
    gb = definition.get_raw().get('fields', {}).get('governed_by', None)
    if gb is None:
        return None
    return govern_client.get_artifact(gb)

def get_dataiku_items_from_govern_item(govern_client, govern_item):
    """
    Retrieve the Dataiku items (could be project, bundle, model, model version) corresponding to the Govern item

    :param GovernClient govern_client: a govern client connected via the public API to the govern instance
    :param GovernArtifact govern_item: the Govern item as input
    :return: the list of corresponding Dataiku items (several dataiku projects can be governed by the same govern project, for other item types, there should be at max a single value in the list)
    :rtype: list of GovernArtifact
    """
    definition = govern_item.get_definition()
    dku_items = definition.get_raw().get('fields', {}).get('dataiku_item', [])
    return [govern_client.get_artifact(arid) for arid in dku_items]

def get_reference_list_as_artifacts(govern_client, artifact, field_id):
    """
    Retrieve the referenced items based on field (list of references) of an artifact

    :param GovernClient govern_client: a govern client connected via the public API to the govern instance
    :param GovernArtifact artifact: the item as input
    :param str field_id: the field ID of the reference list
    :return: the list of corresponding items
    :rtype: list of GovernArtifact
    """
    definition = artifact.get_definition()
    items = definition.get_raw().get('fields', {}).get(field_id, [])
    return [govern_client.get_artifact(arid) for arid in items]

def get_govern_bundles(govern_client, govern_project):
    """
    Retrieve the list of govern bundles from a govern project

    :param GovernClient govern_client: a govern client connected via the public API to the govern instance
    :param GovernArtifact govern_project: the govern project as input
    :return: the list of govern bundles for this project
    :rtype: list of GovernArtifact
    """
    return get_reference_list_as_artifacts(govern_client, govern_project, 'govern_bundles')

def get_govern_models(govern_client, govern_project):
    """
    Retrieve the list of govern models from a govern project

    :param GovernClient govern_client: a govern client connected via the public API to the govern instance
    :param GovernArtifact govern_project: the govern project as input
    :return: the list of govern models for this project
    :rtype: list of GovernArtifact
    """
    return get_reference_list_as_artifacts(govern_client, govern_project, 'govern_models')

def get_govern_model_versions(govern_client, govern_model):
    """
    Retrieve the list of govern model versions from a govern model

    :param GovernClient govern_client: a govern client connected via the public API to the govern instance
    :param GovernArtifact govern_model: the govern model as input
    :return: the list of govern model versions for this model
    :rtype: list of GovernArtifact
    """
    return get_reference_list_as_artifacts(govern_client, govern_model, 'govern_model_versions')

Few usages#

import dataikuapi

host = "http(s)://GOVERN_HOST:GOVERN_PORT"
apiKey = "Your API key secret"
client = dataikuapi.GovernClient(host, apiKey)

# search for a specific Dataiku project (could be None if not found)
dku_project = get_dataiku_project_as_artifact(client, 'design_node_id', 'MY_PROJECT_KEY')
print(dku_project.get_definition().get_raw())

# get the associated Govern project tied to it
govern_project = get_govern_item_from_dataiku_item(client, dku_project)
print(govern_project.get_definition().get_raw())

# get back the Dataiku projects tied to this Govern project
# the returned value is a list since several dataiku project can be governed by the same govern project
dataiku_projects = get_dataiku_items_from_govern_item(client, govern_project)
for dkup in dataiku_projects:
    print(dkup.get_definition().get_raw())

# list the govern bundles from the govern project
bundles = get_govern_bundles(client, govern_project)
for bundle in bundles:
    print(bundle.get_definition().get_raw())