Code envs#
The API offers methods to:
Create code envs
Read and write settings and packages of code envs
Update code envs
Reinstall
Set code environment resources environment variables
Creating a code env#
Python 3.6 code env, with Jupyter support#
client = dataiku.api_client()
# Create the code env
code_env = client.create_code_env("PYTHON", "my_code_env_name", "DESIGN_MANAGED", {"pythonInterpreter": "PYTHON36"})
# Setup packages to install
definition = code_env.get_definition()
definition["desc"]["installCorePackages"] = True
definition["desc"]["installJupyterSupport"] = True
# We want to install 2 packages (tabulate and nameparser)
definition["specPackageList"] = "tabulate\nnameparser"
# Save the new settings
code_env.set_definition(definition)
# Actually perform the installation
code_env.update_packages()
code_env.set_jupyter_support(True)
Python 2.7 code env, without Jupyter support#
client = dataiku.api_client()
# Create the code env
code_env = client.create_code_env("PYTHON", "my_code_env_name", "DESIGN_MANAGED")
# Setup packages to install
definition = code_env.get_definition()
definition["desc"]["installCorePackages"] = True
# We want to install 2 packages (tabulate and nameparser)
definition["specPackageList"] = "tabulate\nnameparser"
# Save the new settings
code_env.set_definition(definition)
# Actually perform the installation
code_env.update_packages()
Managing the code environment resources directory environment variables#
These methods may only be called from a resource initialization script. See Managed code environment resources directory.
from dataiku.code_env_resources import clear_all_env_vars
from dataiku.code_env_resources import delete_env_var
from dataiku.code_env_resources import get_env_var
from dataiku.code_env_resources import set_env_var
from dataiku.code_env_resources import set_env_path
# Delete all environment variables from the code environment runtime
clear_all_env_vars()
# Set a raw environment variable for the code environment runtime
set_env_var("ENV_VAR", "42")
# Set a relative path environment variable to be loaded at runtime
# (relative path with respect to the code env resources directory)
set_env_path("TFHUB_CACHE_DIR", "tensorflow")
# Get an environment variable from the code environment runtime
print("TFHUB_CACHE_DIR:", get_env_var("TFHUB_CACHE_DIR"))
# Delete an environment variable from the code environment runtime
delete_env_var("ENV_VAR")
# Then download pre-trained models in the resources directory, e.g.
# for TensorFlow
# import tensorflow_hub
# tensorflow_hub.KerasLayer("https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/classification/4")
(Advanced) The method dataiku.code_env_resources.fetch_from_backend
allows to fetch specific resources files or folders from the
backend, when running in containerized execution. It is meant to be called in a python recipe/notebook, when the
resources were not already copied or initialized for containerized execution at build time (see Code environment resources directory).
from dataiku.code_env_resources import fetch_from_backend
# Fetch resources files and folders from the backend
fetch_from_backend([
"pytorch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth",
"huggingface/",
])
# Load pre-trained models as usual
Detailed examples#
Get Recipes using specific Code Environments#
When editing a Code Environment you may want to assess which Code Recipe is using that environment and thus could be affected by the changes. The following code snippet allows you to get such a mapping:
import dataiku
def get_instance_default_code_env(client):
"""Return the global default code envs (instance-level).
"""
defaults = {}
general_settings = client.get_general_settings()
for rcp_type in [("python", "defaultPythonEnv"), ("r", "defaultREnv")]:
code_env = general_settings.settings["codeEnvs"].get(rcp_type[1], None)
if code_env:
defaults[rcp_type[0]] = code_env
else:
defaults[rcp_type[0]] = "dss_builtin"
return defaults
def get_code_env_mapping(client, project):
"""Return a dict mapping code-based items with their code envs.
"""
rcp_types = ["python", "r"]
mapping = {"python": [], "r": []}
env_default = {}
settings = project.get_settings()
project_default_modes = settings.get_raw()["settings"]["codeEnvs"]
all_recipes = project.list_recipes()
for rcp_type in rcp_types:
if project_default_modes[rcp_type]["mode"] == "USE_BUILTIN_MODE":
env_default[rcp_type] = "dss_builtin"
if project_default_modes[rcp_type]["mode"] == "INHERIT":
env_default[rcp_type] = get_instance_default_code_env(client).get(rcp_type)
if project_default_modes[rcp_type]["mode"] == "EXPLICIT_ENV":
env_default[rcp_type] = project_default_modes[rcp_type]["envName"]
recipes = [r for r in all_recipes if r["type"] == rcp_type]
for r in recipes:
name = r["name"]
env_select = r["params"]["envSelection"]
if env_select["envMode"] == "EXPLICIT_ENV":
code_env = env_select["envName"]
else:
code_env = env_default[rcp_type]
mapping[rcp_type].append({"name": name, "code_env": code_env})
return mapping
client = dataiku.api_client()
project = client.get_default_project()
mapping = get_code_env_mapping(client, project)
print(mapping)
Reference documentation#
|
A code env on the DSS instance. |
Base settings class for a DSS code env on a design node. |
|
Base settings class for a DSS code env on an automation node. |
|
|
Base settings class for a DSS code env version on an automation node. |