<syntaxhighlight lang="yaml">
apiVersion: batch/v1
kind: Job
metadata:
# name of the job
name: tf-mnist
spec:
template:
spec:
# List of containers belonging to the job starts here
containers:
# container name used for pod creation
- name: tf-mnist-container
# container image from the registry
image: ccu.uni-konstanz.de:5000/bastian.goldluecke/tf_mnist:0.1
# container resources requested from the node
resources:
# limits are minimum requirements
limits:
# this gives us 2 GiB of main memory. Note that this is a hard limit,
# exceeding it will mean the container exits immediately with an error.
memory: "2Gi"
# this requests a number of GPUs. GPUs will be allocated to the container
# exclusively. No fractional GPUs can be requested.
# When executing nvidia-smi in the container, it should show exactly this
# number of GPUs.
#
# PLEASE DO NOT SET THE NUMBER TO ZERO, EVER, AND ALWAYS INCLUDE THIS LINE.
#
# It is a known limitation of nVidias runtime that if zero GPUs are requested,
# then actually *all* GPUs are exposed in the container.
# We are looking for a fix to this.
#
nvidia.com/gpu: "1"
requests:
memory: "2Gi"
command: ["/application/run.sh"]
# login credentials to the docker registry.
# for convenience, a readonly credential is provided as a secret in each namespace.
imagePullSecrets:
- name: registry-ro-login
# containers will never restart
restartPolicy: Never
# number of retries after failure.
# since we typically have to fix something in this case, set to zero by default.
backoffLimit: 0
</syntaxhighlight>