> For the complete documentation index, see [llms.txt](https://run-ai-docs.nvidia.com/llms.txt). Markdown versions of documentation pages are available by appending `.md` to page URLs; this page is available as [Markdown](https://run-ai-docs.nvidia.com/api/workloads/inferences.md).

# Inferences

Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes.

## Create an inference.

> Create an inference using container related fields.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"latest"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"schemas":{"InferenceCreationRequest":{"allOf":[{"$ref":"#/components/schemas/WorkloadCreationMeta"},{"$ref":"#/components/schemas/SubmitWithTemplateId"},{"$ref":"#/components/schemas/InferenceSpec"}]},"WorkloadCreationMeta":{"required":["name","projectId","clusterId"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"useGivenNameAsPrefix":{"description":"When true, the requested name will be treated as a prefix. The final name of the workload will be composed of the name followed by a random set of characters.","type":"boolean","default":false},"projectId":{"$ref":"#/components/schemas/ProjectId"},"clusterId":{"$ref":"#/components/schemas/ClusterId"}}},"WorkloadName":{"description":"The name of the workload.","type":"string","minLength":1,"pattern":".*"},"ProjectId":{"description":"The id of the project.","type":"string","pattern":".*"},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"SubmitWithTemplateId":{"properties":{"templateId":{"description":"The unique identifier of the template to use for submitting this workload. The combined values provided in the spec, template and assets will be used to create the workload.","type":"string","format":"uuid","nullable":true}}},"InferenceSpec":{"description":"The specifications of the inference to be created.","properties":{"spec":{"$ref":"#/components/schemas/InferenceSpecSpec"}}},"InferenceSpecSpec":{"allOf":[{"properties":{"annotations":{"$ref":"#/components/schemas/Annotations"},"args":{"$ref":"#/components/schemas/Args"},"category":{"$ref":"#/components/schemas/Category"},"command":{"$ref":"#/components/schemas/Command"},"compute":{"nullable":true,"properties":{"cpuCoreLimit":{"$ref":"#/components/schemas/CpuCoreLimit"},"cpuCoreRequest":{"$ref":"#/components/schemas/CpuCoreRequest"},"cpuMemoryLimit":{"$ref":"#/components/schemas/CpuMemoryLimit"},"cpuMemoryRequest":{"$ref":"#/components/schemas/CpuMemoryRequest"},"extendedResources":{"$ref":"#/components/schemas/ExtendedResources"},"gpuDevicesRequest":{"$ref":"#/components/schemas/GpuDevicesRequest"},"gpuMemoryLimit":{"$ref":"#/components/schemas/GpuMemoryLimit"},"gpuMemoryRequest":{"$ref":"#/components/schemas/GpuMemoryRequest"},"gpuPortionLimit":{"$ref":"#/components/schemas/GpuPortionLimit"},"gpuPortionRequest":{"$ref":"#/components/schemas/GpuPortionRequest"},"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"},"largeShmRequest":{"$ref":"#/components/schemas/LargeShmRequest"}},"type":"object"},"createHomeDir":{"$ref":"#/components/schemas/CreateHomeDir"},"environmentVariables":{"$ref":"#/components/schemas/EnvironmentVariables"},"exposedUrls":{"$ref":"#/components/schemas/ExposedUrls"},"image":{"$ref":"#/components/schemas/Image"},"imagePullPolicy":{"$ref":"#/components/schemas/ImagePullPolicy"},"imagePullSecrets":{"$ref":"#/components/schemas/ImagePullSecrets"},"labels":{"$ref":"#/components/schemas/Labels"},"nodeAffinityRequired":{"$ref":"#/components/schemas/NodeAffinityRequired"},"nodePools":{"$ref":"#/components/schemas/NodePools"},"nodeType":{"$ref":"#/components/schemas/NodeType3"},"podAffinity":{"$ref":"#/components/schemas/PodAffinity"},"ports":{"$ref":"#/components/schemas/Ports"},"preemptibility":{"$ref":"#/components/schemas/Preemptibility"},"priorityClass":{"$ref":"#/components/schemas/PriorityClass"},"probes":{"$ref":"#/components/schemas/Probes"},"relatedUrls":{"$ref":"#/components/schemas/RelatedUrls"},"security":{"nullable":true,"properties":{"capabilities":{"$ref":"#/components/schemas/Capabilities"},"hostIpc":{"$ref":"#/components/schemas/HostIpc"},"hostNetwork":{"$ref":"#/components/schemas/HostNetwork"},"readOnlyRootFilesystem":{"$ref":"#/components/schemas/ReadOnlyRootFileSystem"},"runAsGid":{"$ref":"#/components/schemas/RunAsGid"},"runAsNonRoot":{"$ref":"#/components/schemas/RunAsNonRoot"},"runAsUid":{"$ref":"#/components/schemas/RunAsUid"},"seccompProfileType":{"$ref":"#/components/schemas/SeccompProfileType"},"supplementalGroups":{"$ref":"#/components/schemas/SupplementalGroups"},"uidGidSource":{"$ref":"#/components/schemas/UidGidSource"}},"type":"object"},"servingPort":{"$ref":"#/components/schemas/ServingPort"},"storage":{"nullable":true,"properties":{"configMapVolume":{"$ref":"#/components/schemas/ConfigMapItems"},"dataVolume":{"$ref":"#/components/schemas/DataVolumeItems"},"emptyDirVolume":{"$ref":"#/components/schemas/EmptyDirItems"},"git":{"$ref":"#/components/schemas/GitItems"},"hostPath":{"$ref":"#/components/schemas/HostPathItems"},"nfs":{"$ref":"#/components/schemas/NfsItems"},"pvc":{"$ref":"#/components/schemas/PvcItems"},"secretVolume":{"$ref":"#/components/schemas/SecretItems"}},"type":"object"},"tolerations":{"$ref":"#/components/schemas/Tolerations"},"workingDir":{"$ref":"#/components/schemas/WorkingDir"}},"type":"object"},{"$ref":"#/components/schemas/InferenceFields"}]},"Annotations":{"description":"Set of annotations to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Annotation"},"maxItems":1000,"nullable":true},"Annotation":{"description":"Annotation details to be populated into the container.","properties":{"name":{"description":"The name of the annotation (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the annotation.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the annotation is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Args":{"description":"Arguments to the command that the container running the workload executes.","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"},"Category":{"description":"Specify the workload category assigned to the workload. Categories are used to classify and monitor different types of workloads within the NVIDIA Run:ai platform.","type":"string","nullable":true,"pattern":".*"},"Command":{"description":"A command to the server as the entry point of the container running the workload.","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"},"CpuCoreLimit":{"description":"Limitations on the number of CPUs consumed by the workload (0.5, 1, .etc). The system guarantees that this workload will not be able to consume more than this amount of CPUs.","format":"double","type":"number","nullable":true,"minimum":0},"CpuCoreRequest":{"description":"CPU units to allocate for the created workload (0.5, 1, .etc). The workload will receive at least this amount of CPU. Note that the workload will not be scheduled unless the system can guarantee this amount of CPUs to the workload.","format":"double","type":"number","nullable":true,"minimum":0},"CpuMemoryLimit":{"description":"Limitations on the CPU memory to allocate for this workload (1G, 20M, .etc). The system guarantees that this workload will not be able to consume more than this amount of memory. The workload will receive an error when trying to allocate more memory than this limit.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"CpuMemoryRequest":{"description":"The amount of CPU memory to allocate for this workload (1G, 20M, .etc). The workload will receive at least this amount of memory. Note that the workload will not be scheduled unless the system can guarantee this amount of memory to the workload","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"ExtendedResources":{"description":"Extended resources and their quantity.","type":"array","items":{"$ref":"#/components/schemas/ExtendedResource"},"maxItems":1000,"nullable":true},"ExtendedResource":{"description":"Quantity of an extended resource.","properties":{"resource":{"description":"The name of the extended resource (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"quantity":{"description":"The requested quantity for the resource.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$"},"exclude":{"description":"Use 'true' in case the extended resource is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"GpuDevicesRequest":{"description":"Requested number of GPU devices. Currently if more than one device is requested, it is not possible to provide values for gpuMemory or gpuPortion.","type":"integer","format":"int32","nullable":true,"minimum":0},"GpuMemoryLimit":{"description":"Limitation on the memory consumed by the workload, per GPU device. The system guarantees The gpuMemoryLimit must be no less than gpuMemoryRequest.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuMemoryRequest":{"description":"Required if and only if gpuRequestType is memory. States the GPU memory to allocate for the created workload, per GPU device. Note that the workload will not be scheduled unless the system can guarantee this amount of GPU memory to the workload.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuPortionLimit":{"description":"Limitations on the portion consumed by the workload, per GPU device. The system guarantees The gpuPotionLimit must be no less than the gpuPortionRequest.","type":"number","format":"double","nullable":true,"minimum":0},"GpuPortionRequest":{"description":"Required if and only if gpuRequestType is portion. States the portion of the GPU to allocate for the created workload, per GPU device, between 0 and 1. The default is no allocated GPUs.","type":"number","format":"double","nullable":true,"minimum":0},"GpuRequestType":{"description":"Sets the unit type for GPU resources requests. Stated in terms of portion or memory. Sets the unit type for other GPU request fields. If `gpuDevicesRequest > 1`, only `portion` is supported. If `gpuDeviceRequest = 1`, the request type can be stated as `portion` or `memory`.","type":"string","minLength":1,"enum":["portion","memory"],"nullable":true},"LargeShmRequest":{"description":"A large /dev/shm device to mount into a container running the created workload. An shm is a shared file system mounted on RAM.","type":"boolean","nullable":true},"CreateHomeDir":{"description":"When set to `true`, creates a home directory for the container.","type":"boolean","nullable":true},"EnvironmentVariables":{"description":"Set of environment variables to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/EnvironmentVariable"},"maxItems":1000,"nullable":true},"EnvironmentVariable":{"description":"Details of an environment variable which is populated into the container.","properties":{"name":{"description":"The name of the environment variable. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the environment variable. (mutually exclusive with secret, userCredential, configMap and podFieldRef)","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"secret":{"$ref":"#/components/schemas/EnvironmentVariableSecret"},"configMap":{"$ref":"#/components/schemas/EnvironmentVariableConfigMap"},"podFieldRef":{"$ref":"#/components/schemas/EnvironmentVariablePodFieldReference"},"userCredential":{"$ref":"#/components/schemas/EnvironmentVariableUserCredential"},"exclude":{"description":"Use 'true' in case the environment variable is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true},"description":{"description":"Description of the environment variable.","type":"string","maxLength":250,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableSecret":{"description":"Details of the secret and key use to populate the environment variable","properties":{"name":{"description":"The name of the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableConfigMap":{"description":"Details of the configMap and key use to populate the environment variable","properties":{"name":{"description":"The name of the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariablePodFieldReference":{"description":"Details of the field-reference and key use to populate the environment variable","properties":{"path":{"description":"The field path resource. (mandatory)","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableUserCredential":{"description":"Defines a reference to a user-created credential and a specific key within that credential whose value will populate the environment variable. User credentials can only be accessed by the user who created them.","properties":{"name":{"description":"The name of the user credential.  (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true},"key":{"description":"The key in the user credential resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true}},"nullable":true,"type":"object"},"ExposedUrls":{"description":"Set of container ports that the workload exposes via URLs.","type":"array","items":{"$ref":"#/components/schemas/ExposedUrl"},"maxItems":1000,"nullable":true},"ExposedUrl":{"description":"A URL for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","nullable":true},"url":{"$ref":"#/components/schemas/Url"},"authorizationType":{"$ref":"#/components/schemas/AuthorizationType"},"authorizedUsers":{"$ref":"#/components/schemas/AuthorizedUsers"},"authorizedGroups":{"$ref":"#/components/schemas/AuthorizedGroups"},"toolType":{"description":"The tool type that runs on this container port.","type":"string","nullable":true,"pattern":".*"},"toolName":{"description":"A name describing the tool that runs on this url.","type":"string","nullable":true,"pattern":".*","maxLength":253},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the instance is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"Url":{"description":"The URL for connecting to the container port. If not specified, the URL will be auto-generated by the system.","type":"string","maxLength":2048,"pattern":".*","nullable":true},"AuthorizationType":{"type":"string","enum":["authenticatedUsers","authorizedUsers","authorizedGroups"],"description":"Specifies who can access the connection URL:\n- `authenticatedUsers`: Any authenticated user or service account can access the URL; the authorizedUsers and authorizedGroups fields are ignored.\n- `authorizedUsers`: Only users listed in the authorizedUsers field are allowed to access the URL; the authorizedGroups field is ignored.\n- `authorizedGroups`: Only members of user groups listed in the authorizedGroups field are allowed to access the URL; the authorizedUsers field is ignored.\nIf not specified, authorization is determined by whether authorizedUsers or authorizedGroups is present. If both fields are set, this results in an error. If neither is set, any authenticatedUser can access.\n","nullable":true},"AuthorizedUsers":{"description":"List of users or service accounts that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string","maxLength":253,"pattern":".*"},"maxItems":1000,"nullable":true},"AuthorizedGroups":{"description":"List of groups that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string","maxLength":253,"pattern":".*"},"maxItems":1000,"nullable":true},"Image":{"description":"Docker image name. For more information, see [Images](https://kubernetes.io/docs/concepts/containers/images). The image name is mandatory for creating a workload.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"ImagePullPolicy":{"description":"Image pull policy. Defaults to `Always` if `:latest` tag is specified, otherwise it is `IfNotPresent`.","type":"string","minLength":1,"enum":["Always","Never","IfNotPresent"],"nullable":true},"ImagePullSecrets":{"description":"A list of references to Kubernetes secrets in the same namespace used for pulling container images.","type":"array","items":{"$ref":"#/components/schemas/ImagePullSecret"},"maxItems":1000,"nullable":true},"ImagePullSecret":{"description":"A reference to a secret in the same namespace used to pull container images.","properties":{"name":{"type":"string","description":"The name of the Kubernetes secret containing the image pull credentials.","pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$","maxLength":253},"userCredential":{"type":"boolean","description":"Indicates whether the secret is a user credential. Set to true if the secret was created by the user and is only accessible by them.","nullable":true},"exclude":{"description":"Use 'true' in case the secret is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Labels":{"description":"Set of labels to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Label"},"maxItems":1000,"nullable":true},"Label":{"description":"Label details to be populated into the container.","properties":{"name":{"description":"The name of the label (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the label.","type":"string","nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"NodeAffinityRequired":{"type":"object","description":"If the affinity requirements specified by this field are not met at scheduling time, the pod will not be scheduled onto the node. If the affinity requirements specified by this field cease to be met at some point during pod execution (e.g. due to an update), the system may or may not try to eventually evict the pod from its node.","properties":{"nodeSelectorTerms":{"description":"A list of node selector terms. The terms are ORed.","type":"array","items":{"$ref":"#/components/schemas/NodeSelectorTerm"},"maxItems":1000}},"nullable":true},"NodeSelectorTerm":{"type":"object","description":"A null or empty node selector term matches no objects. The requirements of them are ANDed.","properties":{"matchExpressions":{"description":"A list of node selector requirements by node's labels.","type":"array","items":{"$ref":"#/components/schemas/MatchExpression"},"maxItems":1000}},"nullable":true},"MatchExpression":{"type":"object","description":"A selector that contains values, a key, and an operator that relates the key and values.","properties":{"key":{"description":"The label key that the selector applies to (mandatory).","type":"string","pattern":".*","maxLength":63},"operator":{"$ref":"#/components/schemas/MatchExpressionOperator"},"values":{"description":"An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer.","type":"array","items":{"type":"string","maxLength":10000,"pattern":".*"},"maxItems":1000}},"required":["key","operator"],"nullable":true},"MatchExpressionOperator":{"description":"Represents a key's relationship to a set of values (mandatory).","type":"string","enum":["In","NotIn","Exists","DoesNotExist","Gt","Lt"]},"NodePools":{"description":"A prioritized list of node pools for the scheduler to run the workload on. The scheduler will always try to use the first node pool before moving to the next one if the first is not available.","type":"array","items":{"type":"string","maxLength":63,"pattern":".*"},"maxItems":1000,"nullable":true},"NodeType3":{"description":"Nodes (machines), or a group of nodes on which the workload will run. To use this feature, your Administrator will need to label nodes. For more information, see [Group Nodes](https://docs.run.ai/latest/admin/researcher-setup/limit-to-node-group). When using this flag with with Project-based affinity, it refines the list of allowable node groups set in the Project. For more information, see [Projects](https://docshub.run.ai/guides/platform-management/aiinitiatives/organization/projects).","type":"string","minLength":1,"nullable":true,"pattern":".*"},"PodAffinity":{"description":"Pod affinity scheduling rules (e.g. co-locate this workload in the same node, zone, etc. as some other workloads).","type":"object","properties":{"type":{"$ref":"#/components/schemas/PodAffinityType"},"key":{"description":"The label key to use. (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":63}},"nullable":true},"PodAffinityType":{"description":"The affinity type, required or preferred. (mandatory)","type":"string","enum":["Required","Preferred"],"nullable":true},"Ports":{"description":"Set of container ports that the workload exposes.","type":"array","items":{"$ref":"#/components/schemas/Port"},"maxItems":1000,"nullable":true},"Port":{"description":"A port for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"serviceType":{"$ref":"#/components/schemas/PortServiceType"},"external":{"description":"The external port which allows a connection to the container port. If not specified, the port will be auto-generated by the system..","type":"integer","format":"int32","nullable":true},"toolType":{"description":"The tool type that runs on this port.","type":"string","nullable":true,"pattern":".*","maxLength":63},"toolName":{"description":"A name describing the tool that runs on this port.","type":"string","nullable":true,"pattern":".*","maxLength":253},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PortServiceType":{"description":"The service type of the port (mandatory).","type":"string","enum":["LoadBalancer","NodePort","ClusterIP"],"nullable":true},"Preemptibility":{"description":"Specifies whether the workload can be preempted by higher-priority workloads. Valid values are preemptible and non-preemptible. If explicitly set, this value takes precedence. If not set, the system derives the preemptibility from the priorityClassName field, ensuring backward compatibility. Each workload type has a default preemptibility. To view the default preemptibility for each workload type, use the GET /workload-types endpoint.","type":"string","minLength":1,"enum":["preemptible","non-preemptible"],"nullable":true},"PriorityClass":{"description":"Specifies the priority class for the workload, which determines its scheduling behavior. Valid values are: very-low, low, medium-low, medium, medium-high, high, and very-high. Each workload type has a default priority. To view the default priority for each workload type, use the GET /workload-types endpoint. Once you change the priority from the default value defined for that workload type, the preemptibility field is not automatically updated. Make sure to set the desired preemptibility value.","type":"string","nullable":true,"pattern":".*"},"Probes":{"description":"Probes are used to determine if the container is healthy and ready to accept traffic.","type":"object","properties":{"readiness":{"$ref":"#/components/schemas/Probe"}},"nullable":true},"Probe":{"type":"object","properties":{"initialDelaySeconds":{"description":"Number of seconds after the container has started before liveness or readiness probes are initiated.","type":"integer","format":"int32","minimum":0,"nullable":true},"periodSeconds":{"description":"How often (in seconds) to perform the probe.","type":"integer","format":"int32","minimum":1,"nullable":true},"timeoutSeconds":{"description":"Number of seconds after which the probe times out.","type":"integer","format":"int32","minimum":1,"nullable":true},"successThreshold":{"description":"Minimum consecutive successes for the probe to be considered successful after having failed.","type":"integer","format":"int32","minimum":1,"nullable":true},"failureThreshold":{"description":"When a probe fails, the number of times to try before giving up.","type":"integer","format":"int32","minimum":1,"nullable":true},"handler":{"$ref":"#/components/schemas/ProbeHandler"}},"nullable":true},"ProbeHandler":{"description":"The action taken to determine the health of the container. (mandatory)","type":"object","properties":{"httpGet":{"description":"An action based on HTTP Get requests.","type":"object","properties":{"path":{"description":"Path to access on the HTTP server, defaults to /.","type":"string","pattern":"^(\\x2F[a-zA-Z0-9\\-_.\\x2F]*)?$","nullable":true,"maxLength":2048},"port":{"description":"Number of the port to access on the container.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"host":{"description":"Host name to connect to, defaults to the pod IP.","type":"string","format":"hostname","nullable":true,"pattern":".*","maxLength":253},"scheme":{"$ref":"#/components/schemas/ProbeHandlerScheme"}}}},"nullable":true},"ProbeHandlerScheme":{"description":"Scheme to use for connecting to the host, defaults to HTTP.","type":"string","enum":["HTTP","HTTPS"],"nullable":true},"RelatedUrls":{"description":"Set of URLs that are related to the workload.","type":"array","items":{"$ref":"#/components/schemas/RelatedUrl"},"maxItems":1000,"nullable":true},"RelatedUrl":{"description":"A URL that is related to the workload. For example, a URL to an external server providing statistics or logging about the workload.","properties":{"url":{"description":"The URL for connecting an external service related to the workload. (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":2048},"type":{"description":"The type of service that the url provides. For example, wandb (Weights & Biases). (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":63},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","nullable":true,"pattern":".*","maxLength":63},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Capabilities":{"description":"Add POSIX capabilities to running containers. Defaults to the default set of capabilities granted by the container runtime.","type":"array","items":{"$ref":"#/components/schemas/Capability"},"nullable":true},"Capability":{"type":"string","enum":["AUDIT_CONTROL","AUDIT_READ","AUDIT_WRITE","BLOCK_SUSPEND","CHOWN","DAC_OVERRIDE","DAC_READ_SEARCH","FOWNER","FSETID","IPC_LOCK","IPC_OWNER","KILL","LEASE","LINUX_IMMUTABLE","MAC_ADMIN","MAC_OVERRIDE","MKNOD","NET_ADMIN","NET_BIND_SERVICE","NET_BROADCAST","NET_RAW","SETGID","SETFCAP","SETPCAP","SETUID","SYS_ADMIN","SYS_BOOT","SYS_CHROOT","SYS_MODULE","SYS_NICE","SYS_PACCT","SYS_PTRACE","SYS_RAWIO","SYS_RESOURCE","SYS_TIME","SYS_TTY_CONFIG","SYSLOG","WAKE_ALARM"]},"HostIpc":{"description":"Whether to enable host IPC. Defaults to false.","type":"boolean","nullable":true},"HostNetwork":{"description":"Whether to enable host networking. Default to false.","type":"boolean","nullable":true},"ReadOnlyRootFileSystem":{"description":"If true, mounts the container's root filesystem as read-only.","type":"boolean","nullable":true},"RunAsGid":{"description":"The group id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsGid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"RunAsNonRoot":{"description":"Force the container to run as a non-root user.","type":"boolean","nullable":true},"RunAsUid":{"description":"The user id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsUid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"SeccompProfileType":{"description":"Indicates which kind of seccomp profile will be applied to the container. The options are a. `RuntimeDefault` - the container runtime default profile should be used. b. `Unconfined` - no profile should be applied. c. `Localhost` is not yet supported by Run:ai.","type":"string","enum":["RuntimeDefault","Unconfined","Localhost"],"nullable":true},"SupplementalGroups":{"description":"Comma separated list of groups that the user running the container belongs to, in addition to the group indicated by runAsGid. Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled. Using an empty string implies reverting the supplementary groups of the image.","type":"string","nullable":true,"pattern":".*"},"UidGidSource":{"description":"Indicate the way to determine the user and group ids of the container. The options are a. `fromTheImage` - user and group ids are determined by the docker image that the container runs. this is the default option. b. `custom` - user and group ids can be specified in the environment asset and/or the workload creation request. c. `idpToken` - user and group IDs are automatically taken from the identity provider (IdP) token (available only in SSO-enabled installations). For more information, see [User Identity](https://run-ai-docs.nvidia.com/saas/infrastructure-setup/advanced-setup/container-access/user-identity-in-containers).","type":"string","enum":["fromTheImage","fromIdpToken","custom"],"nullable":true},"ServingPort":{"description":"A port for accessing the inference service","allOf":[{"$ref":"#/components/schemas/ServingPortContainerAndProtocol"},{"$ref":"#/components/schemas/ServingPortAccess"}],"nullable":true,"type":"object"},"ServingPortContainerAndProtocol":{"properties":{"container":{"description":"The port that the container running the inference service exposes (mandatory).","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"protocol":{"$ref":"#/components/schemas/ServingPortProtocol"}}},"ServingPortProtocol":{"description":"The protocol used by the port, defaults to http","type":"string","enum":["http","grpc"],"nullable":true},"ServingPortAccess":{"properties":{"authorizationType":{"$ref":"#/components/schemas/ServingPortAccessAuthorizationTypeEnum"},"authorizedUsers":{"$ref":"#/components/schemas/AuthorizedUsers"},"authorizedGroups":{"$ref":"#/components/schemas/AuthorizedGroups"},"clusterLocalAccessOnly":{"description":"Configure the serving port URL to be available only on the cluster-local network, and not externally. Defaults to false","type":"boolean","nullable":true}}},"ServingPortAccessAuthorizationTypeEnum":{"type":"string","enum":["public","authenticatedUsers","authorizedUsers","authorizedGroups","authorizedUsersOrGroups"],"description":"Specifies who can send inference requests to the serving endpoint:\n\nPossible values:\n- `public`: No authorization is required. (Default)\n- `authenticatedUsers`: Any NVIDIA Run:ai authenticated user and service account can send requests.\n- `authorizedUsers`: Only users listed in the authorizedUsers field can send requests.\n- `authorizedGroups`: Only members of user groups listed in the authorizedGroups field can send requests.\n- `authorizedUsersOrGroups`: Requires either authorizedUsers or authorizedGroups to be provided; if neither is set, or if both are set, a mutual exclusion error is reported. Supported from cluster version 2.19.\n","nullable":true},"ConfigMapItems":{"description":"Set of config map volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/ConfigMapInstance"},"maxItems":1000,"nullable":true},"ConfigMapInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/ConfigMap"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"StorageInstanceName":{"properties":{"name":{"description":"unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"maxLength":63,"nullable":true}},"nullable":true,"type":"object"},"ConfigMap":{"properties":{"configMap":{"description":"The name of the ConfigMap resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"mountPath":{"description":"Local path within the workload to which the ConfigMap will be mapped to. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"subPath":{"description":"Path within the volume from which the container's volume should be mounted.","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"defaultMode":{"$ref":"#/components/schemas/DefaultMode"}},"nullable":true,"type":"object"},"DefaultMode":{"type":"string","description":"File permission mode in octal string format. This value must be a 4-digit octal number, representing the default file mode when mounting a Secret or ConfigMap as a volume.\n","minLength":4,"maxLength":4,"pattern":"0[0-7]{3}","nullable":true},"ExcludeField":{"properties":{"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"type":"object","nullable":true},"DataVolumeItems":{"description":"Set of data volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/DataVolumeInstance"},"maxItems":1000,"nullable":true},"DataVolumeInstance":{"allOf":[{"$ref":"#/components/schemas/DataVolume"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"DataVolume":{"properties":{"id":{"description":"The unique identifier of the data volume. (mandatory)","type":"string","format":"uuid","nullable":true},"mountPath":{"description":"The path where the data volume will be mounted. (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":4096}},"nullable":true,"type":"object"},"EmptyDirItems":{"description":"A list of emptyDir volumes to mount in the workload.","type":"array","items":{"$ref":"#/components/schemas/EmptyDirInstance"},"maxItems":1000,"nullable":true},"EmptyDirInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/EmptyDir"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"EmptyDir":{"properties":{"path":{"description":"Local path within the workload to which the EmptyDir volume will be mapped. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"medium":{"description":"The type of storage medium for the volume. Use \"Memory\" for memory-backed storage, or leave empty for disk-backed storage.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"sizeLimit":{"description":"The total amount of local storage or memory required for the emptyDir volume. Specify using Kubernetes quantity format (e.g., 1G, 500Mi).","type":"string","maxLength":63,"pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true}},"nullable":true,"type":"object"},"GitItems":{"description":"Set of git repositories to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/GitInstance"},"maxItems":1000,"nullable":true},"GitInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/GitCommon"},{"$ref":"#/components/schemas/GitPassword"},{"$ref":"#/components/schemas/ExcludeField"},{"type":"object","properties":{"secretRef":{"$ref":"#/components/schemas/GitSecretRef"}},"nullable":true}],"nullable":true,"type":"object"},"GitCommon":{"properties":{"repository":{"description":"URL to a remote Git repository. The content of this repository will be mapped to the container running the workload. (mandatory)","type":"string","minLength":1,"maxLength":2048,"nullable":true},"branch":{"description":"Specific branch to synchronize the repository from.","type":"string","minLength":1,"maxLength":63,"nullable":true},"revision":{"description":"Specific revision to synchronize the repository from.","type":"string","minLength":1,"maxLength":63,"nullable":true},"path":{"description":"Local path within the workload to which the Git repository will be mapped (mandatory).","type":"string","minLength":1,"maxLength":4096,"nullable":true}},"nullable":true,"type":"object"},"GitPassword":{"properties":{"passwordSecret":{"description":"Secret containing the credentials of the repository (needed for non public repository which requires authentication). (deprecated)","type":"string","minLength":1,"nullable":true},"secretKeyOfUser":{"description":"The key to use for loading the user name from the secret. The default is `User`. (deprecated)","type":"string","minLength":1,"nullable":true},"secretKeyOfPassword":{"description":"The key to use for loading the password from the secret. The default is `Password`. (deprecated)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitSecretRef":{"properties":{"name":{"description":"Name of the Secret containing the credentials of the repository.","type":"string","minLength":1},"authenticationMethod":{"$ref":"#/components/schemas/GitAuthenticationMethod"},"secretKeyOfUser":{"description":"The key in the Secret that contains the Git username (used for `password` authentication).","type":"string","minLength":1,"nullable":true},"secretKeyOfPassword":{"description":"The key in the Secret that contains the Git password (used for `password` authentication).","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitAuthenticationMethod":{"description":"Specifies the authentication method to use when accessing the Git repository. This is required for private repositories - `password` - Authenticate using a username and password. - `ssh-key` - Authenticate using an SSH private key.","type":"string","minLength":1,"enum":["password","ssh-key"]},"HostPathItems":{"description":"Set of host paths to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/HostPathInstance"},"maxItems":1000,"nullable":true},"HostPathInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/HostPath"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"HostPath":{"properties":{"path":{"description":"Local path within the controller to which the host volume will be mapped. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"readOnly":{"description":"Force the volume to be mounted with read-only permissions. Defaults to false.","type":"boolean","default":true,"nullable":true},"mountPath":{"description":"The path that the host volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"mountPropagation":{"$ref":"#/components/schemas/HostPathMountPropagation"}},"nullable":true,"type":"object"},"HostPathMountPropagation":{"description":"Share this volumes mount with other containers. If set to HostToContainer, this volume mount will receive all subsequent mounts that are mounted to this volume or any of its subdirectories. In case of multiple hostPath entries, this field should have the same value for all of them.","type":"string","enum":["None","HostToContainer"],"nullable":true},"NfsItems":{"description":"Set of nfs volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/NfsInstance"},"maxItems":1000,"nullable":true},"NfsInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Nfs"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Nfs":{"properties":{"path":{"description":"Path that is exported by the NFS server (mandatory). For more information, see [NFS](https://kubernetes.io/docs/concepts/storage/volumes#nfs).","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"readOnly":{"description":"Force the NFS export to be mounted with read-only permissions.","type":"boolean","default":true,"nullable":true},"server":{"description":"The hostname or IP address of the NFS server. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"mountPath":{"description":"The path that the NFS volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"PvcItems":{"description":"Set of pvc persistent volume claims to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/PvcInstance"},"maxItems":1000,"nullable":true},"PvcInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Pvc"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Pvc":{"allOf":[{"$ref":"#/components/schemas/PvcFieldsUpdatable"},{"$ref":"#/components/schemas/PvcFieldsNonUpdatable"}]},"PvcFieldsUpdatable":{"properties":{"path":{"description":"Local path within the workload to which the PVC bucket will be mapped. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"PvcFieldsNonUpdatable":{"properties":{"existingPvc":{"description":"Verify existing PVC. PVC is assumed to exist when set to `true`. If set to `false`, the PVC will be created, if it does not exist.","type":"boolean","default":false,"nullable":true},"claimName":{"description":"Name for the PVC. Allow referencing it across workloads. If not provided, a name based on the workload name and scope will be auto-generated.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"readOnly":{"description":"Permit only read access to PVC.","type":"boolean","default":false,"nullable":true},"ephemeral":{"description":"Use `true` to set PVC to ephemeral. If set to `true`, the PVC will be deleted when the workload is stopped. Not supported for inference workloads.","type":"boolean","default":false,"nullable":true},"claimInfo":{"$ref":"#/components/schemas/ClaimInfo"},"dataSharing":{"description":"use `true` to share the PVC data to all projects under the selected scope.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"ClaimInfo":{"description":"Claim information for the newly created PVC. The information should not be provided when attempting to use existing PVC.","properties":{"size":{"$ref":"#/components/schemas/PvcClaimSize"},"storageClass":{"description":"Storage class name to associate with the PVC. This parameter may be omitted if there is a single storage class in the system, or you are using the default storage class. For more information, see [Storage class](https://kubernetes.io/docs/concepts/storage/storage-classes).","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"accessModes":{"$ref":"#/components/schemas/PvcAccessModes"},"volumeMode":{"$ref":"#/components/schemas/PvcVolumeMode"},"addedAttrValues":{"$ref":"#/components/schemas/PvcAddedAttrValues"}},"nullable":true,"type":"object"},"PvcClaimSize":{"description":"Requested size for the PVC. Mandatory when existingPvc is false. Recommended sizes: TB/GB/MB/TIB/GIB/MIB","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"PvcAccessModes":{"description":"Default access mode(s) applied to newly created PVCs unless explicitly overridden.","properties":{"readWriteOnce":{"description":"Mount the volume as read/write by a single node.","type":"boolean","default":true,"nullable":true},"readOnlyMany":{"description":"Mount the volume as read-only by many nodes.","type":"boolean","default":false,"nullable":true},"readWriteMany":{"description":"Mount the volume as read/write by many nodes.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PvcVolumeMode":{"description":"Default volume mode for the PVC. Choose between Filesystem (default) or Block.","type":"string","enum":["Filesystem","Block"],"nullable":true},"PvcAddedAttrValues":{"description":"an optional array of key-values pairs that are written as annotations on the created PVC. the allowed attributes are determined according to the storage class configuration (see k8s-objects-tracker for further info).","type":"array","items":{"$ref":"#/components/schemas/PvcAddedAttrValue"},"maxItems":1000},"PvcAddedAttrValue":{"type":"object","required":["key"],"properties":{"key":{"type":"string","minLength":1,"maxLength":63,"pattern":"^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$"},"value":{"type":"string","pattern":".*","maxLength":10000}}},"SecretItems":{"description":"Set of secret volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/SecretInstance"},"nullable":true},"SecretInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Secret"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Secret":{"allOf":[{"$ref":"#/components/schemas/SecretFieldsUpdatable"},{"$ref":"#/components/schemas/SecretFieldsNonUpdatable"}]},"SecretFieldsUpdatable":{"properties":{"mountPath":{"description":"Local path within the workload to which the Secret will be mapped to. (mandatory)","type":"string","minLength":1,"nullable":true},"defaultMode":{"$ref":"#/components/schemas/DefaultMode"}},"nullable":true,"type":"object"},"SecretFieldsNonUpdatable":{"properties":{"secret":{"description":"The name of the Secret resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"Tolerations":{"description":"Set of tolerations to apply to the workload.","type":"array","items":{"$ref":"#/components/schemas/Toleration"},"maxItems":1000,"nullable":true},"Toleration":{"description":"Toleration details.","properties":{"name":{"description":"The name of the toleration.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"operator":{"$ref":"#/components/schemas/TolerationOperator"},"key":{"description":"The taint key that the toleration applies to. (mandatory)","type":"string","maxLength":253,"nullable":true,"pattern":".*"},"value":{"description":"The taint value the toleration matches to. Mandatory if operator is Exists, forbidden otherwise.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"effect":{"$ref":"#/components/schemas/TolerationEffect"},"seconds":{"description":"The period of time the toleration tolerates the taint. Valid only if effect is NoExecute. taint.","type":"integer","minimum":1,"nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"TolerationOperator":{"description":"A key's relationship to the value. Equal uses key and value. Exists is equivalent to wildcard for value, so that a workload can tolerate all taints of a particular category. (mandatory)","type":"string","enum":["Equal","Exists"],"nullable":true},"TolerationEffect":{"description":"The taint effect to match. (mandatory)","type":"string","enum":["NoSchedule","NoExecute","PreferNoSchedule","Any"],"nullable":true},"WorkingDir":{"description":"Container's working directory. If not specified, the container runtime default will be used. This may be configured in the container image.","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"InferenceFields":{"allOf":[{"$ref":"#/components/schemas/AutoScalingField"},{"$ref":"#/components/schemas/ServingConfigurationField"}],"nullable":true,"type":"object"},"AutoScalingField":{"properties":{"autoscaling":{"$ref":"#/components/schemas/AutoScaling"}}},"AutoScaling":{"allOf":[{"$ref":"#/components/schemas/AutoScalingCommonFields"},{"$ref":"#/components/schemas/AutoScalingMetricFields"}],"nullable":true,"type":"object"},"AutoScalingCommonFields":{"allOf":[{"$ref":"#/components/schemas/MetricThresholdPercentageField"},{"$ref":"#/components/schemas/InferencesMinReplicasField"},{"$ref":"#/components/schemas/InferencesMaxReplicasField"},{"$ref":"#/components/schemas/InitialReplicasField"},{"$ref":"#/components/schemas/ActivationReplicasField"},{"$ref":"#/components/schemas/ConcurrencyHardLimitField"},{"$ref":"#/components/schemas/ScaleToZeroRetentionField"},{"$ref":"#/components/schemas/ScaleDownDelayField"},{"$ref":"#/components/schemas/InitializationTimeoutField"}],"nullable":true,"type":"object"},"MetricThresholdPercentageField":{"properties":{"metricThresholdPercentage":{"description":"The percentage of metric threshold value to use for autoscaling. Defaults to 70. Applicable only with the 'throughput' and 'concurrency' metrics","type":"number","format":"float","minimum":1,"maximum":100,"nullable":true}}},"InferencesMinReplicasField":{"properties":{"minReplicas":{"description":"The minimum number of replicas for autoscaling. Defaults to 1. Use 0 to allow scale-to-zero","type":"integer","format":"int32","minimum":0,"nullable":true}}},"InferencesMaxReplicasField":{"properties":{"maxReplicas":{"description":"The maximum number of replicas for autoscaling. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true}}},"InitialReplicasField":{"properties":{"initialReplicas":{"description":"The number of replicas to run when initializing the workload for the first time. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"nullable":true}}},"ActivationReplicasField":{"properties":{"activationReplicas":{"description":"The number of replicas to run when scaling-up from zero. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true}}},"ConcurrencyHardLimitField":{"properties":{"concurrencyHardLimit":{"description":"The maximum number of requests allowed to flow to a single replica at any time. 0 means no limit","type":"integer","format":"int32","minimum":0,"nullable":true}}},"ScaleToZeroRetentionField":{"properties":{"scaleToZeroRetentionSeconds":{"description":"The minimum amount of time (in seconds) that the last replica will remain active after a scale-to-zero decision. Defaults to 0. Available only if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true}}},"ScaleDownDelayField":{"properties":{"scaleDownDelaySeconds":{"description":"The minimum amount of time (in seconds) that a replica will remain active after a scale-down decision","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true}}},"InitializationTimeoutField":{"properties":{"initializationTimeoutSeconds":{"description":"Use `servingConfiguration.initializationTimeoutSeconds` instead.  If this field is set, it will be ignored and the value under `servingConfiguration` will be used. The maximum amount of time (in seconds) to wait for the container to become ready.","type":"integer","format":"int32","minimum":1,"nullable":true,"deprecated":true}}},"AutoScalingMetricFields":{"allOf":[{"$ref":"#/components/schemas/AutoScalingMetricField"},{"$ref":"#/components/schemas/MetricThresholdField"}],"nullable":true,"type":"object"},"AutoScalingMetricField":{"properties":{"metric":{"$ref":"#/components/schemas/AutoScalingMetric"}}},"AutoScalingMetric":{"description":"The metric to use for autoscaling. Mandatory if minReplicas < maxReplicas, except for the special case where minReplicas is set to 0 and maxReplicas is set to 1, as in this case autoscaling decisions are made according to network activity rather than metrics. Use one of the built-in metrics of 'throughput', 'concurrency' or 'latency', or any other available custom metric. Only the 'throughput' and 'concurrency' metrics support scale-to-zero","type":"string","pattern":"^[a-zA-Z_:][a-zA-Z0-9_:]*$","nullable":true},"MetricThresholdField":{"properties":{"metricThreshold":{"description":"The threshold to use with the specified metric for autoscaling. Mandatory if metric is specified","type":"integer","format":"int32","nullable":true}}},"ServingConfigurationField":{"properties":{"servingConfiguration":{"$ref":"#/components/schemas/ServingConfiguration"}}},"ServingConfiguration":{"description":"The inference workload serving configuration.","properties":{"initializationTimeoutSeconds":{"description":"The maximum time (in seconds) allowed for a workload to initialize and become ready. If the workload does not start within this time, it will be moved to failed state.","type":"integer","format":"int32","minimum":1,"nullable":true},"requestTimeoutSeconds":{"description":"The maximum time (in seconds) allowed to process an end-user request. If no response is returned within this time, the request will be ignored.Supported from Cluster version 2.22","type":"integer","format":"int32","minimum":1,"nullable":true}},"nullable":true,"type":"object"},"Inference1":{"allOf":[{"$ref":"#/components/schemas/WorkloadMeta"},{"$ref":"#/components/schemas/InferenceSpec"}]},"WorkloadMeta":{"required":["name","requestedName","workloadId","projectId","clusterId","createdBy","createdAt","desiredPhase"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"requestedName":{"description":"The name as was requested for the workload. If useGivenNameAsPrefix, in the creation request, is false, name and requestedName should be identical. Otherwise, name should be composed of requestedName followed by a suffix of random characters.","type":"string"},"workloadId":{"$ref":"#/components/schemas/WorkloadId"},"projectId":{"$ref":"#/components/schemas/ProjectId"},"departmentId":{"$ref":"#/components/schemas/DepartmentId"},"clusterId":{"$ref":"#/components/schemas/ClusterId"},"createdBy":{"description":"The user who created the workload","type":"string"},"createdAt":{"description":"The creation time of the workload.","type":"string","format":"date-time"},"deletedAt":{"description":"The deletion time of the workload.","type":"string","nullable":true,"format":"date-time"},"desiredPhase":{"$ref":"#/components/schemas/WorkloadDesiredPhase"},"actualPhase":{"$ref":"#/components/schemas/Phase"}}},"WorkloadId":{"description":"A unique ID of the workload.","type":"string","format":"uuid"},"DepartmentId":{"description":"The id of the department.","type":"string","minLength":1,"pattern":".*"},"WorkloadDesiredPhase":{"description":"The desired phase of the workload.","type":"string","enum":["Running","Stopped","Deleted"]},"Phase":{"type":"string","enum":["Creating","Initializing","Resuming","Pending","Deleting","Running","Updating","Stopped","Stopping","Degraded","Failed","Completed","Terminating","Unknown"]},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/inferences":{"post":{"summary":"Create an inference.","operationId":"create_inference1","description":"Create an inference using container related fields.","tags":["Inferences"],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/InferenceCreationRequest"}}}},"responses":{"202":{"description":"Request completed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Inference1"}}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get inference data.

> Retrieve inference details using a workload id.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"latest"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}}},"schemas":{"Inference1":{"allOf":[{"$ref":"#/components/schemas/WorkloadMeta"},{"$ref":"#/components/schemas/InferenceSpec"}]},"WorkloadMeta":{"required":["name","requestedName","workloadId","projectId","clusterId","createdBy","createdAt","desiredPhase"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"requestedName":{"description":"The name as was requested for the workload. If useGivenNameAsPrefix, in the creation request, is false, name and requestedName should be identical. Otherwise, name should be composed of requestedName followed by a suffix of random characters.","type":"string"},"workloadId":{"$ref":"#/components/schemas/WorkloadId"},"projectId":{"$ref":"#/components/schemas/ProjectId"},"departmentId":{"$ref":"#/components/schemas/DepartmentId"},"clusterId":{"$ref":"#/components/schemas/ClusterId"},"createdBy":{"description":"The user who created the workload","type":"string"},"createdAt":{"description":"The creation time of the workload.","type":"string","format":"date-time"},"deletedAt":{"description":"The deletion time of the workload.","type":"string","nullable":true,"format":"date-time"},"desiredPhase":{"$ref":"#/components/schemas/WorkloadDesiredPhase"},"actualPhase":{"$ref":"#/components/schemas/Phase"}}},"WorkloadName":{"description":"The name of the workload.","type":"string","minLength":1,"pattern":".*"},"WorkloadId":{"description":"A unique ID of the workload.","type":"string","format":"uuid"},"ProjectId":{"description":"The id of the project.","type":"string","pattern":".*"},"DepartmentId":{"description":"The id of the department.","type":"string","minLength":1,"pattern":".*"},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"WorkloadDesiredPhase":{"description":"The desired phase of the workload.","type":"string","enum":["Running","Stopped","Deleted"]},"Phase":{"type":"string","enum":["Creating","Initializing","Resuming","Pending","Deleting","Running","Updating","Stopped","Stopping","Degraded","Failed","Completed","Terminating","Unknown"]},"InferenceSpec":{"description":"The specifications of the inference to be created.","properties":{"spec":{"$ref":"#/components/schemas/InferenceSpecSpec"}}},"InferenceSpecSpec":{"allOf":[{"properties":{"annotations":{"$ref":"#/components/schemas/Annotations"},"args":{"$ref":"#/components/schemas/Args"},"category":{"$ref":"#/components/schemas/Category"},"command":{"$ref":"#/components/schemas/Command"},"compute":{"nullable":true,"properties":{"cpuCoreLimit":{"$ref":"#/components/schemas/CpuCoreLimit"},"cpuCoreRequest":{"$ref":"#/components/schemas/CpuCoreRequest"},"cpuMemoryLimit":{"$ref":"#/components/schemas/CpuMemoryLimit"},"cpuMemoryRequest":{"$ref":"#/components/schemas/CpuMemoryRequest"},"extendedResources":{"$ref":"#/components/schemas/ExtendedResources"},"gpuDevicesRequest":{"$ref":"#/components/schemas/GpuDevicesRequest"},"gpuMemoryLimit":{"$ref":"#/components/schemas/GpuMemoryLimit"},"gpuMemoryRequest":{"$ref":"#/components/schemas/GpuMemoryRequest"},"gpuPortionLimit":{"$ref":"#/components/schemas/GpuPortionLimit"},"gpuPortionRequest":{"$ref":"#/components/schemas/GpuPortionRequest"},"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"},"largeShmRequest":{"$ref":"#/components/schemas/LargeShmRequest"}},"type":"object"},"createHomeDir":{"$ref":"#/components/schemas/CreateHomeDir"},"environmentVariables":{"$ref":"#/components/schemas/EnvironmentVariables"},"exposedUrls":{"$ref":"#/components/schemas/ExposedUrls"},"image":{"$ref":"#/components/schemas/Image"},"imagePullPolicy":{"$ref":"#/components/schemas/ImagePullPolicy"},"imagePullSecrets":{"$ref":"#/components/schemas/ImagePullSecrets"},"labels":{"$ref":"#/components/schemas/Labels"},"nodeAffinityRequired":{"$ref":"#/components/schemas/NodeAffinityRequired"},"nodePools":{"$ref":"#/components/schemas/NodePools"},"nodeType":{"$ref":"#/components/schemas/NodeType3"},"podAffinity":{"$ref":"#/components/schemas/PodAffinity"},"ports":{"$ref":"#/components/schemas/Ports"},"preemptibility":{"$ref":"#/components/schemas/Preemptibility"},"priorityClass":{"$ref":"#/components/schemas/PriorityClass"},"probes":{"$ref":"#/components/schemas/Probes"},"relatedUrls":{"$ref":"#/components/schemas/RelatedUrls"},"security":{"nullable":true,"properties":{"capabilities":{"$ref":"#/components/schemas/Capabilities"},"hostIpc":{"$ref":"#/components/schemas/HostIpc"},"hostNetwork":{"$ref":"#/components/schemas/HostNetwork"},"readOnlyRootFilesystem":{"$ref":"#/components/schemas/ReadOnlyRootFileSystem"},"runAsGid":{"$ref":"#/components/schemas/RunAsGid"},"runAsNonRoot":{"$ref":"#/components/schemas/RunAsNonRoot"},"runAsUid":{"$ref":"#/components/schemas/RunAsUid"},"seccompProfileType":{"$ref":"#/components/schemas/SeccompProfileType"},"supplementalGroups":{"$ref":"#/components/schemas/SupplementalGroups"},"uidGidSource":{"$ref":"#/components/schemas/UidGidSource"}},"type":"object"},"servingPort":{"$ref":"#/components/schemas/ServingPort"},"storage":{"nullable":true,"properties":{"configMapVolume":{"$ref":"#/components/schemas/ConfigMapItems"},"dataVolume":{"$ref":"#/components/schemas/DataVolumeItems"},"emptyDirVolume":{"$ref":"#/components/schemas/EmptyDirItems"},"git":{"$ref":"#/components/schemas/GitItems"},"hostPath":{"$ref":"#/components/schemas/HostPathItems"},"nfs":{"$ref":"#/components/schemas/NfsItems"},"pvc":{"$ref":"#/components/schemas/PvcItems"},"secretVolume":{"$ref":"#/components/schemas/SecretItems"}},"type":"object"},"tolerations":{"$ref":"#/components/schemas/Tolerations"},"workingDir":{"$ref":"#/components/schemas/WorkingDir"}},"type":"object"},{"$ref":"#/components/schemas/InferenceFields"}]},"Annotations":{"description":"Set of annotations to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Annotation"},"maxItems":1000,"nullable":true},"Annotation":{"description":"Annotation details to be populated into the container.","properties":{"name":{"description":"The name of the annotation (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the annotation.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the annotation is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Args":{"description":"Arguments to the command that the container running the workload executes.","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"},"Category":{"description":"Specify the workload category assigned to the workload. Categories are used to classify and monitor different types of workloads within the NVIDIA Run:ai platform.","type":"string","nullable":true,"pattern":".*"},"Command":{"description":"A command to the server as the entry point of the container running the workload.","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"},"CpuCoreLimit":{"description":"Limitations on the number of CPUs consumed by the workload (0.5, 1, .etc). The system guarantees that this workload will not be able to consume more than this amount of CPUs.","format":"double","type":"number","nullable":true,"minimum":0},"CpuCoreRequest":{"description":"CPU units to allocate for the created workload (0.5, 1, .etc). The workload will receive at least this amount of CPU. Note that the workload will not be scheduled unless the system can guarantee this amount of CPUs to the workload.","format":"double","type":"number","nullable":true,"minimum":0},"CpuMemoryLimit":{"description":"Limitations on the CPU memory to allocate for this workload (1G, 20M, .etc). The system guarantees that this workload will not be able to consume more than this amount of memory. The workload will receive an error when trying to allocate more memory than this limit.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"CpuMemoryRequest":{"description":"The amount of CPU memory to allocate for this workload (1G, 20M, .etc). The workload will receive at least this amount of memory. Note that the workload will not be scheduled unless the system can guarantee this amount of memory to the workload","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"ExtendedResources":{"description":"Extended resources and their quantity.","type":"array","items":{"$ref":"#/components/schemas/ExtendedResource"},"maxItems":1000,"nullable":true},"ExtendedResource":{"description":"Quantity of an extended resource.","properties":{"resource":{"description":"The name of the extended resource (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"quantity":{"description":"The requested quantity for the resource.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$"},"exclude":{"description":"Use 'true' in case the extended resource is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"GpuDevicesRequest":{"description":"Requested number of GPU devices. Currently if more than one device is requested, it is not possible to provide values for gpuMemory or gpuPortion.","type":"integer","format":"int32","nullable":true,"minimum":0},"GpuMemoryLimit":{"description":"Limitation on the memory consumed by the workload, per GPU device. The system guarantees The gpuMemoryLimit must be no less than gpuMemoryRequest.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuMemoryRequest":{"description":"Required if and only if gpuRequestType is memory. States the GPU memory to allocate for the created workload, per GPU device. Note that the workload will not be scheduled unless the system can guarantee this amount of GPU memory to the workload.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuPortionLimit":{"description":"Limitations on the portion consumed by the workload, per GPU device. The system guarantees The gpuPotionLimit must be no less than the gpuPortionRequest.","type":"number","format":"double","nullable":true,"minimum":0},"GpuPortionRequest":{"description":"Required if and only if gpuRequestType is portion. States the portion of the GPU to allocate for the created workload, per GPU device, between 0 and 1. The default is no allocated GPUs.","type":"number","format":"double","nullable":true,"minimum":0},"GpuRequestType":{"description":"Sets the unit type for GPU resources requests. Stated in terms of portion or memory. Sets the unit type for other GPU request fields. If `gpuDevicesRequest > 1`, only `portion` is supported. If `gpuDeviceRequest = 1`, the request type can be stated as `portion` or `memory`.","type":"string","minLength":1,"enum":["portion","memory"],"nullable":true},"LargeShmRequest":{"description":"A large /dev/shm device to mount into a container running the created workload. An shm is a shared file system mounted on RAM.","type":"boolean","nullable":true},"CreateHomeDir":{"description":"When set to `true`, creates a home directory for the container.","type":"boolean","nullable":true},"EnvironmentVariables":{"description":"Set of environment variables to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/EnvironmentVariable"},"maxItems":1000,"nullable":true},"EnvironmentVariable":{"description":"Details of an environment variable which is populated into the container.","properties":{"name":{"description":"The name of the environment variable. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the environment variable. (mutually exclusive with secret, userCredential, configMap and podFieldRef)","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"secret":{"$ref":"#/components/schemas/EnvironmentVariableSecret"},"configMap":{"$ref":"#/components/schemas/EnvironmentVariableConfigMap"},"podFieldRef":{"$ref":"#/components/schemas/EnvironmentVariablePodFieldReference"},"userCredential":{"$ref":"#/components/schemas/EnvironmentVariableUserCredential"},"exclude":{"description":"Use 'true' in case the environment variable is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true},"description":{"description":"Description of the environment variable.","type":"string","maxLength":250,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableSecret":{"description":"Details of the secret and key use to populate the environment variable","properties":{"name":{"description":"The name of the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableConfigMap":{"description":"Details of the configMap and key use to populate the environment variable","properties":{"name":{"description":"The name of the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariablePodFieldReference":{"description":"Details of the field-reference and key use to populate the environment variable","properties":{"path":{"description":"The field path resource. (mandatory)","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableUserCredential":{"description":"Defines a reference to a user-created credential and a specific key within that credential whose value will populate the environment variable. User credentials can only be accessed by the user who created them.","properties":{"name":{"description":"The name of the user credential.  (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true},"key":{"description":"The key in the user credential resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true}},"nullable":true,"type":"object"},"ExposedUrls":{"description":"Set of container ports that the workload exposes via URLs.","type":"array","items":{"$ref":"#/components/schemas/ExposedUrl"},"maxItems":1000,"nullable":true},"ExposedUrl":{"description":"A URL for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","nullable":true},"url":{"$ref":"#/components/schemas/Url"},"authorizationType":{"$ref":"#/components/schemas/AuthorizationType"},"authorizedUsers":{"$ref":"#/components/schemas/AuthorizedUsers"},"authorizedGroups":{"$ref":"#/components/schemas/AuthorizedGroups"},"toolType":{"description":"The tool type that runs on this container port.","type":"string","nullable":true,"pattern":".*"},"toolName":{"description":"A name describing the tool that runs on this url.","type":"string","nullable":true,"pattern":".*","maxLength":253},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the instance is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"Url":{"description":"The URL for connecting to the container port. If not specified, the URL will be auto-generated by the system.","type":"string","maxLength":2048,"pattern":".*","nullable":true},"AuthorizationType":{"type":"string","enum":["authenticatedUsers","authorizedUsers","authorizedGroups"],"description":"Specifies who can access the connection URL:\n- `authenticatedUsers`: Any authenticated user or service account can access the URL; the authorizedUsers and authorizedGroups fields are ignored.\n- `authorizedUsers`: Only users listed in the authorizedUsers field are allowed to access the URL; the authorizedGroups field is ignored.\n- `authorizedGroups`: Only members of user groups listed in the authorizedGroups field are allowed to access the URL; the authorizedUsers field is ignored.\nIf not specified, authorization is determined by whether authorizedUsers or authorizedGroups is present. If both fields are set, this results in an error. If neither is set, any authenticatedUser can access.\n","nullable":true},"AuthorizedUsers":{"description":"List of users or service accounts that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string","maxLength":253,"pattern":".*"},"maxItems":1000,"nullable":true},"AuthorizedGroups":{"description":"List of groups that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string","maxLength":253,"pattern":".*"},"maxItems":1000,"nullable":true},"Image":{"description":"Docker image name. For more information, see [Images](https://kubernetes.io/docs/concepts/containers/images). The image name is mandatory for creating a workload.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"ImagePullPolicy":{"description":"Image pull policy. Defaults to `Always` if `:latest` tag is specified, otherwise it is `IfNotPresent`.","type":"string","minLength":1,"enum":["Always","Never","IfNotPresent"],"nullable":true},"ImagePullSecrets":{"description":"A list of references to Kubernetes secrets in the same namespace used for pulling container images.","type":"array","items":{"$ref":"#/components/schemas/ImagePullSecret"},"maxItems":1000,"nullable":true},"ImagePullSecret":{"description":"A reference to a secret in the same namespace used to pull container images.","properties":{"name":{"type":"string","description":"The name of the Kubernetes secret containing the image pull credentials.","pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$","maxLength":253},"userCredential":{"type":"boolean","description":"Indicates whether the secret is a user credential. Set to true if the secret was created by the user and is only accessible by them.","nullable":true},"exclude":{"description":"Use 'true' in case the secret is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Labels":{"description":"Set of labels to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Label"},"maxItems":1000,"nullable":true},"Label":{"description":"Label details to be populated into the container.","properties":{"name":{"description":"The name of the label (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the label.","type":"string","nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"NodeAffinityRequired":{"type":"object","description":"If the affinity requirements specified by this field are not met at scheduling time, the pod will not be scheduled onto the node. If the affinity requirements specified by this field cease to be met at some point during pod execution (e.g. due to an update), the system may or may not try to eventually evict the pod from its node.","properties":{"nodeSelectorTerms":{"description":"A list of node selector terms. The terms are ORed.","type":"array","items":{"$ref":"#/components/schemas/NodeSelectorTerm"},"maxItems":1000}},"nullable":true},"NodeSelectorTerm":{"type":"object","description":"A null or empty node selector term matches no objects. The requirements of them are ANDed.","properties":{"matchExpressions":{"description":"A list of node selector requirements by node's labels.","type":"array","items":{"$ref":"#/components/schemas/MatchExpression"},"maxItems":1000}},"nullable":true},"MatchExpression":{"type":"object","description":"A selector that contains values, a key, and an operator that relates the key and values.","properties":{"key":{"description":"The label key that the selector applies to (mandatory).","type":"string","pattern":".*","maxLength":63},"operator":{"$ref":"#/components/schemas/MatchExpressionOperator"},"values":{"description":"An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer.","type":"array","items":{"type":"string","maxLength":10000,"pattern":".*"},"maxItems":1000}},"required":["key","operator"],"nullable":true},"MatchExpressionOperator":{"description":"Represents a key's relationship to a set of values (mandatory).","type":"string","enum":["In","NotIn","Exists","DoesNotExist","Gt","Lt"]},"NodePools":{"description":"A prioritized list of node pools for the scheduler to run the workload on. The scheduler will always try to use the first node pool before moving to the next one if the first is not available.","type":"array","items":{"type":"string","maxLength":63,"pattern":".*"},"maxItems":1000,"nullable":true},"NodeType3":{"description":"Nodes (machines), or a group of nodes on which the workload will run. To use this feature, your Administrator will need to label nodes. For more information, see [Group Nodes](https://docs.run.ai/latest/admin/researcher-setup/limit-to-node-group). When using this flag with with Project-based affinity, it refines the list of allowable node groups set in the Project. For more information, see [Projects](https://docshub.run.ai/guides/platform-management/aiinitiatives/organization/projects).","type":"string","minLength":1,"nullable":true,"pattern":".*"},"PodAffinity":{"description":"Pod affinity scheduling rules (e.g. co-locate this workload in the same node, zone, etc. as some other workloads).","type":"object","properties":{"type":{"$ref":"#/components/schemas/PodAffinityType"},"key":{"description":"The label key to use. (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":63}},"nullable":true},"PodAffinityType":{"description":"The affinity type, required or preferred. (mandatory)","type":"string","enum":["Required","Preferred"],"nullable":true},"Ports":{"description":"Set of container ports that the workload exposes.","type":"array","items":{"$ref":"#/components/schemas/Port"},"maxItems":1000,"nullable":true},"Port":{"description":"A port for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"serviceType":{"$ref":"#/components/schemas/PortServiceType"},"external":{"description":"The external port which allows a connection to the container port. If not specified, the port will be auto-generated by the system..","type":"integer","format":"int32","nullable":true},"toolType":{"description":"The tool type that runs on this port.","type":"string","nullable":true,"pattern":".*","maxLength":63},"toolName":{"description":"A name describing the tool that runs on this port.","type":"string","nullable":true,"pattern":".*","maxLength":253},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PortServiceType":{"description":"The service type of the port (mandatory).","type":"string","enum":["LoadBalancer","NodePort","ClusterIP"],"nullable":true},"Preemptibility":{"description":"Specifies whether the workload can be preempted by higher-priority workloads. Valid values are preemptible and non-preemptible. If explicitly set, this value takes precedence. If not set, the system derives the preemptibility from the priorityClassName field, ensuring backward compatibility. Each workload type has a default preemptibility. To view the default preemptibility for each workload type, use the GET /workload-types endpoint.","type":"string","minLength":1,"enum":["preemptible","non-preemptible"],"nullable":true},"PriorityClass":{"description":"Specifies the priority class for the workload, which determines its scheduling behavior. Valid values are: very-low, low, medium-low, medium, medium-high, high, and very-high. Each workload type has a default priority. To view the default priority for each workload type, use the GET /workload-types endpoint. Once you change the priority from the default value defined for that workload type, the preemptibility field is not automatically updated. Make sure to set the desired preemptibility value.","type":"string","nullable":true,"pattern":".*"},"Probes":{"description":"Probes are used to determine if the container is healthy and ready to accept traffic.","type":"object","properties":{"readiness":{"$ref":"#/components/schemas/Probe"}},"nullable":true},"Probe":{"type":"object","properties":{"initialDelaySeconds":{"description":"Number of seconds after the container has started before liveness or readiness probes are initiated.","type":"integer","format":"int32","minimum":0,"nullable":true},"periodSeconds":{"description":"How often (in seconds) to perform the probe.","type":"integer","format":"int32","minimum":1,"nullable":true},"timeoutSeconds":{"description":"Number of seconds after which the probe times out.","type":"integer","format":"int32","minimum":1,"nullable":true},"successThreshold":{"description":"Minimum consecutive successes for the probe to be considered successful after having failed.","type":"integer","format":"int32","minimum":1,"nullable":true},"failureThreshold":{"description":"When a probe fails, the number of times to try before giving up.","type":"integer","format":"int32","minimum":1,"nullable":true},"handler":{"$ref":"#/components/schemas/ProbeHandler"}},"nullable":true},"ProbeHandler":{"description":"The action taken to determine the health of the container. (mandatory)","type":"object","properties":{"httpGet":{"description":"An action based on HTTP Get requests.","type":"object","properties":{"path":{"description":"Path to access on the HTTP server, defaults to /.","type":"string","pattern":"^(\\x2F[a-zA-Z0-9\\-_.\\x2F]*)?$","nullable":true,"maxLength":2048},"port":{"description":"Number of the port to access on the container.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"host":{"description":"Host name to connect to, defaults to the pod IP.","type":"string","format":"hostname","nullable":true,"pattern":".*","maxLength":253},"scheme":{"$ref":"#/components/schemas/ProbeHandlerScheme"}}}},"nullable":true},"ProbeHandlerScheme":{"description":"Scheme to use for connecting to the host, defaults to HTTP.","type":"string","enum":["HTTP","HTTPS"],"nullable":true},"RelatedUrls":{"description":"Set of URLs that are related to the workload.","type":"array","items":{"$ref":"#/components/schemas/RelatedUrl"},"maxItems":1000,"nullable":true},"RelatedUrl":{"description":"A URL that is related to the workload. For example, a URL to an external server providing statistics or logging about the workload.","properties":{"url":{"description":"The URL for connecting an external service related to the workload. (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":2048},"type":{"description":"The type of service that the url provides. For example, wandb (Weights & Biases). (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":63},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","nullable":true,"pattern":".*","maxLength":63},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Capabilities":{"description":"Add POSIX capabilities to running containers. Defaults to the default set of capabilities granted by the container runtime.","type":"array","items":{"$ref":"#/components/schemas/Capability"},"nullable":true},"Capability":{"type":"string","enum":["AUDIT_CONTROL","AUDIT_READ","AUDIT_WRITE","BLOCK_SUSPEND","CHOWN","DAC_OVERRIDE","DAC_READ_SEARCH","FOWNER","FSETID","IPC_LOCK","IPC_OWNER","KILL","LEASE","LINUX_IMMUTABLE","MAC_ADMIN","MAC_OVERRIDE","MKNOD","NET_ADMIN","NET_BIND_SERVICE","NET_BROADCAST","NET_RAW","SETGID","SETFCAP","SETPCAP","SETUID","SYS_ADMIN","SYS_BOOT","SYS_CHROOT","SYS_MODULE","SYS_NICE","SYS_PACCT","SYS_PTRACE","SYS_RAWIO","SYS_RESOURCE","SYS_TIME","SYS_TTY_CONFIG","SYSLOG","WAKE_ALARM"]},"HostIpc":{"description":"Whether to enable host IPC. Defaults to false.","type":"boolean","nullable":true},"HostNetwork":{"description":"Whether to enable host networking. Default to false.","type":"boolean","nullable":true},"ReadOnlyRootFileSystem":{"description":"If true, mounts the container's root filesystem as read-only.","type":"boolean","nullable":true},"RunAsGid":{"description":"The group id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsGid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"RunAsNonRoot":{"description":"Force the container to run as a non-root user.","type":"boolean","nullable":true},"RunAsUid":{"description":"The user id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsUid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"SeccompProfileType":{"description":"Indicates which kind of seccomp profile will be applied to the container. The options are a. `RuntimeDefault` - the container runtime default profile should be used. b. `Unconfined` - no profile should be applied. c. `Localhost` is not yet supported by Run:ai.","type":"string","enum":["RuntimeDefault","Unconfined","Localhost"],"nullable":true},"SupplementalGroups":{"description":"Comma separated list of groups that the user running the container belongs to, in addition to the group indicated by runAsGid. Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled. Using an empty string implies reverting the supplementary groups of the image.","type":"string","nullable":true,"pattern":".*"},"UidGidSource":{"description":"Indicate the way to determine the user and group ids of the container. The options are a. `fromTheImage` - user and group ids are determined by the docker image that the container runs. this is the default option. b. `custom` - user and group ids can be specified in the environment asset and/or the workload creation request. c. `idpToken` - user and group IDs are automatically taken from the identity provider (IdP) token (available only in SSO-enabled installations). For more information, see [User Identity](https://run-ai-docs.nvidia.com/saas/infrastructure-setup/advanced-setup/container-access/user-identity-in-containers).","type":"string","enum":["fromTheImage","fromIdpToken","custom"],"nullable":true},"ServingPort":{"description":"A port for accessing the inference service","allOf":[{"$ref":"#/components/schemas/ServingPortContainerAndProtocol"},{"$ref":"#/components/schemas/ServingPortAccess"}],"nullable":true,"type":"object"},"ServingPortContainerAndProtocol":{"properties":{"container":{"description":"The port that the container running the inference service exposes (mandatory).","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"protocol":{"$ref":"#/components/schemas/ServingPortProtocol"}}},"ServingPortProtocol":{"description":"The protocol used by the port, defaults to http","type":"string","enum":["http","grpc"],"nullable":true},"ServingPortAccess":{"properties":{"authorizationType":{"$ref":"#/components/schemas/ServingPortAccessAuthorizationTypeEnum"},"authorizedUsers":{"$ref":"#/components/schemas/AuthorizedUsers"},"authorizedGroups":{"$ref":"#/components/schemas/AuthorizedGroups"},"clusterLocalAccessOnly":{"description":"Configure the serving port URL to be available only on the cluster-local network, and not externally. Defaults to false","type":"boolean","nullable":true}}},"ServingPortAccessAuthorizationTypeEnum":{"type":"string","enum":["public","authenticatedUsers","authorizedUsers","authorizedGroups","authorizedUsersOrGroups"],"description":"Specifies who can send inference requests to the serving endpoint:\n\nPossible values:\n- `public`: No authorization is required. (Default)\n- `authenticatedUsers`: Any NVIDIA Run:ai authenticated user and service account can send requests.\n- `authorizedUsers`: Only users listed in the authorizedUsers field can send requests.\n- `authorizedGroups`: Only members of user groups listed in the authorizedGroups field can send requests.\n- `authorizedUsersOrGroups`: Requires either authorizedUsers or authorizedGroups to be provided; if neither is set, or if both are set, a mutual exclusion error is reported. Supported from cluster version 2.19.\n","nullable":true},"ConfigMapItems":{"description":"Set of config map volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/ConfigMapInstance"},"maxItems":1000,"nullable":true},"ConfigMapInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/ConfigMap"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"StorageInstanceName":{"properties":{"name":{"description":"unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"maxLength":63,"nullable":true}},"nullable":true,"type":"object"},"ConfigMap":{"properties":{"configMap":{"description":"The name of the ConfigMap resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"mountPath":{"description":"Local path within the workload to which the ConfigMap will be mapped to. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"subPath":{"description":"Path within the volume from which the container's volume should be mounted.","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"defaultMode":{"$ref":"#/components/schemas/DefaultMode"}},"nullable":true,"type":"object"},"DefaultMode":{"type":"string","description":"File permission mode in octal string format. This value must be a 4-digit octal number, representing the default file mode when mounting a Secret or ConfigMap as a volume.\n","minLength":4,"maxLength":4,"pattern":"0[0-7]{3}","nullable":true},"ExcludeField":{"properties":{"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"type":"object","nullable":true},"DataVolumeItems":{"description":"Set of data volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/DataVolumeInstance"},"maxItems":1000,"nullable":true},"DataVolumeInstance":{"allOf":[{"$ref":"#/components/schemas/DataVolume"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"DataVolume":{"properties":{"id":{"description":"The unique identifier of the data volume. (mandatory)","type":"string","format":"uuid","nullable":true},"mountPath":{"description":"The path where the data volume will be mounted. (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":4096}},"nullable":true,"type":"object"},"EmptyDirItems":{"description":"A list of emptyDir volumes to mount in the workload.","type":"array","items":{"$ref":"#/components/schemas/EmptyDirInstance"},"maxItems":1000,"nullable":true},"EmptyDirInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/EmptyDir"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"EmptyDir":{"properties":{"path":{"description":"Local path within the workload to which the EmptyDir volume will be mapped. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"medium":{"description":"The type of storage medium for the volume. Use \"Memory\" for memory-backed storage, or leave empty for disk-backed storage.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"sizeLimit":{"description":"The total amount of local storage or memory required for the emptyDir volume. Specify using Kubernetes quantity format (e.g., 1G, 500Mi).","type":"string","maxLength":63,"pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true}},"nullable":true,"type":"object"},"GitItems":{"description":"Set of git repositories to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/GitInstance"},"maxItems":1000,"nullable":true},"GitInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/GitCommon"},{"$ref":"#/components/schemas/GitPassword"},{"$ref":"#/components/schemas/ExcludeField"},{"type":"object","properties":{"secretRef":{"$ref":"#/components/schemas/GitSecretRef"}},"nullable":true}],"nullable":true,"type":"object"},"GitCommon":{"properties":{"repository":{"description":"URL to a remote Git repository. The content of this repository will be mapped to the container running the workload. (mandatory)","type":"string","minLength":1,"maxLength":2048,"nullable":true},"branch":{"description":"Specific branch to synchronize the repository from.","type":"string","minLength":1,"maxLength":63,"nullable":true},"revision":{"description":"Specific revision to synchronize the repository from.","type":"string","minLength":1,"maxLength":63,"nullable":true},"path":{"description":"Local path within the workload to which the Git repository will be mapped (mandatory).","type":"string","minLength":1,"maxLength":4096,"nullable":true}},"nullable":true,"type":"object"},"GitPassword":{"properties":{"passwordSecret":{"description":"Secret containing the credentials of the repository (needed for non public repository which requires authentication). (deprecated)","type":"string","minLength":1,"nullable":true},"secretKeyOfUser":{"description":"The key to use for loading the user name from the secret. The default is `User`. (deprecated)","type":"string","minLength":1,"nullable":true},"secretKeyOfPassword":{"description":"The key to use for loading the password from the secret. The default is `Password`. (deprecated)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitSecretRef":{"properties":{"name":{"description":"Name of the Secret containing the credentials of the repository.","type":"string","minLength":1},"authenticationMethod":{"$ref":"#/components/schemas/GitAuthenticationMethod"},"secretKeyOfUser":{"description":"The key in the Secret that contains the Git username (used for `password` authentication).","type":"string","minLength":1,"nullable":true},"secretKeyOfPassword":{"description":"The key in the Secret that contains the Git password (used for `password` authentication).","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitAuthenticationMethod":{"description":"Specifies the authentication method to use when accessing the Git repository. This is required for private repositories - `password` - Authenticate using a username and password. - `ssh-key` - Authenticate using an SSH private key.","type":"string","minLength":1,"enum":["password","ssh-key"]},"HostPathItems":{"description":"Set of host paths to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/HostPathInstance"},"maxItems":1000,"nullable":true},"HostPathInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/HostPath"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"HostPath":{"properties":{"path":{"description":"Local path within the controller to which the host volume will be mapped. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"readOnly":{"description":"Force the volume to be mounted with read-only permissions. Defaults to false.","type":"boolean","default":true,"nullable":true},"mountPath":{"description":"The path that the host volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"mountPropagation":{"$ref":"#/components/schemas/HostPathMountPropagation"}},"nullable":true,"type":"object"},"HostPathMountPropagation":{"description":"Share this volumes mount with other containers. If set to HostToContainer, this volume mount will receive all subsequent mounts that are mounted to this volume or any of its subdirectories. In case of multiple hostPath entries, this field should have the same value for all of them.","type":"string","enum":["None","HostToContainer"],"nullable":true},"NfsItems":{"description":"Set of nfs volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/NfsInstance"},"maxItems":1000,"nullable":true},"NfsInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Nfs"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Nfs":{"properties":{"path":{"description":"Path that is exported by the NFS server (mandatory). For more information, see [NFS](https://kubernetes.io/docs/concepts/storage/volumes#nfs).","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"readOnly":{"description":"Force the NFS export to be mounted with read-only permissions.","type":"boolean","default":true,"nullable":true},"server":{"description":"The hostname or IP address of the NFS server. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"mountPath":{"description":"The path that the NFS volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"PvcItems":{"description":"Set of pvc persistent volume claims to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/PvcInstance"},"maxItems":1000,"nullable":true},"PvcInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Pvc"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Pvc":{"allOf":[{"$ref":"#/components/schemas/PvcFieldsUpdatable"},{"$ref":"#/components/schemas/PvcFieldsNonUpdatable"}]},"PvcFieldsUpdatable":{"properties":{"path":{"description":"Local path within the workload to which the PVC bucket will be mapped. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"PvcFieldsNonUpdatable":{"properties":{"existingPvc":{"description":"Verify existing PVC. PVC is assumed to exist when set to `true`. If set to `false`, the PVC will be created, if it does not exist.","type":"boolean","default":false,"nullable":true},"claimName":{"description":"Name for the PVC. Allow referencing it across workloads. If not provided, a name based on the workload name and scope will be auto-generated.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"readOnly":{"description":"Permit only read access to PVC.","type":"boolean","default":false,"nullable":true},"ephemeral":{"description":"Use `true` to set PVC to ephemeral. If set to `true`, the PVC will be deleted when the workload is stopped. Not supported for inference workloads.","type":"boolean","default":false,"nullable":true},"claimInfo":{"$ref":"#/components/schemas/ClaimInfo"},"dataSharing":{"description":"use `true` to share the PVC data to all projects under the selected scope.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"ClaimInfo":{"description":"Claim information for the newly created PVC. The information should not be provided when attempting to use existing PVC.","properties":{"size":{"$ref":"#/components/schemas/PvcClaimSize"},"storageClass":{"description":"Storage class name to associate with the PVC. This parameter may be omitted if there is a single storage class in the system, or you are using the default storage class. For more information, see [Storage class](https://kubernetes.io/docs/concepts/storage/storage-classes).","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"accessModes":{"$ref":"#/components/schemas/PvcAccessModes"},"volumeMode":{"$ref":"#/components/schemas/PvcVolumeMode"},"addedAttrValues":{"$ref":"#/components/schemas/PvcAddedAttrValues"}},"nullable":true,"type":"object"},"PvcClaimSize":{"description":"Requested size for the PVC. Mandatory when existingPvc is false. Recommended sizes: TB/GB/MB/TIB/GIB/MIB","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"PvcAccessModes":{"description":"Default access mode(s) applied to newly created PVCs unless explicitly overridden.","properties":{"readWriteOnce":{"description":"Mount the volume as read/write by a single node.","type":"boolean","default":true,"nullable":true},"readOnlyMany":{"description":"Mount the volume as read-only by many nodes.","type":"boolean","default":false,"nullable":true},"readWriteMany":{"description":"Mount the volume as read/write by many nodes.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PvcVolumeMode":{"description":"Default volume mode for the PVC. Choose between Filesystem (default) or Block.","type":"string","enum":["Filesystem","Block"],"nullable":true},"PvcAddedAttrValues":{"description":"an optional array of key-values pairs that are written as annotations on the created PVC. the allowed attributes are determined according to the storage class configuration (see k8s-objects-tracker for further info).","type":"array","items":{"$ref":"#/components/schemas/PvcAddedAttrValue"},"maxItems":1000},"PvcAddedAttrValue":{"type":"object","required":["key"],"properties":{"key":{"type":"string","minLength":1,"maxLength":63,"pattern":"^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$"},"value":{"type":"string","pattern":".*","maxLength":10000}}},"SecretItems":{"description":"Set of secret volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/SecretInstance"},"nullable":true},"SecretInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Secret"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Secret":{"allOf":[{"$ref":"#/components/schemas/SecretFieldsUpdatable"},{"$ref":"#/components/schemas/SecretFieldsNonUpdatable"}]},"SecretFieldsUpdatable":{"properties":{"mountPath":{"description":"Local path within the workload to which the Secret will be mapped to. (mandatory)","type":"string","minLength":1,"nullable":true},"defaultMode":{"$ref":"#/components/schemas/DefaultMode"}},"nullable":true,"type":"object"},"SecretFieldsNonUpdatable":{"properties":{"secret":{"description":"The name of the Secret resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"Tolerations":{"description":"Set of tolerations to apply to the workload.","type":"array","items":{"$ref":"#/components/schemas/Toleration"},"maxItems":1000,"nullable":true},"Toleration":{"description":"Toleration details.","properties":{"name":{"description":"The name of the toleration.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"operator":{"$ref":"#/components/schemas/TolerationOperator"},"key":{"description":"The taint key that the toleration applies to. (mandatory)","type":"string","maxLength":253,"nullable":true,"pattern":".*"},"value":{"description":"The taint value the toleration matches to. Mandatory if operator is Exists, forbidden otherwise.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"effect":{"$ref":"#/components/schemas/TolerationEffect"},"seconds":{"description":"The period of time the toleration tolerates the taint. Valid only if effect is NoExecute. taint.","type":"integer","minimum":1,"nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"TolerationOperator":{"description":"A key's relationship to the value. Equal uses key and value. Exists is equivalent to wildcard for value, so that a workload can tolerate all taints of a particular category. (mandatory)","type":"string","enum":["Equal","Exists"],"nullable":true},"TolerationEffect":{"description":"The taint effect to match. (mandatory)","type":"string","enum":["NoSchedule","NoExecute","PreferNoSchedule","Any"],"nullable":true},"WorkingDir":{"description":"Container's working directory. If not specified, the container runtime default will be used. This may be configured in the container image.","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"InferenceFields":{"allOf":[{"$ref":"#/components/schemas/AutoScalingField"},{"$ref":"#/components/schemas/ServingConfigurationField"}],"nullable":true,"type":"object"},"AutoScalingField":{"properties":{"autoscaling":{"$ref":"#/components/schemas/AutoScaling"}}},"AutoScaling":{"allOf":[{"$ref":"#/components/schemas/AutoScalingCommonFields"},{"$ref":"#/components/schemas/AutoScalingMetricFields"}],"nullable":true,"type":"object"},"AutoScalingCommonFields":{"allOf":[{"$ref":"#/components/schemas/MetricThresholdPercentageField"},{"$ref":"#/components/schemas/InferencesMinReplicasField"},{"$ref":"#/components/schemas/InferencesMaxReplicasField"},{"$ref":"#/components/schemas/InitialReplicasField"},{"$ref":"#/components/schemas/ActivationReplicasField"},{"$ref":"#/components/schemas/ConcurrencyHardLimitField"},{"$ref":"#/components/schemas/ScaleToZeroRetentionField"},{"$ref":"#/components/schemas/ScaleDownDelayField"},{"$ref":"#/components/schemas/InitializationTimeoutField"}],"nullable":true,"type":"object"},"MetricThresholdPercentageField":{"properties":{"metricThresholdPercentage":{"description":"The percentage of metric threshold value to use for autoscaling. Defaults to 70. Applicable only with the 'throughput' and 'concurrency' metrics","type":"number","format":"float","minimum":1,"maximum":100,"nullable":true}}},"InferencesMinReplicasField":{"properties":{"minReplicas":{"description":"The minimum number of replicas for autoscaling. Defaults to 1. Use 0 to allow scale-to-zero","type":"integer","format":"int32","minimum":0,"nullable":true}}},"InferencesMaxReplicasField":{"properties":{"maxReplicas":{"description":"The maximum number of replicas for autoscaling. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true}}},"InitialReplicasField":{"properties":{"initialReplicas":{"description":"The number of replicas to run when initializing the workload for the first time. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"nullable":true}}},"ActivationReplicasField":{"properties":{"activationReplicas":{"description":"The number of replicas to run when scaling-up from zero. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true}}},"ConcurrencyHardLimitField":{"properties":{"concurrencyHardLimit":{"description":"The maximum number of requests allowed to flow to a single replica at any time. 0 means no limit","type":"integer","format":"int32","minimum":0,"nullable":true}}},"ScaleToZeroRetentionField":{"properties":{"scaleToZeroRetentionSeconds":{"description":"The minimum amount of time (in seconds) that the last replica will remain active after a scale-to-zero decision. Defaults to 0. Available only if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true}}},"ScaleDownDelayField":{"properties":{"scaleDownDelaySeconds":{"description":"The minimum amount of time (in seconds) that a replica will remain active after a scale-down decision","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true}}},"InitializationTimeoutField":{"properties":{"initializationTimeoutSeconds":{"description":"Use `servingConfiguration.initializationTimeoutSeconds` instead.  If this field is set, it will be ignored and the value under `servingConfiguration` will be used. The maximum amount of time (in seconds) to wait for the container to become ready.","type":"integer","format":"int32","minimum":1,"nullable":true,"deprecated":true}}},"AutoScalingMetricFields":{"allOf":[{"$ref":"#/components/schemas/AutoScalingMetricField"},{"$ref":"#/components/schemas/MetricThresholdField"}],"nullable":true,"type":"object"},"AutoScalingMetricField":{"properties":{"metric":{"$ref":"#/components/schemas/AutoScalingMetric"}}},"AutoScalingMetric":{"description":"The metric to use for autoscaling. Mandatory if minReplicas < maxReplicas, except for the special case where minReplicas is set to 0 and maxReplicas is set to 1, as in this case autoscaling decisions are made according to network activity rather than metrics. Use one of the built-in metrics of 'throughput', 'concurrency' or 'latency', or any other available custom metric. Only the 'throughput' and 'concurrency' metrics support scale-to-zero","type":"string","pattern":"^[a-zA-Z_:][a-zA-Z0-9_:]*$","nullable":true},"MetricThresholdField":{"properties":{"metricThreshold":{"description":"The threshold to use with the specified metric for autoscaling. Mandatory if metric is specified","type":"integer","format":"int32","nullable":true}}},"ServingConfigurationField":{"properties":{"servingConfiguration":{"$ref":"#/components/schemas/ServingConfiguration"}}},"ServingConfiguration":{"description":"The inference workload serving configuration.","properties":{"initializationTimeoutSeconds":{"description":"The maximum time (in seconds) allowed for a workload to initialize and become ready. If the workload does not start within this time, it will be moved to failed state.","type":"integer","format":"int32","minimum":1,"nullable":true},"requestTimeoutSeconds":{"description":"The maximum time (in seconds) allowed to process an end-user request. If no response is returned within this time, the request will be ignored.Supported from Cluster version 2.22","type":"integer","format":"int32","minimum":1,"nullable":true}},"nullable":true,"type":"object"},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/inferences/{workloadId}":{"get":{"summary":"Get inference data.","operationId":"get_inference","description":"Retrieve inference details using a workload id.","tags":["Inferences"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Inference1"}}}},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Delete an inference.

> Delete an inference using a workload id.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"latest"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}}},"responses":{"202Accepted":{"description":"Accepted.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HttpResponse"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}},"schemas":{"HttpResponse":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}}},"paths":{"/api/v1/workloads/inferences/{workloadId}":{"delete":{"summary":"Delete an inference.","operationId":"delete_inference","description":"Delete an inference using a workload id.","tags":["Inferences"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"}],"responses":{"202":{"$ref":"#/components/responses/202Accepted"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Update inference spec.

> Update the specification of an existing inference workload.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"latest"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}}},"schemas":{"InferenceUpdateRequest":{"allOf":[{"$ref":"#/components/schemas/InferenceUpdateSpec"}]},"InferenceUpdateSpec":{"description":"The specifications of the inference to be updated.","properties":{"spec":{"allOf":[{"nullable":true,"properties":{"args":{"$ref":"#/components/schemas/Args"},"category":{"$ref":"#/components/schemas/Category"},"command":{"$ref":"#/components/schemas/Command"},"compute":{"nullable":true,"properties":{"cpuCoreLimit":{"$ref":"#/components/schemas/CpuCoreLimit"},"cpuCoreRequest":{"$ref":"#/components/schemas/CpuCoreRequest"},"cpuMemoryLimit":{"$ref":"#/components/schemas/CpuMemoryLimit"},"cpuMemoryRequest":{"$ref":"#/components/schemas/CpuMemoryRequest"},"extendedResources":{"$ref":"#/components/schemas/ExtendedResources"},"gpuDevicesRequest":{"$ref":"#/components/schemas/GpuDevicesRequest"},"gpuMemoryLimit":{"$ref":"#/components/schemas/GpuMemoryLimit"},"gpuMemoryRequest":{"$ref":"#/components/schemas/GpuMemoryRequest"},"gpuPortionLimit":{"$ref":"#/components/schemas/GpuPortionLimit"},"gpuPortionRequest":{"$ref":"#/components/schemas/GpuPortionRequest"},"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"},"largeShmRequest":{"$ref":"#/components/schemas/LargeShmRequest"}},"type":"object"},"createHomeDir":{"$ref":"#/components/schemas/CreateHomeDir"},"environmentVariables":{"$ref":"#/components/schemas/EnvironmentVariables"},"image":{"$ref":"#/components/schemas/Image"},"imagePullPolicy":{"$ref":"#/components/schemas/ImagePullPolicy"},"imagePullSecrets":{"$ref":"#/components/schemas/ImagePullSecrets"},"nodeAffinityRequired":{"$ref":"#/components/schemas/NodeAffinityRequired"},"nodePools":{"$ref":"#/components/schemas/NodePools"},"nodeType":{"$ref":"#/components/schemas/NodeType3"},"podAffinity":{"$ref":"#/components/schemas/PodAffinity"},"preemptibility":{"$ref":"#/components/schemas/Preemptibility"},"priorityClass":{"$ref":"#/components/schemas/PriorityClass"},"probes":{"$ref":"#/components/schemas/Probes"},"workingDir":{"$ref":"#/components/schemas/WorkingDir"}},"type":"object"},{"$ref":"#/components/schemas/InferenceUpdateSpecAutoscaling"},{"$ref":"#/components/schemas/InferenceUpdateSpecServingConfiguration"}]}}},"Args":{"description":"Arguments to the command that the container running the workload executes.","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"},"Category":{"description":"Specify the workload category assigned to the workload. Categories are used to classify and monitor different types of workloads within the NVIDIA Run:ai platform.","type":"string","nullable":true,"pattern":".*"},"Command":{"description":"A command to the server as the entry point of the container running the workload.","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"},"CpuCoreLimit":{"description":"Limitations on the number of CPUs consumed by the workload (0.5, 1, .etc). The system guarantees that this workload will not be able to consume more than this amount of CPUs.","format":"double","type":"number","nullable":true,"minimum":0},"CpuCoreRequest":{"description":"CPU units to allocate for the created workload (0.5, 1, .etc). The workload will receive at least this amount of CPU. Note that the workload will not be scheduled unless the system can guarantee this amount of CPUs to the workload.","format":"double","type":"number","nullable":true,"minimum":0},"CpuMemoryLimit":{"description":"Limitations on the CPU memory to allocate for this workload (1G, 20M, .etc). The system guarantees that this workload will not be able to consume more than this amount of memory. The workload will receive an error when trying to allocate more memory than this limit.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"CpuMemoryRequest":{"description":"The amount of CPU memory to allocate for this workload (1G, 20M, .etc). The workload will receive at least this amount of memory. Note that the workload will not be scheduled unless the system can guarantee this amount of memory to the workload","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"ExtendedResources":{"description":"Extended resources and their quantity.","type":"array","items":{"$ref":"#/components/schemas/ExtendedResource"},"maxItems":1000,"nullable":true},"ExtendedResource":{"description":"Quantity of an extended resource.","properties":{"resource":{"description":"The name of the extended resource (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"quantity":{"description":"The requested quantity for the resource.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$"},"exclude":{"description":"Use 'true' in case the extended resource is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"GpuDevicesRequest":{"description":"Requested number of GPU devices. Currently if more than one device is requested, it is not possible to provide values for gpuMemory or gpuPortion.","type":"integer","format":"int32","nullable":true,"minimum":0},"GpuMemoryLimit":{"description":"Limitation on the memory consumed by the workload, per GPU device. The system guarantees The gpuMemoryLimit must be no less than gpuMemoryRequest.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuMemoryRequest":{"description":"Required if and only if gpuRequestType is memory. States the GPU memory to allocate for the created workload, per GPU device. Note that the workload will not be scheduled unless the system can guarantee this amount of GPU memory to the workload.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuPortionLimit":{"description":"Limitations on the portion consumed by the workload, per GPU device. The system guarantees The gpuPotionLimit must be no less than the gpuPortionRequest.","type":"number","format":"double","nullable":true,"minimum":0},"GpuPortionRequest":{"description":"Required if and only if gpuRequestType is portion. States the portion of the GPU to allocate for the created workload, per GPU device, between 0 and 1. The default is no allocated GPUs.","type":"number","format":"double","nullable":true,"minimum":0},"GpuRequestType":{"description":"Sets the unit type for GPU resources requests. Stated in terms of portion or memory. Sets the unit type for other GPU request fields. If `gpuDevicesRequest > 1`, only `portion` is supported. If `gpuDeviceRequest = 1`, the request type can be stated as `portion` or `memory`.","type":"string","minLength":1,"enum":["portion","memory"],"nullable":true},"LargeShmRequest":{"description":"A large /dev/shm device to mount into a container running the created workload. An shm is a shared file system mounted on RAM.","type":"boolean","nullable":true},"CreateHomeDir":{"description":"When set to `true`, creates a home directory for the container.","type":"boolean","nullable":true},"EnvironmentVariables":{"description":"Set of environment variables to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/EnvironmentVariable"},"maxItems":1000,"nullable":true},"EnvironmentVariable":{"description":"Details of an environment variable which is populated into the container.","properties":{"name":{"description":"The name of the environment variable. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the environment variable. (mutually exclusive with secret, userCredential, configMap and podFieldRef)","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"secret":{"$ref":"#/components/schemas/EnvironmentVariableSecret"},"configMap":{"$ref":"#/components/schemas/EnvironmentVariableConfigMap"},"podFieldRef":{"$ref":"#/components/schemas/EnvironmentVariablePodFieldReference"},"userCredential":{"$ref":"#/components/schemas/EnvironmentVariableUserCredential"},"exclude":{"description":"Use 'true' in case the environment variable is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true},"description":{"description":"Description of the environment variable.","type":"string","maxLength":250,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableSecret":{"description":"Details of the secret and key use to populate the environment variable","properties":{"name":{"description":"The name of the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableConfigMap":{"description":"Details of the configMap and key use to populate the environment variable","properties":{"name":{"description":"The name of the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariablePodFieldReference":{"description":"Details of the field-reference and key use to populate the environment variable","properties":{"path":{"description":"The field path resource. (mandatory)","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableUserCredential":{"description":"Defines a reference to a user-created credential and a specific key within that credential whose value will populate the environment variable. User credentials can only be accessed by the user who created them.","properties":{"name":{"description":"The name of the user credential.  (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true},"key":{"description":"The key in the user credential resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true}},"nullable":true,"type":"object"},"Image":{"description":"Docker image name. For more information, see [Images](https://kubernetes.io/docs/concepts/containers/images). The image name is mandatory for creating a workload.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"ImagePullPolicy":{"description":"Image pull policy. Defaults to `Always` if `:latest` tag is specified, otherwise it is `IfNotPresent`.","type":"string","minLength":1,"enum":["Always","Never","IfNotPresent"],"nullable":true},"ImagePullSecrets":{"description":"A list of references to Kubernetes secrets in the same namespace used for pulling container images.","type":"array","items":{"$ref":"#/components/schemas/ImagePullSecret"},"maxItems":1000,"nullable":true},"ImagePullSecret":{"description":"A reference to a secret in the same namespace used to pull container images.","properties":{"name":{"type":"string","description":"The name of the Kubernetes secret containing the image pull credentials.","pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$","maxLength":253},"userCredential":{"type":"boolean","description":"Indicates whether the secret is a user credential. Set to true if the secret was created by the user and is only accessible by them.","nullable":true},"exclude":{"description":"Use 'true' in case the secret is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"NodeAffinityRequired":{"type":"object","description":"If the affinity requirements specified by this field are not met at scheduling time, the pod will not be scheduled onto the node. If the affinity requirements specified by this field cease to be met at some point during pod execution (e.g. due to an update), the system may or may not try to eventually evict the pod from its node.","properties":{"nodeSelectorTerms":{"description":"A list of node selector terms. The terms are ORed.","type":"array","items":{"$ref":"#/components/schemas/NodeSelectorTerm"},"maxItems":1000}},"nullable":true},"NodeSelectorTerm":{"type":"object","description":"A null or empty node selector term matches no objects. The requirements of them are ANDed.","properties":{"matchExpressions":{"description":"A list of node selector requirements by node's labels.","type":"array","items":{"$ref":"#/components/schemas/MatchExpression"},"maxItems":1000}},"nullable":true},"MatchExpression":{"type":"object","description":"A selector that contains values, a key, and an operator that relates the key and values.","properties":{"key":{"description":"The label key that the selector applies to (mandatory).","type":"string","pattern":".*","maxLength":63},"operator":{"$ref":"#/components/schemas/MatchExpressionOperator"},"values":{"description":"An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer.","type":"array","items":{"type":"string","maxLength":10000,"pattern":".*"},"maxItems":1000}},"required":["key","operator"],"nullable":true},"MatchExpressionOperator":{"description":"Represents a key's relationship to a set of values (mandatory).","type":"string","enum":["In","NotIn","Exists","DoesNotExist","Gt","Lt"]},"NodePools":{"description":"A prioritized list of node pools for the scheduler to run the workload on. The scheduler will always try to use the first node pool before moving to the next one if the first is not available.","type":"array","items":{"type":"string","maxLength":63,"pattern":".*"},"maxItems":1000,"nullable":true},"NodeType3":{"description":"Nodes (machines), or a group of nodes on which the workload will run. To use this feature, your Administrator will need to label nodes. For more information, see [Group Nodes](https://docs.run.ai/latest/admin/researcher-setup/limit-to-node-group). When using this flag with with Project-based affinity, it refines the list of allowable node groups set in the Project. For more information, see [Projects](https://docshub.run.ai/guides/platform-management/aiinitiatives/organization/projects).","type":"string","minLength":1,"nullable":true,"pattern":".*"},"PodAffinity":{"description":"Pod affinity scheduling rules (e.g. co-locate this workload in the same node, zone, etc. as some other workloads).","type":"object","properties":{"type":{"$ref":"#/components/schemas/PodAffinityType"},"key":{"description":"The label key to use. (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":63}},"nullable":true},"PodAffinityType":{"description":"The affinity type, required or preferred. (mandatory)","type":"string","enum":["Required","Preferred"],"nullable":true},"Preemptibility":{"description":"Specifies whether the workload can be preempted by higher-priority workloads. Valid values are preemptible and non-preemptible. If explicitly set, this value takes precedence. If not set, the system derives the preemptibility from the priorityClassName field, ensuring backward compatibility. Each workload type has a default preemptibility. To view the default preemptibility for each workload type, use the GET /workload-types endpoint.","type":"string","minLength":1,"enum":["preemptible","non-preemptible"],"nullable":true},"PriorityClass":{"description":"Specifies the priority class for the workload, which determines its scheduling behavior. Valid values are: very-low, low, medium-low, medium, medium-high, high, and very-high. Each workload type has a default priority. To view the default priority for each workload type, use the GET /workload-types endpoint. Once you change the priority from the default value defined for that workload type, the preemptibility field is not automatically updated. Make sure to set the desired preemptibility value.","type":"string","nullable":true,"pattern":".*"},"Probes":{"description":"Probes are used to determine if the container is healthy and ready to accept traffic.","type":"object","properties":{"readiness":{"$ref":"#/components/schemas/Probe"}},"nullable":true},"Probe":{"type":"object","properties":{"initialDelaySeconds":{"description":"Number of seconds after the container has started before liveness or readiness probes are initiated.","type":"integer","format":"int32","minimum":0,"nullable":true},"periodSeconds":{"description":"How often (in seconds) to perform the probe.","type":"integer","format":"int32","minimum":1,"nullable":true},"timeoutSeconds":{"description":"Number of seconds after which the probe times out.","type":"integer","format":"int32","minimum":1,"nullable":true},"successThreshold":{"description":"Minimum consecutive successes for the probe to be considered successful after having failed.","type":"integer","format":"int32","minimum":1,"nullable":true},"failureThreshold":{"description":"When a probe fails, the number of times to try before giving up.","type":"integer","format":"int32","minimum":1,"nullable":true},"handler":{"$ref":"#/components/schemas/ProbeHandler"}},"nullable":true},"ProbeHandler":{"description":"The action taken to determine the health of the container. (mandatory)","type":"object","properties":{"httpGet":{"description":"An action based on HTTP Get requests.","type":"object","properties":{"path":{"description":"Path to access on the HTTP server, defaults to /.","type":"string","pattern":"^(\\x2F[a-zA-Z0-9\\-_.\\x2F]*)?$","nullable":true,"maxLength":2048},"port":{"description":"Number of the port to access on the container.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"host":{"description":"Host name to connect to, defaults to the pod IP.","type":"string","format":"hostname","nullable":true,"pattern":".*","maxLength":253},"scheme":{"$ref":"#/components/schemas/ProbeHandlerScheme"}}}},"nullable":true},"ProbeHandlerScheme":{"description":"Scheme to use for connecting to the host, defaults to HTTP.","type":"string","enum":["HTTP","HTTPS"],"nullable":true},"WorkingDir":{"description":"Container's working directory. If not specified, the container runtime default will be used. This may be configured in the container image.","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"InferenceUpdateSpecAutoscaling":{"properties":{"autoscaling":{"$ref":"#/components/schemas/AutoScaling"}}},"AutoScaling":{"allOf":[{"$ref":"#/components/schemas/AutoScalingCommonFields"},{"$ref":"#/components/schemas/AutoScalingMetricFields"}],"nullable":true,"type":"object"},"AutoScalingCommonFields":{"allOf":[{"$ref":"#/components/schemas/MetricThresholdPercentageField"},{"$ref":"#/components/schemas/InferencesMinReplicasField"},{"$ref":"#/components/schemas/InferencesMaxReplicasField"},{"$ref":"#/components/schemas/InitialReplicasField"},{"$ref":"#/components/schemas/ActivationReplicasField"},{"$ref":"#/components/schemas/ConcurrencyHardLimitField"},{"$ref":"#/components/schemas/ScaleToZeroRetentionField"},{"$ref":"#/components/schemas/ScaleDownDelayField"},{"$ref":"#/components/schemas/InitializationTimeoutField"}],"nullable":true,"type":"object"},"MetricThresholdPercentageField":{"properties":{"metricThresholdPercentage":{"description":"The percentage of metric threshold value to use for autoscaling. Defaults to 70. Applicable only with the 'throughput' and 'concurrency' metrics","type":"number","format":"float","minimum":1,"maximum":100,"nullable":true}}},"InferencesMinReplicasField":{"properties":{"minReplicas":{"description":"The minimum number of replicas for autoscaling. Defaults to 1. Use 0 to allow scale-to-zero","type":"integer","format":"int32","minimum":0,"nullable":true}}},"InferencesMaxReplicasField":{"properties":{"maxReplicas":{"description":"The maximum number of replicas for autoscaling. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true}}},"InitialReplicasField":{"properties":{"initialReplicas":{"description":"The number of replicas to run when initializing the workload for the first time. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"nullable":true}}},"ActivationReplicasField":{"properties":{"activationReplicas":{"description":"The number of replicas to run when scaling-up from zero. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true}}},"ConcurrencyHardLimitField":{"properties":{"concurrencyHardLimit":{"description":"The maximum number of requests allowed to flow to a single replica at any time. 0 means no limit","type":"integer","format":"int32","minimum":0,"nullable":true}}},"ScaleToZeroRetentionField":{"properties":{"scaleToZeroRetentionSeconds":{"description":"The minimum amount of time (in seconds) that the last replica will remain active after a scale-to-zero decision. Defaults to 0. Available only if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true}}},"ScaleDownDelayField":{"properties":{"scaleDownDelaySeconds":{"description":"The minimum amount of time (in seconds) that a replica will remain active after a scale-down decision","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true}}},"InitializationTimeoutField":{"properties":{"initializationTimeoutSeconds":{"description":"Use `servingConfiguration.initializationTimeoutSeconds` instead.  If this field is set, it will be ignored and the value under `servingConfiguration` will be used. The maximum amount of time (in seconds) to wait for the container to become ready.","type":"integer","format":"int32","minimum":1,"nullable":true,"deprecated":true}}},"AutoScalingMetricFields":{"allOf":[{"$ref":"#/components/schemas/AutoScalingMetricField"},{"$ref":"#/components/schemas/MetricThresholdField"}],"nullable":true,"type":"object"},"AutoScalingMetricField":{"properties":{"metric":{"$ref":"#/components/schemas/AutoScalingMetric"}}},"AutoScalingMetric":{"description":"The metric to use for autoscaling. Mandatory if minReplicas < maxReplicas, except for the special case where minReplicas is set to 0 and maxReplicas is set to 1, as in this case autoscaling decisions are made according to network activity rather than metrics. Use one of the built-in metrics of 'throughput', 'concurrency' or 'latency', or any other available custom metric. Only the 'throughput' and 'concurrency' metrics support scale-to-zero","type":"string","pattern":"^[a-zA-Z_:][a-zA-Z0-9_:]*$","nullable":true},"MetricThresholdField":{"properties":{"metricThreshold":{"description":"The threshold to use with the specified metric for autoscaling. Mandatory if metric is specified","type":"integer","format":"int32","nullable":true}}},"InferenceUpdateSpecServingConfiguration":{"properties":{"servingConfiguration":{"$ref":"#/components/schemas/ServingConfiguration"}}},"ServingConfiguration":{"description":"The inference workload serving configuration.","properties":{"initializationTimeoutSeconds":{"description":"The maximum time (in seconds) allowed for a workload to initialize and become ready. If the workload does not start within this time, it will be moved to failed state.","type":"integer","format":"int32","minimum":1,"nullable":true},"requestTimeoutSeconds":{"description":"The maximum time (in seconds) allowed to process an end-user request. If no response is returned within this time, the request will be ignored.Supported from Cluster version 2.22","type":"integer","format":"int32","minimum":1,"nullable":true}},"nullable":true,"type":"object"},"Inference1":{"allOf":[{"$ref":"#/components/schemas/WorkloadMeta"},{"$ref":"#/components/schemas/InferenceSpec"}]},"WorkloadMeta":{"required":["name","requestedName","workloadId","projectId","clusterId","createdBy","createdAt","desiredPhase"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"requestedName":{"description":"The name as was requested for the workload. If useGivenNameAsPrefix, in the creation request, is false, name and requestedName should be identical. Otherwise, name should be composed of requestedName followed by a suffix of random characters.","type":"string"},"workloadId":{"$ref":"#/components/schemas/WorkloadId"},"projectId":{"$ref":"#/components/schemas/ProjectId"},"departmentId":{"$ref":"#/components/schemas/DepartmentId"},"clusterId":{"$ref":"#/components/schemas/ClusterId"},"createdBy":{"description":"The user who created the workload","type":"string"},"createdAt":{"description":"The creation time of the workload.","type":"string","format":"date-time"},"deletedAt":{"description":"The deletion time of the workload.","type":"string","nullable":true,"format":"date-time"},"desiredPhase":{"$ref":"#/components/schemas/WorkloadDesiredPhase"},"actualPhase":{"$ref":"#/components/schemas/Phase"}}},"WorkloadName":{"description":"The name of the workload.","type":"string","minLength":1,"pattern":".*"},"WorkloadId":{"description":"A unique ID of the workload.","type":"string","format":"uuid"},"ProjectId":{"description":"The id of the project.","type":"string","pattern":".*"},"DepartmentId":{"description":"The id of the department.","type":"string","minLength":1,"pattern":".*"},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"WorkloadDesiredPhase":{"description":"The desired phase of the workload.","type":"string","enum":["Running","Stopped","Deleted"]},"Phase":{"type":"string","enum":["Creating","Initializing","Resuming","Pending","Deleting","Running","Updating","Stopped","Stopping","Degraded","Failed","Completed","Terminating","Unknown"]},"InferenceSpec":{"description":"The specifications of the inference to be created.","properties":{"spec":{"$ref":"#/components/schemas/InferenceSpecSpec"}}},"InferenceSpecSpec":{"allOf":[{"properties":{"annotations":{"$ref":"#/components/schemas/Annotations"},"args":{"$ref":"#/components/schemas/Args"},"category":{"$ref":"#/components/schemas/Category"},"command":{"$ref":"#/components/schemas/Command"},"compute":{"nullable":true,"properties":{"cpuCoreLimit":{"$ref":"#/components/schemas/CpuCoreLimit"},"cpuCoreRequest":{"$ref":"#/components/schemas/CpuCoreRequest"},"cpuMemoryLimit":{"$ref":"#/components/schemas/CpuMemoryLimit"},"cpuMemoryRequest":{"$ref":"#/components/schemas/CpuMemoryRequest"},"extendedResources":{"$ref":"#/components/schemas/ExtendedResources"},"gpuDevicesRequest":{"$ref":"#/components/schemas/GpuDevicesRequest"},"gpuMemoryLimit":{"$ref":"#/components/schemas/GpuMemoryLimit"},"gpuMemoryRequest":{"$ref":"#/components/schemas/GpuMemoryRequest"},"gpuPortionLimit":{"$ref":"#/components/schemas/GpuPortionLimit"},"gpuPortionRequest":{"$ref":"#/components/schemas/GpuPortionRequest"},"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"},"largeShmRequest":{"$ref":"#/components/schemas/LargeShmRequest"}},"type":"object"},"createHomeDir":{"$ref":"#/components/schemas/CreateHomeDir"},"environmentVariables":{"$ref":"#/components/schemas/EnvironmentVariables"},"exposedUrls":{"$ref":"#/components/schemas/ExposedUrls"},"image":{"$ref":"#/components/schemas/Image"},"imagePullPolicy":{"$ref":"#/components/schemas/ImagePullPolicy"},"imagePullSecrets":{"$ref":"#/components/schemas/ImagePullSecrets"},"labels":{"$ref":"#/components/schemas/Labels"},"nodeAffinityRequired":{"$ref":"#/components/schemas/NodeAffinityRequired"},"nodePools":{"$ref":"#/components/schemas/NodePools"},"nodeType":{"$ref":"#/components/schemas/NodeType3"},"podAffinity":{"$ref":"#/components/schemas/PodAffinity"},"ports":{"$ref":"#/components/schemas/Ports"},"preemptibility":{"$ref":"#/components/schemas/Preemptibility"},"priorityClass":{"$ref":"#/components/schemas/PriorityClass"},"probes":{"$ref":"#/components/schemas/Probes"},"relatedUrls":{"$ref":"#/components/schemas/RelatedUrls"},"security":{"nullable":true,"properties":{"capabilities":{"$ref":"#/components/schemas/Capabilities"},"hostIpc":{"$ref":"#/components/schemas/HostIpc"},"hostNetwork":{"$ref":"#/components/schemas/HostNetwork"},"readOnlyRootFilesystem":{"$ref":"#/components/schemas/ReadOnlyRootFileSystem"},"runAsGid":{"$ref":"#/components/schemas/RunAsGid"},"runAsNonRoot":{"$ref":"#/components/schemas/RunAsNonRoot"},"runAsUid":{"$ref":"#/components/schemas/RunAsUid"},"seccompProfileType":{"$ref":"#/components/schemas/SeccompProfileType"},"supplementalGroups":{"$ref":"#/components/schemas/SupplementalGroups"},"uidGidSource":{"$ref":"#/components/schemas/UidGidSource"}},"type":"object"},"servingPort":{"$ref":"#/components/schemas/ServingPort"},"storage":{"nullable":true,"properties":{"configMapVolume":{"$ref":"#/components/schemas/ConfigMapItems"},"dataVolume":{"$ref":"#/components/schemas/DataVolumeItems"},"emptyDirVolume":{"$ref":"#/components/schemas/EmptyDirItems"},"git":{"$ref":"#/components/schemas/GitItems"},"hostPath":{"$ref":"#/components/schemas/HostPathItems"},"nfs":{"$ref":"#/components/schemas/NfsItems"},"pvc":{"$ref":"#/components/schemas/PvcItems"},"secretVolume":{"$ref":"#/components/schemas/SecretItems"}},"type":"object"},"tolerations":{"$ref":"#/components/schemas/Tolerations"},"workingDir":{"$ref":"#/components/schemas/WorkingDir"}},"type":"object"},{"$ref":"#/components/schemas/InferenceFields"}]},"Annotations":{"description":"Set of annotations to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Annotation"},"maxItems":1000,"nullable":true},"Annotation":{"description":"Annotation details to be populated into the container.","properties":{"name":{"description":"The name of the annotation (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the annotation.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the annotation is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"ExposedUrls":{"description":"Set of container ports that the workload exposes via URLs.","type":"array","items":{"$ref":"#/components/schemas/ExposedUrl"},"maxItems":1000,"nullable":true},"ExposedUrl":{"description":"A URL for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","nullable":true},"url":{"$ref":"#/components/schemas/Url"},"authorizationType":{"$ref":"#/components/schemas/AuthorizationType"},"authorizedUsers":{"$ref":"#/components/schemas/AuthorizedUsers"},"authorizedGroups":{"$ref":"#/components/schemas/AuthorizedGroups"},"toolType":{"description":"The tool type that runs on this container port.","type":"string","nullable":true,"pattern":".*"},"toolName":{"description":"A name describing the tool that runs on this url.","type":"string","nullable":true,"pattern":".*","maxLength":253},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the instance is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"Url":{"description":"The URL for connecting to the container port. If not specified, the URL will be auto-generated by the system.","type":"string","maxLength":2048,"pattern":".*","nullable":true},"AuthorizationType":{"type":"string","enum":["authenticatedUsers","authorizedUsers","authorizedGroups"],"description":"Specifies who can access the connection URL:\n- `authenticatedUsers`: Any authenticated user or service account can access the URL; the authorizedUsers and authorizedGroups fields are ignored.\n- `authorizedUsers`: Only users listed in the authorizedUsers field are allowed to access the URL; the authorizedGroups field is ignored.\n- `authorizedGroups`: Only members of user groups listed in the authorizedGroups field are allowed to access the URL; the authorizedUsers field is ignored.\nIf not specified, authorization is determined by whether authorizedUsers or authorizedGroups is present. If both fields are set, this results in an error. If neither is set, any authenticatedUser can access.\n","nullable":true},"AuthorizedUsers":{"description":"List of users or service accounts that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string","maxLength":253,"pattern":".*"},"maxItems":1000,"nullable":true},"AuthorizedGroups":{"description":"List of groups that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string","maxLength":253,"pattern":".*"},"maxItems":1000,"nullable":true},"Labels":{"description":"Set of labels to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Label"},"maxItems":1000,"nullable":true},"Label":{"description":"Label details to be populated into the container.","properties":{"name":{"description":"The name of the label (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the label.","type":"string","nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"Ports":{"description":"Set of container ports that the workload exposes.","type":"array","items":{"$ref":"#/components/schemas/Port"},"maxItems":1000,"nullable":true},"Port":{"description":"A port for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"serviceType":{"$ref":"#/components/schemas/PortServiceType"},"external":{"description":"The external port which allows a connection to the container port. If not specified, the port will be auto-generated by the system..","type":"integer","format":"int32","nullable":true},"toolType":{"description":"The tool type that runs on this port.","type":"string","nullable":true,"pattern":".*","maxLength":63},"toolName":{"description":"A name describing the tool that runs on this port.","type":"string","nullable":true,"pattern":".*","maxLength":253},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PortServiceType":{"description":"The service type of the port (mandatory).","type":"string","enum":["LoadBalancer","NodePort","ClusterIP"],"nullable":true},"RelatedUrls":{"description":"Set of URLs that are related to the workload.","type":"array","items":{"$ref":"#/components/schemas/RelatedUrl"},"maxItems":1000,"nullable":true},"RelatedUrl":{"description":"A URL that is related to the workload. For example, a URL to an external server providing statistics or logging about the workload.","properties":{"url":{"description":"The URL for connecting an external service related to the workload. (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":2048},"type":{"description":"The type of service that the url provides. For example, wandb (Weights & Biases). (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":63},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","nullable":true,"pattern":".*","maxLength":63},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Capabilities":{"description":"Add POSIX capabilities to running containers. Defaults to the default set of capabilities granted by the container runtime.","type":"array","items":{"$ref":"#/components/schemas/Capability"},"nullable":true},"Capability":{"type":"string","enum":["AUDIT_CONTROL","AUDIT_READ","AUDIT_WRITE","BLOCK_SUSPEND","CHOWN","DAC_OVERRIDE","DAC_READ_SEARCH","FOWNER","FSETID","IPC_LOCK","IPC_OWNER","KILL","LEASE","LINUX_IMMUTABLE","MAC_ADMIN","MAC_OVERRIDE","MKNOD","NET_ADMIN","NET_BIND_SERVICE","NET_BROADCAST","NET_RAW","SETGID","SETFCAP","SETPCAP","SETUID","SYS_ADMIN","SYS_BOOT","SYS_CHROOT","SYS_MODULE","SYS_NICE","SYS_PACCT","SYS_PTRACE","SYS_RAWIO","SYS_RESOURCE","SYS_TIME","SYS_TTY_CONFIG","SYSLOG","WAKE_ALARM"]},"HostIpc":{"description":"Whether to enable host IPC. Defaults to false.","type":"boolean","nullable":true},"HostNetwork":{"description":"Whether to enable host networking. Default to false.","type":"boolean","nullable":true},"ReadOnlyRootFileSystem":{"description":"If true, mounts the container's root filesystem as read-only.","type":"boolean","nullable":true},"RunAsGid":{"description":"The group id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsGid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"RunAsNonRoot":{"description":"Force the container to run as a non-root user.","type":"boolean","nullable":true},"RunAsUid":{"description":"The user id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsUid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"SeccompProfileType":{"description":"Indicates which kind of seccomp profile will be applied to the container. The options are a. `RuntimeDefault` - the container runtime default profile should be used. b. `Unconfined` - no profile should be applied. c. `Localhost` is not yet supported by Run:ai.","type":"string","enum":["RuntimeDefault","Unconfined","Localhost"],"nullable":true},"SupplementalGroups":{"description":"Comma separated list of groups that the user running the container belongs to, in addition to the group indicated by runAsGid. Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled. Using an empty string implies reverting the supplementary groups of the image.","type":"string","nullable":true,"pattern":".*"},"UidGidSource":{"description":"Indicate the way to determine the user and group ids of the container. The options are a. `fromTheImage` - user and group ids are determined by the docker image that the container runs. this is the default option. b. `custom` - user and group ids can be specified in the environment asset and/or the workload creation request. c. `idpToken` - user and group IDs are automatically taken from the identity provider (IdP) token (available only in SSO-enabled installations). For more information, see [User Identity](https://run-ai-docs.nvidia.com/saas/infrastructure-setup/advanced-setup/container-access/user-identity-in-containers).","type":"string","enum":["fromTheImage","fromIdpToken","custom"],"nullable":true},"ServingPort":{"description":"A port for accessing the inference service","allOf":[{"$ref":"#/components/schemas/ServingPortContainerAndProtocol"},{"$ref":"#/components/schemas/ServingPortAccess"}],"nullable":true,"type":"object"},"ServingPortContainerAndProtocol":{"properties":{"container":{"description":"The port that the container running the inference service exposes (mandatory).","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"protocol":{"$ref":"#/components/schemas/ServingPortProtocol"}}},"ServingPortProtocol":{"description":"The protocol used by the port, defaults to http","type":"string","enum":["http","grpc"],"nullable":true},"ServingPortAccess":{"properties":{"authorizationType":{"$ref":"#/components/schemas/ServingPortAccessAuthorizationTypeEnum"},"authorizedUsers":{"$ref":"#/components/schemas/AuthorizedUsers"},"authorizedGroups":{"$ref":"#/components/schemas/AuthorizedGroups"},"clusterLocalAccessOnly":{"description":"Configure the serving port URL to be available only on the cluster-local network, and not externally. Defaults to false","type":"boolean","nullable":true}}},"ServingPortAccessAuthorizationTypeEnum":{"type":"string","enum":["public","authenticatedUsers","authorizedUsers","authorizedGroups","authorizedUsersOrGroups"],"description":"Specifies who can send inference requests to the serving endpoint:\n\nPossible values:\n- `public`: No authorization is required. (Default)\n- `authenticatedUsers`: Any NVIDIA Run:ai authenticated user and service account can send requests.\n- `authorizedUsers`: Only users listed in the authorizedUsers field can send requests.\n- `authorizedGroups`: Only members of user groups listed in the authorizedGroups field can send requests.\n- `authorizedUsersOrGroups`: Requires either authorizedUsers or authorizedGroups to be provided; if neither is set, or if both are set, a mutual exclusion error is reported. Supported from cluster version 2.19.\n","nullable":true},"ConfigMapItems":{"description":"Set of config map volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/ConfigMapInstance"},"maxItems":1000,"nullable":true},"ConfigMapInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/ConfigMap"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"StorageInstanceName":{"properties":{"name":{"description":"unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"maxLength":63,"nullable":true}},"nullable":true,"type":"object"},"ConfigMap":{"properties":{"configMap":{"description":"The name of the ConfigMap resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"mountPath":{"description":"Local path within the workload to which the ConfigMap will be mapped to. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"subPath":{"description":"Path within the volume from which the container's volume should be mounted.","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"defaultMode":{"$ref":"#/components/schemas/DefaultMode"}},"nullable":true,"type":"object"},"DefaultMode":{"type":"string","description":"File permission mode in octal string format. This value must be a 4-digit octal number, representing the default file mode when mounting a Secret or ConfigMap as a volume.\n","minLength":4,"maxLength":4,"pattern":"0[0-7]{3}","nullable":true},"ExcludeField":{"properties":{"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"type":"object","nullable":true},"DataVolumeItems":{"description":"Set of data volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/DataVolumeInstance"},"maxItems":1000,"nullable":true},"DataVolumeInstance":{"allOf":[{"$ref":"#/components/schemas/DataVolume"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"DataVolume":{"properties":{"id":{"description":"The unique identifier of the data volume. (mandatory)","type":"string","format":"uuid","nullable":true},"mountPath":{"description":"The path where the data volume will be mounted. (mandatory)","type":"string","nullable":true,"pattern":".*","maxLength":4096}},"nullable":true,"type":"object"},"EmptyDirItems":{"description":"A list of emptyDir volumes to mount in the workload.","type":"array","items":{"$ref":"#/components/schemas/EmptyDirInstance"},"maxItems":1000,"nullable":true},"EmptyDirInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/EmptyDir"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"EmptyDir":{"properties":{"path":{"description":"Local path within the workload to which the EmptyDir volume will be mapped. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"medium":{"description":"The type of storage medium for the volume. Use \"Memory\" for memory-backed storage, or leave empty for disk-backed storage.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"sizeLimit":{"description":"The total amount of local storage or memory required for the emptyDir volume. Specify using Kubernetes quantity format (e.g., 1G, 500Mi).","type":"string","maxLength":63,"pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true}},"nullable":true,"type":"object"},"GitItems":{"description":"Set of git repositories to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/GitInstance"},"maxItems":1000,"nullable":true},"GitInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/GitCommon"},{"$ref":"#/components/schemas/GitPassword"},{"$ref":"#/components/schemas/ExcludeField"},{"type":"object","properties":{"secretRef":{"$ref":"#/components/schemas/GitSecretRef"}},"nullable":true}],"nullable":true,"type":"object"},"GitCommon":{"properties":{"repository":{"description":"URL to a remote Git repository. The content of this repository will be mapped to the container running the workload. (mandatory)","type":"string","minLength":1,"maxLength":2048,"nullable":true},"branch":{"description":"Specific branch to synchronize the repository from.","type":"string","minLength":1,"maxLength":63,"nullable":true},"revision":{"description":"Specific revision to synchronize the repository from.","type":"string","minLength":1,"maxLength":63,"nullable":true},"path":{"description":"Local path within the workload to which the Git repository will be mapped (mandatory).","type":"string","minLength":1,"maxLength":4096,"nullable":true}},"nullable":true,"type":"object"},"GitPassword":{"properties":{"passwordSecret":{"description":"Secret containing the credentials of the repository (needed for non public repository which requires authentication). (deprecated)","type":"string","minLength":1,"nullable":true},"secretKeyOfUser":{"description":"The key to use for loading the user name from the secret. The default is `User`. (deprecated)","type":"string","minLength":1,"nullable":true},"secretKeyOfPassword":{"description":"The key to use for loading the password from the secret. The default is `Password`. (deprecated)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitSecretRef":{"properties":{"name":{"description":"Name of the Secret containing the credentials of the repository.","type":"string","minLength":1},"authenticationMethod":{"$ref":"#/components/schemas/GitAuthenticationMethod"},"secretKeyOfUser":{"description":"The key in the Secret that contains the Git username (used for `password` authentication).","type":"string","minLength":1,"nullable":true},"secretKeyOfPassword":{"description":"The key in the Secret that contains the Git password (used for `password` authentication).","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitAuthenticationMethod":{"description":"Specifies the authentication method to use when accessing the Git repository. This is required for private repositories - `password` - Authenticate using a username and password. - `ssh-key` - Authenticate using an SSH private key.","type":"string","minLength":1,"enum":["password","ssh-key"]},"HostPathItems":{"description":"Set of host paths to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/HostPathInstance"},"maxItems":1000,"nullable":true},"HostPathInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/HostPath"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"HostPath":{"properties":{"path":{"description":"Local path within the controller to which the host volume will be mapped. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"readOnly":{"description":"Force the volume to be mounted with read-only permissions. Defaults to false.","type":"boolean","default":true,"nullable":true},"mountPath":{"description":"The path that the host volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"mountPropagation":{"$ref":"#/components/schemas/HostPathMountPropagation"}},"nullable":true,"type":"object"},"HostPathMountPropagation":{"description":"Share this volumes mount with other containers. If set to HostToContainer, this volume mount will receive all subsequent mounts that are mounted to this volume or any of its subdirectories. In case of multiple hostPath entries, this field should have the same value for all of them.","type":"string","enum":["None","HostToContainer"],"nullable":true},"NfsItems":{"description":"Set of nfs volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/NfsInstance"},"maxItems":1000,"nullable":true},"NfsInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Nfs"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Nfs":{"properties":{"path":{"description":"Path that is exported by the NFS server (mandatory). For more information, see [NFS](https://kubernetes.io/docs/concepts/storage/volumes#nfs).","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"},"readOnly":{"description":"Force the NFS export to be mounted with read-only permissions.","type":"boolean","default":true,"nullable":true},"server":{"description":"The hostname or IP address of the NFS server. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"mountPath":{"description":"The path that the NFS volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"PvcItems":{"description":"Set of pvc persistent volume claims to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/PvcInstance"},"maxItems":1000,"nullable":true},"PvcInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Pvc"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Pvc":{"allOf":[{"$ref":"#/components/schemas/PvcFieldsUpdatable"},{"$ref":"#/components/schemas/PvcFieldsNonUpdatable"}]},"PvcFieldsUpdatable":{"properties":{"path":{"description":"Local path within the workload to which the PVC bucket will be mapped. (mandatory)","type":"string","minLength":1,"maxLength":4096,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"PvcFieldsNonUpdatable":{"properties":{"existingPvc":{"description":"Verify existing PVC. PVC is assumed to exist when set to `true`. If set to `false`, the PVC will be created, if it does not exist.","type":"boolean","default":false,"nullable":true},"claimName":{"description":"Name for the PVC. Allow referencing it across workloads. If not provided, a name based on the workload name and scope will be auto-generated.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"readOnly":{"description":"Permit only read access to PVC.","type":"boolean","default":false,"nullable":true},"ephemeral":{"description":"Use `true` to set PVC to ephemeral. If set to `true`, the PVC will be deleted when the workload is stopped. Not supported for inference workloads.","type":"boolean","default":false,"nullable":true},"claimInfo":{"$ref":"#/components/schemas/ClaimInfo"},"dataSharing":{"description":"use `true` to share the PVC data to all projects under the selected scope.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"ClaimInfo":{"description":"Claim information for the newly created PVC. The information should not be provided when attempting to use existing PVC.","properties":{"size":{"$ref":"#/components/schemas/PvcClaimSize"},"storageClass":{"description":"Storage class name to associate with the PVC. This parameter may be omitted if there is a single storage class in the system, or you are using the default storage class. For more information, see [Storage class](https://kubernetes.io/docs/concepts/storage/storage-classes).","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"accessModes":{"$ref":"#/components/schemas/PvcAccessModes"},"volumeMode":{"$ref":"#/components/schemas/PvcVolumeMode"},"addedAttrValues":{"$ref":"#/components/schemas/PvcAddedAttrValues"}},"nullable":true,"type":"object"},"PvcClaimSize":{"description":"Requested size for the PVC. Mandatory when existingPvc is false. Recommended sizes: TB/GB/MB/TIB/GIB/MIB","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"PvcAccessModes":{"description":"Default access mode(s) applied to newly created PVCs unless explicitly overridden.","properties":{"readWriteOnce":{"description":"Mount the volume as read/write by a single node.","type":"boolean","default":true,"nullable":true},"readOnlyMany":{"description":"Mount the volume as read-only by many nodes.","type":"boolean","default":false,"nullable":true},"readWriteMany":{"description":"Mount the volume as read/write by many nodes.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PvcVolumeMode":{"description":"Default volume mode for the PVC. Choose between Filesystem (default) or Block.","type":"string","enum":["Filesystem","Block"],"nullable":true},"PvcAddedAttrValues":{"description":"an optional array of key-values pairs that are written as annotations on the created PVC. the allowed attributes are determined according to the storage class configuration (see k8s-objects-tracker for further info).","type":"array","items":{"$ref":"#/components/schemas/PvcAddedAttrValue"},"maxItems":1000},"PvcAddedAttrValue":{"type":"object","required":["key"],"properties":{"key":{"type":"string","minLength":1,"maxLength":63,"pattern":"^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$"},"value":{"type":"string","pattern":".*","maxLength":10000}}},"SecretItems":{"description":"Set of secret volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/SecretInstance"},"nullable":true},"SecretInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Secret"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Secret":{"allOf":[{"$ref":"#/components/schemas/SecretFieldsUpdatable"},{"$ref":"#/components/schemas/SecretFieldsNonUpdatable"}]},"SecretFieldsUpdatable":{"properties":{"mountPath":{"description":"Local path within the workload to which the Secret will be mapped to. (mandatory)","type":"string","minLength":1,"nullable":true},"defaultMode":{"$ref":"#/components/schemas/DefaultMode"}},"nullable":true,"type":"object"},"SecretFieldsNonUpdatable":{"properties":{"secret":{"description":"The name of the Secret resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"Tolerations":{"description":"Set of tolerations to apply to the workload.","type":"array","items":{"$ref":"#/components/schemas/Toleration"},"maxItems":1000,"nullable":true},"Toleration":{"description":"Toleration details.","properties":{"name":{"description":"The name of the toleration.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"operator":{"$ref":"#/components/schemas/TolerationOperator"},"key":{"description":"The taint key that the toleration applies to. (mandatory)","type":"string","maxLength":253,"nullable":true,"pattern":".*"},"value":{"description":"The taint value the toleration matches to. Mandatory if operator is Exists, forbidden otherwise.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"effect":{"$ref":"#/components/schemas/TolerationEffect"},"seconds":{"description":"The period of time the toleration tolerates the taint. Valid only if effect is NoExecute. taint.","type":"integer","minimum":1,"nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"TolerationOperator":{"description":"A key's relationship to the value. Equal uses key and value. Exists is equivalent to wildcard for value, so that a workload can tolerate all taints of a particular category. (mandatory)","type":"string","enum":["Equal","Exists"],"nullable":true},"TolerationEffect":{"description":"The taint effect to match. (mandatory)","type":"string","enum":["NoSchedule","NoExecute","PreferNoSchedule","Any"],"nullable":true},"InferenceFields":{"allOf":[{"$ref":"#/components/schemas/AutoScalingField"},{"$ref":"#/components/schemas/ServingConfigurationField"}],"nullable":true,"type":"object"},"AutoScalingField":{"properties":{"autoscaling":{"$ref":"#/components/schemas/AutoScaling"}}},"ServingConfigurationField":{"properties":{"servingConfiguration":{"$ref":"#/components/schemas/ServingConfiguration"}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/inferences/{workloadId}":{"patch":{"summary":"Update inference spec.","operationId":"update_inference_spec","description":"Update the specification of an existing inference workload.","tags":["Inferences"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/InferenceUpdateRequest"}}}},"responses":{"202":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Inference1"}}}},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get inference metrics data.

> Retrieve inference metrics data by id. Supported from control-plane version 2.18 or later.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"latest"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}},"InferenceWorkloadMetricTypes":{"name":"metricType","in":"query","required":true,"description":"Specify which data to request.","explode":false,"schema":{"type":"array","minItems":1,"maxItems":20,"items":{"$ref":"#/components/schemas/InferenceWorkloadMetricType"}}},"StartRequired":{"name":"start","in":"query","description":"Start date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"EndRequired":{"name":"end","in":"query","description":"End date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"NumberOfSamples":{"name":"numberOfSamples","in":"query","description":"The number of samples to take in the specified time range.","required":false,"schema":{"type":"integer","maximum":1000,"minimum":0,"default":20}}},"schemas":{"InferenceWorkloadMetricType":{"type":"string","description":"Specify which data to request.","enum":["THROUGHPUT","LATENCY"]},"MetricsResponse":{"type":"object","required":["measurements"],"properties":{"measurements":{"type":"array","items":{"$ref":"#/components/schemas/MeasurementResponse"}}}},"MeasurementResponse":{"type":"object","required":["type","values"],"properties":{"type":{"type":"string","description":"specifies what data returned"},"labels":{"type":"object","nullable":true,"description":"labels of the metric measurement","additionalProperties":{"type":"string"}},"values":{"type":"array","nullable":true,"items":{"type":"object","required":["value","timestamp"],"properties":{"value":{"type":"string"},"timestamp":{"type":"string","format":"date-time","nullable":true}}}}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/inferences/{workloadId}/metrics":{"get":{"summary":"Get inference metrics data.","description":"Retrieve inference metrics data by id. Supported from control-plane version 2.18 or later.","operationId":"get_inference_workload_metrics","tags":["Inferences"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"},{"$ref":"#/components/parameters/InferenceWorkloadMetricTypes"},{"$ref":"#/components/parameters/StartRequired"},{"$ref":"#/components/parameters/EndRequired"},{"$ref":"#/components/parameters/NumberOfSamples"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsResponse"}},"text/csv":{}}},"207":{"description":"Partial success.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsResponse"}}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get inference pod's metrics data.

> Retrieve inference metrics pod's data by workload and pod id. Supported from control-plane version 2.18 or later.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"latest"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}},"PodId":{"name":"podId","in":"path","description":"The requested pod id.","schema":{"type":"string","format":"uuid"},"required":true},"InferencePodMetricTypes":{"name":"metricType","in":"query","required":true,"description":"Specifies metrics data to request. Inference metrics are only available for inference workloads.","explode":false,"schema":{"type":"array","minItems":1,"maxItems":20,"items":{"$ref":"#/components/schemas/InferencePodMetricType"}}},"StartRequired":{"name":"start","in":"query","description":"Start date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"EndRequired":{"name":"end","in":"query","description":"End date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"NumberOfSamples":{"name":"numberOfSamples","in":"query","description":"The number of samples to take in the specified time range.","required":false,"schema":{"type":"integer","maximum":1000,"minimum":0,"default":20}}},"schemas":{"InferencePodMetricType":{"type":"string","description":"Specifies what data to request.","enum":["THROUGHPUT","LATENCY"]},"MetricsResponse":{"type":"object","required":["measurements"],"properties":{"measurements":{"type":"array","items":{"$ref":"#/components/schemas/MeasurementResponse"}}}},"MeasurementResponse":{"type":"object","required":["type","values"],"properties":{"type":{"type":"string","description":"specifies what data returned"},"labels":{"type":"object","nullable":true,"description":"labels of the metric measurement","additionalProperties":{"type":"string"}},"values":{"type":"array","nullable":true,"items":{"type":"object","required":["value","timestamp"],"properties":{"value":{"type":"string"},"timestamp":{"type":"string","format":"date-time","nullable":true}}}}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/inferences/{workloadId}/pods/{podId}/metrics":{"get":{"summary":"Get inference pod's metrics data.","description":"Retrieve inference metrics pod's data by workload and pod id. Supported from control-plane version 2.18 or later.","operationId":"get_inference_workload_pod_metrics","tags":["Inferences"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"},{"$ref":"#/components/parameters/PodId"},{"$ref":"#/components/parameters/InferencePodMetricTypes"},{"$ref":"#/components/parameters/StartRequired"},{"$ref":"#/components/parameters/EndRequired"},{"$ref":"#/components/parameters/NumberOfSamples"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsResponse"}},"text/csv":{}}},"207":{"description":"Partial success.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsResponse"}}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```


---

# Agent Instructions
This documentation is published with GitBook. GitBook is the documentation platform designed so that both humans and AI agents can read, navigate, and reason over technical content effectively. Learn more at gitbook.com.

## Querying This Documentation
If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter, and the optional `goal` query parameter:

```
GET https://run-ai-docs.nvidia.com/api/workloads/inferences.md?ask=<question>&goal=<endgoal>
```

`ask` is the immediate question: it should be specific, self-contained, and written in natural language.
`goal` is optional and describes the broader end goal you are ultimately trying to accomplish on behalf of the user. GitBook uses it to tailor the answer towards what is most useful for that goal.

The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.