# Inferences

Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes.

## Create an inference.

> Create an inference using container related fields.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.22"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"schemas":{"InferenceCreationRequest":{"allOf":[{"$ref":"#/components/schemas/WorkloadCreationMeta1"},{"$ref":"#/components/schemas/InferenceSpec"}]},"WorkloadCreationMeta1":{"required":["name","projectId","clusterId"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"useGivenNameAsPrefix":{"description":"When true, the requested name will be treated as a prefix. The final name of the workload will be composed of the name followed by a random set of characters.","type":"boolean","default":false},"projectId":{"$ref":"#/components/schemas/ProjectId2"},"clusterId":{"$ref":"#/components/schemas/ClusterId"}}},"WorkloadName":{"description":"The name of the workload.","type":"string","minLength":1},"ProjectId2":{"description":"The id of the project.","type":"string"},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"InferenceSpec":{"description":"The specifications of the inference to be created.","properties":{"spec":{"$ref":"#/components/schemas/InferenceSpecSpec"}}},"InferenceSpecSpec":{"allOf":[{"$ref":"#/components/schemas/CommonFlatFields"},{"$ref":"#/components/schemas/NodeRelatedFlatFields"},{"$ref":"#/components/schemas/InferenceFlatFields"},{"$ref":"#/components/schemas/CommonItemizedFields"},{"$ref":"#/components/schemas/ConnectivityFields"},{"$ref":"#/components/schemas/Compute"},{"$ref":"#/components/schemas/CommonStorage"},{"$ref":"#/components/schemas/CommonSecurity"},{"$ref":"#/components/schemas/InferenceFields"}]},"CommonFlatFields":{"allOf":[{"$ref":"#/components/schemas/ContainerOverridable"},{"$ref":"#/components/schemas/ContainerNonOverridable"},{"$ref":"#/components/schemas/ResourcesFlatFieldsPerPod"},{"$ref":"#/components/schemas/ResourcesFlatFieldsPerPG"}],"nullable":true,"type":"object"},"ContainerOverridable":{"description":"Container overrideable fields. In the context of assets,these are environment asset fields that can be overriden in the submit workload request.","properties":{"command":{"description":"A command to the server as the entry point of the container running the workload.","type":"string","minLength":1,"nullable":true},"args":{"description":"Arguments to the command that the container running the workload executes.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"ContainerNonOverridable":{"description":"Container non overrideable fields. In the context of assets,these are environment asset fields that cannot be overriden in the submit workload request.","properties":{"image":{"description":"Docker image name. For more information, see [Images](https://kubernetes.io/docs/concepts/containers/images). The image name is mandatory for creating a workload.","type":"string","minLength":1,"nullable":true},"imagePullPolicy":{"$ref":"#/components/schemas/ImagePullPolicy"},"workingDir":{"description":"Container's working directory. If not specified, the container runtime default will be used. This may be configured in the container image.","type":"string","minLength":1,"nullable":true},"createHomeDir":{"description":"When set to `true`, creates a home directory for the container.","type":"boolean","nullable":true},"probes":{"$ref":"#/components/schemas/Probes"}},"nullable":true,"type":"object"},"ImagePullPolicy":{"description":"Image pull policy. Defaults to `Always` if `:latest` tag is specified, otherwise it is `IfNotPresent`.","type":"string","minLength":1,"enum":["Always","Never","IfNotPresent"],"nullable":true},"Probes":{"description":"Probes are used to determine if the container is healthy and ready to accept traffic.","type":"object","properties":{"readiness":{"$ref":"#/components/schemas/Probe"}},"nullable":true},"Probe":{"type":"object","properties":{"initialDelaySeconds":{"description":"Number of seconds after the container has started before liveness or readiness probes are initiated.","type":"integer","format":"int32","minimum":0,"nullable":true},"periodSeconds":{"description":"How often (in seconds) to perform the probe.","type":"integer","format":"int32","minimum":1,"nullable":true},"timeoutSeconds":{"description":"Number of seconds after which the probe times out.","type":"integer","format":"int32","minimum":1,"nullable":true},"successThreshold":{"description":"Minimum consecutive successes for the probe to be considered successful after having failed.","type":"integer","format":"int32","minimum":1,"nullable":true},"failureThreshold":{"description":"When a probe fails, the number of times to try before giving up.","type":"integer","format":"int32","minimum":1,"nullable":true},"handler":{"$ref":"#/components/schemas/ProbeHandler"}},"nullable":true},"ProbeHandler":{"description":"The action taken to determine the health of the container. (mandatory)","type":"object","properties":{"httpGet":{"description":"An action based on HTTP Get requests.","type":"object","properties":{"path":{"description":"Path to access on the HTTP server, defaults to /.","type":"string","pattern":"^(\\x2F[a-zA-Z0-9\\-_.\\x2F]*)?$","nullable":true},"port":{"description":"Number of the port to access on the container.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"host":{"description":"Host name to connect to, defaults to the pod IP.","type":"string","format":"hostname","nullable":true},"scheme":{"$ref":"#/components/schemas/ProbeHandlerScheme"}}}},"nullable":true},"ProbeHandlerScheme":{"description":"Scheme to use for connecting to the host, defaults to HTTP.","type":"string","enum":["HTTP","HTTPS"],"nullable":true},"ResourcesFlatFieldsPerPod":{"description":"Resource parameters.","properties":{"nodeType":{"description":"Nodes (machines), or a group of nodes on which the workload will run. To use this feature, your Administrator will need to label nodes. For more information, see [Group Nodes](https://run-ai-docs.nvidia.com/self-hosted/2.22/admin/researcher-setup/limit-to-node-group). When using this flag with with Project-based affinity, it refines the list of allowable node groups set in the Project. For more information, see [Projects](https://docshub.run.ai/guides/platform-management/aiinitiatives/organization/projects).","type":"string","minLength":1,"nullable":true},"nodeAffinityRequired":{"$ref":"#/components/schemas/NodeAffinityRequired"},"podAffinity":{"$ref":"#/components/schemas/PodAffinity"}},"nullable":true,"type":"object"},"NodeAffinityRequired":{"type":"object","description":"If the affinity requirements specified by this field are not met at scheduling time, the pod will not be scheduled onto the node. If the affinity requirements specified by this field cease to be met at some point during pod execution (e.g. due to an update), the system may or may not try to eventually evict the pod from its node.","properties":{"nodeSelectorTerms":{"description":"A list of node selector terms. The terms are ORed.","type":"array","items":{"$ref":"#/components/schemas/NodeSelectorTerm"}}},"nullable":true},"NodeSelectorTerm":{"type":"object","description":"A null or empty node selector term matches no objects. The requirements of them are ANDed.","properties":{"matchExpressions":{"description":"A list of node selector requirements by node's labels.","type":"array","items":{"$ref":"#/components/schemas/MatchExpression"}}},"nullable":true},"MatchExpression":{"type":"object","description":"A selector that contains values, a key, and an operator that relates the key and values.","properties":{"key":{"description":"The label key that the selector applies to (mandatory).","type":"string"},"operator":{"$ref":"#/components/schemas/MatchExpressionOperator"},"values":{"description":"An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer.","type":"array","items":{"type":"string"}}},"required":["key","operator"],"nullable":true},"MatchExpressionOperator":{"description":"Represents a key's relationship to a set of values (mandatory).","type":"string","enum":["In","NotIn","Exists","DoesNotExist","Gt","Lt"]},"PodAffinity":{"description":"Pod affinity scheduling rules (e.g. co-locate this workload in the same node, zone, etc. as some other workloads).","type":"object","properties":{"type":{"$ref":"#/components/schemas/PodAffinityType"},"key":{"description":"The label key to use. (mandatory)","type":"string","nullable":true}},"nullable":true},"PodAffinityType":{"description":"The affinity type, required or preferred. (mandatory)","type":"string","enum":["Required","Preferred"],"nullable":true},"ResourcesFlatFieldsPerPG":{"description":"Resource parameters.","properties":{"category":{"$ref":"#/components/schemas/Category"},"priorityClass":{"$ref":"#/components/schemas/PriorityClass"}},"nullable":true,"type":"object"},"Category":{"description":"Specify the workload category assigned to the workload. Categories are used to classify and monitor different types of workloads within the NVIDIA Run:ai platform.","type":"string","nullable":true},"PriorityClass":{"description":"Specifies the priority class for the workload.  Valid values are: very-low, low, medium-low, medium, medium-high, high, very-high.  You can use this parameter to adjust the workload's scheduling behavior.  Each workload type has a default priority.  To view the default priority for each workload type, use the GET /workload-types endpoint.","type":"string","nullable":true},"NodeRelatedFlatFields":{"description":"Node related parameters.","properties":{"nodePools":{"description":"A prioritized list of node pools for the scheduler to run the workload on. The scheduler will always try to use the first node pool before moving to the next one if the first is not available.","type":"array","items":{"type":"string"},"nullable":true}},"nullable":true,"type":"object"},"InferenceFlatFields":{"allOf":[{"$ref":"#/components/schemas/CommonFlatFields"},{"$ref":"#/components/schemas/NodeRelatedFlatFields"}],"nullable":true,"type":"object"},"CommonItemizedFields":{"allOf":[{"$ref":"#/components/schemas/EnvironmentVariablesField"},{"$ref":"#/components/schemas/AdvancedItemizedFields"}],"nullable":true,"type":"object"},"EnvironmentVariablesField":{"properties":{"environmentVariables":{"$ref":"#/components/schemas/EnvironmentVariables"}},"nullable":true,"type":"object"},"EnvironmentVariables":{"description":"Set of environment variables to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/EnvironmentVariable"},"nullable":true},"EnvironmentVariable":{"description":"Details of an environment variable which is populated into the container.","properties":{"name":{"description":"The name of the environment variable. (mandatory)","type":"string","minLength":1,"nullable":true},"value":{"description":"The value of the environment variable. (mutually exclusive with secret, credential, configMap and podFieldRef)","type":"string","nullable":true},"secret":{"$ref":"#/components/schemas/EnvironmentVariableSecret"},"configMap":{"$ref":"#/components/schemas/EnvironmentVariableConfigMap"},"podFieldRef":{"$ref":"#/components/schemas/EnvironmentVariablePodFieldReference"},"exclude":{"description":"Use 'true' in case the environment variable is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true},"description":{"description":"Description of the environment variable.","type":"string","nullable":true}},"nullable":true,"type":"object"},"EnvironmentVariableSecret":{"description":"Details of the secret and key use to populate the environment variable","properties":{"name":{"description":"The name of the secret resource. (mandatory)","type":"string","minLength":1,"nullable":true},"key":{"description":"The key in the secret resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"EnvironmentVariableConfigMap":{"description":"Details of the configMap and key use to populate the environment variable","properties":{"name":{"description":"The name of the config-map resource. (mandatory)","type":"string","minLength":1,"nullable":true},"key":{"description":"The key in the config-map resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"EnvironmentVariablePodFieldReference":{"description":"Details of the field-reference and key use to populate the environment variable","properties":{"path":{"description":"The field path resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"AdvancedItemizedFields":{"allOf":[{"$ref":"#/components/schemas/AnnotationsField"},{"$ref":"#/components/schemas/LabelsField"},{"$ref":"#/components/schemas/ImagePullSecretsField"},{"$ref":"#/components/schemas/TolerationsField"}],"nullable":true,"type":"object"},"AnnotationsField":{"properties":{"annotations":{"$ref":"#/components/schemas/Annotations"}},"nullable":true,"type":"object"},"Annotations":{"description":"Set of annotations to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Annotation"},"nullable":true},"Annotation":{"description":"Annotation details to be populated into the container.","properties":{"name":{"description":"The name of the annotation (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true},"value":{"description":"The value of the annotation.","type":"string","nullable":true},"exclude":{"description":"Use 'true' in case the annotation is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"LabelsField":{"properties":{"labels":{"$ref":"#/components/schemas/Labels"}},"nullable":true,"type":"object"},"Labels":{"description":"Set of labels to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Label"},"nullable":true},"Label":{"description":"Label details to be populated into the container.","properties":{"name":{"description":"The name of the label (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true},"value":{"description":"The value of the label.","type":"string","nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"ImagePullSecretsField":{"properties":{"imagePullSecrets":{"$ref":"#/components/schemas/ImagePullSecrets"}},"nullable":true,"type":"object"},"ImagePullSecrets":{"description":"A list of references to Kubernetes secrets in the same namespace used for pulling container images.","type":"array","items":{"$ref":"#/components/schemas/ImagePullSecret"},"nullable":true},"ImagePullSecret":{"description":"A reference to a secret in the same namespace used to pull container images.","properties":{"name":{"type":"string","description":"The name of the Kubernetes secret containing the image pull credentials."},"userCredential":{"type":"boolean","description":"Indicates whether the secret is a user credential. Set to true if the secret was created by the user and is only accessible by them.","nullable":true},"exclude":{"description":"Use 'true' in case the secret is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"TolerationsField":{"properties":{"tolerations":{"$ref":"#/components/schemas/Tolerations"}},"nullable":true,"type":"object"},"Tolerations":{"description":"Set of tolerations to apply to the workload.","type":"array","items":{"$ref":"#/components/schemas/Toleration"},"nullable":true},"Toleration":{"description":"Toleration details.","properties":{"name":{"description":"The name of the toleration.","type":"string","minLength":1,"nullable":true},"operator":{"$ref":"#/components/schemas/TolerationOperator"},"key":{"description":"The taint key that the toleration applies to. (mandatory)","type":"string","nullable":true},"value":{"description":"The taint value the toleration matches to. Mandatory if operator is Exists, forbidden otherwise.","type":"string","nullable":true},"effect":{"$ref":"#/components/schemas/TolerationEffect"},"seconds":{"description":"The period of time the toleration tolerates the taint. Valid only if effect is NoExecute. taint.","type":"integer","minimum":1,"nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"TolerationOperator":{"description":"A key's relationship to the value. Equal uses key and value. Exists is equivalent to wildcard for value, so that a workload can tolerate all taints of a particular category. (mandatory)","type":"string","enum":["Equal","Exists"],"nullable":true},"TolerationEffect":{"description":"The taint effect to match. (mandatory)","type":"string","enum":["NoSchedule","NoExecute","PreferNoSchedule","Any"],"nullable":true},"ConnectivityFields":{"properties":{"ports":{"$ref":"#/components/schemas/Ports"},"exposedUrls":{"$ref":"#/components/schemas/ExposedUrls"},"relatedUrls":{"$ref":"#/components/schemas/RelatedUrls"}},"nullable":true,"type":"object"},"Ports":{"description":"Set of container ports that the workload exposes.","type":"array","items":{"$ref":"#/components/schemas/Port"},"nullable":true},"Port":{"description":"A port for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"serviceType":{"$ref":"#/components/schemas/PortServiceType"},"external":{"description":"The external port which allows a connection to the container port. If not specified, the port will be auto-generated by the system..","type":"integer","format":"int32","nullable":true},"toolType":{"description":"The tool type that runs on this port.","type":"string","nullable":true},"toolName":{"description":"A name describing the tool that runs on this port.","type":"string","nullable":true},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"nullable":true},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PortServiceType":{"description":"The service type of the port (mandatory).","type":"string","enum":["LoadBalancer","NodePort","ClusterIP"],"nullable":true},"ExposedUrls":{"description":"Set of container ports that the workload exposes via URLs.","type":"array","items":{"$ref":"#/components/schemas/ExposedUrl"},"nullable":true},"ExposedUrl":{"description":"A URL for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","nullable":true},"url":{"description":"The URL for connecting to the container port. If not specified, the URL will be auto-generated by the system..","type":"string","nullable":true},"authorizedUsers":{"description":"List of users that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"authorizedGroups":{"description":"List of groups that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"toolType":{"description":"The tool type that runs on this container port.","type":"string","nullable":true},"toolName":{"description":"A name describing the tool that runs on this url.","type":"string","nullable":true},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"nullable":true},"exclude":{"description":"Use 'true' in case the instance is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"RelatedUrls":{"description":"Set of URLs that are related to the workload.","type":"array","items":{"$ref":"#/components/schemas/RelatedUrl"},"nullable":true},"RelatedUrl":{"description":"A URL that is related to the workload. For example, a URL to an external server providing statistics or logging about the workload.","properties":{"url":{"description":"The URL for connecting an external service related to the workload. (mandatory)","type":"string","nullable":true},"type":{"description":"The type of service that the url provides. For example, wandb (Weights & Biases). (mandatory)","type":"string","nullable":true},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","nullable":true},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Compute":{"properties":{"compute":{"$ref":"#/components/schemas/ComputeFields"}},"nullable":true,"type":"object"},"ComputeFields":{"allOf":[{"$ref":"#/components/schemas/ComputeFlatFields"},{"$ref":"#/components/schemas/ComputeItemizedFields"}],"nullable":true,"type":"object"},"ComputeFlatFields":{"properties":{"gpuDevicesRequest":{"description":"Requested number of GPU devices. Currently if more than one device is requested, it is not possible to provide values for gpuMemory, gpuPortion or migProfile [deprecated].","type":"integer","format":"int32","nullable":true,"minimum":0},"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"},"gpuPortionRequest":{"description":"Required if and only if gpuRequestType is portion. States the portion of the GPU to allocate for the created workload, per GPU device, between 0 and 1. The default is no allocated GPUs.","type":"number","format":"double","nullable":true,"minimum":0},"gpuPortionLimit":{"description":"Limitations on the portion consumed by the workload, per GPU device. The system guarantees The gpuPotionLimit must be no less than the gpuPortionRequest.","type":"number","format":"double","nullable":true,"minimum":0},"gpuMemoryRequest":{"description":"Required if and only if gpuRequestType is memory. States the GPU memory to allocate for the created workload, per GPU device. Note that the workload will not be scheduled unless the system can guarantee this amount of GPU memory to the workload.","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"gpuMemoryLimit":{"description":"Limitation on the memory consumed by the workload, per GPU device. The system guarantees The gpuMemoryLimit must be no less than gpuMemoryRequest.","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"migProfile":{"$ref":"#/components/schemas/MigProfile","depracted":true},"cpuCoreRequest":{"description":"CPU units to allocate for the created workload (0.5, 1, .etc). The workload will receive at least this amount of CPU. Note that the workload will not be scheduled unless the system can guarantee this amount of CPUs to the workload.","format":"double","type":"number","nullable":true,"minimum":0},"cpuCoreLimit":{"description":"Limitations on the number of CPUs consumed by the workload (0.5, 1, .etc). The system guarantees that this workload will not be able to consume more than this amount of CPUs.","format":"double","type":"number","nullable":true,"minimum":0},"cpuMemoryRequest":{"description":"The amount of CPU memory to allocate for this workload (1G, 20M, .etc). The workload will receive at least this amount of memory. Note that the workload will not be scheduled unless the system can guarantee this amount of memory to the workload","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"cpuMemoryLimit":{"description":"Limitations on the CPU memory to allocate for this workload (1G, 20M, .etc). The system guarantees that this workload will not be able to consume more than this amount of memory. The workload will receive an error when trying to allocate more memory than this limit.","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"largeShmRequest":{"description":"A large /dev/shm device to mount into a container running the created workload. An shm is a shared file system mounted on RAM.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"GpuRequestType":{"description":"Sets the unit type for GPU resources requests. Stated in terms of portion, memory or  mig profile [deprecated]. Sets the unit type for other GPU request fields. If `gpuDevicesRequest > 1`, only `portion` is supported. If `gpuDeviceRequest = 1`, the request type can be stated as `portion`, `memory` or `migProfile` [deprecated].","type":"string","minLength":1,"enum":["portion","memory","migProfile"],"nullable":true},"MigProfile":{"description":"Required only if `gpuRequestType` is `migProfile`. This states the memory profile to be used for the workload running NVIDIA Multi-Instance GPU (MIG) technology.","type":"string","deprecated":true,"minLength":1,"enum":["1g.5gb","1g.10gb","2g.10gb","2g.20gb","3g.20gb","3g.40gb","4g.20gb","4g.40gb","7g.40gb","7g.80gb"],"nullable":true},"ComputeItemizedFields":{"properties":{"extendedResources":{"$ref":"#/components/schemas/ExtendedResources"}},"nullable":true,"type":"object"},"ExtendedResources":{"description":"Extended resources and their quantity.","type":"array","items":{"$ref":"#/components/schemas/ExtendedResource"},"nullable":true},"ExtendedResource":{"description":"Quantity of an extended resource.","properties":{"resource":{"description":"The name of the extended resource (mandatory)","type":"string","minLength":1,"nullable":true},"quantity":{"description":"The requested quantity for the resource.","type":"string","minLength":1,"nullable":true},"exclude":{"description":"Use 'true' in case the extended resource is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"CommonStorage":{"properties":{"storage":{"$ref":"#/components/schemas/CommonStorageFields"}},"nullable":true,"type":"object"},"CommonStorageFields":{"properties":{"dataVolume":{"$ref":"#/components/schemas/DataVolumeItems"},"pvc":{"$ref":"#/components/schemas/PvcItems"},"hostPath":{"$ref":"#/components/schemas/HostPathItems"},"nfs":{"$ref":"#/components/schemas/NfsItems"},"git":{"$ref":"#/components/schemas/GitItems"},"configMapVolume":{"$ref":"#/components/schemas/ConfigMapField"},"secretVolume":{"$ref":"#/components/schemas/SecretItems1"},"emptyDirVolume":{"$ref":"#/components/schemas/EmptyDirItems"}},"nullable":true,"type":"object"},"DataVolumeItems":{"description":"Set of data volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/DataVolumeInstance"},"nullable":true},"DataVolumeInstance":{"allOf":[{"$ref":"#/components/schemas/DataVolume"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"DataVolume":{"properties":{"id":{"description":"The unique identifier of the data volume. (mandatory)","type":"string","format":"uuid","nullable":true},"mountPath":{"description":"The path where the data volume will be mounted. (mandatory)","type":"string","nullable":true}},"nullable":true,"type":"object"},"ExcludeField":{"properties":{"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"type":"object","nullable":true},"PvcItems":{"description":"Set of pvc persistent volume claims to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/PvcInstance"},"nullable":true},"PvcInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Pvc"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"StorageInstanceName":{"properties":{"name":{"description":"unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"Pvc":{"allOf":[{"$ref":"#/components/schemas/PvcFieldsUpdatable"},{"$ref":"#/components/schemas/PvcFieldsNonUpdatable"}]},"PvcFieldsUpdatable":{"properties":{"path":{"description":"Local path within the workload to which the PVC bucket will be mapped. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"PvcFieldsNonUpdatable":{"properties":{"existingPvc":{"description":"Verify existing PVC. PVC is assumed to exist when set to `true`. If set to `false`, the PVC will be created, if it does not exist.","type":"boolean","default":false,"nullable":true},"claimName":{"description":"Name for the PVC. Allow referencing it across workloads. If not provided, a name based on the workload name and scope will be auto-generated.","type":"string","minLength":1,"maxLength":63,"nullable":true},"readOnly":{"description":"Permit only read access to PVC.","type":"boolean","default":false,"nullable":true},"ephemeral":{"description":"Use `true` to set PVC to ephemeral. If set to `true`, the PVC will be deleted when the workload is stopped. Not supported for inference workloads.","type":"boolean","default":false,"nullable":true},"claimInfo":{"$ref":"#/components/schemas/ClaimInfo"},"dataSharing":{"description":"use `true` to share the PVC data to all projects under the selected scope.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"ClaimInfo":{"description":"Claim information for the newly created PVC. The information should not be provided when attempting to use existing PVC.","properties":{"size":{"$ref":"#/components/schemas/PvcClaimSize"},"storageClass":{"description":"Storage class name to associate with the PVC. This parameter may be omitted if there is a single storage class in the system, or you are using the default storage class. For more information, see [Storage class](https://kubernetes.io/docs/concepts/storage/storage-classes).","type":"string","minLength":1,"nullable":true},"accessModes":{"$ref":"#/components/schemas/PvcAccessModes"},"volumeMode":{"$ref":"#/components/schemas/PvcVolumeMode"},"addedAttrValues":{"$ref":"#/components/schemas/PvcAddedAttrValues"}},"nullable":true,"type":"object"},"PvcClaimSize":{"description":"Requested size for the PVC. Mandatory when existingPvc is false. Recommended sizes: TB/GB/MB/TIB/GIB/MIB","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"PvcAccessModes":{"description":"Requested access mode(s) for the newly created PVC.","properties":{"readWriteOnce":{"description":"Requesting claim that can be mounted in read/write mode to exactly one host. This is the default access mode.","type":"boolean","default":true,"nullable":true},"readOnlyMany":{"description":"Requesting claim that can be mounted in read-only mode to many hosts.","type":"boolean","default":false,"nullable":true},"readWriteMany":{"description":"Requesting claim that can be mounted in read/write mode to many hosts.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PvcVolumeMode":{"description":"The volume mode required by the claim. Choose Filesystem (default) or Block.","type":"string","enum":["Filesystem","Block"],"nullable":true},"PvcAddedAttrValues":{"description":"an optional array of key-values pairs that are written as annotations on the created PVC. the allowed attributes are determined according to the storage class configuration (see k8s-objects-tracker for further info).","type":"array","items":{"$ref":"#/components/schemas/PvcAddedAttrValue"}},"PvcAddedAttrValue":{"type":"object","required":["key"],"properties":{"key":{"type":"string","minLength":1,"maxLength":63,"pattern":"^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$"},"value":{"type":"string"}}},"HostPathItems":{"description":"Set of host paths to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/HostPathInstance"},"nullable":true},"HostPathInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/HostPath"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"HostPath":{"properties":{"path":{"description":"Local path within the controller to which the host volume will be mapped. (mandatory)","type":"string","minLength":1,"nullable":true},"readOnly":{"description":"Force the volume to be mounted with read-only permissions. Defaults to false.","type":"boolean","default":true,"nullable":true},"mountPath":{"description":"The path that the host volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"nullable":true},"mountPropagation":{"$ref":"#/components/schemas/HostPathMountPropagation"}},"nullable":true,"type":"object"},"HostPathMountPropagation":{"description":"Share this volumes mount with other containers. If set to HostToContainer, this volume mount will receive all subsequent mounts that are mounted to this volume or any of its subdirectories. In case of multiple hostPath entries, this field should have the same value for all of them.","type":"string","enum":["None","HostToContainer"],"nullable":true},"NfsItems":{"description":"Set of nfs volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/NfsInstance"},"nullable":true},"NfsInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Nfs"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Nfs":{"properties":{"path":{"description":"Path that is exported by the NFS server (mandatory). For more information, see [NFS](https://kubernetes.io/docs/concepts/storage/volumes#nfs).","type":"string","minLength":1,"nullable":true},"readOnly":{"description":"Force the NFS export to be mounted with read-only permissions.","type":"boolean","default":true,"nullable":true},"server":{"description":"The hostname or IP address of the NFS server. (mandatory)","type":"string","minLength":1,"nullable":true},"mountPath":{"description":"The path that the NFS volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitItems":{"description":"Set of git repositories to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/GitInstance"},"nullable":true},"GitInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/GitCommon"},{"$ref":"#/components/schemas/GitPassword"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"GitCommon":{"properties":{"repository":{"description":"URL to a remote Git repository. The content of this repository will be mapped to the container running the workload. (mandatory)","type":"string","minLength":1,"nullable":true},"branch":{"description":"Specific branch to synchronize the repository from.","type":"string","minLength":1,"nullable":true},"revision":{"description":"Specific revision to synchronize the repository from.","type":"string","minLength":1,"nullable":true},"path":{"description":"Local path within the workload to which the Git repository will be mapped (mandatory).","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitPassword":{"properties":{"passwordSecret":{"description":"Secret containing the credentials of the repository (needed for non public repository which requires authentication).","type":"string","minLength":1,"nullable":true},"secretKeyOfUser":{"description":"The key to use for loading the user name from the secret. The default is `User`.","type":"string","minLength":1,"nullable":true},"secretKeyOfPassword":{"description":"The key to use for loading the password from the secret. The default is `Password`.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"ConfigMapField":{"description":"Set of config map volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/ConfigMapInstance"},"nullable":true},"ConfigMapInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/ConfigMap"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"ConfigMap":{"properties":{"configMap":{"description":"The name of the ConfigMap resource. (mandatory)","type":"string","minLength":1,"nullable":true},"mountPath":{"description":"Local path within the workload to which the ConfigMap will be mapped to. (mandatory)","type":"string","minLength":1,"nullable":true},"subPath":{"description":"Path within the volume from which the container's volume should be mounted.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"SecretItems1":{"description":"Set of secret volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/SecretInstance2"},"nullable":true},"SecretInstance2":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Secret5"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Secret5":{"allOf":[{"$ref":"#/components/schemas/SecretFieldsUpdatable"},{"$ref":"#/components/schemas/SecretFieldsNonUpdatable"}]},"SecretFieldsUpdatable":{"properties":{"mountPath":{"description":"Local path within the workload to which the Secret will be mapped to. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"SecretFieldsNonUpdatable":{"properties":{"secret":{"description":"The name of the Secret resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"EmptyDirItems":{"description":"A list of emptyDir volumes to mount in the workload.","type":"array","items":{"$ref":"#/components/schemas/EmptyDirInstance"},"nullable":true},"EmptyDirInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/EmptyDir"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"EmptyDir":{"properties":{"path":{"description":"Local path within the workload to which the EmptyDir volume will be mapped. (mandatory)","type":"string","minLength":1,"nullable":true},"medium":{"description":"The type of storage medium for the volume. Use \"Memory\" for memory-backed storage, or leave empty for disk-backed storage.","type":"string","minLength":1,"nullable":true},"sizeLimit":{"description":"The total amount of local storage or memory required for the emptyDir volume. Specify using Kubernetes quantity format (e.g., 1G, 500Mi).","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true}},"nullable":true,"type":"object"},"CommonSecurity":{"properties":{"security":{"$ref":"#/components/schemas/CommonSecurityFlatFields"}},"nullable":true,"type":"object"},"CommonSecurityFlatFields":{"allOf":[{"$ref":"#/components/schemas/CommonSecurityNonOverridable"},{"$ref":"#/components/schemas/CommonSecurityOverridable"}],"nullable":true,"type":"object"},"CommonSecurityNonOverridable":{"description":"Security non overrideable fields. In the context of assets,these are environment asset fields that cannot be overriden in the submit workload request.","properties":{"uidGidSource":{"$ref":"#/components/schemas/UidGidSource"},"capabilities":{"description":"Add POSIX capabilities to running containers. Defaults to the default set of capabilities granted by the container runtime.","type":"array","items":{"$ref":"#/components/schemas/Capability"},"nullable":true},"seccompProfileType":{"$ref":"#/components/schemas/SeccompProfileType"},"runAsNonRoot":{"description":"Force the container to run as a non-root user.","type":"boolean","nullable":true},"readOnlyRootFilesystem":{"description":"If true, mounts the container's root filesystem as read-only.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"UidGidSource":{"description":"Indicate the way to determine the user and group ids of the container. The options are a. `fromTheImage` - user and group ids are determined by the docker image that the container runs. this is the default option. b. `custom` - user and group ids can be specified in the environment asset and/or the workload creation request. c. `idpToken` - user and group ids are determined according to the identity provider (idp) access token. This option is intended for internal use of the environment UI form. For more information, see [User Identity](https://run-ai-docs.nvidia.com/self-hosted/2.22/admin/runai-setup/config/non-root-containers/).","type":"string","enum":["fromTheImage","fromIdpToken","custom"],"nullable":true},"Capability":{"type":"string","enum":["AUDIT_CONTROL","AUDIT_READ","AUDIT_WRITE","BLOCK_SUSPEND","CHOWN","DAC_OVERRIDE","DAC_READ_SEARCH","FOWNER","FSETID","IPC_LOCK","IPC_OWNER","KILL","LEASE","LINUX_IMMUTABLE","MAC_ADMIN","MAC_OVERRIDE","MKNOD","NET_ADMIN","NET_BIND_SERVICE","NET_BROADCAST","NET_RAW","SETGID","SETFCAP","SETPCAP","SETUID","SYS_ADMIN","SYS_BOOT","SYS_CHROOT","SYS_MODULE","SYS_NICE","SYS_PACCT","SYS_PTRACE","SYS_RAWIO","SYS_RESOURCE","SYS_TIME","SYS_TTY_CONFIG","SYSLOG","WAKE_ALARM"]},"SeccompProfileType":{"description":"Indicates which kind of seccomp profile will be applied to the container. The options are a. `RuntimeDefault` - the container runtime default profile should be used. b. `Unconfined` - no profile should be applied. c. `Localhost` is not yet supported by Run:ai.","type":"string","enum":["RuntimeDefault","Unconfined","Localhost"],"nullable":true},"CommonSecurityOverridable":{"description":"Security overrideable fields. In the context of assets,these are environment asset fields that can be overriden in the submit workload request.","properties":{"runAsUid":{"description":"The user id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsUid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"runAsGid":{"description":"The group id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsGid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"supplementalGroups":{"description":"Comma separated list of groups that the user running the container belongs to, in addition to the group indicated by runAsGid. Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled. Using an empty string implies reverting the supplementary groups of the image.","type":"string","nullable":true}},"nullable":true,"type":"object"},"InferenceFields":{"properties":{"servingPort":{"$ref":"#/components/schemas/ServingPort"},"autoscaling":{"$ref":"#/components/schemas/AutoScaling"},"servingConfiguration":{"$ref":"#/components/schemas/ServingConfiguration"}},"nullable":true,"type":"object"},"ServingPort":{"description":"A port for accessing the inference service","allOf":[{"$ref":"#/components/schemas/ServingPortContainerAndProtocol"},{"$ref":"#/components/schemas/ServingPortAccess"}],"nullable":true,"type":"object"},"ServingPortContainerAndProtocol":{"properties":{"container":{"description":"The port that the container running the inference service exposes (mandatory).","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"protocol":{"$ref":"#/components/schemas/ServingPortProtocol"}}},"ServingPortProtocol":{"description":"The protocol used by the port, defaults to http","type":"string","enum":["http","grpc"],"nullable":true},"ServingPortAccess":{"properties":{"authorizationType":{"$ref":"#/components/schemas/ServingPortAccessAuthorizationTypeEnum"},"authorizedUsers":{"description":"List of users that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"authorizedGroups":{"description":"List of groups that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"clusterLocalAccessOnly":{"description":"Configure the serving port URL to be available only on the cluster-local network, and not externally. Defaults to false","type":"boolean","nullable":true}}},"ServingPortAccessAuthorizationTypeEnum":{"type":"string","enum":["public","authenticatedUsers","authorizedUsersOrGroups"],"description":"The authorization type for serving port URL access. Defaults to public, which means no authorization is required. If set to authenticatedUsers, only authenticated Run:ai users are allowed to access the URL. If set to authorizedUsersOrGroups, only users or groups specified in authorizedUsers or authorizedGroups are allowed to access the URL. Supported from cluster version 2.19.","nullable":true},"AutoScaling":{"allOf":[{"$ref":"#/components/schemas/AutoScalingCommonFields"},{"$ref":"#/components/schemas/AutoScalingMetricFields"}],"nullable":true,"type":"object"},"AutoScalingCommonFields":{"description":"Auto scaling common fields","properties":{"metricThresholdPercentage":{"description":"The percentage of metric threshold value to use for autoscaling. Defaults to 70. Applicable only with the 'throughput' and 'concurrency' metrics","type":"number","format":"float","minimum":1,"maximum":100,"nullable":true},"minReplicas":{"description":"The minimum number of replicas for autoscaling. Defaults to 1. Use 0 to allow scale-to-zero","type":"integer","format":"int32","minimum":0,"nullable":true},"maxReplicas":{"description":"The maximum number of replicas for autoscaling. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true},"initialReplicas":{"description":"The number of replicas to run when initializing the workload for the first time. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"nullable":true},"activationReplicas":{"description":"The number of replicas to run when scaling-up from zero. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true},"concurrencyHardLimit":{"description":"The maximum number of requests allowed to flow to a single replica at any time. 0 means no limit","type":"integer","format":"int32","minimum":0,"nullable":true},"scaleToZeroRetentionSeconds":{"description":"The minimum amount of time (in seconds) that the last replica will remain active after a scale-to-zero decision. Defaults to 0. Available only if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true},"scaleDownDelaySeconds":{"description":"The minimum amount of time (in seconds) that a replica will remain active after a scale-down decision","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true},"initializationTimeoutSeconds":{"description":"Use `servingConfiguration.initializationTimeoutSeconds` instead.  If this field is set, it will be ignored and the value under `servingConfiguration` will be used. The maximum amount of time (in seconds) to wait for the container to become ready.","type":"integer","format":"int32","minimum":1,"nullable":true,"deprecated":true}},"nullable":true,"type":"object"},"AutoScalingMetricFields":{"description":"Auto scaling metric fields","properties":{"metric":{"$ref":"#/components/schemas/AutoScalingMetric"},"metricThreshold":{"description":"The threshold to use with the specified metric for autoscaling. Mandatory if metric is specified","type":"integer","format":"int32","nullable":true}},"nullable":true,"type":"object"},"AutoScalingMetric":{"description":"The metric to use for autoscaling. Mandatory if minReplicas < maxReplicas, except for the special case where minReplicas is set to 0 and maxReplicas is set to 1, as in this case autoscaling decisions are made according to network activity rather than metrics. Use one of the built-in metrics of 'throughput', 'concurrency' or 'latency', or any other available custom metric. Only the 'throughput' and 'concurrency' metrics support scale-to-zero","type":"string","pattern":"^[a-zA-Z_:][a-zA-Z0-9_:]*$","nullable":true},"ServingConfiguration":{"description":"The inference workload serving configuration.","properties":{"initializationTimeoutSeconds":{"description":"The maximum time (in seconds) allowed for a workload to initialize and become ready. If the workload does not start within this time, it will be moved to failed state.","type":"integer","format":"int32","minimum":1,"nullable":true},"requestTimeoutSeconds":{"description":"The maximum time (in seconds) allowed to process an end-user request. If no response is returned within this time, the request will be ignored.Supported from Cluster version 2.22","type":"integer","format":"int32","minimum":1,"nullable":true}},"nullable":true,"type":"object"},"Inference1":{"allOf":[{"$ref":"#/components/schemas/WorkloadMeta1"},{"$ref":"#/components/schemas/InferenceSpec"}]},"WorkloadMeta1":{"required":["name","requestedName","workloadId","projectId","clusterId","createdBy","createdAt","desiredPhase"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"requestedName":{"description":"The name as was requested for the workload. If useGivenNameAsPrefix, in the creation request, is false, name and requestedName should be identical. Otherwise, name should be composed of requestedName followed by a suffix of random characters.","type":"string"},"workloadId":{"$ref":"#/components/schemas/WorkloadId2"},"projectId":{"$ref":"#/components/schemas/ProjectId2"},"departmentId":{"$ref":"#/components/schemas/DepartmentId2"},"clusterId":{"$ref":"#/components/schemas/ClusterId"},"createdBy":{"description":"The user who created the workload","type":"string"},"createdAt":{"description":"The creation time of the workload.","type":"string","format":"date-time"},"deletedAt":{"description":"The deletion time of the workload.","type":"string","nullable":true,"format":"date-time"},"desiredPhase":{"$ref":"#/components/schemas/WorkloadDesiredPhase"},"actualPhase":{"$ref":"#/components/schemas/Phase"}}},"WorkloadId2":{"description":"A unique ID of the workload.","type":"string","format":"uuid"},"DepartmentId2":{"description":"The id of the department.","type":"string","minLength":1},"WorkloadDesiredPhase":{"description":"The desired phase of the workload.","type":"string","enum":["Running","Stopped","Deleted"]},"Phase":{"type":"string","enum":["Creating","Initializing","Resuming","Pending","Deleting","Running","Updating","Stopped","Stopping","Degraded","Failed","Completed","Terminating","Unknown"]},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/inferences":{"post":{"summary":"Create an inference.","operationId":"create_inference1","description":"Create an inference using container related fields.","tags":["Inferences"],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/InferenceCreationRequest"}}}},"responses":{"202":{"description":"Request completed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Inference1"}}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get inference data.

> Retrieve inference details using a workload id.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.22"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}}},"schemas":{"Inference1":{"allOf":[{"$ref":"#/components/schemas/WorkloadMeta1"},{"$ref":"#/components/schemas/InferenceSpec"}]},"WorkloadMeta1":{"required":["name","requestedName","workloadId","projectId","clusterId","createdBy","createdAt","desiredPhase"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"requestedName":{"description":"The name as was requested for the workload. If useGivenNameAsPrefix, in the creation request, is false, name and requestedName should be identical. Otherwise, name should be composed of requestedName followed by a suffix of random characters.","type":"string"},"workloadId":{"$ref":"#/components/schemas/WorkloadId2"},"projectId":{"$ref":"#/components/schemas/ProjectId2"},"departmentId":{"$ref":"#/components/schemas/DepartmentId2"},"clusterId":{"$ref":"#/components/schemas/ClusterId"},"createdBy":{"description":"The user who created the workload","type":"string"},"createdAt":{"description":"The creation time of the workload.","type":"string","format":"date-time"},"deletedAt":{"description":"The deletion time of the workload.","type":"string","nullable":true,"format":"date-time"},"desiredPhase":{"$ref":"#/components/schemas/WorkloadDesiredPhase"},"actualPhase":{"$ref":"#/components/schemas/Phase"}}},"WorkloadName":{"description":"The name of the workload.","type":"string","minLength":1},"WorkloadId2":{"description":"A unique ID of the workload.","type":"string","format":"uuid"},"ProjectId2":{"description":"The id of the project.","type":"string"},"DepartmentId2":{"description":"The id of the department.","type":"string","minLength":1},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"WorkloadDesiredPhase":{"description":"The desired phase of the workload.","type":"string","enum":["Running","Stopped","Deleted"]},"Phase":{"type":"string","enum":["Creating","Initializing","Resuming","Pending","Deleting","Running","Updating","Stopped","Stopping","Degraded","Failed","Completed","Terminating","Unknown"]},"InferenceSpec":{"description":"The specifications of the inference to be created.","properties":{"spec":{"$ref":"#/components/schemas/InferenceSpecSpec"}}},"InferenceSpecSpec":{"allOf":[{"$ref":"#/components/schemas/CommonFlatFields"},{"$ref":"#/components/schemas/NodeRelatedFlatFields"},{"$ref":"#/components/schemas/InferenceFlatFields"},{"$ref":"#/components/schemas/CommonItemizedFields"},{"$ref":"#/components/schemas/ConnectivityFields"},{"$ref":"#/components/schemas/Compute"},{"$ref":"#/components/schemas/CommonStorage"},{"$ref":"#/components/schemas/CommonSecurity"},{"$ref":"#/components/schemas/InferenceFields"}]},"CommonFlatFields":{"allOf":[{"$ref":"#/components/schemas/ContainerOverridable"},{"$ref":"#/components/schemas/ContainerNonOverridable"},{"$ref":"#/components/schemas/ResourcesFlatFieldsPerPod"},{"$ref":"#/components/schemas/ResourcesFlatFieldsPerPG"}],"nullable":true,"type":"object"},"ContainerOverridable":{"description":"Container overrideable fields. In the context of assets,these are environment asset fields that can be overriden in the submit workload request.","properties":{"command":{"description":"A command to the server as the entry point of the container running the workload.","type":"string","minLength":1,"nullable":true},"args":{"description":"Arguments to the command that the container running the workload executes.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"ContainerNonOverridable":{"description":"Container non overrideable fields. In the context of assets,these are environment asset fields that cannot be overriden in the submit workload request.","properties":{"image":{"description":"Docker image name. For more information, see [Images](https://kubernetes.io/docs/concepts/containers/images). The image name is mandatory for creating a workload.","type":"string","minLength":1,"nullable":true},"imagePullPolicy":{"$ref":"#/components/schemas/ImagePullPolicy"},"workingDir":{"description":"Container's working directory. If not specified, the container runtime default will be used. This may be configured in the container image.","type":"string","minLength":1,"nullable":true},"createHomeDir":{"description":"When set to `true`, creates a home directory for the container.","type":"boolean","nullable":true},"probes":{"$ref":"#/components/schemas/Probes"}},"nullable":true,"type":"object"},"ImagePullPolicy":{"description":"Image pull policy. Defaults to `Always` if `:latest` tag is specified, otherwise it is `IfNotPresent`.","type":"string","minLength":1,"enum":["Always","Never","IfNotPresent"],"nullable":true},"Probes":{"description":"Probes are used to determine if the container is healthy and ready to accept traffic.","type":"object","properties":{"readiness":{"$ref":"#/components/schemas/Probe"}},"nullable":true},"Probe":{"type":"object","properties":{"initialDelaySeconds":{"description":"Number of seconds after the container has started before liveness or readiness probes are initiated.","type":"integer","format":"int32","minimum":0,"nullable":true},"periodSeconds":{"description":"How often (in seconds) to perform the probe.","type":"integer","format":"int32","minimum":1,"nullable":true},"timeoutSeconds":{"description":"Number of seconds after which the probe times out.","type":"integer","format":"int32","minimum":1,"nullable":true},"successThreshold":{"description":"Minimum consecutive successes for the probe to be considered successful after having failed.","type":"integer","format":"int32","minimum":1,"nullable":true},"failureThreshold":{"description":"When a probe fails, the number of times to try before giving up.","type":"integer","format":"int32","minimum":1,"nullable":true},"handler":{"$ref":"#/components/schemas/ProbeHandler"}},"nullable":true},"ProbeHandler":{"description":"The action taken to determine the health of the container. (mandatory)","type":"object","properties":{"httpGet":{"description":"An action based on HTTP Get requests.","type":"object","properties":{"path":{"description":"Path to access on the HTTP server, defaults to /.","type":"string","pattern":"^(\\x2F[a-zA-Z0-9\\-_.\\x2F]*)?$","nullable":true},"port":{"description":"Number of the port to access on the container.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"host":{"description":"Host name to connect to, defaults to the pod IP.","type":"string","format":"hostname","nullable":true},"scheme":{"$ref":"#/components/schemas/ProbeHandlerScheme"}}}},"nullable":true},"ProbeHandlerScheme":{"description":"Scheme to use for connecting to the host, defaults to HTTP.","type":"string","enum":["HTTP","HTTPS"],"nullable":true},"ResourcesFlatFieldsPerPod":{"description":"Resource parameters.","properties":{"nodeType":{"description":"Nodes (machines), or a group of nodes on which the workload will run. To use this feature, your Administrator will need to label nodes. For more information, see [Group Nodes](https://run-ai-docs.nvidia.com/self-hosted/2.22/admin/researcher-setup/limit-to-node-group). When using this flag with with Project-based affinity, it refines the list of allowable node groups set in the Project. For more information, see [Projects](https://docshub.run.ai/guides/platform-management/aiinitiatives/organization/projects).","type":"string","minLength":1,"nullable":true},"nodeAffinityRequired":{"$ref":"#/components/schemas/NodeAffinityRequired"},"podAffinity":{"$ref":"#/components/schemas/PodAffinity"}},"nullable":true,"type":"object"},"NodeAffinityRequired":{"type":"object","description":"If the affinity requirements specified by this field are not met at scheduling time, the pod will not be scheduled onto the node. If the affinity requirements specified by this field cease to be met at some point during pod execution (e.g. due to an update), the system may or may not try to eventually evict the pod from its node.","properties":{"nodeSelectorTerms":{"description":"A list of node selector terms. The terms are ORed.","type":"array","items":{"$ref":"#/components/schemas/NodeSelectorTerm"}}},"nullable":true},"NodeSelectorTerm":{"type":"object","description":"A null or empty node selector term matches no objects. The requirements of them are ANDed.","properties":{"matchExpressions":{"description":"A list of node selector requirements by node's labels.","type":"array","items":{"$ref":"#/components/schemas/MatchExpression"}}},"nullable":true},"MatchExpression":{"type":"object","description":"A selector that contains values, a key, and an operator that relates the key and values.","properties":{"key":{"description":"The label key that the selector applies to (mandatory).","type":"string"},"operator":{"$ref":"#/components/schemas/MatchExpressionOperator"},"values":{"description":"An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer.","type":"array","items":{"type":"string"}}},"required":["key","operator"],"nullable":true},"MatchExpressionOperator":{"description":"Represents a key's relationship to a set of values (mandatory).","type":"string","enum":["In","NotIn","Exists","DoesNotExist","Gt","Lt"]},"PodAffinity":{"description":"Pod affinity scheduling rules (e.g. co-locate this workload in the same node, zone, etc. as some other workloads).","type":"object","properties":{"type":{"$ref":"#/components/schemas/PodAffinityType"},"key":{"description":"The label key to use. (mandatory)","type":"string","nullable":true}},"nullable":true},"PodAffinityType":{"description":"The affinity type, required or preferred. (mandatory)","type":"string","enum":["Required","Preferred"],"nullable":true},"ResourcesFlatFieldsPerPG":{"description":"Resource parameters.","properties":{"category":{"$ref":"#/components/schemas/Category"},"priorityClass":{"$ref":"#/components/schemas/PriorityClass"}},"nullable":true,"type":"object"},"Category":{"description":"Specify the workload category assigned to the workload. Categories are used to classify and monitor different types of workloads within the NVIDIA Run:ai platform.","type":"string","nullable":true},"PriorityClass":{"description":"Specifies the priority class for the workload.  Valid values are: very-low, low, medium-low, medium, medium-high, high, very-high.  You can use this parameter to adjust the workload's scheduling behavior.  Each workload type has a default priority.  To view the default priority for each workload type, use the GET /workload-types endpoint.","type":"string","nullable":true},"NodeRelatedFlatFields":{"description":"Node related parameters.","properties":{"nodePools":{"description":"A prioritized list of node pools for the scheduler to run the workload on. The scheduler will always try to use the first node pool before moving to the next one if the first is not available.","type":"array","items":{"type":"string"},"nullable":true}},"nullable":true,"type":"object"},"InferenceFlatFields":{"allOf":[{"$ref":"#/components/schemas/CommonFlatFields"},{"$ref":"#/components/schemas/NodeRelatedFlatFields"}],"nullable":true,"type":"object"},"CommonItemizedFields":{"allOf":[{"$ref":"#/components/schemas/EnvironmentVariablesField"},{"$ref":"#/components/schemas/AdvancedItemizedFields"}],"nullable":true,"type":"object"},"EnvironmentVariablesField":{"properties":{"environmentVariables":{"$ref":"#/components/schemas/EnvironmentVariables"}},"nullable":true,"type":"object"},"EnvironmentVariables":{"description":"Set of environment variables to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/EnvironmentVariable"},"nullable":true},"EnvironmentVariable":{"description":"Details of an environment variable which is populated into the container.","properties":{"name":{"description":"The name of the environment variable. (mandatory)","type":"string","minLength":1,"nullable":true},"value":{"description":"The value of the environment variable. (mutually exclusive with secret, credential, configMap and podFieldRef)","type":"string","nullable":true},"secret":{"$ref":"#/components/schemas/EnvironmentVariableSecret"},"configMap":{"$ref":"#/components/schemas/EnvironmentVariableConfigMap"},"podFieldRef":{"$ref":"#/components/schemas/EnvironmentVariablePodFieldReference"},"exclude":{"description":"Use 'true' in case the environment variable is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true},"description":{"description":"Description of the environment variable.","type":"string","nullable":true}},"nullable":true,"type":"object"},"EnvironmentVariableSecret":{"description":"Details of the secret and key use to populate the environment variable","properties":{"name":{"description":"The name of the secret resource. (mandatory)","type":"string","minLength":1,"nullable":true},"key":{"description":"The key in the secret resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"EnvironmentVariableConfigMap":{"description":"Details of the configMap and key use to populate the environment variable","properties":{"name":{"description":"The name of the config-map resource. (mandatory)","type":"string","minLength":1,"nullable":true},"key":{"description":"The key in the config-map resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"EnvironmentVariablePodFieldReference":{"description":"Details of the field-reference and key use to populate the environment variable","properties":{"path":{"description":"The field path resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"AdvancedItemizedFields":{"allOf":[{"$ref":"#/components/schemas/AnnotationsField"},{"$ref":"#/components/schemas/LabelsField"},{"$ref":"#/components/schemas/ImagePullSecretsField"},{"$ref":"#/components/schemas/TolerationsField"}],"nullable":true,"type":"object"},"AnnotationsField":{"properties":{"annotations":{"$ref":"#/components/schemas/Annotations"}},"nullable":true,"type":"object"},"Annotations":{"description":"Set of annotations to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Annotation"},"nullable":true},"Annotation":{"description":"Annotation details to be populated into the container.","properties":{"name":{"description":"The name of the annotation (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true},"value":{"description":"The value of the annotation.","type":"string","nullable":true},"exclude":{"description":"Use 'true' in case the annotation is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"LabelsField":{"properties":{"labels":{"$ref":"#/components/schemas/Labels"}},"nullable":true,"type":"object"},"Labels":{"description":"Set of labels to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Label"},"nullable":true},"Label":{"description":"Label details to be populated into the container.","properties":{"name":{"description":"The name of the label (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true},"value":{"description":"The value of the label.","type":"string","nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"ImagePullSecretsField":{"properties":{"imagePullSecrets":{"$ref":"#/components/schemas/ImagePullSecrets"}},"nullable":true,"type":"object"},"ImagePullSecrets":{"description":"A list of references to Kubernetes secrets in the same namespace used for pulling container images.","type":"array","items":{"$ref":"#/components/schemas/ImagePullSecret"},"nullable":true},"ImagePullSecret":{"description":"A reference to a secret in the same namespace used to pull container images.","properties":{"name":{"type":"string","description":"The name of the Kubernetes secret containing the image pull credentials."},"userCredential":{"type":"boolean","description":"Indicates whether the secret is a user credential. Set to true if the secret was created by the user and is only accessible by them.","nullable":true},"exclude":{"description":"Use 'true' in case the secret is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"TolerationsField":{"properties":{"tolerations":{"$ref":"#/components/schemas/Tolerations"}},"nullable":true,"type":"object"},"Tolerations":{"description":"Set of tolerations to apply to the workload.","type":"array","items":{"$ref":"#/components/schemas/Toleration"},"nullable":true},"Toleration":{"description":"Toleration details.","properties":{"name":{"description":"The name of the toleration.","type":"string","minLength":1,"nullable":true},"operator":{"$ref":"#/components/schemas/TolerationOperator"},"key":{"description":"The taint key that the toleration applies to. (mandatory)","type":"string","nullable":true},"value":{"description":"The taint value the toleration matches to. Mandatory if operator is Exists, forbidden otherwise.","type":"string","nullable":true},"effect":{"$ref":"#/components/schemas/TolerationEffect"},"seconds":{"description":"The period of time the toleration tolerates the taint. Valid only if effect is NoExecute. taint.","type":"integer","minimum":1,"nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"TolerationOperator":{"description":"A key's relationship to the value. Equal uses key and value. Exists is equivalent to wildcard for value, so that a workload can tolerate all taints of a particular category. (mandatory)","type":"string","enum":["Equal","Exists"],"nullable":true},"TolerationEffect":{"description":"The taint effect to match. (mandatory)","type":"string","enum":["NoSchedule","NoExecute","PreferNoSchedule","Any"],"nullable":true},"ConnectivityFields":{"properties":{"ports":{"$ref":"#/components/schemas/Ports"},"exposedUrls":{"$ref":"#/components/schemas/ExposedUrls"},"relatedUrls":{"$ref":"#/components/schemas/RelatedUrls"}},"nullable":true,"type":"object"},"Ports":{"description":"Set of container ports that the workload exposes.","type":"array","items":{"$ref":"#/components/schemas/Port"},"nullable":true},"Port":{"description":"A port for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"serviceType":{"$ref":"#/components/schemas/PortServiceType"},"external":{"description":"The external port which allows a connection to the container port. If not specified, the port will be auto-generated by the system..","type":"integer","format":"int32","nullable":true},"toolType":{"description":"The tool type that runs on this port.","type":"string","nullable":true},"toolName":{"description":"A name describing the tool that runs on this port.","type":"string","nullable":true},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"nullable":true},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PortServiceType":{"description":"The service type of the port (mandatory).","type":"string","enum":["LoadBalancer","NodePort","ClusterIP"],"nullable":true},"ExposedUrls":{"description":"Set of container ports that the workload exposes via URLs.","type":"array","items":{"$ref":"#/components/schemas/ExposedUrl"},"nullable":true},"ExposedUrl":{"description":"A URL for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","nullable":true},"url":{"description":"The URL for connecting to the container port. If not specified, the URL will be auto-generated by the system..","type":"string","nullable":true},"authorizedUsers":{"description":"List of users that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"authorizedGroups":{"description":"List of groups that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"toolType":{"description":"The tool type that runs on this container port.","type":"string","nullable":true},"toolName":{"description":"A name describing the tool that runs on this url.","type":"string","nullable":true},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"nullable":true},"exclude":{"description":"Use 'true' in case the instance is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"RelatedUrls":{"description":"Set of URLs that are related to the workload.","type":"array","items":{"$ref":"#/components/schemas/RelatedUrl"},"nullable":true},"RelatedUrl":{"description":"A URL that is related to the workload. For example, a URL to an external server providing statistics or logging about the workload.","properties":{"url":{"description":"The URL for connecting an external service related to the workload. (mandatory)","type":"string","nullable":true},"type":{"description":"The type of service that the url provides. For example, wandb (Weights & Biases). (mandatory)","type":"string","nullable":true},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","nullable":true},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Compute":{"properties":{"compute":{"$ref":"#/components/schemas/ComputeFields"}},"nullable":true,"type":"object"},"ComputeFields":{"allOf":[{"$ref":"#/components/schemas/ComputeFlatFields"},{"$ref":"#/components/schemas/ComputeItemizedFields"}],"nullable":true,"type":"object"},"ComputeFlatFields":{"properties":{"gpuDevicesRequest":{"description":"Requested number of GPU devices. Currently if more than one device is requested, it is not possible to provide values for gpuMemory, gpuPortion or migProfile [deprecated].","type":"integer","format":"int32","nullable":true,"minimum":0},"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"},"gpuPortionRequest":{"description":"Required if and only if gpuRequestType is portion. States the portion of the GPU to allocate for the created workload, per GPU device, between 0 and 1. The default is no allocated GPUs.","type":"number","format":"double","nullable":true,"minimum":0},"gpuPortionLimit":{"description":"Limitations on the portion consumed by the workload, per GPU device. The system guarantees The gpuPotionLimit must be no less than the gpuPortionRequest.","type":"number","format":"double","nullable":true,"minimum":0},"gpuMemoryRequest":{"description":"Required if and only if gpuRequestType is memory. States the GPU memory to allocate for the created workload, per GPU device. Note that the workload will not be scheduled unless the system can guarantee this amount of GPU memory to the workload.","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"gpuMemoryLimit":{"description":"Limitation on the memory consumed by the workload, per GPU device. The system guarantees The gpuMemoryLimit must be no less than gpuMemoryRequest.","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"migProfile":{"$ref":"#/components/schemas/MigProfile","depracted":true},"cpuCoreRequest":{"description":"CPU units to allocate for the created workload (0.5, 1, .etc). The workload will receive at least this amount of CPU. Note that the workload will not be scheduled unless the system can guarantee this amount of CPUs to the workload.","format":"double","type":"number","nullable":true,"minimum":0},"cpuCoreLimit":{"description":"Limitations on the number of CPUs consumed by the workload (0.5, 1, .etc). The system guarantees that this workload will not be able to consume more than this amount of CPUs.","format":"double","type":"number","nullable":true,"minimum":0},"cpuMemoryRequest":{"description":"The amount of CPU memory to allocate for this workload (1G, 20M, .etc). The workload will receive at least this amount of memory. Note that the workload will not be scheduled unless the system can guarantee this amount of memory to the workload","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"cpuMemoryLimit":{"description":"Limitations on the CPU memory to allocate for this workload (1G, 20M, .etc). The system guarantees that this workload will not be able to consume more than this amount of memory. The workload will receive an error when trying to allocate more memory than this limit.","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"largeShmRequest":{"description":"A large /dev/shm device to mount into a container running the created workload. An shm is a shared file system mounted on RAM.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"GpuRequestType":{"description":"Sets the unit type for GPU resources requests. Stated in terms of portion, memory or  mig profile [deprecated]. Sets the unit type for other GPU request fields. If `gpuDevicesRequest > 1`, only `portion` is supported. If `gpuDeviceRequest = 1`, the request type can be stated as `portion`, `memory` or `migProfile` [deprecated].","type":"string","minLength":1,"enum":["portion","memory","migProfile"],"nullable":true},"MigProfile":{"description":"Required only if `gpuRequestType` is `migProfile`. This states the memory profile to be used for the workload running NVIDIA Multi-Instance GPU (MIG) technology.","type":"string","deprecated":true,"minLength":1,"enum":["1g.5gb","1g.10gb","2g.10gb","2g.20gb","3g.20gb","3g.40gb","4g.20gb","4g.40gb","7g.40gb","7g.80gb"],"nullable":true},"ComputeItemizedFields":{"properties":{"extendedResources":{"$ref":"#/components/schemas/ExtendedResources"}},"nullable":true,"type":"object"},"ExtendedResources":{"description":"Extended resources and their quantity.","type":"array","items":{"$ref":"#/components/schemas/ExtendedResource"},"nullable":true},"ExtendedResource":{"description":"Quantity of an extended resource.","properties":{"resource":{"description":"The name of the extended resource (mandatory)","type":"string","minLength":1,"nullable":true},"quantity":{"description":"The requested quantity for the resource.","type":"string","minLength":1,"nullable":true},"exclude":{"description":"Use 'true' in case the extended resource is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"CommonStorage":{"properties":{"storage":{"$ref":"#/components/schemas/CommonStorageFields"}},"nullable":true,"type":"object"},"CommonStorageFields":{"properties":{"dataVolume":{"$ref":"#/components/schemas/DataVolumeItems"},"pvc":{"$ref":"#/components/schemas/PvcItems"},"hostPath":{"$ref":"#/components/schemas/HostPathItems"},"nfs":{"$ref":"#/components/schemas/NfsItems"},"git":{"$ref":"#/components/schemas/GitItems"},"configMapVolume":{"$ref":"#/components/schemas/ConfigMapField"},"secretVolume":{"$ref":"#/components/schemas/SecretItems1"},"emptyDirVolume":{"$ref":"#/components/schemas/EmptyDirItems"}},"nullable":true,"type":"object"},"DataVolumeItems":{"description":"Set of data volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/DataVolumeInstance"},"nullable":true},"DataVolumeInstance":{"allOf":[{"$ref":"#/components/schemas/DataVolume"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"DataVolume":{"properties":{"id":{"description":"The unique identifier of the data volume. (mandatory)","type":"string","format":"uuid","nullable":true},"mountPath":{"description":"The path where the data volume will be mounted. (mandatory)","type":"string","nullable":true}},"nullable":true,"type":"object"},"ExcludeField":{"properties":{"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"type":"object","nullable":true},"PvcItems":{"description":"Set of pvc persistent volume claims to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/PvcInstance"},"nullable":true},"PvcInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Pvc"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"StorageInstanceName":{"properties":{"name":{"description":"unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"Pvc":{"allOf":[{"$ref":"#/components/schemas/PvcFieldsUpdatable"},{"$ref":"#/components/schemas/PvcFieldsNonUpdatable"}]},"PvcFieldsUpdatable":{"properties":{"path":{"description":"Local path within the workload to which the PVC bucket will be mapped. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"PvcFieldsNonUpdatable":{"properties":{"existingPvc":{"description":"Verify existing PVC. PVC is assumed to exist when set to `true`. If set to `false`, the PVC will be created, if it does not exist.","type":"boolean","default":false,"nullable":true},"claimName":{"description":"Name for the PVC. Allow referencing it across workloads. If not provided, a name based on the workload name and scope will be auto-generated.","type":"string","minLength":1,"maxLength":63,"nullable":true},"readOnly":{"description":"Permit only read access to PVC.","type":"boolean","default":false,"nullable":true},"ephemeral":{"description":"Use `true` to set PVC to ephemeral. If set to `true`, the PVC will be deleted when the workload is stopped. Not supported for inference workloads.","type":"boolean","default":false,"nullable":true},"claimInfo":{"$ref":"#/components/schemas/ClaimInfo"},"dataSharing":{"description":"use `true` to share the PVC data to all projects under the selected scope.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"ClaimInfo":{"description":"Claim information for the newly created PVC. The information should not be provided when attempting to use existing PVC.","properties":{"size":{"$ref":"#/components/schemas/PvcClaimSize"},"storageClass":{"description":"Storage class name to associate with the PVC. This parameter may be omitted if there is a single storage class in the system, or you are using the default storage class. For more information, see [Storage class](https://kubernetes.io/docs/concepts/storage/storage-classes).","type":"string","minLength":1,"nullable":true},"accessModes":{"$ref":"#/components/schemas/PvcAccessModes"},"volumeMode":{"$ref":"#/components/schemas/PvcVolumeMode"},"addedAttrValues":{"$ref":"#/components/schemas/PvcAddedAttrValues"}},"nullable":true,"type":"object"},"PvcClaimSize":{"description":"Requested size for the PVC. Mandatory when existingPvc is false. Recommended sizes: TB/GB/MB/TIB/GIB/MIB","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"PvcAccessModes":{"description":"Requested access mode(s) for the newly created PVC.","properties":{"readWriteOnce":{"description":"Requesting claim that can be mounted in read/write mode to exactly one host. This is the default access mode.","type":"boolean","default":true,"nullable":true},"readOnlyMany":{"description":"Requesting claim that can be mounted in read-only mode to many hosts.","type":"boolean","default":false,"nullable":true},"readWriteMany":{"description":"Requesting claim that can be mounted in read/write mode to many hosts.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PvcVolumeMode":{"description":"The volume mode required by the claim. Choose Filesystem (default) or Block.","type":"string","enum":["Filesystem","Block"],"nullable":true},"PvcAddedAttrValues":{"description":"an optional array of key-values pairs that are written as annotations on the created PVC. the allowed attributes are determined according to the storage class configuration (see k8s-objects-tracker for further info).","type":"array","items":{"$ref":"#/components/schemas/PvcAddedAttrValue"}},"PvcAddedAttrValue":{"type":"object","required":["key"],"properties":{"key":{"type":"string","minLength":1,"maxLength":63,"pattern":"^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$"},"value":{"type":"string"}}},"HostPathItems":{"description":"Set of host paths to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/HostPathInstance"},"nullable":true},"HostPathInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/HostPath"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"HostPath":{"properties":{"path":{"description":"Local path within the controller to which the host volume will be mapped. (mandatory)","type":"string","minLength":1,"nullable":true},"readOnly":{"description":"Force the volume to be mounted with read-only permissions. Defaults to false.","type":"boolean","default":true,"nullable":true},"mountPath":{"description":"The path that the host volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"nullable":true},"mountPropagation":{"$ref":"#/components/schemas/HostPathMountPropagation"}},"nullable":true,"type":"object"},"HostPathMountPropagation":{"description":"Share this volumes mount with other containers. If set to HostToContainer, this volume mount will receive all subsequent mounts that are mounted to this volume or any of its subdirectories. In case of multiple hostPath entries, this field should have the same value for all of them.","type":"string","enum":["None","HostToContainer"],"nullable":true},"NfsItems":{"description":"Set of nfs volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/NfsInstance"},"nullable":true},"NfsInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Nfs"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Nfs":{"properties":{"path":{"description":"Path that is exported by the NFS server (mandatory). For more information, see [NFS](https://kubernetes.io/docs/concepts/storage/volumes#nfs).","type":"string","minLength":1,"nullable":true},"readOnly":{"description":"Force the NFS export to be mounted with read-only permissions.","type":"boolean","default":true,"nullable":true},"server":{"description":"The hostname or IP address of the NFS server. (mandatory)","type":"string","minLength":1,"nullable":true},"mountPath":{"description":"The path that the NFS volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitItems":{"description":"Set of git repositories to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/GitInstance"},"nullable":true},"GitInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/GitCommon"},{"$ref":"#/components/schemas/GitPassword"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"GitCommon":{"properties":{"repository":{"description":"URL to a remote Git repository. The content of this repository will be mapped to the container running the workload. (mandatory)","type":"string","minLength":1,"nullable":true},"branch":{"description":"Specific branch to synchronize the repository from.","type":"string","minLength":1,"nullable":true},"revision":{"description":"Specific revision to synchronize the repository from.","type":"string","minLength":1,"nullable":true},"path":{"description":"Local path within the workload to which the Git repository will be mapped (mandatory).","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitPassword":{"properties":{"passwordSecret":{"description":"Secret containing the credentials of the repository (needed for non public repository which requires authentication).","type":"string","minLength":1,"nullable":true},"secretKeyOfUser":{"description":"The key to use for loading the user name from the secret. The default is `User`.","type":"string","minLength":1,"nullable":true},"secretKeyOfPassword":{"description":"The key to use for loading the password from the secret. The default is `Password`.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"ConfigMapField":{"description":"Set of config map volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/ConfigMapInstance"},"nullable":true},"ConfigMapInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/ConfigMap"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"ConfigMap":{"properties":{"configMap":{"description":"The name of the ConfigMap resource. (mandatory)","type":"string","minLength":1,"nullable":true},"mountPath":{"description":"Local path within the workload to which the ConfigMap will be mapped to. (mandatory)","type":"string","minLength":1,"nullable":true},"subPath":{"description":"Path within the volume from which the container's volume should be mounted.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"SecretItems1":{"description":"Set of secret volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/SecretInstance2"},"nullable":true},"SecretInstance2":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Secret5"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Secret5":{"allOf":[{"$ref":"#/components/schemas/SecretFieldsUpdatable"},{"$ref":"#/components/schemas/SecretFieldsNonUpdatable"}]},"SecretFieldsUpdatable":{"properties":{"mountPath":{"description":"Local path within the workload to which the Secret will be mapped to. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"SecretFieldsNonUpdatable":{"properties":{"secret":{"description":"The name of the Secret resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"EmptyDirItems":{"description":"A list of emptyDir volumes to mount in the workload.","type":"array","items":{"$ref":"#/components/schemas/EmptyDirInstance"},"nullable":true},"EmptyDirInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/EmptyDir"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"EmptyDir":{"properties":{"path":{"description":"Local path within the workload to which the EmptyDir volume will be mapped. (mandatory)","type":"string","minLength":1,"nullable":true},"medium":{"description":"The type of storage medium for the volume. Use \"Memory\" for memory-backed storage, or leave empty for disk-backed storage.","type":"string","minLength":1,"nullable":true},"sizeLimit":{"description":"The total amount of local storage or memory required for the emptyDir volume. Specify using Kubernetes quantity format (e.g., 1G, 500Mi).","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true}},"nullable":true,"type":"object"},"CommonSecurity":{"properties":{"security":{"$ref":"#/components/schemas/CommonSecurityFlatFields"}},"nullable":true,"type":"object"},"CommonSecurityFlatFields":{"allOf":[{"$ref":"#/components/schemas/CommonSecurityNonOverridable"},{"$ref":"#/components/schemas/CommonSecurityOverridable"}],"nullable":true,"type":"object"},"CommonSecurityNonOverridable":{"description":"Security non overrideable fields. In the context of assets,these are environment asset fields that cannot be overriden in the submit workload request.","properties":{"uidGidSource":{"$ref":"#/components/schemas/UidGidSource"},"capabilities":{"description":"Add POSIX capabilities to running containers. Defaults to the default set of capabilities granted by the container runtime.","type":"array","items":{"$ref":"#/components/schemas/Capability"},"nullable":true},"seccompProfileType":{"$ref":"#/components/schemas/SeccompProfileType"},"runAsNonRoot":{"description":"Force the container to run as a non-root user.","type":"boolean","nullable":true},"readOnlyRootFilesystem":{"description":"If true, mounts the container's root filesystem as read-only.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"UidGidSource":{"description":"Indicate the way to determine the user and group ids of the container. The options are a. `fromTheImage` - user and group ids are determined by the docker image that the container runs. this is the default option. b. `custom` - user and group ids can be specified in the environment asset and/or the workload creation request. c. `idpToken` - user and group ids are determined according to the identity provider (idp) access token. This option is intended for internal use of the environment UI form. For more information, see [User Identity](https://run-ai-docs.nvidia.com/self-hosted/2.22/admin/runai-setup/config/non-root-containers/).","type":"string","enum":["fromTheImage","fromIdpToken","custom"],"nullable":true},"Capability":{"type":"string","enum":["AUDIT_CONTROL","AUDIT_READ","AUDIT_WRITE","BLOCK_SUSPEND","CHOWN","DAC_OVERRIDE","DAC_READ_SEARCH","FOWNER","FSETID","IPC_LOCK","IPC_OWNER","KILL","LEASE","LINUX_IMMUTABLE","MAC_ADMIN","MAC_OVERRIDE","MKNOD","NET_ADMIN","NET_BIND_SERVICE","NET_BROADCAST","NET_RAW","SETGID","SETFCAP","SETPCAP","SETUID","SYS_ADMIN","SYS_BOOT","SYS_CHROOT","SYS_MODULE","SYS_NICE","SYS_PACCT","SYS_PTRACE","SYS_RAWIO","SYS_RESOURCE","SYS_TIME","SYS_TTY_CONFIG","SYSLOG","WAKE_ALARM"]},"SeccompProfileType":{"description":"Indicates which kind of seccomp profile will be applied to the container. The options are a. `RuntimeDefault` - the container runtime default profile should be used. b. `Unconfined` - no profile should be applied. c. `Localhost` is not yet supported by Run:ai.","type":"string","enum":["RuntimeDefault","Unconfined","Localhost"],"nullable":true},"CommonSecurityOverridable":{"description":"Security overrideable fields. In the context of assets,these are environment asset fields that can be overriden in the submit workload request.","properties":{"runAsUid":{"description":"The user id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsUid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"runAsGid":{"description":"The group id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsGid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"supplementalGroups":{"description":"Comma separated list of groups that the user running the container belongs to, in addition to the group indicated by runAsGid. Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled. Using an empty string implies reverting the supplementary groups of the image.","type":"string","nullable":true}},"nullable":true,"type":"object"},"InferenceFields":{"properties":{"servingPort":{"$ref":"#/components/schemas/ServingPort"},"autoscaling":{"$ref":"#/components/schemas/AutoScaling"},"servingConfiguration":{"$ref":"#/components/schemas/ServingConfiguration"}},"nullable":true,"type":"object"},"ServingPort":{"description":"A port for accessing the inference service","allOf":[{"$ref":"#/components/schemas/ServingPortContainerAndProtocol"},{"$ref":"#/components/schemas/ServingPortAccess"}],"nullable":true,"type":"object"},"ServingPortContainerAndProtocol":{"properties":{"container":{"description":"The port that the container running the inference service exposes (mandatory).","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"protocol":{"$ref":"#/components/schemas/ServingPortProtocol"}}},"ServingPortProtocol":{"description":"The protocol used by the port, defaults to http","type":"string","enum":["http","grpc"],"nullable":true},"ServingPortAccess":{"properties":{"authorizationType":{"$ref":"#/components/schemas/ServingPortAccessAuthorizationTypeEnum"},"authorizedUsers":{"description":"List of users that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"authorizedGroups":{"description":"List of groups that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"clusterLocalAccessOnly":{"description":"Configure the serving port URL to be available only on the cluster-local network, and not externally. Defaults to false","type":"boolean","nullable":true}}},"ServingPortAccessAuthorizationTypeEnum":{"type":"string","enum":["public","authenticatedUsers","authorizedUsersOrGroups"],"description":"The authorization type for serving port URL access. Defaults to public, which means no authorization is required. If set to authenticatedUsers, only authenticated Run:ai users are allowed to access the URL. If set to authorizedUsersOrGroups, only users or groups specified in authorizedUsers or authorizedGroups are allowed to access the URL. Supported from cluster version 2.19.","nullable":true},"AutoScaling":{"allOf":[{"$ref":"#/components/schemas/AutoScalingCommonFields"},{"$ref":"#/components/schemas/AutoScalingMetricFields"}],"nullable":true,"type":"object"},"AutoScalingCommonFields":{"description":"Auto scaling common fields","properties":{"metricThresholdPercentage":{"description":"The percentage of metric threshold value to use for autoscaling. Defaults to 70. Applicable only with the 'throughput' and 'concurrency' metrics","type":"number","format":"float","minimum":1,"maximum":100,"nullable":true},"minReplicas":{"description":"The minimum number of replicas for autoscaling. Defaults to 1. Use 0 to allow scale-to-zero","type":"integer","format":"int32","minimum":0,"nullable":true},"maxReplicas":{"description":"The maximum number of replicas for autoscaling. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true},"initialReplicas":{"description":"The number of replicas to run when initializing the workload for the first time. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"nullable":true},"activationReplicas":{"description":"The number of replicas to run when scaling-up from zero. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true},"concurrencyHardLimit":{"description":"The maximum number of requests allowed to flow to a single replica at any time. 0 means no limit","type":"integer","format":"int32","minimum":0,"nullable":true},"scaleToZeroRetentionSeconds":{"description":"The minimum amount of time (in seconds) that the last replica will remain active after a scale-to-zero decision. Defaults to 0. Available only if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true},"scaleDownDelaySeconds":{"description":"The minimum amount of time (in seconds) that a replica will remain active after a scale-down decision","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true},"initializationTimeoutSeconds":{"description":"Use `servingConfiguration.initializationTimeoutSeconds` instead.  If this field is set, it will be ignored and the value under `servingConfiguration` will be used. The maximum amount of time (in seconds) to wait for the container to become ready.","type":"integer","format":"int32","minimum":1,"nullable":true,"deprecated":true}},"nullable":true,"type":"object"},"AutoScalingMetricFields":{"description":"Auto scaling metric fields","properties":{"metric":{"$ref":"#/components/schemas/AutoScalingMetric"},"metricThreshold":{"description":"The threshold to use with the specified metric for autoscaling. Mandatory if metric is specified","type":"integer","format":"int32","nullable":true}},"nullable":true,"type":"object"},"AutoScalingMetric":{"description":"The metric to use for autoscaling. Mandatory if minReplicas < maxReplicas, except for the special case where minReplicas is set to 0 and maxReplicas is set to 1, as in this case autoscaling decisions are made according to network activity rather than metrics. Use one of the built-in metrics of 'throughput', 'concurrency' or 'latency', or any other available custom metric. Only the 'throughput' and 'concurrency' metrics support scale-to-zero","type":"string","pattern":"^[a-zA-Z_:][a-zA-Z0-9_:]*$","nullable":true},"ServingConfiguration":{"description":"The inference workload serving configuration.","properties":{"initializationTimeoutSeconds":{"description":"The maximum time (in seconds) allowed for a workload to initialize and become ready. If the workload does not start within this time, it will be moved to failed state.","type":"integer","format":"int32","minimum":1,"nullable":true},"requestTimeoutSeconds":{"description":"The maximum time (in seconds) allowed to process an end-user request. If no response is returned within this time, the request will be ignored.Supported from Cluster version 2.22","type":"integer","format":"int32","minimum":1,"nullable":true}},"nullable":true,"type":"object"},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/inferences/{workloadId}":{"get":{"summary":"Get inference data.","operationId":"get_inference","description":"Retrieve inference details using a workload id.","tags":["Inferences"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Inference1"}}}},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Delete an inference.

> Delete an inference using a workload id.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.22"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}}},"responses":{"202Accepted":{"description":"Accepted.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HttpResponse"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}},"schemas":{"HttpResponse":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}}},"paths":{"/api/v1/workloads/inferences/{workloadId}":{"delete":{"summary":"Delete an inference.","operationId":"delete_inference","description":"Delete an inference using a workload id.","tags":["Inferences"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"}],"responses":{"202":{"$ref":"#/components/responses/202Accepted"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Update inference spec. \[Experimental]

> Update the specification of an existing inference workload.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.22"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}}},"schemas":{"InferenceUpdateRequest":{"allOf":[{"$ref":"#/components/schemas/InferenceUpdateSpec"}]},"InferenceUpdateSpec":{"description":"The specifications of the inference to be updated.","properties":{"spec":{"allOf":[{"$ref":"#/components/schemas/CommonFlatFields"},{"$ref":"#/components/schemas/NodeRelatedFlatFields"},{"$ref":"#/components/schemas/EnvironmentVariablesField"},{"$ref":"#/components/schemas/Compute"},{"$ref":"#/components/schemas/InferenceUpdateSpecAutoscaling"},{"$ref":"#/components/schemas/InferenceUpdateSpecServingConfiguration"}]}}},"CommonFlatFields":{"allOf":[{"$ref":"#/components/schemas/ContainerOverridable"},{"$ref":"#/components/schemas/ContainerNonOverridable"},{"$ref":"#/components/schemas/ResourcesFlatFieldsPerPod"},{"$ref":"#/components/schemas/ResourcesFlatFieldsPerPG"}],"nullable":true,"type":"object"},"ContainerOverridable":{"description":"Container overrideable fields. In the context of assets,these are environment asset fields that can be overriden in the submit workload request.","properties":{"command":{"description":"A command to the server as the entry point of the container running the workload.","type":"string","minLength":1,"nullable":true},"args":{"description":"Arguments to the command that the container running the workload executes.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"ContainerNonOverridable":{"description":"Container non overrideable fields. In the context of assets,these are environment asset fields that cannot be overriden in the submit workload request.","properties":{"image":{"description":"Docker image name. For more information, see [Images](https://kubernetes.io/docs/concepts/containers/images). The image name is mandatory for creating a workload.","type":"string","minLength":1,"nullable":true},"imagePullPolicy":{"$ref":"#/components/schemas/ImagePullPolicy"},"workingDir":{"description":"Container's working directory. If not specified, the container runtime default will be used. This may be configured in the container image.","type":"string","minLength":1,"nullable":true},"createHomeDir":{"description":"When set to `true`, creates a home directory for the container.","type":"boolean","nullable":true},"probes":{"$ref":"#/components/schemas/Probes"}},"nullable":true,"type":"object"},"ImagePullPolicy":{"description":"Image pull policy. Defaults to `Always` if `:latest` tag is specified, otherwise it is `IfNotPresent`.","type":"string","minLength":1,"enum":["Always","Never","IfNotPresent"],"nullable":true},"Probes":{"description":"Probes are used to determine if the container is healthy and ready to accept traffic.","type":"object","properties":{"readiness":{"$ref":"#/components/schemas/Probe"}},"nullable":true},"Probe":{"type":"object","properties":{"initialDelaySeconds":{"description":"Number of seconds after the container has started before liveness or readiness probes are initiated.","type":"integer","format":"int32","minimum":0,"nullable":true},"periodSeconds":{"description":"How often (in seconds) to perform the probe.","type":"integer","format":"int32","minimum":1,"nullable":true},"timeoutSeconds":{"description":"Number of seconds after which the probe times out.","type":"integer","format":"int32","minimum":1,"nullable":true},"successThreshold":{"description":"Minimum consecutive successes for the probe to be considered successful after having failed.","type":"integer","format":"int32","minimum":1,"nullable":true},"failureThreshold":{"description":"When a probe fails, the number of times to try before giving up.","type":"integer","format":"int32","minimum":1,"nullable":true},"handler":{"$ref":"#/components/schemas/ProbeHandler"}},"nullable":true},"ProbeHandler":{"description":"The action taken to determine the health of the container. (mandatory)","type":"object","properties":{"httpGet":{"description":"An action based on HTTP Get requests.","type":"object","properties":{"path":{"description":"Path to access on the HTTP server, defaults to /.","type":"string","pattern":"^(\\x2F[a-zA-Z0-9\\-_.\\x2F]*)?$","nullable":true},"port":{"description":"Number of the port to access on the container.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"host":{"description":"Host name to connect to, defaults to the pod IP.","type":"string","format":"hostname","nullable":true},"scheme":{"$ref":"#/components/schemas/ProbeHandlerScheme"}}}},"nullable":true},"ProbeHandlerScheme":{"description":"Scheme to use for connecting to the host, defaults to HTTP.","type":"string","enum":["HTTP","HTTPS"],"nullable":true},"ResourcesFlatFieldsPerPod":{"description":"Resource parameters.","properties":{"nodeType":{"description":"Nodes (machines), or a group of nodes on which the workload will run. To use this feature, your Administrator will need to label nodes. For more information, see [Group Nodes](https://run-ai-docs.nvidia.com/self-hosted/2.22/admin/researcher-setup/limit-to-node-group). When using this flag with with Project-based affinity, it refines the list of allowable node groups set in the Project. For more information, see [Projects](https://docshub.run.ai/guides/platform-management/aiinitiatives/organization/projects).","type":"string","minLength":1,"nullable":true},"nodeAffinityRequired":{"$ref":"#/components/schemas/NodeAffinityRequired"},"podAffinity":{"$ref":"#/components/schemas/PodAffinity"}},"nullable":true,"type":"object"},"NodeAffinityRequired":{"type":"object","description":"If the affinity requirements specified by this field are not met at scheduling time, the pod will not be scheduled onto the node. If the affinity requirements specified by this field cease to be met at some point during pod execution (e.g. due to an update), the system may or may not try to eventually evict the pod from its node.","properties":{"nodeSelectorTerms":{"description":"A list of node selector terms. The terms are ORed.","type":"array","items":{"$ref":"#/components/schemas/NodeSelectorTerm"}}},"nullable":true},"NodeSelectorTerm":{"type":"object","description":"A null or empty node selector term matches no objects. The requirements of them are ANDed.","properties":{"matchExpressions":{"description":"A list of node selector requirements by node's labels.","type":"array","items":{"$ref":"#/components/schemas/MatchExpression"}}},"nullable":true},"MatchExpression":{"type":"object","description":"A selector that contains values, a key, and an operator that relates the key and values.","properties":{"key":{"description":"The label key that the selector applies to (mandatory).","type":"string"},"operator":{"$ref":"#/components/schemas/MatchExpressionOperator"},"values":{"description":"An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer.","type":"array","items":{"type":"string"}}},"required":["key","operator"],"nullable":true},"MatchExpressionOperator":{"description":"Represents a key's relationship to a set of values (mandatory).","type":"string","enum":["In","NotIn","Exists","DoesNotExist","Gt","Lt"]},"PodAffinity":{"description":"Pod affinity scheduling rules (e.g. co-locate this workload in the same node, zone, etc. as some other workloads).","type":"object","properties":{"type":{"$ref":"#/components/schemas/PodAffinityType"},"key":{"description":"The label key to use. (mandatory)","type":"string","nullable":true}},"nullable":true},"PodAffinityType":{"description":"The affinity type, required or preferred. (mandatory)","type":"string","enum":["Required","Preferred"],"nullable":true},"ResourcesFlatFieldsPerPG":{"description":"Resource parameters.","properties":{"category":{"$ref":"#/components/schemas/Category"},"priorityClass":{"$ref":"#/components/schemas/PriorityClass"}},"nullable":true,"type":"object"},"Category":{"description":"Specify the workload category assigned to the workload. Categories are used to classify and monitor different types of workloads within the NVIDIA Run:ai platform.","type":"string","nullable":true},"PriorityClass":{"description":"Specifies the priority class for the workload.  Valid values are: very-low, low, medium-low, medium, medium-high, high, very-high.  You can use this parameter to adjust the workload's scheduling behavior.  Each workload type has a default priority.  To view the default priority for each workload type, use the GET /workload-types endpoint.","type":"string","nullable":true},"NodeRelatedFlatFields":{"description":"Node related parameters.","properties":{"nodePools":{"description":"A prioritized list of node pools for the scheduler to run the workload on. The scheduler will always try to use the first node pool before moving to the next one if the first is not available.","type":"array","items":{"type":"string"},"nullable":true}},"nullable":true,"type":"object"},"EnvironmentVariablesField":{"properties":{"environmentVariables":{"$ref":"#/components/schemas/EnvironmentVariables"}},"nullable":true,"type":"object"},"EnvironmentVariables":{"description":"Set of environment variables to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/EnvironmentVariable"},"nullable":true},"EnvironmentVariable":{"description":"Details of an environment variable which is populated into the container.","properties":{"name":{"description":"The name of the environment variable. (mandatory)","type":"string","minLength":1,"nullable":true},"value":{"description":"The value of the environment variable. (mutually exclusive with secret, credential, configMap and podFieldRef)","type":"string","nullable":true},"secret":{"$ref":"#/components/schemas/EnvironmentVariableSecret"},"configMap":{"$ref":"#/components/schemas/EnvironmentVariableConfigMap"},"podFieldRef":{"$ref":"#/components/schemas/EnvironmentVariablePodFieldReference"},"exclude":{"description":"Use 'true' in case the environment variable is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true},"description":{"description":"Description of the environment variable.","type":"string","nullable":true}},"nullable":true,"type":"object"},"EnvironmentVariableSecret":{"description":"Details of the secret and key use to populate the environment variable","properties":{"name":{"description":"The name of the secret resource. (mandatory)","type":"string","minLength":1,"nullable":true},"key":{"description":"The key in the secret resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"EnvironmentVariableConfigMap":{"description":"Details of the configMap and key use to populate the environment variable","properties":{"name":{"description":"The name of the config-map resource. (mandatory)","type":"string","minLength":1,"nullable":true},"key":{"description":"The key in the config-map resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"EnvironmentVariablePodFieldReference":{"description":"Details of the field-reference and key use to populate the environment variable","properties":{"path":{"description":"The field path resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"Compute":{"properties":{"compute":{"$ref":"#/components/schemas/ComputeFields"}},"nullable":true,"type":"object"},"ComputeFields":{"allOf":[{"$ref":"#/components/schemas/ComputeFlatFields"},{"$ref":"#/components/schemas/ComputeItemizedFields"}],"nullable":true,"type":"object"},"ComputeFlatFields":{"properties":{"gpuDevicesRequest":{"description":"Requested number of GPU devices. Currently if more than one device is requested, it is not possible to provide values for gpuMemory, gpuPortion or migProfile [deprecated].","type":"integer","format":"int32","nullable":true,"minimum":0},"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"},"gpuPortionRequest":{"description":"Required if and only if gpuRequestType is portion. States the portion of the GPU to allocate for the created workload, per GPU device, between 0 and 1. The default is no allocated GPUs.","type":"number","format":"double","nullable":true,"minimum":0},"gpuPortionLimit":{"description":"Limitations on the portion consumed by the workload, per GPU device. The system guarantees The gpuPotionLimit must be no less than the gpuPortionRequest.","type":"number","format":"double","nullable":true,"minimum":0},"gpuMemoryRequest":{"description":"Required if and only if gpuRequestType is memory. States the GPU memory to allocate for the created workload, per GPU device. Note that the workload will not be scheduled unless the system can guarantee this amount of GPU memory to the workload.","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"gpuMemoryLimit":{"description":"Limitation on the memory consumed by the workload, per GPU device. The system guarantees The gpuMemoryLimit must be no less than gpuMemoryRequest.","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"migProfile":{"$ref":"#/components/schemas/MigProfile","depracted":true},"cpuCoreRequest":{"description":"CPU units to allocate for the created workload (0.5, 1, .etc). The workload will receive at least this amount of CPU. Note that the workload will not be scheduled unless the system can guarantee this amount of CPUs to the workload.","format":"double","type":"number","nullable":true,"minimum":0},"cpuCoreLimit":{"description":"Limitations on the number of CPUs consumed by the workload (0.5, 1, .etc). The system guarantees that this workload will not be able to consume more than this amount of CPUs.","format":"double","type":"number","nullable":true,"minimum":0},"cpuMemoryRequest":{"description":"The amount of CPU memory to allocate for this workload (1G, 20M, .etc). The workload will receive at least this amount of memory. Note that the workload will not be scheduled unless the system can guarantee this amount of memory to the workload","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"cpuMemoryLimit":{"description":"Limitations on the CPU memory to allocate for this workload (1G, 20M, .etc). The system guarantees that this workload will not be able to consume more than this amount of memory. The workload will receive an error when trying to allocate more memory than this limit.","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"largeShmRequest":{"description":"A large /dev/shm device to mount into a container running the created workload. An shm is a shared file system mounted on RAM.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"GpuRequestType":{"description":"Sets the unit type for GPU resources requests. Stated in terms of portion, memory or  mig profile [deprecated]. Sets the unit type for other GPU request fields. If `gpuDevicesRequest > 1`, only `portion` is supported. If `gpuDeviceRequest = 1`, the request type can be stated as `portion`, `memory` or `migProfile` [deprecated].","type":"string","minLength":1,"enum":["portion","memory","migProfile"],"nullable":true},"MigProfile":{"description":"Required only if `gpuRequestType` is `migProfile`. This states the memory profile to be used for the workload running NVIDIA Multi-Instance GPU (MIG) technology.","type":"string","deprecated":true,"minLength":1,"enum":["1g.5gb","1g.10gb","2g.10gb","2g.20gb","3g.20gb","3g.40gb","4g.20gb","4g.40gb","7g.40gb","7g.80gb"],"nullable":true},"ComputeItemizedFields":{"properties":{"extendedResources":{"$ref":"#/components/schemas/ExtendedResources"}},"nullable":true,"type":"object"},"ExtendedResources":{"description":"Extended resources and their quantity.","type":"array","items":{"$ref":"#/components/schemas/ExtendedResource"},"nullable":true},"ExtendedResource":{"description":"Quantity of an extended resource.","properties":{"resource":{"description":"The name of the extended resource (mandatory)","type":"string","minLength":1,"nullable":true},"quantity":{"description":"The requested quantity for the resource.","type":"string","minLength":1,"nullable":true},"exclude":{"description":"Use 'true' in case the extended resource is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"InferenceUpdateSpecAutoscaling":{"properties":{"autoscaling":{"$ref":"#/components/schemas/AutoScaling"}}},"AutoScaling":{"allOf":[{"$ref":"#/components/schemas/AutoScalingCommonFields"},{"$ref":"#/components/schemas/AutoScalingMetricFields"}],"nullable":true,"type":"object"},"AutoScalingCommonFields":{"description":"Auto scaling common fields","properties":{"metricThresholdPercentage":{"description":"The percentage of metric threshold value to use for autoscaling. Defaults to 70. Applicable only with the 'throughput' and 'concurrency' metrics","type":"number","format":"float","minimum":1,"maximum":100,"nullable":true},"minReplicas":{"description":"The minimum number of replicas for autoscaling. Defaults to 1. Use 0 to allow scale-to-zero","type":"integer","format":"int32","minimum":0,"nullable":true},"maxReplicas":{"description":"The maximum number of replicas for autoscaling. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true},"initialReplicas":{"description":"The number of replicas to run when initializing the workload for the first time. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"nullable":true},"activationReplicas":{"description":"The number of replicas to run when scaling-up from zero. Defaults to minReplicas, or to 1 if minReplicas is set to 0","type":"integer","format":"int32","minimum":1,"nullable":true},"concurrencyHardLimit":{"description":"The maximum number of requests allowed to flow to a single replica at any time. 0 means no limit","type":"integer","format":"int32","minimum":0,"nullable":true},"scaleToZeroRetentionSeconds":{"description":"The minimum amount of time (in seconds) that the last replica will remain active after a scale-to-zero decision. Defaults to 0. Available only if minReplicas is set to 0","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true},"scaleDownDelaySeconds":{"description":"The minimum amount of time (in seconds) that a replica will remain active after a scale-down decision","type":"integer","format":"int32","minimum":0,"maximum":3600,"nullable":true},"initializationTimeoutSeconds":{"description":"Use `servingConfiguration.initializationTimeoutSeconds` instead.  If this field is set, it will be ignored and the value under `servingConfiguration` will be used. The maximum amount of time (in seconds) to wait for the container to become ready.","type":"integer","format":"int32","minimum":1,"nullable":true,"deprecated":true}},"nullable":true,"type":"object"},"AutoScalingMetricFields":{"description":"Auto scaling metric fields","properties":{"metric":{"$ref":"#/components/schemas/AutoScalingMetric"},"metricThreshold":{"description":"The threshold to use with the specified metric for autoscaling. Mandatory if metric is specified","type":"integer","format":"int32","nullable":true}},"nullable":true,"type":"object"},"AutoScalingMetric":{"description":"The metric to use for autoscaling. Mandatory if minReplicas < maxReplicas, except for the special case where minReplicas is set to 0 and maxReplicas is set to 1, as in this case autoscaling decisions are made according to network activity rather than metrics. Use one of the built-in metrics of 'throughput', 'concurrency' or 'latency', or any other available custom metric. Only the 'throughput' and 'concurrency' metrics support scale-to-zero","type":"string","pattern":"^[a-zA-Z_:][a-zA-Z0-9_:]*$","nullable":true},"InferenceUpdateSpecServingConfiguration":{"properties":{"servingConfiguration":{"$ref":"#/components/schemas/ServingConfiguration"}}},"ServingConfiguration":{"description":"The inference workload serving configuration.","properties":{"initializationTimeoutSeconds":{"description":"The maximum time (in seconds) allowed for a workload to initialize and become ready. If the workload does not start within this time, it will be moved to failed state.","type":"integer","format":"int32","minimum":1,"nullable":true},"requestTimeoutSeconds":{"description":"The maximum time (in seconds) allowed to process an end-user request. If no response is returned within this time, the request will be ignored.Supported from Cluster version 2.22","type":"integer","format":"int32","minimum":1,"nullable":true}},"nullable":true,"type":"object"},"Inference1":{"allOf":[{"$ref":"#/components/schemas/WorkloadMeta1"},{"$ref":"#/components/schemas/InferenceSpec"}]},"WorkloadMeta1":{"required":["name","requestedName","workloadId","projectId","clusterId","createdBy","createdAt","desiredPhase"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"requestedName":{"description":"The name as was requested for the workload. If useGivenNameAsPrefix, in the creation request, is false, name and requestedName should be identical. Otherwise, name should be composed of requestedName followed by a suffix of random characters.","type":"string"},"workloadId":{"$ref":"#/components/schemas/WorkloadId2"},"projectId":{"$ref":"#/components/schemas/ProjectId2"},"departmentId":{"$ref":"#/components/schemas/DepartmentId2"},"clusterId":{"$ref":"#/components/schemas/ClusterId"},"createdBy":{"description":"The user who created the workload","type":"string"},"createdAt":{"description":"The creation time of the workload.","type":"string","format":"date-time"},"deletedAt":{"description":"The deletion time of the workload.","type":"string","nullable":true,"format":"date-time"},"desiredPhase":{"$ref":"#/components/schemas/WorkloadDesiredPhase"},"actualPhase":{"$ref":"#/components/schemas/Phase"}}},"WorkloadName":{"description":"The name of the workload.","type":"string","minLength":1},"WorkloadId2":{"description":"A unique ID of the workload.","type":"string","format":"uuid"},"ProjectId2":{"description":"The id of the project.","type":"string"},"DepartmentId2":{"description":"The id of the department.","type":"string","minLength":1},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"WorkloadDesiredPhase":{"description":"The desired phase of the workload.","type":"string","enum":["Running","Stopped","Deleted"]},"Phase":{"type":"string","enum":["Creating","Initializing","Resuming","Pending","Deleting","Running","Updating","Stopped","Stopping","Degraded","Failed","Completed","Terminating","Unknown"]},"InferenceSpec":{"description":"The specifications of the inference to be created.","properties":{"spec":{"$ref":"#/components/schemas/InferenceSpecSpec"}}},"InferenceSpecSpec":{"allOf":[{"$ref":"#/components/schemas/CommonFlatFields"},{"$ref":"#/components/schemas/NodeRelatedFlatFields"},{"$ref":"#/components/schemas/InferenceFlatFields"},{"$ref":"#/components/schemas/CommonItemizedFields"},{"$ref":"#/components/schemas/ConnectivityFields"},{"$ref":"#/components/schemas/Compute"},{"$ref":"#/components/schemas/CommonStorage"},{"$ref":"#/components/schemas/CommonSecurity"},{"$ref":"#/components/schemas/InferenceFields"}]},"InferenceFlatFields":{"allOf":[{"$ref":"#/components/schemas/CommonFlatFields"},{"$ref":"#/components/schemas/NodeRelatedFlatFields"}],"nullable":true,"type":"object"},"CommonItemizedFields":{"allOf":[{"$ref":"#/components/schemas/EnvironmentVariablesField"},{"$ref":"#/components/schemas/AdvancedItemizedFields"}],"nullable":true,"type":"object"},"AdvancedItemizedFields":{"allOf":[{"$ref":"#/components/schemas/AnnotationsField"},{"$ref":"#/components/schemas/LabelsField"},{"$ref":"#/components/schemas/ImagePullSecretsField"},{"$ref":"#/components/schemas/TolerationsField"}],"nullable":true,"type":"object"},"AnnotationsField":{"properties":{"annotations":{"$ref":"#/components/schemas/Annotations"}},"nullable":true,"type":"object"},"Annotations":{"description":"Set of annotations to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Annotation"},"nullable":true},"Annotation":{"description":"Annotation details to be populated into the container.","properties":{"name":{"description":"The name of the annotation (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true},"value":{"description":"The value of the annotation.","type":"string","nullable":true},"exclude":{"description":"Use 'true' in case the annotation is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"LabelsField":{"properties":{"labels":{"$ref":"#/components/schemas/Labels"}},"nullable":true,"type":"object"},"Labels":{"description":"Set of labels to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Label"},"nullable":true},"Label":{"description":"Label details to be populated into the container.","properties":{"name":{"description":"The name of the label (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true},"value":{"description":"The value of the label.","type":"string","nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"ImagePullSecretsField":{"properties":{"imagePullSecrets":{"$ref":"#/components/schemas/ImagePullSecrets"}},"nullable":true,"type":"object"},"ImagePullSecrets":{"description":"A list of references to Kubernetes secrets in the same namespace used for pulling container images.","type":"array","items":{"$ref":"#/components/schemas/ImagePullSecret"},"nullable":true},"ImagePullSecret":{"description":"A reference to a secret in the same namespace used to pull container images.","properties":{"name":{"type":"string","description":"The name of the Kubernetes secret containing the image pull credentials."},"userCredential":{"type":"boolean","description":"Indicates whether the secret is a user credential. Set to true if the secret was created by the user and is only accessible by them.","nullable":true},"exclude":{"description":"Use 'true' in case the secret is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"TolerationsField":{"properties":{"tolerations":{"$ref":"#/components/schemas/Tolerations"}},"nullable":true,"type":"object"},"Tolerations":{"description":"Set of tolerations to apply to the workload.","type":"array","items":{"$ref":"#/components/schemas/Toleration"},"nullable":true},"Toleration":{"description":"Toleration details.","properties":{"name":{"description":"The name of the toleration.","type":"string","minLength":1,"nullable":true},"operator":{"$ref":"#/components/schemas/TolerationOperator"},"key":{"description":"The taint key that the toleration applies to. (mandatory)","type":"string","nullable":true},"value":{"description":"The taint value the toleration matches to. Mandatory if operator is Exists, forbidden otherwise.","type":"string","nullable":true},"effect":{"$ref":"#/components/schemas/TolerationEffect"},"seconds":{"description":"The period of time the toleration tolerates the taint. Valid only if effect is NoExecute. taint.","type":"integer","minimum":1,"nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"TolerationOperator":{"description":"A key's relationship to the value. Equal uses key and value. Exists is equivalent to wildcard for value, so that a workload can tolerate all taints of a particular category. (mandatory)","type":"string","enum":["Equal","Exists"],"nullable":true},"TolerationEffect":{"description":"The taint effect to match. (mandatory)","type":"string","enum":["NoSchedule","NoExecute","PreferNoSchedule","Any"],"nullable":true},"ConnectivityFields":{"properties":{"ports":{"$ref":"#/components/schemas/Ports"},"exposedUrls":{"$ref":"#/components/schemas/ExposedUrls"},"relatedUrls":{"$ref":"#/components/schemas/RelatedUrls"}},"nullable":true,"type":"object"},"Ports":{"description":"Set of container ports that the workload exposes.","type":"array","items":{"$ref":"#/components/schemas/Port"},"nullable":true},"Port":{"description":"A port for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"serviceType":{"$ref":"#/components/schemas/PortServiceType"},"external":{"description":"The external port which allows a connection to the container port. If not specified, the port will be auto-generated by the system..","type":"integer","format":"int32","nullable":true},"toolType":{"description":"The tool type that runs on this port.","type":"string","nullable":true},"toolName":{"description":"A name describing the tool that runs on this port.","type":"string","nullable":true},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"nullable":true},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PortServiceType":{"description":"The service type of the port (mandatory).","type":"string","enum":["LoadBalancer","NodePort","ClusterIP"],"nullable":true},"ExposedUrls":{"description":"Set of container ports that the workload exposes via URLs.","type":"array","items":{"$ref":"#/components/schemas/ExposedUrl"},"nullable":true},"ExposedUrl":{"description":"A URL for accessing the workload.","properties":{"container":{"description":"The port that the container running the workload exposes. (mandatory)","type":"integer","format":"int32","nullable":true},"url":{"description":"The URL for connecting to the container port. If not specified, the URL will be auto-generated by the system..","type":"string","nullable":true},"authorizedUsers":{"description":"List of users that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"authorizedGroups":{"description":"List of groups that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"toolType":{"description":"The tool type that runs on this container port.","type":"string","nullable":true},"toolName":{"description":"A name describing the tool that runs on this url.","type":"string","nullable":true},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"nullable":true},"exclude":{"description":"Use 'true' in case the instance is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"RelatedUrls":{"description":"Set of URLs that are related to the workload.","type":"array","items":{"$ref":"#/components/schemas/RelatedUrl"},"nullable":true},"RelatedUrl":{"description":"A URL that is related to the workload. For example, a URL to an external server providing statistics or logging about the workload.","properties":{"url":{"description":"The URL for connecting an external service related to the workload. (mandatory)","type":"string","nullable":true},"type":{"description":"The type of service that the url provides. For example, wandb (Weights & Biases). (mandatory)","type":"string","nullable":true},"name":{"description":"Unique name to identify the instance. primarily used for policy locked rules.","type":"string","nullable":true},"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"CommonStorage":{"properties":{"storage":{"$ref":"#/components/schemas/CommonStorageFields"}},"nullable":true,"type":"object"},"CommonStorageFields":{"properties":{"dataVolume":{"$ref":"#/components/schemas/DataVolumeItems"},"pvc":{"$ref":"#/components/schemas/PvcItems"},"hostPath":{"$ref":"#/components/schemas/HostPathItems"},"nfs":{"$ref":"#/components/schemas/NfsItems"},"git":{"$ref":"#/components/schemas/GitItems"},"configMapVolume":{"$ref":"#/components/schemas/ConfigMapField"},"secretVolume":{"$ref":"#/components/schemas/SecretItems1"},"emptyDirVolume":{"$ref":"#/components/schemas/EmptyDirItems"}},"nullable":true,"type":"object"},"DataVolumeItems":{"description":"Set of data volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/DataVolumeInstance"},"nullable":true},"DataVolumeInstance":{"allOf":[{"$ref":"#/components/schemas/DataVolume"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"DataVolume":{"properties":{"id":{"description":"The unique identifier of the data volume. (mandatory)","type":"string","format":"uuid","nullable":true},"mountPath":{"description":"The path where the data volume will be mounted. (mandatory)","type":"string","nullable":true}},"nullable":true,"type":"object"},"ExcludeField":{"properties":{"exclude":{"description":"Use 'true' in case the item is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"type":"object","nullable":true},"PvcItems":{"description":"Set of pvc persistent volume claims to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/PvcInstance"},"nullable":true},"PvcInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Pvc"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"StorageInstanceName":{"properties":{"name":{"description":"unique name to identify the instance. primarily used for policy locked rules.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"Pvc":{"allOf":[{"$ref":"#/components/schemas/PvcFieldsUpdatable"},{"$ref":"#/components/schemas/PvcFieldsNonUpdatable"}]},"PvcFieldsUpdatable":{"properties":{"path":{"description":"Local path within the workload to which the PVC bucket will be mapped. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"PvcFieldsNonUpdatable":{"properties":{"existingPvc":{"description":"Verify existing PVC. PVC is assumed to exist when set to `true`. If set to `false`, the PVC will be created, if it does not exist.","type":"boolean","default":false,"nullable":true},"claimName":{"description":"Name for the PVC. Allow referencing it across workloads. If not provided, a name based on the workload name and scope will be auto-generated.","type":"string","minLength":1,"maxLength":63,"nullable":true},"readOnly":{"description":"Permit only read access to PVC.","type":"boolean","default":false,"nullable":true},"ephemeral":{"description":"Use `true` to set PVC to ephemeral. If set to `true`, the PVC will be deleted when the workload is stopped. Not supported for inference workloads.","type":"boolean","default":false,"nullable":true},"claimInfo":{"$ref":"#/components/schemas/ClaimInfo"},"dataSharing":{"description":"use `true` to share the PVC data to all projects under the selected scope.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"ClaimInfo":{"description":"Claim information for the newly created PVC. The information should not be provided when attempting to use existing PVC.","properties":{"size":{"$ref":"#/components/schemas/PvcClaimSize"},"storageClass":{"description":"Storage class name to associate with the PVC. This parameter may be omitted if there is a single storage class in the system, or you are using the default storage class. For more information, see [Storage class](https://kubernetes.io/docs/concepts/storage/storage-classes).","type":"string","minLength":1,"nullable":true},"accessModes":{"$ref":"#/components/schemas/PvcAccessModes"},"volumeMode":{"$ref":"#/components/schemas/PvcVolumeMode"},"addedAttrValues":{"$ref":"#/components/schemas/PvcAddedAttrValues"}},"nullable":true,"type":"object"},"PvcClaimSize":{"description":"Requested size for the PVC. Mandatory when existingPvc is false. Recommended sizes: TB/GB/MB/TIB/GIB/MIB","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"PvcAccessModes":{"description":"Requested access mode(s) for the newly created PVC.","properties":{"readWriteOnce":{"description":"Requesting claim that can be mounted in read/write mode to exactly one host. This is the default access mode.","type":"boolean","default":true,"nullable":true},"readOnlyMany":{"description":"Requesting claim that can be mounted in read-only mode to many hosts.","type":"boolean","default":false,"nullable":true},"readWriteMany":{"description":"Requesting claim that can be mounted in read/write mode to many hosts.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PvcVolumeMode":{"description":"The volume mode required by the claim. Choose Filesystem (default) or Block.","type":"string","enum":["Filesystem","Block"],"nullable":true},"PvcAddedAttrValues":{"description":"an optional array of key-values pairs that are written as annotations on the created PVC. the allowed attributes are determined according to the storage class configuration (see k8s-objects-tracker for further info).","type":"array","items":{"$ref":"#/components/schemas/PvcAddedAttrValue"}},"PvcAddedAttrValue":{"type":"object","required":["key"],"properties":{"key":{"type":"string","minLength":1,"maxLength":63,"pattern":"^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$"},"value":{"type":"string"}}},"HostPathItems":{"description":"Set of host paths to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/HostPathInstance"},"nullable":true},"HostPathInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/HostPath"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"HostPath":{"properties":{"path":{"description":"Local path within the controller to which the host volume will be mapped. (mandatory)","type":"string","minLength":1,"nullable":true},"readOnly":{"description":"Force the volume to be mounted with read-only permissions. Defaults to false.","type":"boolean","default":true,"nullable":true},"mountPath":{"description":"The path that the host volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"nullable":true},"mountPropagation":{"$ref":"#/components/schemas/HostPathMountPropagation"}},"nullable":true,"type":"object"},"HostPathMountPropagation":{"description":"Share this volumes mount with other containers. If set to HostToContainer, this volume mount will receive all subsequent mounts that are mounted to this volume or any of its subdirectories. In case of multiple hostPath entries, this field should have the same value for all of them.","type":"string","enum":["None","HostToContainer"],"nullable":true},"NfsItems":{"description":"Set of nfs volumes to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/NfsInstance"},"nullable":true},"NfsInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Nfs"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Nfs":{"properties":{"path":{"description":"Path that is exported by the NFS server (mandatory). For more information, see [NFS](https://kubernetes.io/docs/concepts/storage/volumes#nfs).","type":"string","minLength":1,"nullable":true},"readOnly":{"description":"Force the NFS export to be mounted with read-only permissions.","type":"boolean","default":true,"nullable":true},"server":{"description":"The hostname or IP address of the NFS server. (mandatory)","type":"string","minLength":1,"nullable":true},"mountPath":{"description":"The path that the NFS volume will be mounted to when in use. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitItems":{"description":"Set of git repositories to use in the workload.","type":"array","items":{"$ref":"#/components/schemas/GitInstance"},"nullable":true},"GitInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/GitCommon"},{"$ref":"#/components/schemas/GitPassword"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"GitCommon":{"properties":{"repository":{"description":"URL to a remote Git repository. The content of this repository will be mapped to the container running the workload. (mandatory)","type":"string","minLength":1,"nullable":true},"branch":{"description":"Specific branch to synchronize the repository from.","type":"string","minLength":1,"nullable":true},"revision":{"description":"Specific revision to synchronize the repository from.","type":"string","minLength":1,"nullable":true},"path":{"description":"Local path within the workload to which the Git repository will be mapped (mandatory).","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"GitPassword":{"properties":{"passwordSecret":{"description":"Secret containing the credentials of the repository (needed for non public repository which requires authentication).","type":"string","minLength":1,"nullable":true},"secretKeyOfUser":{"description":"The key to use for loading the user name from the secret. The default is `User`.","type":"string","minLength":1,"nullable":true},"secretKeyOfPassword":{"description":"The key to use for loading the password from the secret. The default is `Password`.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"ConfigMapField":{"description":"Set of config map volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/ConfigMapInstance"},"nullable":true},"ConfigMapInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/ConfigMap"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"ConfigMap":{"properties":{"configMap":{"description":"The name of the ConfigMap resource. (mandatory)","type":"string","minLength":1,"nullable":true},"mountPath":{"description":"Local path within the workload to which the ConfigMap will be mapped to. (mandatory)","type":"string","minLength":1,"nullable":true},"subPath":{"description":"Path within the volume from which the container's volume should be mounted.","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"SecretItems1":{"description":"Set of secret volumes to use in the workload","type":"array","items":{"$ref":"#/components/schemas/SecretInstance2"},"nullable":true},"SecretInstance2":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/Secret5"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"Secret5":{"allOf":[{"$ref":"#/components/schemas/SecretFieldsUpdatable"},{"$ref":"#/components/schemas/SecretFieldsNonUpdatable"}]},"SecretFieldsUpdatable":{"properties":{"mountPath":{"description":"Local path within the workload to which the Secret will be mapped to. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"SecretFieldsNonUpdatable":{"properties":{"secret":{"description":"The name of the Secret resource. (mandatory)","type":"string","minLength":1,"nullable":true}},"nullable":true,"type":"object"},"EmptyDirItems":{"description":"A list of emptyDir volumes to mount in the workload.","type":"array","items":{"$ref":"#/components/schemas/EmptyDirInstance"},"nullable":true},"EmptyDirInstance":{"allOf":[{"$ref":"#/components/schemas/StorageInstanceName"},{"$ref":"#/components/schemas/EmptyDir"},{"$ref":"#/components/schemas/ExcludeField"}],"nullable":true,"type":"object"},"EmptyDir":{"properties":{"path":{"description":"Local path within the workload to which the EmptyDir volume will be mapped. (mandatory)","type":"string","minLength":1,"nullable":true},"medium":{"description":"The type of storage medium for the volume. Use \"Memory\" for memory-backed storage, or leave empty for disk-backed storage.","type":"string","minLength":1,"nullable":true},"sizeLimit":{"description":"The total amount of local storage or memory required for the emptyDir volume. Specify using Kubernetes quantity format (e.g., 1G, 500Mi).","type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true}},"nullable":true,"type":"object"},"CommonSecurity":{"properties":{"security":{"$ref":"#/components/schemas/CommonSecurityFlatFields"}},"nullable":true,"type":"object"},"CommonSecurityFlatFields":{"allOf":[{"$ref":"#/components/schemas/CommonSecurityNonOverridable"},{"$ref":"#/components/schemas/CommonSecurityOverridable"}],"nullable":true,"type":"object"},"CommonSecurityNonOverridable":{"description":"Security non overrideable fields. In the context of assets,these are environment asset fields that cannot be overriden in the submit workload request.","properties":{"uidGidSource":{"$ref":"#/components/schemas/UidGidSource"},"capabilities":{"description":"Add POSIX capabilities to running containers. Defaults to the default set of capabilities granted by the container runtime.","type":"array","items":{"$ref":"#/components/schemas/Capability"},"nullable":true},"seccompProfileType":{"$ref":"#/components/schemas/SeccompProfileType"},"runAsNonRoot":{"description":"Force the container to run as a non-root user.","type":"boolean","nullable":true},"readOnlyRootFilesystem":{"description":"If true, mounts the container's root filesystem as read-only.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"UidGidSource":{"description":"Indicate the way to determine the user and group ids of the container. The options are a. `fromTheImage` - user and group ids are determined by the docker image that the container runs. this is the default option. b. `custom` - user and group ids can be specified in the environment asset and/or the workload creation request. c. `idpToken` - user and group ids are determined according to the identity provider (idp) access token. This option is intended for internal use of the environment UI form. For more information, see [User Identity](https://run-ai-docs.nvidia.com/self-hosted/2.22/admin/runai-setup/config/non-root-containers/).","type":"string","enum":["fromTheImage","fromIdpToken","custom"],"nullable":true},"Capability":{"type":"string","enum":["AUDIT_CONTROL","AUDIT_READ","AUDIT_WRITE","BLOCK_SUSPEND","CHOWN","DAC_OVERRIDE","DAC_READ_SEARCH","FOWNER","FSETID","IPC_LOCK","IPC_OWNER","KILL","LEASE","LINUX_IMMUTABLE","MAC_ADMIN","MAC_OVERRIDE","MKNOD","NET_ADMIN","NET_BIND_SERVICE","NET_BROADCAST","NET_RAW","SETGID","SETFCAP","SETPCAP","SETUID","SYS_ADMIN","SYS_BOOT","SYS_CHROOT","SYS_MODULE","SYS_NICE","SYS_PACCT","SYS_PTRACE","SYS_RAWIO","SYS_RESOURCE","SYS_TIME","SYS_TTY_CONFIG","SYSLOG","WAKE_ALARM"]},"SeccompProfileType":{"description":"Indicates which kind of seccomp profile will be applied to the container. The options are a. `RuntimeDefault` - the container runtime default profile should be used. b. `Unconfined` - no profile should be applied. c. `Localhost` is not yet supported by Run:ai.","type":"string","enum":["RuntimeDefault","Unconfined","Localhost"],"nullable":true},"CommonSecurityOverridable":{"description":"Security overrideable fields. In the context of assets,these are environment asset fields that can be overriden in the submit workload request.","properties":{"runAsUid":{"description":"The user id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsUid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"runAsGid":{"description":"The group id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsGid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"supplementalGroups":{"description":"Comma separated list of groups that the user running the container belongs to, in addition to the group indicated by runAsGid. Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled. Using an empty string implies reverting the supplementary groups of the image.","type":"string","nullable":true}},"nullable":true,"type":"object"},"InferenceFields":{"properties":{"servingPort":{"$ref":"#/components/schemas/ServingPort"},"autoscaling":{"$ref":"#/components/schemas/AutoScaling"},"servingConfiguration":{"$ref":"#/components/schemas/ServingConfiguration"}},"nullable":true,"type":"object"},"ServingPort":{"description":"A port for accessing the inference service","allOf":[{"$ref":"#/components/schemas/ServingPortContainerAndProtocol"},{"$ref":"#/components/schemas/ServingPortAccess"}],"nullable":true,"type":"object"},"ServingPortContainerAndProtocol":{"properties":{"container":{"description":"The port that the container running the inference service exposes (mandatory).","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"protocol":{"$ref":"#/components/schemas/ServingPortProtocol"}}},"ServingPortProtocol":{"description":"The protocol used by the port, defaults to http","type":"string","enum":["http","grpc"],"nullable":true},"ServingPortAccess":{"properties":{"authorizationType":{"$ref":"#/components/schemas/ServingPortAccessAuthorizationTypeEnum"},"authorizedUsers":{"description":"List of users that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"authorizedGroups":{"description":"List of groups that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","type":"array","items":{"type":"string"},"nullable":true},"clusterLocalAccessOnly":{"description":"Configure the serving port URL to be available only on the cluster-local network, and not externally. Defaults to false","type":"boolean","nullable":true}}},"ServingPortAccessAuthorizationTypeEnum":{"type":"string","enum":["public","authenticatedUsers","authorizedUsersOrGroups"],"description":"The authorization type for serving port URL access. Defaults to public, which means no authorization is required. If set to authenticatedUsers, only authenticated Run:ai users are allowed to access the URL. If set to authorizedUsersOrGroups, only users or groups specified in authorizedUsers or authorizedGroups are allowed to access the URL. Supported from cluster version 2.19.","nullable":true},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/inferences/{workloadId}":{"patch":{"summary":"Update inference spec. [Experimental]","operationId":"update_inference_spec","description":"Update the specification of an existing inference workload.","tags":["Inferences"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/InferenceUpdateRequest"}}}},"responses":{"202":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Inference1"}}}},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get inference metrics data.

> Retrieve inference metrics data by id. Supported from control-plane version 2.18 or later.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.22"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}},"InferenceWorkloadMetricTypes":{"name":"metricType","in":"query","required":true,"description":"Specify which data to request.","explode":false,"schema":{"type":"array","items":{"$ref":"#/components/schemas/InferenceWorkloadMetricType"}}},"StartRequired":{"name":"start","in":"query","description":"Start date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"EndRequired":{"name":"end","in":"query","description":"End date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"NumberOfSamples":{"name":"numberOfSamples","in":"query","description":"The number of samples to take in the specified time range.","required":false,"schema":{"type":"integer","maximum":1000,"minimum":0,"default":20}}},"schemas":{"InferenceWorkloadMetricType":{"type":"string","description":"Specify which data to request.","enum":["THROUGHPUT","LATENCY"]},"MetricsResponse":{"type":"object","required":["measurements"],"properties":{"measurements":{"type":"array","items":{"$ref":"#/components/schemas/MeasurementResponse"}}}},"MeasurementResponse":{"type":"object","required":["type","values"],"properties":{"type":{"type":"string","description":"specifies what data returned"},"labels":{"type":"object","nullable":true,"description":"labels of the metric measurement","additionalProperties":{"type":"string"}},"values":{"type":"array","nullable":true,"items":{"type":"object","required":["value","timestamp"],"properties":{"value":{"type":"string"},"timestamp":{"type":"string","format":"date-time","nullable":true}}}}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/inferences/{workloadId}/metrics":{"get":{"summary":"Get inference metrics data.","description":"Retrieve inference metrics data by id. Supported from control-plane version 2.18 or later.","operationId":"get_inference_workload_metrics","tags":["Inferences"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"},{"$ref":"#/components/parameters/InferenceWorkloadMetricTypes"},{"$ref":"#/components/parameters/StartRequired"},{"$ref":"#/components/parameters/EndRequired"},{"$ref":"#/components/parameters/NumberOfSamples"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsResponse"}},"text/csv":{}}},"207":{"description":"Partial success.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsResponse"}}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get inference pod's metrics data.

> Retrieve inference metrics pod's data by workload and pod id. Supported from control-plane version 2.18 or later.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.22"},"tags":[{"name":"Inferences","description":"Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes."}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}},"PodId":{"name":"podId","in":"path","description":"The requested pod id.","schema":{"type":"string","format":"uuid"},"required":true},"InferencePodMetricTypes":{"name":"metricType","in":"query","required":true,"description":"Specifies metrics data to request. Inference metrics are only available for inference workloads.","explode":false,"schema":{"type":"array","items":{"$ref":"#/components/schemas/InferencePodMetricType"}}},"StartRequired":{"name":"start","in":"query","description":"Start date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"EndRequired":{"name":"end","in":"query","description":"End date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"NumberOfSamples":{"name":"numberOfSamples","in":"query","description":"The number of samples to take in the specified time range.","required":false,"schema":{"type":"integer","maximum":1000,"minimum":0,"default":20}}},"schemas":{"InferencePodMetricType":{"type":"string","description":"Specifies what data to request.","enum":["THROUGHPUT","LATENCY"]},"MetricsResponse":{"type":"object","required":["measurements"],"properties":{"measurements":{"type":"array","items":{"$ref":"#/components/schemas/MeasurementResponse"}}}},"MeasurementResponse":{"type":"object","required":["type","values"],"properties":{"type":{"type":"string","description":"specifies what data returned"},"labels":{"type":"object","nullable":true,"description":"labels of the metric measurement","additionalProperties":{"type":"string"}},"values":{"type":"array","nullable":true,"items":{"type":"object","required":["value","timestamp"],"properties":{"value":{"type":"string"},"timestamp":{"type":"string","format":"date-time","nullable":true}}}}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/inferences/{workloadId}/pods/{podId}/metrics":{"get":{"summary":"Get inference pod's metrics data.","description":"Retrieve inference metrics pod's data by workload and pod id. Supported from control-plane version 2.18 or later.","operationId":"get_inference_workload_pod_metrics","tags":["Inferences"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"},{"$ref":"#/components/parameters/PodId"},{"$ref":"#/components/parameters/InferencePodMetricTypes"},{"$ref":"#/components/parameters/StartRequired"},{"$ref":"#/components/parameters/EndRequired"},{"$ref":"#/components/parameters/NumberOfSamples"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsResponse"}},"text/csv":{}}},"207":{"description":"Partial success.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsResponse"}}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```
