220 lines
6.2 KiB
YAML
220 lines
6.2 KiB
YAML
# Default values for hami-vgpu.
|
|
|
|
nameOverride: ""
|
|
fullnameOverride: ""
|
|
imagePullSecrets: [ ]
|
|
version: "v2.5.0"
|
|
|
|
#Nvidia GPU Parameters
|
|
resourceName: "nvidia.com/gpu"
|
|
resourceMem: "nvidia.com/gpumem"
|
|
resourceMemPercentage: "nvidia.com/gpumem-percentage"
|
|
resourceCores: "nvidia.com/gpucores"
|
|
resourcePriority: "nvidia.com/priority"
|
|
|
|
#MLU Parameters
|
|
mluResourceName: "cambricon.com/vmlu"
|
|
mluResourceMem: "cambricon.com/mlu.smlu.vmemory"
|
|
mluResourceCores: "cambricon.com/mlu.smlu.vcore"
|
|
|
|
#Hygon DCU Parameters
|
|
dcuResourceName: "hygon.com/dcunum"
|
|
dcuResourceMem: "hygon.com/dcumem"
|
|
dcuResourceCores: "hygon.com/dcucores"
|
|
|
|
#Iluvatar GPU Parameters
|
|
iluvatarResourceName: "iluvatar.ai/vgpu"
|
|
iluvatarResourceMem: "iluvatar.ai/vcuda-memory"
|
|
iluvatarResourceCore: "iluvatar.ai/vcuda-core"
|
|
|
|
#Metax SGPU Parameters
|
|
metaxResourceName: "metax-tech.com/sgpu"
|
|
metaxResourceCore: "metax-tech.com/vcore"
|
|
metaxResourceMem: "metax-tech.com/vmemory"
|
|
|
|
schedulerName: "hami-scheduler"
|
|
|
|
podSecurityPolicy:
|
|
enabled: false
|
|
|
|
global:
|
|
gpuHookPath: /usr/local
|
|
labels: {}
|
|
annotations: {}
|
|
managedNodeSelectorEnable: false
|
|
managedNodeSelector:
|
|
usage: "gpu"
|
|
|
|
|
|
scheduler:
|
|
# @param nodeName defines the node name and the nvidia-vgpu-scheduler-scheduler will schedule to the node.
|
|
# if we install the nvidia-vgpu-scheduler-scheduler as default scheduler, we need to remove the k8s default
|
|
# scheduler pod from the cluster first, we must specify node name to skip the schedule workflow.
|
|
nodeName: ""
|
|
#nodeLabelSelector:
|
|
# "gpu": "on"
|
|
overwriteEnv: "false"
|
|
defaultSchedulerPolicy:
|
|
nodeSchedulerPolicy: binpack
|
|
gpuSchedulerPolicy: spread
|
|
metricsBindAddress: ":9395"
|
|
livenessProbe: false
|
|
leaderElect: true
|
|
# when leaderElect is true, replicas is available, otherwise replicas is 1.
|
|
replicas: 1
|
|
kubeScheduler:
|
|
# @param enabled indicate whether to run kube-scheduler container in the scheduler pod, it's true by default.
|
|
enabled: true
|
|
image: registry.k8s.io/kube-scheduler
|
|
imageTag: ""
|
|
imagePullPolicy: IfNotPresent
|
|
resources: {}
|
|
# If you do want to specify resources, uncomment the following lines, adjust them as necessary.
|
|
# and remove the curly braces after 'resources:'.
|
|
# limits:
|
|
# cpu: 1000m
|
|
# memory: 1000Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 100Mi
|
|
extraNewArgs:
|
|
- --config=/config/config.yaml
|
|
- -v=4
|
|
extraArgs:
|
|
- --policy-config-file=/config/config.json
|
|
- -v=4
|
|
extender:
|
|
image: "beclab/hami"
|
|
imagePullPolicy: IfNotPresent
|
|
resources: {}
|
|
# If you do want to specify resources, uncomment the following lines, adjust them as necessary,
|
|
# and remove the curly braces after 'resources:'.
|
|
# limits:
|
|
# cpu: 1000m
|
|
# memory: 1000Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 100Mi
|
|
extraArgs:
|
|
- --debug
|
|
- -v=4
|
|
podAnnotations: {}
|
|
tolerations: []
|
|
#serviceAccountName: "hami-vgpu-scheduler-sa"
|
|
admissionWebhook:
|
|
customURL:
|
|
enabled: false
|
|
# must be an endpoint using https.
|
|
# should generate host certs here
|
|
host: 127.0.0.1 # hostname or ip, can be your node'IP if you want to use https://<nodeIP>:<schedulerPort>/<path>
|
|
port: 31998
|
|
path: /webhook
|
|
whitelistNamespaces:
|
|
# Specify the namespaces that the webhook will not be applied to.
|
|
# - default
|
|
# - kube-system
|
|
# - istio-system
|
|
reinvocationPolicy: Never
|
|
failurePolicy: Ignore
|
|
patch:
|
|
image: jettech/kube-webhook-certgen:v1.5.2
|
|
imageNew: liangjw/kube-webhook-certgen:v1.1.1
|
|
imagePullPolicy: IfNotPresent
|
|
priorityClassName: ""
|
|
podAnnotations: {}
|
|
nodeSelector: {}
|
|
tolerations: []
|
|
runAsUser: 2000
|
|
service:
|
|
type: NodePort # Default type is NodePort, can be changed to ClusterIP
|
|
httpPort: 443 # HTTP port
|
|
schedulerPort: 31998 # NodePort for HTTP
|
|
monitorPort: 31993 # Monitoring port
|
|
labels: {}
|
|
annotations: {}
|
|
|
|
devicePlugin:
|
|
image: "beclab/hami"
|
|
monitorimage: "beclab/hami"
|
|
monitorctrPath: /usr/local/vgpu/containers
|
|
imagePullPolicy: IfNotPresent
|
|
deviceSplitCount: 100
|
|
deviceMemoryScaling: 100
|
|
deviceCoreScaling: 100
|
|
runtimeClassName: ""
|
|
migStrategy: "none"
|
|
disablecorelimit: "false"
|
|
passDeviceSpecsEnabled: false
|
|
extraArgs:
|
|
- -v=4
|
|
|
|
service:
|
|
type: NodePort # Default type is NodePort, can be changed to ClusterIP
|
|
httpPort: 31992
|
|
labels: {}
|
|
annotations: {}
|
|
|
|
pluginPath: /var/lib/kubelet/device-plugins
|
|
libPath: /usr/local/vgpu
|
|
|
|
podAnnotations: {}
|
|
nvidianodeSelector:
|
|
gpu.bytetrade.io/cuda-supported: 'true'
|
|
tolerations: []
|
|
# The updateStrategy for DevicePlugin DaemonSet.
|
|
# If you want to update the DaemonSet by manual, set type as "OnDelete".
|
|
# We recommend use OnDelete update strategy because DevicePlugin pod restart will cause business pod restart, this behavior is destructive.
|
|
# Otherwise, you can use RollingUpdate update strategy to rolling update DevicePlugin pod.
|
|
updateStrategy:
|
|
type: RollingUpdate
|
|
rollingUpdate:
|
|
maxUnavailable: 1
|
|
|
|
resources: {}
|
|
# If you do want to specify resources, uncomment the following lines, adjust them as necessary.
|
|
# and remove the curly braces after 'resources:'.
|
|
# limits:
|
|
# cpu: 1000m
|
|
# memory: 1000Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 100Mi
|
|
|
|
vgpuMonitor:
|
|
resources: {}
|
|
# If you do want to specify resources, uncomment the following lines, adjust them as necessary.
|
|
# and remove the curly braces after 'resources:'.
|
|
# limits:
|
|
# cpu: 1000m
|
|
# memory: 1000Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 100Mi
|
|
|
|
devices:
|
|
mthreads:
|
|
enabled: false
|
|
customresources:
|
|
- mthreads.com/vgpu
|
|
nvidia:
|
|
gpuCorePolicy: default
|
|
ascend:
|
|
enabled: false
|
|
image: ""
|
|
imagePullPolicy: IfNotPresent
|
|
extraArgs: []
|
|
nodeSelector:
|
|
ascend: "on"
|
|
tolerations: []
|
|
customresources:
|
|
- huawei.com/Ascend910A
|
|
- huawei.com/Ascend910A-memory
|
|
- huawei.com/Ascend910B2
|
|
- huawei.com/Ascend910B2-memory
|
|
- huawei.com/Ascend910B
|
|
- huawei.com/Ascend910B-memory
|
|
- huawei.com/Ascend910B4
|
|
- huawei.com/Ascend910B4-memory
|
|
- huawei.com/Ascend310P
|
|
- huawei.com/Ascend310P-memory
|