first commit
This commit is contained in:
96
frameworks/GPU/config/gpu/templates/device-plugin.yaml
Normal file
96
frameworks/GPU/config/gpu/templates/device-plugin.yaml
Normal file
@@ -0,0 +1,96 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: probe.orionx-k8s-device-plugin
|
||||
namespace: {{ .Release.Namespace }}
|
||||
data:
|
||||
probe.sh: |
|
||||
#!/bin/bash
|
||||
# Author: yuanzhilei@virtaitech.com
|
||||
#
|
||||
health=$(pidof k8s-device-plugin)
|
||||
if [[ -z $health ]]; then
|
||||
echo "k8s-device-plugin not exists."
|
||||
exit 1
|
||||
fi
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: orionx-k8s-device-plugin
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app: orionx-k8s-device-plugin
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: orionx-k8s-device-plugin
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: orionx-k8s-device-plugin
|
||||
orionx: orionx-k8s-device-plugin
|
||||
spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
effect: NoSchedule
|
||||
hostNetwork: true
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
containers:
|
||||
- image: eball/orionx-k8s-device-plugin:3.1.5-20230707
|
||||
imagePullPolicy: Always
|
||||
name: device-plugin
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- /tmp/probe.sh
|
||||
periodSeconds: 10
|
||||
initialDelaySeconds: 60
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
# command: ["bash", "-c", "sleep 1d"]
|
||||
env:
|
||||
- name : ORION_CONTROLLER
|
||||
value : {{ .Values.gpu.server }}
|
||||
- name: ORION_VGPU_RESNAME
|
||||
value: "virtaitech.com/gpu"
|
||||
- name: NVIDIA_GPU_RESNAME
|
||||
value: "nvidia.com/gpu"
|
||||
- name: ORION_GMEM_UNIT
|
||||
value: "1024"
|
||||
- name: ORION_MNT_PATH
|
||||
value: '{"/var/tmp/orion/comm/":"/var/tmp/orion/comm:ro"}'
|
||||
- name: ORION_ENV
|
||||
value: '{"ORION_DRIVER_ONLY":"true","ORION_DEVICE_ENABLE":"1","ORION_EXPORT_CMD":"env | grep ORION; orion-smi -j","ORION_NV_SMI_FORCE_DISPLAY":"1"}'
|
||||
- name: REPORT_ONLY_LOCAL_VGPU
|
||||
value: "false"
|
||||
- name: DP_DISABLE_HEALTHCHECKS
|
||||
value: "all"
|
||||
- name: ORION_HOST_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.hostIP
|
||||
volumeMounts:
|
||||
- name: device-plugin
|
||||
mountPath: /var/lib/kubelet/device-plugins
|
||||
- name: probe
|
||||
mountPath: /tmp/probe.sh
|
||||
subPath: probe.sh
|
||||
- name: localtime
|
||||
mountPath: /etc/localtime
|
||||
volumes:
|
||||
- name: localtime
|
||||
hostPath:
|
||||
path: /etc/localtime
|
||||
- name: device-plugin
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/device-plugins
|
||||
- name: probe
|
||||
configMap:
|
||||
name: probe.orionx-k8s-device-plugin
|
||||
defaultMode: 0755
|
||||
Reference in New Issue
Block a user