117 lines
3.1 KiB
YAML
117 lines
3.1 KiB
YAML
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: probe.orionx-server
|
|
namespace: {{ .Release.Namespace }}
|
|
data:
|
|
probe.sh: |
|
|
#!/bin/bash
|
|
# Author: yuanzhilei@virtaitech.com
|
|
#
|
|
oriond=$(pidof oriond)
|
|
if [[ -z $oriond ]]; then
|
|
echo "oriond not exists."
|
|
exit 1
|
|
fi
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: DaemonSet
|
|
metadata:
|
|
name: orionx-server
|
|
namespace: {{ .Release.Namespace }}
|
|
labels:
|
|
k8s-app: virtaitech
|
|
annotations:
|
|
gpu-server: {{ .Values.gpu.server }}
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
app: orionx-server
|
|
updateStrategy:
|
|
type: RollingUpdate
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: orionx-server
|
|
spec:
|
|
priorityClassName: system-cluster-critical
|
|
tolerations:
|
|
- key: node-role.kubernetes.io/master
|
|
effect: NoSchedule
|
|
hostNetwork: true
|
|
dnsPolicy: ClusterFirstWithHostNet
|
|
hostIPC: true
|
|
hostPID: true
|
|
containers:
|
|
- image: eball/orionx-server:4.3.0-cu9.0-12.1-slim-bytetrade-20231211
|
|
name: orionx-server
|
|
#command: ["bash", "-c"]
|
|
#args: ["echo 'Sleeping for 3600 seconds ...'; sleep 3600s"]
|
|
imagePullPolicy: Always
|
|
securityContext:
|
|
privileged: true
|
|
env:
|
|
- name : ORION_CONTROLLER
|
|
value : {{ .Values.gpu.server }}
|
|
- name: ORION_BIND_ADDR
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: status.hostIP
|
|
# - name : ORION_BIND_NET
|
|
# value : "eth0"
|
|
# - name : ORION_BIND_HOSTNAME
|
|
# value : "true"
|
|
- name : ORION_SERVER_PORT
|
|
value : "9960"
|
|
- name : ORION_VGPU_COUNT
|
|
value : "4"
|
|
- name : ORION_LOG_LEVEL
|
|
value : "INFO"
|
|
- name: ORION_VGPU_EXPORTER_LISTEN_PORT
|
|
value: "9401"
|
|
- name: ORION_VGPU_EXPORTER_LISTEN_IP
|
|
value: "0.0.0.0"
|
|
- name: ORION_SERVICE_REGISTRY_ADDRESS
|
|
value: {{ .Values.gpu.server }}
|
|
# - name : ENABLE_RDMA
|
|
# value : "true"
|
|
# - name: ENABLE_COMPUTATION_THROTTLE
|
|
# value : "false"
|
|
# - name: ORION_RDMA_NAME
|
|
# value: "mlx5_0"
|
|
# - name: ORION_RDMA_PORT
|
|
# value: "1"
|
|
# - name: ORION_RDMA_GID
|
|
# value: "3"
|
|
- name: MY_NODE_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: spec.nodeName
|
|
- name: ORION_SERVER_LABELS
|
|
value: '{"pod_host_node": "$(MY_NODE_NAME)"}'
|
|
livenessProbe:
|
|
exec:
|
|
command:
|
|
- /tmp/probe.sh
|
|
initialDelaySeconds: 50
|
|
periodSeconds: 10
|
|
volumeMounts:
|
|
- name: probe
|
|
mountPath: /tmp/probe.sh
|
|
subPath: probe.sh
|
|
- name: orioncomm
|
|
mountPath: "/var/tmp/orion/comm/"
|
|
- name: localtime
|
|
mountPath: /etc/localtime
|
|
volumes:
|
|
- name: localtime
|
|
hostPath:
|
|
path: /etc/localtime
|
|
- name: orioncomm
|
|
hostPath:
|
|
path: /var/tmp/orion/comm/
|
|
- name: probe
|
|
configMap:
|
|
name: probe.orionx-server
|
|
defaultMode: 0755
|