Compare commits
3 Commits
cli/fix/up
...
ci/ignore_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9a6674e51a | ||
|
|
bab074cd37 | ||
|
|
afb7d49455 |
@@ -36,6 +36,10 @@ fi
|
||||
|
||||
|
||||
find $BASE_DIR/../ -type f -name Olares.yaml | while read f; do
|
||||
if [[ "$f" == *"/vendor/"* ]]; then
|
||||
echo "skip vendor file $f"
|
||||
continue
|
||||
fi
|
||||
echo "Processing $f"
|
||||
declare -a bins
|
||||
IFS=
|
||||
|
||||
@@ -187,7 +187,7 @@ func (m *InstallPluginModule) Init() {
|
||||
Prepare: &prepare.PrepareCollection{
|
||||
new(common.OnlyFirstMaster),
|
||||
},
|
||||
Action: new(UpdateNodeLabels),
|
||||
Action: new(UpdateNodeGPUInfo),
|
||||
Parallel: false,
|
||||
Retry: 1,
|
||||
}
|
||||
@@ -223,23 +223,6 @@ func (m *InstallPluginModule) Init() {
|
||||
}
|
||||
}
|
||||
|
||||
type GetCudaVersionModule struct {
|
||||
common.KubeModule
|
||||
}
|
||||
|
||||
func (g *GetCudaVersionModule) Init() {
|
||||
g.Name = "GetCudaVersion"
|
||||
|
||||
getCudaVersion := &task.LocalTask{
|
||||
Name: "GetCudaVersion",
|
||||
Action: new(GetCudaVersion),
|
||||
}
|
||||
|
||||
g.Tasks = []task.Interface{
|
||||
getCudaVersion,
|
||||
}
|
||||
}
|
||||
|
||||
type NodeLabelingModule struct {
|
||||
common.KubeModule
|
||||
}
|
||||
@@ -253,7 +236,7 @@ func (l *NodeLabelingModule) Init() {
|
||||
new(CudaInstalled),
|
||||
new(CurrentNodeInK8s),
|
||||
},
|
||||
Action: new(UpdateNodeLabels),
|
||||
Action: new(UpdateNodeGPUInfo),
|
||||
Retry: 1,
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,10 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
v1alpha1 "bytetrade.io/web3os/app-service/api/sys.bytetrade.io/v1alpha1"
|
||||
apputils "bytetrade.io/web3os/app-service/pkg/utils"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
|
||||
|
||||
"github.com/beclab/Olares/cli/pkg/clientset"
|
||||
"github.com/beclab/Olares/cli/pkg/common"
|
||||
@@ -26,7 +29,11 @@ import (
|
||||
"github.com/pelletier/go-toml"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
apixclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
kruntime "k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/util/retry"
|
||||
)
|
||||
@@ -323,59 +330,11 @@ func (t *CheckGpuStatus) Execute(runtime connector.Runtime) error {
|
||||
return fmt.Errorf("GPU Container State is Pending")
|
||||
}
|
||||
|
||||
type GetCudaVersion struct {
|
||||
type UpdateNodeGPUInfo struct {
|
||||
common.KubeAction
|
||||
}
|
||||
|
||||
func (g *GetCudaVersion) Execute(runtime connector.Runtime) error {
|
||||
var nvidiaSmiFile string
|
||||
var systemInfo = runtime.GetSystemInfo()
|
||||
|
||||
switch {
|
||||
case systemInfo.IsWsl():
|
||||
nvidiaSmiFile = "/usr/lib/wsl/lib/nvidia-smi"
|
||||
default:
|
||||
nvidiaSmiFile = "/usr/bin/nvidia-smi"
|
||||
}
|
||||
|
||||
if !util.IsExist(nvidiaSmiFile) {
|
||||
logger.Info("nvidia-smi not exists")
|
||||
return nil
|
||||
}
|
||||
|
||||
var cudaVersion string
|
||||
res, err := runtime.GetRunner().Cmd(fmt.Sprintf("%s --version", nvidiaSmiFile), false, true)
|
||||
if err != nil {
|
||||
logger.Errorf("get cuda version error %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
lines := strings.Split(res, "\n")
|
||||
|
||||
if len(lines) == 0 {
|
||||
return nil
|
||||
}
|
||||
for _, line := range lines {
|
||||
if strings.Contains(line, "CUDA Version") {
|
||||
parts := strings.Split(line, ":")
|
||||
if len(parts) != 2 {
|
||||
break
|
||||
}
|
||||
cudaVersion = strings.TrimSpace(parts[1])
|
||||
}
|
||||
}
|
||||
if cudaVersion != "" {
|
||||
common.SetSystemEnv("OLARES_SYSTEM_CUDA_VERSION", cudaVersion)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type UpdateNodeLabels struct {
|
||||
common.KubeAction
|
||||
}
|
||||
|
||||
func (u *UpdateNodeLabels) Execute(runtime connector.Runtime) error {
|
||||
func (u *UpdateNodeGPUInfo) Execute(runtime connector.Runtime) error {
|
||||
client, err := clientset.NewKubeClient()
|
||||
if err != nil {
|
||||
return errors.Wrap(errors.WithStack(err), "kubeclient create error")
|
||||
@@ -482,6 +441,85 @@ func UpdateNodeGpuLabel(ctx context.Context, client kubernetes.Interface, driver
|
||||
}
|
||||
}
|
||||
|
||||
if cuda != nil && *cuda != "" {
|
||||
if err := updateCudaVersionSystemEnv(ctx, *cuda); err != nil {
|
||||
logger.Errorf("failed to update SystemEnv for CUDA version: %v", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func updateCudaVersionSystemEnv(ctx context.Context, cudaVersion string) error {
|
||||
envName := "OLARES_SYSTEM_CUDA_VERSION"
|
||||
common.SetSystemEnv(envName, cudaVersion)
|
||||
config, err := ctrl.GetConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get rest config: %w", err)
|
||||
}
|
||||
|
||||
apix, err := apixclientset.NewForConfig(config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create crd client: %w", err)
|
||||
}
|
||||
|
||||
_, err = apix.ApiextensionsV1().CustomResourceDefinitions().Get(ctx, "systemenvs.sys.bytetrade.io", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
logger.Debugf("SystemEnv CRD not found, skipping CUDA version update")
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("failed to get SystemEnv CRD: %w", err)
|
||||
}
|
||||
|
||||
scheme := kruntime.NewScheme()
|
||||
if err := v1alpha1.AddToScheme(scheme); err != nil {
|
||||
return fmt.Errorf("failed to add systemenv scheme: %w", err)
|
||||
}
|
||||
|
||||
c, err := ctrlclient.New(config, ctrlclient.Options{Scheme: scheme})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create client: %w", err)
|
||||
}
|
||||
|
||||
resourceName, err := apputils.EnvNameToResourceName(envName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid system env name: %s", envName)
|
||||
}
|
||||
|
||||
var existingSystemEnv v1alpha1.SystemEnv
|
||||
err = c.Get(ctx, types.NamespacedName{Name: resourceName}, &existingSystemEnv)
|
||||
if err == nil {
|
||||
if existingSystemEnv.Default != cudaVersion {
|
||||
existingSystemEnv.Default = cudaVersion
|
||||
if err := c.Update(ctx, &existingSystemEnv); err != nil {
|
||||
return fmt.Errorf("failed to update SystemEnv %s: %w", resourceName, err)
|
||||
}
|
||||
logger.Infof("Updated SystemEnv %s default to %s", resourceName, cudaVersion)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if !apierrors.IsNotFound(err) {
|
||||
return fmt.Errorf("failed to get SystemEnv %s: %w", resourceName, err)
|
||||
}
|
||||
|
||||
systemEnv := &v1alpha1.SystemEnv{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: resourceName,
|
||||
},
|
||||
EnvVarSpec: v1alpha1.EnvVarSpec{
|
||||
EnvName: envName,
|
||||
Default: cudaVersion,
|
||||
},
|
||||
}
|
||||
|
||||
if err := c.Create(ctx, systemEnv); err != nil && !apierrors.IsAlreadyExists(err) {
|
||||
return fmt.Errorf("failed to create SystemEnv %s: %w", resourceName, err)
|
||||
}
|
||||
|
||||
logger.Infof("Created SystemEnv: %s with default %s", envName, cudaVersion)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -58,7 +58,6 @@ func (l *linuxInstallPhaseBuilder) installGpuPlugin() phase {
|
||||
return []module.Module{
|
||||
&gpu.RestartK3sServiceModule{Skip: !(l.runtime.Arg.Kubetype == common.K3s)},
|
||||
&gpu.InstallPluginModule{Skip: skipGpuPlugin},
|
||||
&gpu.GetCudaVersionModule{},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ require (
|
||||
bytetrade.io/web3os/bfl v0.0.0-00010101000000-000000000000
|
||||
github.com/Masterminds/semver/v3 v3.4.0
|
||||
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2
|
||||
github.com/beclab/Olares/cli v0.0.0-20251016092744-6241cceceb89
|
||||
github.com/beclab/Olares/cli v0.0.0-20251219153848-63d422037cf9
|
||||
github.com/containerd/containerd v1.7.28
|
||||
github.com/distribution/distribution/v3 v3.0.0
|
||||
github.com/dustin/go-humanize v1.0.1
|
||||
@@ -39,6 +39,7 @@ require (
|
||||
github.com/libp2p/go-netroute v0.2.2
|
||||
github.com/mackerelio/go-osstat v0.2.5
|
||||
github.com/mdlayher/raw v0.1.0
|
||||
github.com/miekg/dns v1.1.55
|
||||
github.com/muka/network_manager v0.0.0-20200903202308-ae5ede816e07
|
||||
github.com/nxadm/tail v1.4.11
|
||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58
|
||||
@@ -56,6 +57,7 @@ require (
|
||||
golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b
|
||||
golang.org/x/sys v0.35.0
|
||||
k8s.io/api v0.34.1
|
||||
k8s.io/apiextensions-apiserver v0.34.0
|
||||
k8s.io/apimachinery v0.34.1
|
||||
k8s.io/client-go v12.0.0+incompatible
|
||||
k8s.io/cri-api v0.34.1
|
||||
@@ -129,7 +131,6 @@ require (
|
||||
github.com/mattn/go-runewidth v0.0.16 // indirect
|
||||
github.com/mdlayher/packet v0.0.0-20220221164757-67998ac0ff93 // indirect
|
||||
github.com/mdlayher/socket v0.2.1 // indirect
|
||||
github.com/miekg/dns v1.1.55 // indirect
|
||||
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
||||
github.com/moby/locker v1.0.1 // indirect
|
||||
github.com/moby/spdystream v0.5.0 // indirect
|
||||
@@ -200,7 +201,6 @@ require (
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
howett.net/plist v1.0.0 // indirect
|
||||
k8s.io/apiextensions-apiserver v0.34.0 // indirect
|
||||
k8s.io/apiserver v0.34.0 // indirect
|
||||
k8s.io/component-base v0.34.1 // indirect
|
||||
k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
|
||||
|
||||
@@ -24,8 +24,8 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd
|
||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
|
||||
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so=
|
||||
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
|
||||
github.com/beclab/Olares/cli v0.0.0-20251016092744-6241cceceb89 h1:5s9hXV8K3faToQtE9DbiM7O6jt5kIiEsLAaKn6F0UfA=
|
||||
github.com/beclab/Olares/cli v0.0.0-20251016092744-6241cceceb89/go.mod h1:iEvZxM6PnFxFRppneTzV3hgr2tIxDnsI3dhp4pi7pFg=
|
||||
github.com/beclab/Olares/cli v0.0.0-20251219153848-63d422037cf9 h1:YNHfPra2FqsKJ5mAxSWNVIK6VyWygRyZiNwfPqiFxlg=
|
||||
github.com/beclab/Olares/cli v0.0.0-20251219153848-63d422037cf9/go.mod h1:cYPcuju2yRSp9BQjIN/CC495dDOOvVoL42r/gvFlutk=
|
||||
github.com/beclab/app-service v0.4.37 h1:gt60wQxgPWMc3oN94TNSdiQAvzqTyCv/OUP93jNSQTY=
|
||||
github.com/beclab/app-service v0.4.37/go.mod h1:0vEg3rv/DbR7dYznvTlXNXyYNn+TXNMaxz03GQYRWUQ=
|
||||
github.com/beclab/bfl v0.3.36 h1:PgeSPGc+XoONiwFsKq9xX8rqcL4kVM1G/ut0lYYj/js=
|
||||
|
||||
Reference in New Issue
Block a user