Compare commits
1 Commits
module-l4-
...
feat/cli/a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a4f6045a90 |
21
cli/cmd/ctl/amdgpu/install.go
Normal file
21
cli/cmd/ctl/amdgpu/install.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package amdgpu
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"github.com/beclab/Olares/cli/pkg/pipelines"
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
)
|
||||||
|
|
||||||
|
func NewCmdAmdGpuInstall() *cobra.Command {
|
||||||
|
cmd := &cobra.Command{
|
||||||
|
Use: "install",
|
||||||
|
Short: "Install AMD ROCm stack via amdgpu-install",
|
||||||
|
Run: func(cmd *cobra.Command, args []string) {
|
||||||
|
if err := pipelines.AmdGpuInstall(); err != nil {
|
||||||
|
log.Fatalf("error: %v", err)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
16
cli/cmd/ctl/amdgpu/root.go
Normal file
16
cli/cmd/ctl/amdgpu/root.go
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
package amdgpu
|
||||||
|
|
||||||
|
import "github.com/spf13/cobra"
|
||||||
|
|
||||||
|
func NewCmdAmdGpu() *cobra.Command {
|
||||||
|
cmd := &cobra.Command{
|
||||||
|
Use: "amdgpu",
|
||||||
|
Short: "Manage AMD GPU ROCm stack",
|
||||||
|
}
|
||||||
|
cmd.AddCommand(NewCmdAmdGpuInstall())
|
||||||
|
cmd.AddCommand(NewCmdAmdGpuUninstall())
|
||||||
|
cmd.AddCommand(NewCmdAmdGpuStatus())
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
21
cli/cmd/ctl/amdgpu/status.go
Normal file
21
cli/cmd/ctl/amdgpu/status.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package amdgpu
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"github.com/beclab/Olares/cli/pkg/pipelines"
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
)
|
||||||
|
|
||||||
|
func NewCmdAmdGpuStatus() *cobra.Command {
|
||||||
|
cmd := &cobra.Command{
|
||||||
|
Use: "status",
|
||||||
|
Short: "Show AMD GPU driver and ROCm status",
|
||||||
|
Run: func(cmd *cobra.Command, args []string) {
|
||||||
|
if err := pipelines.AmdGpuStatus(); err != nil {
|
||||||
|
log.Fatalf("error: %v", err)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
21
cli/cmd/ctl/amdgpu/uninstall.go
Normal file
21
cli/cmd/ctl/amdgpu/uninstall.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package amdgpu
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"github.com/beclab/Olares/cli/pkg/pipelines"
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
)
|
||||||
|
|
||||||
|
func NewCmdAmdGpuUninstall() *cobra.Command {
|
||||||
|
cmd := &cobra.Command{
|
||||||
|
Use: "uninstall",
|
||||||
|
Short: "Uninstall AMD ROCm stack via amdgpu-install",
|
||||||
|
Run: func(cmd *cobra.Command, args []string) {
|
||||||
|
if err := pipelines.AmdGpuUninstall(); err != nil {
|
||||||
|
log.Fatalf("error: %v", err)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package ctl
|
package ctl
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/beclab/Olares/cli/cmd/ctl/amdgpu"
|
||||||
"github.com/beclab/Olares/cli/cmd/ctl/disk"
|
"github.com/beclab/Olares/cli/cmd/ctl/disk"
|
||||||
"github.com/beclab/Olares/cli/cmd/ctl/gpu"
|
"github.com/beclab/Olares/cli/cmd/ctl/gpu"
|
||||||
"github.com/beclab/Olares/cli/cmd/ctl/node"
|
"github.com/beclab/Olares/cli/cmd/ctl/node"
|
||||||
@@ -33,6 +34,7 @@ func NewDefaultCommand() *cobra.Command {
|
|||||||
cmds.AddCommand(os.NewOSCommands()...)
|
cmds.AddCommand(os.NewOSCommands()...)
|
||||||
cmds.AddCommand(node.NewNodeCommand())
|
cmds.AddCommand(node.NewNodeCommand())
|
||||||
cmds.AddCommand(gpu.NewCmdGpu())
|
cmds.AddCommand(gpu.NewCmdGpu())
|
||||||
|
cmds.AddCommand(amdgpu.NewCmdAmdGpu())
|
||||||
cmds.AddCommand(user.NewUserCommand())
|
cmds.AddCommand(user.NewUserCommand())
|
||||||
cmds.AddCommand(disk.NewDiskCommand())
|
cmds.AddCommand(disk.NewDiskCommand())
|
||||||
|
|
||||||
|
|||||||
133
cli/pkg/amdgpu/tasks.go
Normal file
133
cli/pkg/amdgpu/tasks.go
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
package amdgpu
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"path"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/beclab/Olares/cli/pkg/common"
|
||||||
|
cc "github.com/beclab/Olares/cli/pkg/core/common"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/core/connector"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/core/logger"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/core/task"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/utils"
|
||||||
|
|
||||||
|
"github.com/Masterminds/semver/v3"
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
)
|
||||||
|
|
||||||
|
// InstallAmdRocmModule installs AMD ROCm stack on supported Ubuntu if AMD GPU is present.
|
||||||
|
type InstallAmdRocmModule struct {
|
||||||
|
common.KubeModule
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *InstallAmdRocmModule) Init() {
|
||||||
|
m.Name = "InstallAMDGPU"
|
||||||
|
|
||||||
|
installAmd := &task.RemoteTask{
|
||||||
|
Name: "InstallAmdRocm",
|
||||||
|
Hosts: m.Runtime.GetHostsByRole(common.Master),
|
||||||
|
Action: &InstallAmdRocm{
|
||||||
|
// no manifest needed
|
||||||
|
},
|
||||||
|
Parallel: false,
|
||||||
|
Retry: 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
m.Tasks = []task.Interface{
|
||||||
|
installAmd,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// InstallAmdRocm installs ROCm using amdgpu-install on Ubuntu 22.04/24.04 for AMD GPUs.
|
||||||
|
type InstallAmdRocm struct {
|
||||||
|
common.KubeAction
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *InstallAmdRocm) Execute(runtime connector.Runtime) error {
|
||||||
|
si := runtime.GetSystemInfo()
|
||||||
|
if !si.IsLinux() || !si.IsUbuntu() || !(si.IsUbuntuVersionEqual(connector.Ubuntu2204) || si.IsUbuntuVersionEqual(connector.Ubuntu2404)) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
amdGPUExists, err := utils.HasAmdIGPU(runtime)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// skip rocm install
|
||||||
|
if !amdGPUExists {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
rocmV, _ := utils.RocmVersion()
|
||||||
|
min := semver.MustParse("7.1.1")
|
||||||
|
if rocmV != nil && rocmV.LessThan(min) {
|
||||||
|
return fmt.Errorf("detected ROCm version %s, which is lower than required %s; please uninstall existing ROCm/AMDGPU components before installation with command: olares-cli amdgpu uninstall", rocmV.Original(), min.Original())
|
||||||
|
}
|
||||||
|
if rocmV != nil && rocmV.GreaterThan(min) {
|
||||||
|
logger.Warnf("Warning: detected ROCm version %s great than maximum tested version %s")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if rocmV != nil && rocmV.Equal(min) {
|
||||||
|
logger.Infof("detected ROCm version %s, skip rocm install...", min.Original())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ensure python3-setuptools and python3-wheel
|
||||||
|
_, _ = runtime.GetRunner().SudoCmd("apt-get update", false, true)
|
||||||
|
checkPkgs := "dpkg -s python3-setuptools python3-wheel >/dev/null 2>&1 || DEBIAN_FRONTEND=noninteractive apt-get install -y python3-setuptools python3-wheel"
|
||||||
|
if _, err := runtime.GetRunner().SudoCmd(checkPkgs, false, true); err != nil {
|
||||||
|
return errors.Wrap(errors.WithStack(err), "failed to install python3-setuptools and python3-wheel")
|
||||||
|
}
|
||||||
|
// ensure amdgpu-install exists
|
||||||
|
if _, err := exec.LookPath("amdgpu-install"); err != nil {
|
||||||
|
var debURL string
|
||||||
|
if si.IsUbuntuVersionEqual(connector.Ubuntu2404) {
|
||||||
|
debURL = "https://repo.radeon.com/amdgpu-install/7.1.1/ubuntu/noble/amdgpu-install_7.1.1.70101-1_all.deb"
|
||||||
|
} else {
|
||||||
|
debURL = "https://repo.radeon.com/amdgpu-install/7.1.1/ubuntu/jammy/amdgpu-install_7.1.1.70101-1_all.deb"
|
||||||
|
}
|
||||||
|
tmpDeb := path.Join(runtime.GetBaseDir(), cc.PackageCacheDir, "gpu", "amdgpu-install_7.1.1.70101-1_all.deb")
|
||||||
|
if _, err := runtime.GetRunner().SudoCmd(fmt.Sprintf("install -d -m 0755 %s", filepath.Dir(tmpDeb)), false, true); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
cmd := fmt.Sprintf("sh -c 'wget -O %s %s'", tmpDeb, debURL)
|
||||||
|
if _, err := runtime.GetRunner().SudoCmd(cmd, false, true); err != nil {
|
||||||
|
return errors.Wrap(errors.WithStack(err), "failed to download amdgpu-install deb")
|
||||||
|
}
|
||||||
|
if _, err := runtime.GetRunner().SudoCmd(fmt.Sprintf("DEBIAN_FRONTEND=noninteractive apt-get install -y %s", tmpDeb), false, true); err != nil {
|
||||||
|
return errors.Wrap(errors.WithStack(err), "failed to install amdgpu-install deb")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// run installer for rocm usecase
|
||||||
|
if _, err := runtime.GetRunner().SudoCmd("amdgpu-install -y --usecase=rocm", false, true); err != nil {
|
||||||
|
return errors.Wrap(errors.WithStack(err), "failed to install AMD ROCm via amdgpu-install")
|
||||||
|
}
|
||||||
|
fmt.Println()
|
||||||
|
logger.Warn("Warning: To enable ROCm, please reboot your machine after installation.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type AmdgpuInstallAction struct {
|
||||||
|
common.KubeAction
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *AmdgpuInstallAction) Execute(runtime connector.Runtime) error {
|
||||||
|
if _, err := runtime.GetRunner().SudoCmd("amdgpu-install -y --usecase=rocm", false, true); err != nil {
|
||||||
|
return errors.Wrap(errors.WithStack(err), "failed to install AMD ROCm via amdgpu-install")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type AmdgpuUninstallAction struct {
|
||||||
|
common.KubeAction
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *AmdgpuUninstallAction) Execute(runtime connector.Runtime) error {
|
||||||
|
if _, err := runtime.GetRunner().SudoCmd("amdgpu-install --uninstall -y", false, true); err != nil {
|
||||||
|
return errors.Wrap(errors.WithStack(err), "failed to uninstall AMD ROCm via amdgpu-install")
|
||||||
|
}
|
||||||
|
fmt.Println()
|
||||||
|
logger.Warn("Warning: Please reboot your machine after uninstall to fully remove ROCm components.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -81,6 +81,7 @@ func (m *RunPrechecksModule) Init() {
|
|||||||
new(NvidiaCardArchChecker),
|
new(NvidiaCardArchChecker),
|
||||||
new(NouveauChecker),
|
new(NouveauChecker),
|
||||||
new(CudaChecker),
|
new(CudaChecker),
|
||||||
|
new(RocmChecker),
|
||||||
}
|
}
|
||||||
runPreChecks := &task.LocalTask{
|
runPreChecks := &task.LocalTask{
|
||||||
Name: "RunPrechecks",
|
Name: "RunPrechecks",
|
||||||
|
|||||||
@@ -372,6 +372,48 @@ func (c *CudaChecker) Check(runtime connector.Runtime) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RocmChecker checks AMD ROCm version for AMD GPU on Ubuntu 22.04/24.04 only.
|
||||||
|
type RocmChecker struct{}
|
||||||
|
|
||||||
|
func (r *RocmChecker) Name() string {
|
||||||
|
return "ROCm"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RocmChecker) Check(runtime connector.Runtime) error {
|
||||||
|
if !runtime.GetSystemInfo().IsLinux() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
si := runtime.GetSystemInfo()
|
||||||
|
if !si.IsUbuntu() || !(si.IsUbuntuVersionEqual(connector.Ubuntu2204) || si.IsUbuntuVersionEqual(connector.Ubuntu2404)) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// detect AMD GPU presence
|
||||||
|
amdGPUExists, err := utils.HasAmdIGPU(runtime)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// no AMD GPU found, no need to check rocm
|
||||||
|
if !amdGPUExists {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
curV, err := utils.RocmVersion()
|
||||||
|
if err != nil && !os.IsNotExist(err) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
min := semver.MustParse("7.1.1")
|
||||||
|
if curV.LessThan(min) {
|
||||||
|
return fmt.Errorf("detected ROCm version %s, which is lower than required %s; please uninstall existing ROCm/AMDGPU components before installation with command: olares-cli amdgpu uninstall", curV.Original(), min.Original())
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
// precheck - task
|
// precheck - task
|
||||||
|
|
||||||
|
|||||||
@@ -51,10 +51,12 @@ func (d DebianVersion) String() string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
Ubuntu20 UbuntuVersion = "20."
|
Ubuntu20 UbuntuVersion = "20."
|
||||||
Ubuntu22 UbuntuVersion = "22."
|
Ubuntu22 UbuntuVersion = "22."
|
||||||
Ubuntu24 UbuntuVersion = "24."
|
Ubuntu24 UbuntuVersion = "24."
|
||||||
Ubuntu25 UbuntuVersion = "25."
|
Ubuntu25 UbuntuVersion = "25."
|
||||||
|
Ubuntu2204 UbuntuVersion = "22.04"
|
||||||
|
Ubuntu2404 UbuntuVersion = "24.04"
|
||||||
|
|
||||||
Debian9 DebianVersion = "9"
|
Debian9 DebianVersion = "9"
|
||||||
Debian10 DebianVersion = "10"
|
Debian10 DebianVersion = "10"
|
||||||
|
|||||||
@@ -3,8 +3,7 @@ package system
|
|||||||
import (
|
import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/beclab/Olares/cli/pkg/gpu"
|
"github.com/beclab/Olares/cli/pkg/amdgpu"
|
||||||
|
|
||||||
"github.com/beclab/Olares/cli/pkg/bootstrap/os"
|
"github.com/beclab/Olares/cli/pkg/bootstrap/os"
|
||||||
"github.com/beclab/Olares/cli/pkg/bootstrap/patch"
|
"github.com/beclab/Olares/cli/pkg/bootstrap/patch"
|
||||||
"github.com/beclab/Olares/cli/pkg/bootstrap/precheck"
|
"github.com/beclab/Olares/cli/pkg/bootstrap/precheck"
|
||||||
@@ -12,6 +11,7 @@ import (
|
|||||||
"github.com/beclab/Olares/cli/pkg/container"
|
"github.com/beclab/Olares/cli/pkg/container"
|
||||||
"github.com/beclab/Olares/cli/pkg/core/module"
|
"github.com/beclab/Olares/cli/pkg/core/module"
|
||||||
"github.com/beclab/Olares/cli/pkg/daemon"
|
"github.com/beclab/Olares/cli/pkg/daemon"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/gpu"
|
||||||
"github.com/beclab/Olares/cli/pkg/images"
|
"github.com/beclab/Olares/cli/pkg/images"
|
||||||
"github.com/beclab/Olares/cli/pkg/k3s"
|
"github.com/beclab/Olares/cli/pkg/k3s"
|
||||||
"github.com/beclab/Olares/cli/pkg/manifest"
|
"github.com/beclab/Olares/cli/pkg/manifest"
|
||||||
@@ -82,6 +82,7 @@ func (l *linuxPhaseBuilder) build() []module.Module {
|
|||||||
addModule(&terminus.WriteReleaseFileModule{}).
|
addModule(&terminus.WriteReleaseFileModule{}).
|
||||||
addModule(gpuModuleBuilder(func() []module.Module {
|
addModule(gpuModuleBuilder(func() []module.Module {
|
||||||
return []module.Module{
|
return []module.Module{
|
||||||
|
&amdgpu.InstallAmdRocmModule{},
|
||||||
&gpu.InstallDriversModule{
|
&gpu.InstallDriversModule{
|
||||||
ManifestModule: manifest.ManifestModule{
|
ManifestModule: manifest.ManifestModule{
|
||||||
Manifest: l.manifestMap,
|
Manifest: l.manifestMap,
|
||||||
|
|||||||
101
cli/pkg/pipelines/amdgpu.go
Normal file
101
cli/pkg/pipelines/amdgpu.go
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
package pipelines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/beclab/Olares/cli/pkg/amdgpu"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/common"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/core/action"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/core/connector"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/core/logger"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/core/module"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/core/pipeline"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/core/task"
|
||||||
|
)
|
||||||
|
|
||||||
|
type singleTaskModule struct {
|
||||||
|
common.KubeModule
|
||||||
|
name string
|
||||||
|
act action.Action
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *singleTaskModule) Init() {
|
||||||
|
m.Name = m.name
|
||||||
|
m.Tasks = []task.Interface{
|
||||||
|
&task.LocalTask{
|
||||||
|
Name: m.name,
|
||||||
|
Action: m.act,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func AmdGpuInstall() error {
|
||||||
|
arg := common.NewArgument()
|
||||||
|
arg.SetConsoleLog("amdgpuinstall.log", true)
|
||||||
|
runtime, err := common.NewKubeRuntime(common.AllInOne, *arg)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
p := &pipeline.Pipeline{
|
||||||
|
Name: "InstallAMDGPUDrivers",
|
||||||
|
Runtime: runtime,
|
||||||
|
Modules: []module.Module{
|
||||||
|
&amdgpu.InstallAmdRocmModule{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return p.Start()
|
||||||
|
}
|
||||||
|
|
||||||
|
func AmdGpuUninstall() error {
|
||||||
|
arg := common.NewArgument()
|
||||||
|
arg.SetConsoleLog("amdgpuuninstall.log", true)
|
||||||
|
runtime, err := common.NewKubeRuntime(common.AllInOne, *arg)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
p := &pipeline.Pipeline{
|
||||||
|
Name: "UninstallAMDGPUDrivers",
|
||||||
|
Runtime: runtime,
|
||||||
|
Modules: []module.Module{
|
||||||
|
&singleTaskModule{name: "AmdgpuUninstall", act: new(amdgpu.AmdgpuUninstallAction)},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return p.Start()
|
||||||
|
}
|
||||||
|
|
||||||
|
func AmdGpuStatus() error {
|
||||||
|
arg := common.NewArgument()
|
||||||
|
runtime, err := common.NewKubeRuntime(common.AllInOne, *arg)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
runtime.SetRunner(
|
||||||
|
&connector.Runner{
|
||||||
|
Host: &connector.BaseHost{
|
||||||
|
Name: common.LocalHost,
|
||||||
|
Arch: runtime.GetSystemInfo().GetOsArch(),
|
||||||
|
Os: runtime.GetSystemInfo().GetOsType(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
amdModel, _ := runtime.GetRunner().SudoCmd("lspci | grep -iE 'VGA|3D|Display' | grep -iE 'AMD|ATI' | head -1 || true", false, false)
|
||||||
|
drvVer, _ := runtime.GetRunner().SudoCmd("modinfo amdgpu 2>/dev/null | awk -F': ' '/^version:/{print $2}' || true", false, false)
|
||||||
|
rocmVer, _ := runtime.GetRunner().SudoCmd("cat /opt/rocm/.info/version 2>/dev/null || true", false, false)
|
||||||
|
|
||||||
|
if strings.TrimSpace(amdModel) != "" {
|
||||||
|
logger.Infof("AMD GPU: %s", strings.TrimSpace(amdModel))
|
||||||
|
} else {
|
||||||
|
logger.Info("AMD GPU: not detected")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(drvVer) != "" {
|
||||||
|
logger.Infof("AMDGPU driver %s", strings.TrimSpace(drvVer))
|
||||||
|
} else {
|
||||||
|
logger.Info("AMDGPU driver version: unknown")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(rocmVer) != "" {
|
||||||
|
logger.Infof("ROCm version: %s", strings.TrimSpace(rocmVer))
|
||||||
|
} else {
|
||||||
|
logger.Info("ROCm version: not installed")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"github.com/beclab/Olares/cli/pkg/core/connector"
|
"github.com/beclab/Olares/cli/pkg/core/connector"
|
||||||
"github.com/beclab/Olares/cli/pkg/core/logger"
|
"github.com/beclab/Olares/cli/pkg/core/logger"
|
||||||
"github.com/beclab/Olares/cli/pkg/core/task"
|
"github.com/beclab/Olares/cli/pkg/core/task"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
type WelcomeMessage struct {
|
type WelcomeMessage struct {
|
||||||
@@ -68,6 +69,15 @@ func (t *WelcomeMessage) Execute(runtime connector.Runtime) error {
|
|||||||
logger.Infof("Username: %s", t.KubeConf.Arg.User.UserName)
|
logger.Infof("Username: %s", t.KubeConf.Arg.User.UserName)
|
||||||
logger.Infof("Password: %s", t.KubeConf.Arg.User.Password)
|
logger.Infof("Password: %s", t.KubeConf.Arg.User.Password)
|
||||||
fmt.Printf("\n------------------------------------------------\n\n\n\n\n")
|
fmt.Printf("\n------------------------------------------------\n\n\n\n\n")
|
||||||
|
fmt.Println()
|
||||||
|
|
||||||
|
// If AMD GPU on Ubuntu 22.04/24.04, print warning about reboot for ROCm
|
||||||
|
if si := runtime.GetSystemInfo(); si.IsUbuntu() && (si.IsUbuntuVersionEqual(connector.Ubuntu2204) || si.IsUbuntuVersionEqual(connector.Ubuntu2404)) {
|
||||||
|
if hasAmd, _ := utils.HasAmdIGPU(runtime); hasAmd {
|
||||||
|
logger.Warnf("\x1b[31mWarning: To enable ROCm, please reboot your machine after activation.\x1b[0m")
|
||||||
|
fmt.Println()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
67
cli/pkg/utils/amdgpu.go
Normal file
67
cli/pkg/utils/amdgpu.go
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/Masterminds/semver/v3"
|
||||||
|
"github.com/beclab/Olares/cli/pkg/core/connector"
|
||||||
|
)
|
||||||
|
|
||||||
|
func HasAmdIGPU(execRuntime connector.Runtime) (bool, error) {
|
||||||
|
// Detect by CPU model names that bundle AMD AI NPU/graphics
|
||||||
|
targets := []string{
|
||||||
|
"AMD Ryzen AI Max+ 395",
|
||||||
|
"AMD Ryzen AI Max 390",
|
||||||
|
"AMD Ryzen AI Max 385",
|
||||||
|
"AMD Ryzen AI 9 HX 375",
|
||||||
|
"AMD Ryzen AI 9 HX 370",
|
||||||
|
"AMD Ryzen AI 9 365",
|
||||||
|
}
|
||||||
|
// try lscpu first: extract 'Model name' field
|
||||||
|
out, err := execRuntime.GetRunner().SudoCmd("lscpu 2>/dev/null | awk -F': *' '/^Model name/{print $2; exit}' || true", false, false)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
if out != "" {
|
||||||
|
lo := strings.ToLower(strings.TrimSpace(out))
|
||||||
|
for _, t := range targets {
|
||||||
|
if strings.Contains(lo, strings.ToLower(t)) {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// fallback to /proc/cpuinfo
|
||||||
|
out, err = execRuntime.GetRunner().SudoCmd("awk -F': *' '/^model name/{print $2; exit}' /proc/cpuinfo 2>/dev/null || true", false, false)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
if out != "" {
|
||||||
|
lo := strings.ToLower(strings.TrimSpace(out))
|
||||||
|
for _, t := range targets {
|
||||||
|
if strings.Contains(lo, strings.ToLower(t)) {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func RocmVersion() (*semver.Version, error) {
|
||||||
|
const rocmVersionFile = "/opt/rocm/.info/version"
|
||||||
|
data, err := os.ReadFile(rocmVersionFile)
|
||||||
|
if err != nil {
|
||||||
|
// no ROCm installed, nothing to check
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
curStr := strings.TrimSpace(string(data))
|
||||||
|
cur, err := semver.NewVersion(curStr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid rocm version: %s", curStr)
|
||||||
|
}
|
||||||
|
return cur, nil
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user