Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/mapt/cmd/aws/hosts/rhelai.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func getRHELAICreate() *cobra.Command {
Prefix: "main",
Version: viper.GetString(params.RhelAIVersion),
Accelerator: viper.GetString(params.RhelAIAccelerator),
CustomAMI: viper.GetString(params.RhelAIAMICustom),
CustomImage: viper.GetString(params.RhelAICustomImage),
ComputeRequest: params.ComputeRequestArgs(),
Spot: params.SpotArgs(),
Timeout: viper.GetString(params.Timeout),
Expand All @@ -69,7 +69,7 @@ func getRHELAICreate() *cobra.Command {
flagSet.StringToStringP(params.Tags, "", nil, params.TagsDesc)
flagSet.StringP(params.RhelAIVersion, "", params.RhelAIVersionDefault, params.RhelAIVersionDesc)
flagSet.StringP(params.RhelAIAccelerator, "", params.RhelAIAccelearatorDefault, params.RhelAIAccelearatorDesc)
flagSet.StringP(params.RhelAIAMICustom, "", "", params.RhelAIAMICustomDesc)
flagSet.StringP(params.RhelAICustomImage, "", "", params.RhelAICustomImageDesc)
flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc)
params.AddComputeRequestFlags(flagSet)
params.AddSpotFlags(flagSet)
Expand Down
4 changes: 2 additions & 2 deletions cmd/mapt/cmd/azure/hosts/rhelai.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func getRHELAICreate() *cobra.Command {
Prefix: "main",
Version: viper.GetString(params.RhelAIVersion),
Accelerator: viper.GetString(params.RhelAIAccelerator),
CustomAMI: viper.GetString(params.RhelAIAMICustom),
CustomImage: viper.GetString(params.RhelAICustomImage),
ComputeRequest: params.ComputeRequestArgs(),
Spot: params.SpotArgs(),
Timeout: viper.GetString(params.Timeout),
Expand All @@ -69,7 +69,7 @@ func getRHELAICreate() *cobra.Command {
flagSet.StringToStringP(params.Tags, "", nil, params.TagsDesc)
flagSet.StringP(params.RhelAIVersion, "", params.RhelAIVersionDefault, params.RhelAIVersionDesc)
flagSet.StringP(params.RhelAIAccelerator, "", params.RhelAIAccelearatorDefault, params.RhelAIAccelearatorDesc)
flagSet.StringP(params.RhelAIAMICustom, "", "", params.RhelAIAMICustomDesc)
flagSet.StringP(params.RhelAICustomImage, "", "", params.RhelAICustomImageDesc)
flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc)
params.AddComputeRequestFlags(flagSet)
params.AddSpotFlags(flagSet)
Expand Down
24 changes: 12 additions & 12 deletions cmd/mapt/cmd/params/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,17 @@ const (
cirrusPWLabels string = "it-cirrus-pw-labels"
cirrusPWLabelsDesc string = "additional labels to use on the persistent worker (--it-cirrus-pw-labels key1=value1,key2=value2)"

glRunnerToken string = "glrunner-token"
glRunnerTokenDesc string = "GitLab Personal Access Token with api scope"
glRunnerProjectID string = "glrunner-project-id"
glRunnerToken string = "glrunner-token"
glRunnerTokenDesc string = "GitLab Personal Access Token with api scope"
glRunnerProjectID string = "glrunner-project-id"
glRunnerProjectIDDesc string = "GitLab project ID for project runner registration"
glRunnerGroupID string = "glrunner-group-id"
glRunnerGroupIDDesc string = "GitLab group ID for group runner registration (alternative to --glrunner-project-id)"
glRunnerURL string = "glrunner-url"
glRunnerURLDesc string = "GitLab instance URL (e.g., https://gitlab.com, https://gitlab.example.com)"
glRunnerURLDefault string = "https://gitlab.com"
glRunnerTags string = "glrunner-tags"
glRunnerTagsDesc string = "List of tags separated by comma to be added to the self-hosted runner"
glRunnerGroupID string = "glrunner-group-id"
glRunnerGroupIDDesc string = "GitLab group ID for group runner registration (alternative to --glrunner-project-id)"
glRunnerURL string = "glrunner-url"
glRunnerURLDesc string = "GitLab instance URL (e.g., https://gitlab.com, https://gitlab.example.com)"
glRunnerURLDefault string = "https://gitlab.com"
glRunnerTags string = "glrunner-tags"
glRunnerTagsDesc string = "List of tags separated by comma to be added to the self-hosted runner"

//RHEL
SubsUsername string = "rh-subscription-username"
Expand All @@ -106,8 +106,8 @@ const (
RhelAIAccelerator string = "accelerator"
RhelAIAccelearatorDesc string = "accelerator type. Valid types: cuda and rocm"
RhelAIAccelearatorDefault string = "cuda"
RhelAIAMICustom string = "custom-ami"
RhelAIAMICustomDesc string = "custom AMI to spin RHEL AI OS"
RhelAICustomImage string = "custom-image"
RhelAICustomImageDesc string = "custom image name to spin RHEL AI OS (AMI name for AWS, image name for Azure)"

// Serverless
Timeout string = "timeout"
Expand Down
4 changes: 2 additions & 2 deletions pkg/provider/aws/action/rhel-ai/rhelai.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiRHELAI.RHELAIArgs) (err error) {
}
// Compose request
amiName := amiName(&args.Accelerator, &args.Version)
if len(args.CustomAMI) != 0 {
amiName = fmt.Sprintf("%s*", args.CustomAMI)
if len(args.CustomImage) != 0 {
amiName = fmt.Sprintf("%s*", args.CustomImage)
}
prefix := util.If(len(args.Prefix) > 0, args.Prefix, "main")
r := rhelAIRequest{
Expand Down
17 changes: 12 additions & 5 deletions pkg/provider/azure/action/rhel-ai/rhelai.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,32 @@ const (
username = "azureuser"
)

func imageId(accelerator, version string) string {
iName := fmt.Sprintf(imageNameRegex, accelerator, version)
gName := strings.ReplaceAll(iName, "-", "_")
func imageIdFromName(imageName string) string {
gName := strings.ReplaceAll(imageName, "-", "_")
return fmt.Sprintf(imageIdRegex,
imageOwnerSubscriptionId,
gName,
iName)
imageName)
}

func imageId(accelerator, version string) string {
return imageIdFromName(fmt.Sprintf(imageNameRegex, accelerator, version))
}

func Create(mCtxArgs *maptContext.ContextArgs, args *apiRHELAI.RHELAIArgs) (err error) {
logging.Debug("Creating RHEL Server")
sharedImageID := imageId(args.Accelerator, args.Version)
if args.CustomImage != "" {
sharedImageID = imageIdFromName(args.CustomImage)
}
azureLinuxRequest :=
&azureLinux.LinuxArgs{
Prefix: args.Prefix,
// Location: args.Location,
ComputeRequest: args.ComputeRequest,
Spot: args.Spot,
ImageRef: &data.ImageReference{
SharedImageID: imageId(args.Accelerator, args.Version),
SharedImageID: sharedImageID,
},
Username: username,
ReadinessCommand: command.CommandPing}
Expand Down
2 changes: 1 addition & 1 deletion pkg/target/host/rhelai/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ type RHELAIArgs struct {
Prefix string
Accelerator string
Version string
CustomAMI string
CustomImage string
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ohh a s this is an API..we should decide in one arg name which will work for AMI in AWS and Image in Azure...you can leave CustomImageName or CustomVersion but only one which then will be translated to the right name for the provider

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated but not tested yet

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tested with both AWS and Azure

Arch string
ComputeRequest *cr.ComputeRequestArgs
Spot *spotTypes.SpotArgs
Expand Down
8 changes: 4 additions & 4 deletions tkn/infra-aws-rhel-ai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ spec:
- name: cpus
description: Number of CPUs for the cloud instance (default 8)
default: "8"
- name: custom-ami
description: Custom AMI to use for the cloud instance
- name: custom-image
description: Custom image name to use for the cloud instance
default: ""
- name: gpu-manufacturer
description: Manufacturer company name for GPU. (i.e. NVIDIA)
Expand Down Expand Up @@ -251,8 +251,8 @@ spec:
if [[ "$(params.nested-virt)" == "true" ]]; then
cmd+="--nested-virt "
fi
if [[ "$(params.custom-ami)" != "" ]]; then
cmd+="--custom-ami '$(params.custom-ami)' "
if [[ "$(params.custom-image)" != "" ]]; then
cmd+="--custom-image '$(params.custom-image)' "
else
cmd+="--version '$(params.version)' "
fi
Expand Down
26 changes: 19 additions & 7 deletions tkn/infra-azure-rhel-ai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,15 @@ spec:
or `$(context.taskRun.uid)` depending on the value of `ownerKind`.

# VM type params
- name: accelerator
description: accelerator for RHEL AI OS rocm or cuda (default rocm)
default: "rocm"
- name: compute-sizes
description: Comma seperated list of sizes for the machines to be requested. If set this takes precedence over compute by args
default: "Standard_ND96is_MI300X_v5,Standard_ND96isr_MI300X_v5"
- name: custom-image
description: Custom image name to use for the cloud instance
default: ""
- name: spot
description: Check best spot option to spin the machine and will create resources on that region.
default: "true"
Expand All @@ -101,10 +107,7 @@ spec:
- name: version
description: Version of RHEL AI OS (default 3.2.0)
default: "3.2.0"
- name: accelerator
description: accelerator for RHEL AI OS rocm or cuda (default rocm)
default: "rocm"


# Metadata params
- name: tags
description: tags for the resources created on the providers
Expand Down Expand Up @@ -211,9 +214,18 @@ spec:

if [[ "$(params.operation)" == "create" ]]; then
cmd+="--conn-details-output /opt/host-info "
cmd+="--compute-sizes '$(params.compute-sizes)' "
cmd+="--version '$(params.version)' "
cmd+="--accelerator '$(params.accelerator)' "

if [[ "$(params.compute-sizes)" != "" ]]; then
cmd+="--compute-sizes '$(params.compute-sizes)' "
else
cmd+="--accelerator '$(params.accelerator)' "
fi
if [[ "$(params.custom-image)" != "" ]]; then
cmd+="--custom-image '$(params.custom-image)' "
else
cmd+="--version '$(params.version)' "
fi

if [[ "$(params.spot)" == "true" ]]; then
cmd+="--spot "
cmd+="--spot-increase-rate '$(params.spot-increase-rate)' "
Expand Down
2 changes: 1 addition & 1 deletion tkn/template/infra-aws-ocp-snc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ spec:
- name: profile
description: Comma-separated list of profiles to install on the cluster (e.g. virtualization, serverless-serving, serverless-eventing, serverless, servicemesh). When virtualization is selected, a bare metal instance is used.
default: "''"

# Metadata params
- name: tags
description: tags for the resources created on the providers
Expand Down
8 changes: 4 additions & 4 deletions tkn/template/infra-aws-rhel-ai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ spec:
- name: cpus
description: Number of CPUs for the cloud instance (default 8)
default: "8"
- name: custom-ami
description: Custom AMI to use for the cloud instance
- name: custom-image
description: Custom image name to use for the cloud instance
default: ""
- name: gpu-manufacturer
description: Manufacturer company name for GPU. (i.e. NVIDIA)
Expand Down Expand Up @@ -251,8 +251,8 @@ spec:
if [[ "$(params.nested-virt)" == "true" ]]; then
cmd+="--nested-virt "
fi
if [[ "$(params.custom-ami)" != "" ]]; then
cmd+="--custom-ami '$(params.custom-ami)' "
if [[ "$(params.custom-image)" != "" ]]; then
cmd+="--custom-image '$(params.custom-image)' "
else
cmd+="--version '$(params.version)' "
fi
Expand Down
26 changes: 19 additions & 7 deletions tkn/template/infra-azure-rhel-ai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,15 @@ spec:
or `$(context.taskRun.uid)` depending on the value of `ownerKind`.

# VM type params
- name: accelerator
description: accelerator for RHEL AI OS rocm or cuda (default rocm)
default: "rocm"
- name: compute-sizes
description: Comma seperated list of sizes for the machines to be requested. If set this takes precedence over compute by args
default: "Standard_ND96is_MI300X_v5,Standard_ND96isr_MI300X_v5"
- name: custom-image
description: Custom image name to use for the cloud instance
default: ""
- name: spot
description: Check best spot option to spin the machine and will create resources on that region.
default: "true"
Expand All @@ -101,10 +107,7 @@ spec:
- name: version
description: Version of RHEL AI OS (default 3.2.0)
default: "3.2.0"
- name: accelerator
description: accelerator for RHEL AI OS rocm or cuda (default rocm)
default: "rocm"


# Metadata params
- name: tags
description: tags for the resources created on the providers
Expand Down Expand Up @@ -211,9 +214,18 @@ spec:

if [[ "$(params.operation)" == "create" ]]; then
cmd+="--conn-details-output /opt/host-info "
cmd+="--compute-sizes '$(params.compute-sizes)' "
cmd+="--version '$(params.version)' "
cmd+="--accelerator '$(params.accelerator)' "

if [[ "$(params.compute-sizes)" != "" ]]; then
cmd+="--compute-sizes '$(params.compute-sizes)' "
else
cmd+="--accelerator '$(params.accelerator)' "
fi
if [[ "$(params.custom-image)" != "" ]]; then
cmd+="--custom-image '$(params.custom-image)' "
else
cmd+="--version '$(params.version)' "
fi

if [[ "$(params.spot)" == "true" ]]; then
cmd+="--spot "
cmd+="--spot-increase-rate '$(params.spot-increase-rate)' "
Expand Down