From 11adcb942ced8716c5457d46bf5dbba314bf87d4 Mon Sep 17 00:00:00 2001 From: Rodolfo Pacheco Date: Thu, 5 Nov 2020 12:45:53 -0500 Subject: [PATCH] WIP : Selects properly the vBMHs that match the scheduling criteria. Extrapolate is not working.. --- Makefile | 2 +- config/kustomization.yaml | 11 ++ config/manager/manager.yaml | 5 +- config/rbac/auth_proxy_role_binding.yaml | 2 +- config/rbac/kustomization.yaml | 3 + config/rbac/leader_election_role_binding.yaml | 2 +- config/rbac/role_binding.yaml | 2 +- config/rbac/sipcluster_scheduler_binding.yaml | 26 +++++ config/rbac/sipcluster_scheduler_role.yaml | 47 ++++++++ config/webhook/kustomization.yaml | 2 +- main.go | 6 ++ pkg/controllers/sipcluster_controller.go | 15 ++- pkg/services/{authhost.go => authpod.go} | 0 pkg/services/{jumphost.go => jumppod.go} | 0 pkg/vbmh/machines.go | 100 +++++++++++++----- 15 files changed, 188 insertions(+), 35 deletions(-) create mode 100644 config/kustomization.yaml create mode 100644 config/rbac/sipcluster_scheduler_binding.yaml create mode 100644 config/rbac/sipcluster_scheduler_role.yaml rename pkg/services/{authhost.go => authpod.go} (100%) rename pkg/services/{jumphost.go => jumppod.go} (100%) diff --git a/Makefile b/Makefile index bded4b0..66fa1f8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Image URL to use all building/pushing image targets #IMG ?= controller:latest -IMG ?= quay.io/jezogwza/airship:sip.v1 +IMG ?= quay.io/jezogwza/airship:sip.v2 # Produce CRDs that work back to Kubernetes 1.11 (no version conversion) CRD_OPTIONS ?= "crd:trivialVersions=true" diff --git a/config/kustomization.yaml b/config/kustomization.yaml new file mode 100644 index 0000000..fe5fb50 --- /dev/null +++ b/config/kustomization.yaml @@ -0,0 +1,11 @@ +# +# Adds namespace to all resources. +namespace: sip-system + +resources: + - manager + - certmanager + - crd + - prometheus + - rbac + - webhook diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index b6c85a5..f269681 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -3,7 +3,7 @@ kind: Namespace metadata: labels: control-plane: controller-manager - name: system + name: sip-system --- apiVersion: apps/v1 kind: Deployment @@ -27,7 +27,8 @@ spec: - /manager args: - --enable-leader-election - image: controller:latest + image: quay.io/jezogwza/airship:sip.v1 + imagePullPolicy: Always name: manager resources: limits: diff --git a/config/rbac/auth_proxy_role_binding.yaml b/config/rbac/auth_proxy_role_binding.yaml index 48ed1e4..b0bb406 100644 --- a/config/rbac/auth_proxy_role_binding.yaml +++ b/config/rbac/auth_proxy_role_binding.yaml @@ -9,4 +9,4 @@ roleRef: subjects: - kind: ServiceAccount name: default - namespace: system + namespace: sip-cluster-system diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index 66c2833..42fd758 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -1,8 +1,11 @@ resources: - role.yaml - role_binding.yaml +- sipcluster_scheduler_role.yaml +- sipcluster_scheduler_binding.yaml - leader_election_role.yaml - leader_election_role_binding.yaml + # Comment the following 4 lines if you want to disable # the auth proxy (https://github.com/brancz/kube-rbac-proxy) # which protects your /metrics endpoint. diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml index eed1690..ec3f6f7 100644 --- a/config/rbac/leader_election_role_binding.yaml +++ b/config/rbac/leader_election_role_binding.yaml @@ -9,4 +9,4 @@ roleRef: subjects: - kind: ServiceAccount name: default - namespace: system + namespace: sip-cluster-system diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml index 8f26587..831bae5 100644 --- a/config/rbac/role_binding.yaml +++ b/config/rbac/role_binding.yaml @@ -9,4 +9,4 @@ roleRef: subjects: - kind: ServiceAccount name: default - namespace: system + namespace: sip-cluster-system diff --git a/config/rbac/sipcluster_scheduler_binding.yaml b/config/rbac/sipcluster_scheduler_binding.yaml new file mode 100644 index 0000000..22d239c --- /dev/null +++ b/config/rbac/sipcluster_scheduler_binding.yaml @@ -0,0 +1,26 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: scheduler-rolebinding + namespace: metal3 +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: scheduler-role +subjects: +- kind: ServiceAccount + name: default + namespace: sip-cluster-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-scheduler-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-scheduler-role +subjects: +- kind: ServiceAccount + name: default + namespace: sip-cluster-system diff --git a/config/rbac/sipcluster_scheduler_role.yaml b/config/rbac/sipcluster_scheduler_role.yaml new file mode 100644 index 0000000..515beeb --- /dev/null +++ b/config/rbac/sipcluster_scheduler_role.yaml @@ -0,0 +1,47 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: scheduler-role + namespace: metal3 +rules: +- apiGroups: + - metal3.io + resources: + - baremetalhosts + verbs: + - get + - list + - patch + - watch + - update +- apiGroups: + - "" + resources: + - secrets + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-scheduler-role +rules: +- apiGroups: + - metal3.io + resources: + - baremetalhosts + verbs: + - get + - list + - patch + - watch + - update +- apiGroups: + - "" + resources: + - secrets + verbs: + - get + - list diff --git a/config/webhook/kustomization.yaml b/config/webhook/kustomization.yaml index 9cf2613..f6b559e 100644 --- a/config/webhook/kustomization.yaml +++ b/config/webhook/kustomization.yaml @@ -1,5 +1,5 @@ resources: -- manifests.yaml + #- manifests.yaml - service.yaml configurations: diff --git a/main.go b/main.go index c6d7214..0d61fcc 100644 --- a/main.go +++ b/main.go @@ -28,7 +28,10 @@ import ( airshipv1 "sipcluster/pkg/api/v1" "sipcluster/pkg/controllers" + // +kubebuilder:scaffold:imports + + metal3 "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1" ) var ( @@ -41,6 +44,9 @@ func init() { _ = airshipv1.AddToScheme(scheme) // +kubebuilder:scaffold:scheme + + // Add Metal3 CRD + _ = metal3.AddToScheme(scheme) } func main() { diff --git a/pkg/controllers/sipcluster_controller.go b/pkg/controllers/sipcluster_controller.go index 8eb6779..c0e5f82 100644 --- a/pkg/controllers/sipcluster_controller.go +++ b/pkg/controllers/sipcluster_controller.go @@ -18,6 +18,7 @@ package controllers import ( "context" + "fmt" "github.com/go-logr/logr" // "github.com/prometheus/common/log" @@ -39,6 +40,9 @@ type SIPClusterReconciler struct { // +kubebuilder:rbac:groups=airship.airshipit.org,resources=sipclusters,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=airship.airshipit.org,resources=sipclusters/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=airship.airshipit.org,resources=sipclusters/status,verbs=get;update;patch + +// +kubebuilder:rbac:groups="metal3.io",resources=baremetalhosts,verbs=get;update;patch;list func (r *SIPClusterReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { ctx := context.Background() @@ -54,8 +58,9 @@ func (r *SIPClusterReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) return ctrl.Result{}, nil } // machines - err, machines := r.gatherVM(sip) + err, machines := r.gatherVBMH(sip) if err != nil { + log.Error(err, "unable to gather vBMHs") return ctrl.Result{}, err } @@ -97,19 +102,21 @@ func (r *SIPClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { */ // machines -func (r *SIPClusterReconciler) gatherVM(sip airshipv1.SIPCluster) (error, *airshipvms.MachineList) { +func (r *SIPClusterReconciler) gatherVBMH(sip airshipv1.SIPCluster) (error, *airshipvms.MachineList) { // 1- Let me retrieve all BMH that are unlabeled or already labeled with the target Tenant/CNF // 2- Let me now select the one's that meet teh scheduling criteria // If I schedule successfully then // If Not complete schedule , then throw an error. machines := &airshipvms.MachineList{} + fmt.Printf("gatherVBMH.Schedule sip:%v machines:%v\n", sip, machines) err := machines.Schedule(sip.Spec.Nodes, r.Client) if err != nil { return err, machines } - // we extra the information in a generic way - // So that LB and Jump Host all leverage the same + // we extract the information in a generic way + // So that LB , Jump and Ath POD all leverage the same + fmt.Printf("gatherVBMH.Extrapolate sip:%v machines:%v\n", sip, machines) err = machines.Extrapolate(sip, r.Client) if err != nil { return err, machines diff --git a/pkg/services/authhost.go b/pkg/services/authpod.go similarity index 100% rename from pkg/services/authhost.go rename to pkg/services/authpod.go diff --git a/pkg/services/jumphost.go b/pkg/services/jumppod.go similarity index 100% rename from pkg/services/jumphost.go rename to pkg/services/jumppod.go diff --git a/pkg/vbmh/machines.go b/pkg/vbmh/machines.go index f5d5c5a..e41c6c3 100644 --- a/pkg/vbmh/machines.go +++ b/pkg/vbmh/machines.go @@ -25,11 +25,12 @@ import ( metal3 "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1" corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + airshipv1 "sipcluster/pkg/api/v1" //rbacv1 "k8s.io/api/rbac/v1" //"k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // ScheduledState @@ -51,21 +52,29 @@ const ( ) const ( - BaseAirshipSelector = "airshipit.org" - SipScheduled = BaseAirshipSelector + "/sip-scheduled in (True, true)" - SipNotScheduled = BaseAirshipSelector + "/sip-scheduled in (False, false)" + BaseAirshipSelector = "sip.airshipit.org" + SipScheduleLabelName = "sip-scheduled" + SipScheduleLabel = BaseAirshipSelector + "/" + SipScheduleLabelName + + SipScheduled = SipScheduleLabel + "=true" + SipNotScheduled = SipScheduleLabel + "=false" // This is a placeholder . Need to synchronize with ViNO the constants below // Probable pll this or eqivakent values from a ViNO pkg RackLabel = BaseAirshipSelector + "/rack" - ServerLabel = BaseAirshipSelector + "/rack" + ServerLabel = BaseAirshipSelector + "/server" + + // This should be a configurable thing + // But probable not in teh CR.. TBD + // TODO + VBMH_NAMESPACE = "metal3" ) // MAchine represents an individual BMH CR, and teh appropriate // attributes required to manage the SIP Cluster scheduling and // rocesing needs about thhem type Machine struct { - Bmh metal3.BareMetalHost + Bmh *metal3.BareMetalHost ScheduleStatus ScheduledState // scheduleLabels // I expect to build this over time / if not might not be needed @@ -88,17 +97,18 @@ type MachineList struct { } func (ml *MachineList) Schedule(nodes map[airshipv1.VmRoles]airshipv1.NodeSet, c client.Client) error { + // Initialize teh Target list ml.bmhs = ml.init(nodes) // IDentify vBMH's that meet the appropriate selction criteria - bmList, err := ml.getVBMH(c) + bmhList, err := ml.getVBMH(c) if err != nil { return err } // Identify and Select the vBMH I actually will use - err = ml.identifyNodes(nodes, bmList) + err = ml.identifyNodes(nodes, bmhList) if err != nil { return err } @@ -106,7 +116,7 @@ func (ml *MachineList) Schedule(nodes map[airshipv1.VmRoles]airshipv1.NodeSet, c // If I get here the MachineList should have a selected set of Machine's // They are in the ScheduleStatus of ToBeScheduled as well as the Role // - + fmt.Printf("Schedule ml.bmhs size:%d\n", len(ml.bmhs)) return nil } @@ -115,12 +125,16 @@ func (ml *MachineList) init(nodes map[airshipv1.VmRoles]airshipv1.NodeSet) []*Ma for _, nodeCfg := range nodes { mlSize = mlSize + nodeCfg.Count.Active + nodeCfg.Count.Standby } - return make([]*Machine, mlSize) + //fmt.Printf("Schedule.init mlSize:%d\n", mlSize) + + return make([]*Machine, 0) } func (ml *MachineList) getVBMH(c client.Client) (*metal3.BareMetalHostList, error) { - bmList := &metal3.BareMetalHostList{} + + bmhList := &metal3.BareMetalHostList{} + // I am thinking we can add a Label for unsccheduled. // SIP Cluster can change it to scheduled. // We can then simple use this to select UNSCHEDULED @@ -128,24 +142,42 @@ func (ml *MachineList) getVBMH(c client.Client) (*metal3.BareMetalHostList, erro This possible will not be needed if I figured out how to provide a != label. Then we can use DOESNT HAVE A TENANT LABEL */ - labelSelector := metav1.LabelSelector{MatchLabels: map[string]string{SipNotScheduled: "False"}} + scheduleLabels := map[string]string{SipScheduleLabel: "false"} + + /** + DELETE SOON, .. + labelSelector := metav1.LabelSelector{MatchLabels: scheduleLabels} bmhSelector, err := metav1.LabelSelectorAsSelector(&labelSelector) if err != nil { - return bmList, err + return bmhList, err } + + fmt.Printf("Schedule.getVBMH bmhSelector:%v\n", bmhSelector) + // TODO Namespace where vBMH needs to be found + // Might be in THE SIP CR Peerhaps bmListOptions := &client.ListOptions{ LabelSelector: bmhSelector, Limit: 100, + Namespace: VBMH_NAMESPACE, } - err = c.List(context.TODO(), bmList, bmListOptions) + fmt.Printf("Schedule.getVBMH bmList context.Background bmhList:%v\n bmListOptions:%v scheduleLabels:%v\n", bmhList, bmListOptions, scheduleLabels) + err = c.List(context.Background(), bmhList, bmListOptions, client.InNamespace(VBMH_NAMESPACE)) + */ + + err := c.List(context.Background(), bmhList, client.MatchingLabels(scheduleLabels)) if err != nil { - return bmList, err + fmt.Printf("Schedule.getVBMH bmhList err:%v\n", err) + return bmhList, err } - return bmList, nil + fmt.Printf("Schedule.getVBMH bmhList size:%d\n", len(bmhList.Items)) + if len(bmhList.Items) > 0 { + return bmhList, nil + } + return bmhList, fmt.Errorf("Unable to identify vBMH available for scheduling. Selecting %v ", scheduleLabels) } -func (ml *MachineList) identifyNodes(nodes map[airshipv1.VmRoles]airshipv1.NodeSet, bmList *metal3.BareMetalHostList) error { +func (ml *MachineList) identifyNodes(nodes map[airshipv1.VmRoles]airshipv1.NodeSet, bmhList *metal3.BareMetalHostList) error { // If using the SIP Sheduled label, we now have a list of vBMH;'s // that are not scheduled // Next I need to apply the constraints @@ -154,17 +186,18 @@ func (ml *MachineList) identifyNodes(nodes map[airshipv1.VmRoles]airshipv1.NodeS // Only deals with AntiAffinity at : // - Racks : Dont select two machines in the same rack // - Server : Dont select two machines in the same server + fmt.Printf("Schedule.identifyNodes bmList size:%d\n", len(bmhList.Items)) for nodeRole, nodeCfg := range nodes { scheduleSetMap, err := ml.initScheduleMaps(nodeCfg.Scheduling) if err != nil { return err } - err = ml.scheduleIt(nodeRole, nodeCfg, bmList, scheduleSetMap) + err = ml.scheduleIt(nodeRole, nodeCfg, bmhList, scheduleSetMap) if err != nil { return err } } - + fmt.Printf("Schedule.identifyNodes ml.bmhs size:%d\n", len(ml.bmhs)) return nil } @@ -188,39 +221,50 @@ func (ml *MachineList) initScheduleMaps(constraints []airshipv1.SchedulingOption } } + fmt.Printf("Schedule.initScheduleMaps setMap:%v\n", setMap) if len(setMap) > 0 { - return setMap, ErrorConstraintNotFound{} + return setMap, nil } - return setMap, nil + return setMap, ErrorConstraintNotFound{} } func (ml *MachineList) scheduleIt(nodeRole airshipv1.VmRoles, nodeCfg airshipv1.NodeSet, bmList *metal3.BareMetalHostList, scheduleSetMap map[airshipv1.SchedulingOptions]*ScheduleSet) error { validBmh := true nodeTarget := (nodeCfg.Count.Active + nodeCfg.Count.Standby) + fmt.Printf("Schedule.scheduleIt nodeRole:%v nodeTarget:%d nodeCfg.VmFlavor:%s ml.bmhs len:%d \n", nodeRole, nodeTarget, nodeCfg.VmFlavor, len(ml.bmhs)) for _, bmh := range bmList.Items { + fmt.Printf("---------------\n Schedule.scheduleIt bmh.ObjectMeta.Name:%s \n", bmh.ObjectMeta.Name) for _, constraint := range nodeCfg.Scheduling { // Do I care about this constraint if scheduleSetMap[constraint].Active() { // Check if bmh has the label // There is a func (host *BareMetalHost) getLabel(name string) string { - // Not sure why its not Public, so sing our won method + // Not sure why its not Public, so using our own method cLabelValue, cFlavorMatches := scheduleSetMap[constraint].GetLabels(bmh.Labels, nodeCfg.VmFlavor) + // If it doesnt match the flavor its not valid + validBmh = cFlavorMatches + // If it does match the flavor if cLabelValue != "" && cFlavorMatches { - // If its in th elist , theen this bmh is disqualified. Skip it + // If its in the list already for the constraint , theen this bmh is disqualified. Skip it if scheduleSetMap[constraint].Exists(cLabelValue) { validBmh = false break + } else { + scheduleSetMap[constraint].Add(cLabelValue) } } + fmt.Printf("Schedule.scheduleIt cLabelValue:%s, cFlavorMatches:%t scheduleSetMap[%v]:%v\n", cLabelValue, cFlavorMatches, constraint, scheduleSetMap[constraint]) + } } + fmt.Printf("Schedule.scheduleIt nodeTarget:%d, validBmh:%t ml.bmhs len:%d\n", nodeTarget, validBmh, len(ml.bmhs)) // All the constraints have been checked if validBmh { // Lets add it to the list as a schedulable thing m := &Machine{ - Bmh: bmh, + Bmh: &bmh, ScheduleStatus: ToBeScheduled, VmRole: nodeRole, Data: &MachineData{ @@ -229,6 +273,7 @@ func (ml *MachineList) scheduleIt(nodeRole airshipv1.VmRoles, nodeCfg airshipv1. } // Probable need to use the nodeRole as a label here ml.bmhs = append(ml.bmhs, m) + fmt.Printf("Schedule.scheduleIt ADDED ml.bmhs len:%d machine:%v \n", len(ml.bmhs), m) nodeTarget = nodeTarget - 1 if nodeTarget == 0 { break @@ -239,6 +284,7 @@ func (ml *MachineList) scheduleIt(nodeRole airshipv1.VmRoles, nodeCfg airshipv1. validBmh = true } + fmt.Printf("Schedule.scheduleIt nodeTarget:%d, ml.bmhs:%d\n", nodeTarget, len(ml.bmhs)) if nodeTarget > 0 { return ErrorUnableToFullySchedule{ TargetNode: nodeRole, @@ -252,7 +298,9 @@ func (ml *MachineList) scheduleIt(nodeRole airshipv1.VmRoles, nodeCfg airshipv1. // The intention is to extract the IP information from the referenced networkData field for the BareMetalHost func (ml *MachineList) Extrapolate(sip airshipv1.SIPCluster, c client.Client) error { // Lets get the data for all selected BMH's. + fmt.Printf("Schedule.Extrapolate ml.bmhs:%d\n", len(ml.bmhs)) for _, machine := range ml.bmhs { + fmt.Printf("Schedule.Extrapolate machine:%v\n", machine) bmh := machine.Bmh // Identify Network Data Secret name @@ -492,7 +540,11 @@ func (ss *ScheduleSet) Exists(value string) bool { return false } +func (ss *ScheduleSet) Add(labelValue string) { + ss.set[labelValue] = true +} func (ss *ScheduleSet) GetLabels(labels map[string]string, flavorLabel string) (string, bool) { + fmt.Printf("Schedule.scheduleIt.GetLabels labels:%v, flavorLabel:%s\n", labels, flavorLabel) if labels == nil { return "", false }