From d2a4c3d012d7863221ae059cc9cb7035fcdfcfb4 Mon Sep 17 00:00:00 2001 From: Angie Wang Date: Mon, 14 Jan 2019 14:53:10 -0500 Subject: [PATCH 1/4] Helm repository replication This updates the helm-upload to stop syncing charts to standby controller as charts are changed to store in drbd fs. Story: 2004520 Task: 28343 Depends-On: https://review.openstack.org/#/c/630763/ Change-Id: I12f17fae6124650d878ba7a560f94b7a8ed36e56 Signed-off-by: Angie Wang --- kubernetes/helm/centos/files/helm-upload | 27 ------------------------ 1 file changed, 27 deletions(-) diff --git a/kubernetes/helm/centos/files/helm-upload b/kubernetes/helm/centos/files/helm-upload index a7f8dcde5..824b7c93d 100644 --- a/kubernetes/helm/centos/files/helm-upload +++ b/kubernetes/helm/centos/files/helm-upload @@ -49,31 +49,4 @@ if [ $REINDEX -eq 1 ]; then /usr/sbin/helm repo index $REPO_DIR fi -if [ ! -f "/etc/platform/simplex" ]; then - # We're not a one node system, copy the files to the other - # controller if we can - if [ $HOSTNAME == "controller-0" ]; then - TARGET="controller-1" - else - TARGET="controller-0" - fi - - # We've modified etc/rsyncd.conf to allow access to /www/helm_charts - # To avoid races, copy over the index file last. - rsync -acv --exclude=index.yaml ${REPO_DIR}/ rsync://${TARGET}/helm_charts - if [ $? -ne 0 ]; then - echo Problem syncing helm charts to $TARGET - RETVAL=1 - fi - - rsync -acv ${REPO_DIR}/index.yaml rsync://${TARGET}/helm_charts - if [ $? -ne 0 ]; then - echo Problem syncing helm chart index file to $TARGET - RETVAL=1 - fi -fi - -# We also need to sync the helm charts on node startup -# in case they were added while the node was down. - exit $RETVAL From 6db8e31b21b271f827f3c9cabf0f0558e8ca6b58 Mon Sep 17 00:00:00 2001 From: Ovidiu Poncea Date: Thu, 20 Dec 2018 09:10:00 -0500 Subject: [PATCH 2/4] Add StarlingX specific restart command for Ceph monitors Since we don't use systemd to manage Ceph and we have pmon monitoring we have to make sure that: 1. Restarting is properly handled as "systemctl restart" will return error and manifest will fail; 2. Pmon does not check ceph-mon status during restart. Otherwise we risk getting into a race condition between the puppet restart and pmon detecting that ceph is down and trying a restart. Both are resolved when using /etc/init.d/ceph-init-wrapper restart. Change-Id: Ie316bb611a006bbbc92ac22c52c3973cc9f15109 Co-Authored-By: Ovidiu Poncea Implements: containerization-2002844-CEPH-persistent-storage-backend-for-Kubernetes Story: 2002844 Task: 28723 Signed-off-by: Ovidiu Poncea --- .../puppet-ceph-2.2.0/centos/build_srpm.data | 2 +- ...ecific-restart-command-for-Ceph-moni.patch | 32 +++++++++++++++++ .../centos/meta_patches/PATCH_ORDER | 1 + ...ecific-restart-command-for-Ceph-moni.patch | 35 +++++++++++++++++++ 4 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/meta_patches/0007-Add-StarlingX-specific-restart-command-for-Ceph-moni.patch create mode 100644 config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/patches/0007-Add-StarlingX-specific-restart-command-for-Ceph-moni.patch diff --git a/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/build_srpm.data b/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/build_srpm.data index 8429863c3..c66bf348c 100644 --- a/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/build_srpm.data +++ b/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/build_srpm.data @@ -1 +1 @@ -TIS_PATCH_VER=6 +TIS_PATCH_VER=7 diff --git a/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/meta_patches/0007-Add-StarlingX-specific-restart-command-for-Ceph-moni.patch b/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/meta_patches/0007-Add-StarlingX-specific-restart-command-for-Ceph-moni.patch new file mode 100644 index 000000000..697284f2f --- /dev/null +++ b/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/meta_patches/0007-Add-StarlingX-specific-restart-command-for-Ceph-moni.patch @@ -0,0 +1,32 @@ +From b590f06d6f6ce2bd71d4d0389b6d51a78e225c19 Mon Sep 17 00:00:00 2001 +From: Ovidiu Poncea +Date: Thu, 20 Dec 2018 08:07:15 -0500 +Subject: [PATCH] Add-StarlingX-specific-restart-command-for-Ceph-moni patch + +--- + SPECS/puppet-ceph.spec | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/SPECS/puppet-ceph.spec b/SPECS/puppet-ceph.spec +index 0b728a1..e5cc64c 100644 +--- a/SPECS/puppet-ceph.spec ++++ b/SPECS/puppet-ceph.spec +@@ -14,6 +14,7 @@ Patch0003: 0003-Ceph-Jewel-rebase.patch + Patch0004: 0004-US92424-Add-OSD-support-for-persistent-naming.patch + Patch0005: 0005-Remove-puppetlabs-apt-as-ceph-requirement.patch + Patch0006: 0006-ceph-disk-prepare-invalid-data-disk-value.patch ++Patch0007: 0007-Add-StarlingX-specific-restart-command-for-Ceph-moni.patch + + BuildArch: noarch + +@@ -35,6 +36,7 @@ Community Developed Ceph Module + %patch0004 -p1 + %patch0005 -p1 + %patch0006 -p1 ++%patch0007 -p1 + + find . -type f -name ".*" -exec rm {} + + find . -size 0 -exec rm {} + +-- +1.8.3.1 + diff --git a/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/meta_patches/PATCH_ORDER b/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/meta_patches/PATCH_ORDER index a2c4b1bfa..a4452e9ba 100644 --- a/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/meta_patches/PATCH_ORDER +++ b/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/meta_patches/PATCH_ORDER @@ -4,3 +4,4 @@ 0004-Add-OSD-support-for-persistent-naming.patch 0005-meta-patch-for-patch5.patch 0006-add-ceph-disk-prepare-invalid-data-disk-value-patch.patch +0007-Add-StarlingX-specific-restart-command-for-Ceph-moni.patch diff --git a/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/patches/0007-Add-StarlingX-specific-restart-command-for-Ceph-moni.patch b/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/patches/0007-Add-StarlingX-specific-restart-command-for-Ceph-moni.patch new file mode 100644 index 000000000..1c3926fda --- /dev/null +++ b/config/puppet-modules/openstack/puppet-ceph-2.2.0/centos/patches/0007-Add-StarlingX-specific-restart-command-for-Ceph-moni.patch @@ -0,0 +1,35 @@ +From a364f37cacab78cdaad5ebd23ab24cf400a3fa40 Mon Sep 17 00:00:00 2001 +From: Ovidiu Poncea +Date: Thu, 20 Dec 2018 07:18:55 -0500 +Subject: [PATCH] Add StarlingX specific restart command for Ceph monitors + +Since we don't use systemd to manage Ceph and we have pmon monitoring we +have to make sure that: +1. Restarting is properly handled as "systemctl restart" will return error + and manifest will fail; +2. Pmon does not check ceph-mon status during restart. Otherwise we risk + getting into a race condition between the puppet restart and pmon + detecting that ceph is down and trying a restart. + +Both are resolved when using /etc/init.d/ceph-init-wrapper restart + +Signed-off-by: Ovidiu Poncea +--- + manifests/mon.pp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/manifests/mon.pp b/manifests/mon.pp +index 17cb925..62d5059 100644 +--- a/manifests/mon.pp ++++ b/manifests/mon.pp +@@ -106,6 +106,7 @@ define ceph::mon ( + start => "service ceph start mon.${id}", + stop => "service ceph stop mon.${id}", + status => "service ceph status mon.${id}", ++ restart => "/etc/init.d/ceph-init-wrapper restart mon.${id}", + enable => $mon_enable, + } + } +-- +1.8.3.1 + From 7bb43963d30e77eae84873f497188e4018c21b74 Mon Sep 17 00:00:00 2001 From: Jerry Sun Date: Tue, 15 Jan 2019 09:47:33 -0500 Subject: [PATCH 3/4] Build registry-token-server without dep This change reworks the registry-token-server package spec with go dependencies downloaded at mirror-download time, rather than at build time. The dependencies (at fixed revisions) are extracted into the package's build tree for compilation. Story: 2002840 Task: 22783 Depends-On: https://review.openstack.org/#/c/631001/ Change-Id: Ib7d745c6469beacf029195c3e6eaa4935f398483 Signed-off-by: Jerry Sun Signed-off-by: Jason McKenna --- centos_pkg_dirs | 1 + .../centos/build_srpm.data | 12 ++++++++-- .../centos/registry-token-server.spec | 24 +++++++++++++++++-- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/centos_pkg_dirs b/centos_pkg_dirs index 0fb4b3049..7131b8a1a 100644 --- a/centos_pkg_dirs +++ b/centos_pkg_dirs @@ -100,6 +100,7 @@ monitoring/influxdb-extensions kubernetes/kubernetes kubernetes/docker-distribution kubernetes/helm +kubernetes/registry-token-server logging/logmgmt filesystem/filesystem-scripts utilities/branding diff --git a/kubernetes/registry-token-server/centos/build_srpm.data b/kubernetes/registry-token-server/centos/build_srpm.data index dbf687757..7f12975c8 100644 --- a/kubernetes/registry-token-server/centos/build_srpm.data +++ b/kubernetes/registry-token-server/centos/build_srpm.data @@ -1,4 +1,12 @@ TAR_NAME="registry-token-server" SRC_DIR="$PKG_BASE/src" -COPY_LIST="$FILES_BASE/*" -TIS_PATCH_VER=0 +COPY_LIST=" \ + $FILES_BASE/* \ + $STX_BASE/downloads/Sirupsen-logrus-55eb11d21d2a31a3cc93838241d04800f52e823d.tar.gz \ + $STX_BASE/downloads/docker-distribution-48294d928ced5dd9b378f7fd7c6f5da3ff3f2c89.tar.gz \ + $STX_BASE/downloads/docker-libtrust-fa567046d9b14f6aa788882a950d69651d230b21.tar.gz \ + $STX_BASE/downloads/gophercloud-gophercloud-aa00757ee3ab58e53520b6cb910ca0543116400a.tar.gz \ + $STX_BASE/downloads/gorilla-context-08b5f424b9271eedf6f9f0ce86cb9396ed337a42.tar.gz \ + $STX_BASE/downloads/gorilla-mux-456bcfa82d672db7cae587c9b541463f65bc2718.tar.gz \ +" +TIS_PATCH_VER=1 diff --git a/kubernetes/registry-token-server/centos/registry-token-server.spec b/kubernetes/registry-token-server/centos/registry-token-server.spec index b26aa642b..6aa2bf65e 100644 --- a/kubernetes/registry-token-server/centos/registry-token-server.spec +++ b/kubernetes/registry-token-server/centos/registry-token-server.spec @@ -11,13 +11,20 @@ Source0: registry-token-server-%{version}.tar.gz Source1: %{name}.service Source2: token_server.conf +# Go dependencies downloaded as tarballs +Source10: Sirupsen-logrus-55eb11d21d2a31a3cc93838241d04800f52e823d.tar.gz +Source11: docker-distribution-48294d928ced5dd9b378f7fd7c6f5da3ff3f2c89.tar.gz +Source12: docker-libtrust-fa567046d9b14f6aa788882a950d69651d230b21.tar.gz +Source13: gophercloud-gophercloud-aa00757ee3ab58e53520b6cb910ca0543116400a.tar.gz +Source14: gorilla-context-08b5f424b9271eedf6f9f0ce86cb9396ed337a42.tar.gz +Source15: gorilla-mux-456bcfa82d672db7cae587c9b541463f65bc2718.tar.gz + BuildRequires: systemd Requires(post): systemd Requires(preun): systemd Requires(postun): systemd BuildRequires: golang >= 1.6 -BuildRequires: golang-dep ExclusiveArch: %{?go_arches:%{go_arches}}%{!?go_arches:%{ix86} x86_64 %{arm}} %description @@ -26,13 +33,26 @@ ExclusiveArch: %{?go_arches:%{go_arches}}%{!?go_arches:%{ix86} x86_64 %{arm}} %prep %setup -q -n registry-token-server-%{version} +# Extract other go dependencies +%setup -T -D -a 10 +%setup -T -D -a 11 +%setup -T -D -a 12 +%setup -T -D -a 13 +%setup -T -D -a 14 +%setup -T -D -a 15 +mkdir -p _build/src/github.com/gorilla/ && mv gorilla-mux _build/src/github.com/gorilla/mux +mkdir -p _build/src/github.com/docker/ && mv docker-distribution _build/src/github.com/docker/distribution +mkdir -p _build/src/github.com/docker/ && mv docker-libtrust _build/src/github.com/docker/libtrust +mkdir -p _build/src/github.com/docker/distribution/ && mv gorilla-context _build/src/github.com/docker/distribution/context +mkdir -p _build/src/github.com/Sirupsen/ && mv Sirupsen-logrus _build/src/github.com/Sirupsen/logrus +mkdir -p _build/src/github.com/gophercloud && mv gophercloud-gophercloud _build/src/github.com/gophercloud/gophercloud + %build mkdir -p ./_build/src/ ln -s $(pwd) ./_build/src/registry-token-server export GOPATH=$(pwd)/_build:%{gopath} cd ./_build/src/registry-token-server -dep ensure %gobuild -o bin/registry-token-server registry-token-server %install From abaff6b27525aaa91df53319f84004640f75e6a3 Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Fri, 18 Jan 2019 16:29:56 -0500 Subject: [PATCH 4/4] Remove alarm query before clear in NTP plugin Issue titled 'NTP 100.14 alarm is not cleared' exposed an issue where the NTP plugin alarm clear operation is circumvented when its pre-curser fm_api.get_fault call returns None if the fm process is not running. From the callers point of view the None return suggests that the alarm to be cleared does not exist so the code skips the call to clear. This update works around this by simply issuing the clear without the query. Change-Id: Idcc05bb0e7e1aa1082af1e8ecdcb1a5463b19440 Closes-Bug: 1812440 Signed-off-by: Eric MacDonald --- .../centos/build_srpm.data | 2 +- monitoring/collectd-extensions/src/ntpq.py | 44 +++++++++---------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/monitoring/collectd-extensions/centos/build_srpm.data b/monitoring/collectd-extensions/centos/build_srpm.data index e5b3c5046..82cafe8bb 100644 --- a/monitoring/collectd-extensions/centos/build_srpm.data +++ b/monitoring/collectd-extensions/centos/build_srpm.data @@ -16,4 +16,4 @@ COPY_LIST="$PKG_BASE/src/LICENSE \ $PKG_BASE/src/example.py \ $PKG_BASE/src/example.conf" -TIS_PATCH_VER=5 +TIS_PATCH_VER=6 diff --git a/monitoring/collectd-extensions/src/ntpq.py b/monitoring/collectd-extensions/src/ntpq.py index 7b6f343db..3f7964656 100755 --- a/monitoring/collectd-extensions/src/ntpq.py +++ b/monitoring/collectd-extensions/src/ntpq.py @@ -222,15 +222,14 @@ def _raise_alarm(ip=None): def _clear_base_alarm(): """ Clear the NTP base alarm """ - if api.get_fault(PLUGIN_ALARMID, obj.base_eid) is not None: - if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is False: - collectd.error("%s failed to clear alarm %s:%s" % - (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) - return True - else: - collectd.info("%s cleared alarm %s:%s" % - (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) - obj.alarm_raised = False + if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is False: + collectd.error("%s failed to clear alarm %s:%s" % + (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) + return True + else: + collectd.info("%s cleared alarm %s:%s" % + (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) + obj.alarm_raised = False return False @@ -263,23 +262,20 @@ def _remove_ip_from_unreachable_list(ip): if ip and ip in obj.unreachable_servers: eid = obj.base_eid + '=' + ip collectd.debug("%s trying to clear alarm %s" % (PLUGIN, eid)) + # clear the alarm if its asserted - if api.get_fault(PLUGIN_ALARMID, eid) is not None: - if api.clear_fault(PLUGIN_ALARMID, eid) is True: - collectd.info("%s cleared %s:%s alarm" % - (PLUGIN, PLUGIN_ALARMID, eid)) - obj.unreachable_servers.remove(ip) - else: - # Handle clear failure by not removing the IP from the list. - # It will retry on next audit. - # Error should only occur if FM is not running at the time - # this get or clear is called - collectd.error("%s failed alarm clear %s:%s" % - (PLUGIN, PLUGIN_ALARMID, eid)) - return True - else: + if api.clear_fault(PLUGIN_ALARMID, eid) is True: + collectd.info("%s cleared %s:%s alarm" % + (PLUGIN, PLUGIN_ALARMID, eid)) obj.unreachable_servers.remove(ip) - collectd.info("%s alarm %s not raised" % (PLUGIN, eid)) + else: + # Handle clear failure by not removing the IP from the list. + # It will retry on next audit. + # Error should only occur if FM is not running at the time + # this get or clear is called + collectd.error("%s failed alarm clear %s:%s" % + (PLUGIN, PLUGIN_ALARMID, eid)) + return True return False