196fa877a9
Previously, we have switched to use default exchanges to avoid excessive amounts of exchange not found messages. But it does not actually solve the problem because reply_* queue is already gone and agent will not receive callbacks. after some debugging, I found under some circumstances seems rabbitmq consumer does not receive basic cancel signal when queue is already gone. This might due to rabbitmq try to restart consumer when queue is down (for example when split brain). In such cases, it might be better to fail early. by reading the code, seems like x-cancel-on-ha-failover is not dedicated to mirror queues only, https://github.com/rabbitmq/rabbitmq-server/blob/master/src/rabbit_channel.erl#L1894, https://github.com/rabbitmq/rabbitmq-server/blob/master/src/rabbit_channel.erl#L1926. By failing early, in my own test setup, I could solve a certain case of exchange not found problem. Change-Id: I2ae53340783e4044dab58035bc0992dc08145b53 Related-bug: #1789177
137 lines
5.0 KiB
Python
137 lines
5.0 KiB
Python
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
|
|
import os
|
|
import signal
|
|
import time
|
|
|
|
import fixtures
|
|
from pifpaf.drivers import rabbitmq
|
|
|
|
from oslo_messaging.tests.functional import utils
|
|
from oslo_messaging.tests import utils as test_utils
|
|
|
|
|
|
class ConnectedPortMatcher(object):
|
|
def __init__(self, port):
|
|
self.port = port
|
|
|
|
def __eq__(self, data):
|
|
return data.get("port") == self.port
|
|
|
|
def __repr__(self):
|
|
return "<ConnectedPortMatcher port=%d>" % self.port
|
|
|
|
|
|
class RabbitMQFailoverTests(test_utils.BaseTestCase):
|
|
DRIVERS = [
|
|
"rabbit",
|
|
]
|
|
|
|
def test_failover_scenario(self):
|
|
self._test_failover_scenario()
|
|
|
|
def test_failover_scenario_enable_cancel_on_failover(self):
|
|
self._test_failover_scenario(enable_cancel_on_failover=True)
|
|
|
|
def _test_failover_scenario(self, enable_cancel_on_failover=False):
|
|
# NOTE(sileht): run this test only if functional suite run of a driver
|
|
# that use rabbitmq as backend
|
|
self.driver = os.environ.get('TRANSPORT_DRIVER')
|
|
if self.driver not in self.DRIVERS:
|
|
self.skipTest("TRANSPORT_DRIVER is not set to a rabbit driver")
|
|
|
|
# NOTE(sileht): Allow only one response at a time, to
|
|
# have only one tcp connection for reply and ensure it will failover
|
|
# correctly
|
|
self.config(heartbeat_timeout_threshold=1,
|
|
rpc_conn_pool_size=1,
|
|
kombu_reconnect_delay=0,
|
|
rabbit_retry_interval=0,
|
|
rabbit_retry_backoff=0,
|
|
enable_cancel_on_failover=enable_cancel_on_failover,
|
|
group='oslo_messaging_rabbit')
|
|
|
|
self.pifpaf = self.useFixture(rabbitmq.RabbitMQDriver(cluster=True,
|
|
port=5692))
|
|
|
|
self.url = self.pifpaf.env["PIFPAF_URL"]
|
|
self.n1 = self.pifpaf.env["PIFPAF_RABBITMQ_NODENAME1"]
|
|
self.n2 = self.pifpaf.env["PIFPAF_RABBITMQ_NODENAME2"]
|
|
self.n3 = self.pifpaf.env["PIFPAF_RABBITMQ_NODENAME3"]
|
|
|
|
# ensure connections will be establish to the first node
|
|
self.pifpaf.stop_node(self.n2)
|
|
self.pifpaf.stop_node(self.n3)
|
|
|
|
self.servers = self.useFixture(utils.RpcServerGroupFixture(
|
|
self.conf, self.url, endpoint=self, names=["server"]))
|
|
|
|
# Don't randomize rabbit hosts
|
|
self.useFixture(fixtures.MockPatch(
|
|
'oslo_messaging._drivers.impl_rabbit.random',
|
|
side_effect=lambda x: x))
|
|
|
|
# NOTE(sileht): this connects server connections and reply
|
|
# connection to nodename n1
|
|
self.client = self.servers.client(0)
|
|
self.client.ping()
|
|
self._check_ports(self.pifpaf.port)
|
|
|
|
# Switch to node n2
|
|
self.pifpaf.start_node(self.n2)
|
|
self.assertEqual("callback done", self.client.kill_and_process())
|
|
self.assertEqual("callback done", self.client.just_process())
|
|
self._check_ports(self.pifpaf.get_port(self.n2))
|
|
|
|
# Switch to node n3
|
|
self.pifpaf.start_node(self.n3)
|
|
time.sleep(0.1)
|
|
self.pifpaf.kill_node(self.n2, signal=signal.SIGKILL)
|
|
time.sleep(0.1)
|
|
self.assertEqual("callback done", self.client.just_process())
|
|
self._check_ports(self.pifpaf.get_port(self.n3))
|
|
|
|
self.pifpaf.start_node(self.n1)
|
|
time.sleep(0.1)
|
|
self.pifpaf.kill_node(self.n3, signal=signal.SIGKILL)
|
|
time.sleep(0.1)
|
|
self.assertEqual("callback done", self.client.just_process())
|
|
self._check_ports(self.pifpaf.get_port(self.n1))
|
|
|
|
def kill_and_process(self, *args, **kargs):
|
|
self.pifpaf.kill_node(self.n1, signal=signal.SIGKILL)
|
|
time.sleep(0.1)
|
|
return "callback done"
|
|
|
|
def just_process(self, *args, **kargs):
|
|
return "callback done"
|
|
|
|
def _check_ports(self, port):
|
|
rpc_server = self.servers.servers[0].server
|
|
connection_contexts = [
|
|
# rpc server
|
|
rpc_server.listener._poll_style_listener.conn,
|
|
# rpc client
|
|
self.client.client.transport._driver._get_connection(),
|
|
# rpc client replies waiter
|
|
self.client.client.transport._driver._reply_q_conn,
|
|
]
|
|
|
|
ports = [cctxt.connection.channel.connection.sock.getpeername()[1]
|
|
for cctxt in connection_contexts]
|
|
|
|
self.assertEqual([port] * len(ports), ports,
|
|
"expected: %s, rpc-server: %s, rpc-client: %s, "
|
|
"rpc-replies: %s" % tuple([port] + ports))
|