From 5283871af52923985d12f0b9e8fd3a036beba974 Mon Sep 17 00:00:00 2001 From: aditi Date: Thu, 15 Jun 2017 12:44:53 +0000 Subject: [PATCH] Abort operation for live migration This patch adds abort operation for live migration to support abort in cancel action plan. Change-Id: I458e93d9bd09dc4cf80cc941104129fc7600a6b1 Partially-Implements: blueprint cancel-action-plan --- tox.ini | 2 +- watcher/applier/actions/base.py | 8 +++- watcher/applier/actions/migration.py | 25 +++++++++++- watcher/applier/actions/nop.py | 1 + watcher/applier/actions/sleep.py | 1 + watcher/applier/workflow_engine/base.py | 12 +++--- watcher/applier/workflow_engine/default.py | 10 ++++- watcher/common/nova_helper.py | 40 +++++++++++++++++++ .../tests/applier/actions/test_migration.py | 15 +++++++ .../test_default_workflow_engine.py | 1 - watcher/tests/common/test_nova_helper.py | 39 ++++++++++++++++++ 11 files changed, 141 insertions(+), 13 deletions(-) diff --git a/tox.ini b/tox.ini index 907c2a008..12db5c39b 100644 --- a/tox.ini +++ b/tox.ini @@ -47,7 +47,7 @@ commands = [flake8] show-source=True -ignore= H105,E123,E226,N320 +ignore= H105,E123,E226,N320,H202 builtins= _ exclude=.venv,.git,.tox,dist,doc,*lib/python*,*egg,build,*sqlalchemy/alembic/versions/*,demo/,releasenotes diff --git a/watcher/applier/actions/base.py b/watcher/applier/actions/base.py index 5bb330bea..8bb8bfb03 100644 --- a/watcher/applier/actions/base.py +++ b/watcher/applier/actions/base.py @@ -139,4 +139,10 @@ class BaseAction(loadable.Loadable): raise NotImplementedError() def check_abort(self): - return bool(self.__class__.__name__ in self.ABORT_TRUE) + if self.__class__.__name__ is 'Migrate': + if self.migration_type == self.LIVE_MIGRATION: + return True + else: + return False + else: + return bool(self.__class__.__name__ in self.ABORT_TRUE) diff --git a/watcher/applier/actions/migration.py b/watcher/applier/actions/migration.py index 36d467d22..0e10af6b8 100644 --- a/watcher/applier/actions/migration.py +++ b/watcher/applier/actions/migration.py @@ -151,6 +151,18 @@ class Migrate(base.BaseAction): "host.", self.instance_uuid) return result + def _abort_cold_migrate(self, nova): + # TODO(adisky): currently watcher uses its own version of cold migrate + # implement cold migrate using nova dependent on the blueprint + # https://blueprints.launchpad.net/nova/+spec/cold-migration-with-target + # Abort operation for cold migrate is dependent on blueprint + # https://blueprints.launchpad.net/nova/+spec/abort-cold-migration + LOG.warning("Abort operation for cold migration is not implemented") + + def _abort_live_migrate(self, nova, source, destination): + return nova.abort_live_migrate(instance_id=self.instance_uuid, + source=source, destination=destination) + def migrate(self, destination): nova = nova_helper.NovaHelper(osc=self.osc) LOG.debug("Migrate instance %s to %s", self.instance_uuid, @@ -176,8 +188,17 @@ class Migrate(base.BaseAction): return self.migrate(destination=self.source_node) def abort(self): - # TODO(adisky): implement abort for migration - LOG.warning("Abort for migration not implemented") + nova = nova_helper.NovaHelper(osc=self.osc) + instance = nova.find_instance(self.instance_uuid) + if instance: + if self.migration_type == self.COLD_MIGRATION: + return self._abort_cold_migrate(nova) + elif self.migration_type == self.LIVE_MIGRATION: + return self._abort_live_migrate( + nova, source=self.source_node, + destination=self.destination_node) + else: + raise exception.InstanceNotFound(name=self.instance_uuid) def pre_condition(self): # TODO(jed): check if the instance exists / check if the instance is on diff --git a/watcher/applier/actions/nop.py b/watcher/applier/actions/nop.py index 2b3c86971..1372fa9d2 100644 --- a/watcher/applier/actions/nop.py +++ b/watcher/applier/actions/nop.py @@ -83,3 +83,4 @@ class Nop(base.BaseAction): def abort(self): LOG.debug("Abort action NOP") + return True diff --git a/watcher/applier/actions/sleep.py b/watcher/applier/actions/sleep.py index dc2ed3d49..62d702fe8 100644 --- a/watcher/applier/actions/sleep.py +++ b/watcher/applier/actions/sleep.py @@ -73,3 +73,4 @@ class Sleep(base.BaseAction): def abort(self): LOG.debug("Abort action sleep") + return True diff --git a/watcher/applier/workflow_engine/base.py b/watcher/applier/workflow_engine/base.py index f83afcd7c..3e0c60ff4 100644 --- a/watcher/applier/workflow_engine/base.py +++ b/watcher/applier/workflow_engine/base.py @@ -88,10 +88,6 @@ class BaseWorkFlowEngine(loadable.Loadable): def notify(self, action, state): db_action = objects.Action.get_by_uuid(self.context, action.uuid, eager=True) - if (db_action.state in [objects.action.State.CANCELLING, - objects.action.State.CANCELLED] and - state == objects.action.State.SUCCEEDED): - return db_action.state = state db_action.save() @@ -204,7 +200,7 @@ class BaseTaskFlowActionContainer(flow_task.Task): objects.action.State.FAILED] or action_plan_object.state in CANCEL_STATE): break - time.sleep(2) + time.sleep(1) try: # NOTE: kill the action execution thread, if action plan is # cancelled for all other cases wait for the result from action @@ -246,15 +242,19 @@ class BaseTaskFlowActionContainer(flow_task.Task): # if due to some other exception keep the flow intact. if action_plan.state not in CANCEL_STATE: self.do_revert() + return + action_object = objects.Action.get_by_uuid( self.engine.context, self._db_action.uuid, eager=True) if action_object.state == objects.action.State.ONGOING: action_object.state = objects.action.State.CANCELLING action_object.save() self.abort() - if action_object.state == objects.action.State.PENDING: + elif action_object.state == objects.action.State.PENDING: action_object.state = objects.action.State.CANCELLED action_object.save() + else: + pass def abort(self, *args, **kwargs): self.do_abort(*args, **kwargs) diff --git a/watcher/applier/workflow_engine/default.py b/watcher/applier/workflow_engine/default.py index 9431ced85..4080de7bf 100644 --- a/watcher/applier/workflow_engine/default.py +++ b/watcher/applier/workflow_engine/default.py @@ -143,8 +143,14 @@ class TaskFlowActionContainer(base.BaseTaskFlowActionContainer): def do_abort(self, *args, **kwargs): LOG.warning("Aborting action: %s", self.name) try: - self.action.abort() - self.engine.notify(self._db_action, objects.action.State.CANCELLED) + result = self.action.abort() + if result: + # Aborted the action. + self.engine.notify(self._db_action, + objects.action.State.CANCELLED) + else: + self.engine.notify(self._db_action, + objects.action.State.SUCCEEDED) except Exception as e: self.engine.notify(self._db_action, objects.action.State.FAILED) LOG.exception(e) diff --git a/watcher/common/nova_helper.py b/watcher/common/nova_helper.py index 04250a9c2..b62953b73 100644 --- a/watcher/common/nova_helper.py +++ b/watcher/common/nova_helper.py @@ -432,6 +432,43 @@ class NovaHelper(object): return True + def abort_live_migrate(self, instance_id, source, destination, retry=240): + LOG.debug("Aborting live migration of instance %s" % instance_id) + migration = self.get_running_migration(instance_id) + if migration: + migration_id = getattr(migration[0], "id") + try: + self.nova.server_migrations.live_migration_abort( + server=instance_id, migration=migration_id) + except exception as e: + # Note: Does not return from here, as abort request can't be + # accepted but migration still going on. + LOG.exception(e) + else: + LOG.debug( + "No running migrations found for instance %s" % instance_id) + + while retry: + instance = self.nova.servers.get(instance_id) + if (getattr(instance, 'OS-EXT-STS:task_state') is None and + getattr(instance, 'status') in ['ACTIVE', 'ERROR']): + break + time.sleep(2) + retry -= 1 + instance_host = getattr(instance, 'OS-EXT-SRV-ATTR:host') + instance_status = getattr(instance, 'status') + + # Abort live migration successfull, action is cancelled + if instance_host == source and instance_status == 'ACTIVE': + return True + # Nova Unable to abort live migration, action is succeded + elif instance_host == destination and instance_status == 'ACTIVE': + return False + + else: + raise Exception("Live migration execution and abort both failed " + "for the instance %s" % instance_id) + def enable_service_nova_compute(self, hostname): if self.nova.services.enable(host=hostname, binary='nova-compute'). \ @@ -757,3 +794,6 @@ class NovaHelper(object): instance.flavor[attr] = default continue instance.flavor[attr] = getattr(flavor, attr, default) + + def get_running_migration(self, instance_id): + return self.nova.server_migrations.list(server=instance_id) diff --git a/watcher/tests/applier/actions/test_migration.py b/watcher/tests/applier/actions/test_migration.py index 8dda5772e..54b04b21b 100644 --- a/watcher/tests/applier/actions/test_migration.py +++ b/watcher/tests/applier/actions/test_migration.py @@ -238,3 +238,18 @@ class TestMigration(base.TestCase): ] self.m_helper.live_migrate_instance.mock_calls == expected self.assertEqual(2, self.m_helper.live_migrate_instance.call_count) + + def test_abort_live_migrate(self): + migration = mock.MagicMock() + migration.id = "2" + migrations = [migration] + self.m_helper.get_running_migration.return_value = migrations + self.m_helper.find_instance.return_value = self.INSTANCE_UUID + try: + self.action.abort() + except Exception as exc: + self.fail(exc) + + self.m_helper.abort_live_migrate.assert_called_once_with( + instance_id=self.INSTANCE_UUID, source="compute1-hostname", + destination="compute2-hostname") diff --git a/watcher/tests/applier/workflow_engine/test_default_workflow_engine.py b/watcher/tests/applier/workflow_engine/test_default_workflow_engine.py index 5e6b4a4ba..fdf902c0b 100644 --- a/watcher/tests/applier/workflow_engine/test_default_workflow_engine.py +++ b/watcher/tests/applier/workflow_engine/test_default_workflow_engine.py @@ -343,7 +343,6 @@ class TestDefaultWorkFlowEngine(base.DbTestCase): actions.append(action1) actions.append(action2) actions.append(action3) - self.assertRaises(exception.ActionPlanCancelled, self.engine.execute, actions) try: diff --git a/watcher/tests/common/test_nova_helper.py b/watcher/tests/common/test_nova_helper.py index 1c4b8562f..f037c5bcb 100644 --- a/watcher/tests/common/test_nova_helper.py +++ b/watcher/tests/common/test_nova_helper.py @@ -48,6 +48,12 @@ class TestNovaHelper(base.TestCase): return server + @staticmethod + def fake_migration(*args, **kwargs): + migration = mock.MagicMock() + migration.id = args[0] + return migration + @staticmethod def fake_nova_find_list(nova_util, find=None, list=None): nova_util.nova.servers.get.return_value = find @@ -56,6 +62,13 @@ class TestNovaHelper(base.TestCase): else: nova_util.nova.servers.list.return_value = [list] + @staticmethod + def fake_nova_migration_list(nova_util, list=None): + if list is None: + nova_util.nova.server_migrations.list.return_value = [] + else: + nova_util.nova.server_migration.list.return_value = [list] + @mock.patch.object(time, 'sleep', mock.Mock()) def test_stop_instance(self, mock_glance, mock_cinder, mock_neutron, mock_nova): @@ -189,6 +202,32 @@ class TestNovaHelper(base.TestCase): self.destination_node, keep_original_image_name=False) self.assertTrue(is_success) + @mock.patch.object(time, 'sleep', mock.Mock()) + def test_abort_live_migrate_instance(self, mock_glance, mock_cinder, + mock_neutron, mock_nova): + nova_util = nova_helper.NovaHelper() + server = self.fake_server(self.instance_uuid) + setattr(server, 'OS-EXT-SRV-ATTR:host', + self.source_node) + setattr(server, 'OS-EXT-STS:task_state', None) + migration = self.fake_migration(2) + self.fake_nova_migration_list(nova_util, list=migration) + + self.fake_nova_find_list(nova_util, find=server, list=server) + + self.assertTrue(nova_util.abort_live_migrate( + self.instance_uuid, self.source_node, self.destination_node)) + + setattr(server, 'OS-EXT-SRV-ATTR:host', self.destination_node) + + self.assertFalse(nova_util.abort_live_migrate( + self.instance_uuid, self.source_node, self.destination_node)) + + setattr(server, 'status', 'ERROR') + self.assertRaises(Exception, nova_util.abort_live_migrate, + (self.instance_uuid, self.source_node, + self.destination_node)) + @mock.patch.object(time, 'sleep', mock.Mock()) def test_create_image_from_instance(self, mock_glance, mock_cinder, mock_neutron, mock_nova):