[Openstack] Heat patch for Icehouse vs Volume deletion.

David Hill david.hill at ubisoft.com
Sat Aug 30 22:33:24 UTC 2014


Hi guys,

I've wrote simple patch that takes care of the volume deletion and it's ugly.  I'm posting it here so anybody with better code knowledge on heat can base his patch on it if it's very that ugly :)

For some reasons, sometimes an instance will fail to delete, this takes care of it and will retry until it disappears:
--- instance.py.orig    2014-08-30 22:07:45.259328109 +0000
+++ instance.py 2014-08-30 22:23:03.180527084 +0000
@@ -587,8 +587,13 @@
                     self.resource_id_set(None)
                     break
                 elif server.status == "ERROR":
-                    raise exception.Error(_("Deletion of server %s failed.") %
-                                          server.id)
+                    while server.status == "ERROR":
+                        nova_utils.refresh_server(server)
+                       server.reset_state()
+                       server.delete()
+                        if server.status == "DELETED":
+                            self.resource_id_set(None)
+                            break
             except clients.novaclient.exceptions.NotFound:
                 self.resource_id_set(None)
                 break


This patch is based on the following bug: https://bugs.launchpad.net/heat/+bug/1298350 => https://review.openstack.org/#/c/86638/
It didn't work for me but this one works flawlessly.



--- volume.py.icehouse               2014-08-30 00:59:49.844290344 +0000

+++ volume.py 2014-08-30 21:58:08.769813146 +0000

@@ -151,11 +151,11 @@

                 if vol.status == 'in-use':

                     logger.warn(_('can not delete volume when in-use'))

                     raise exception.Error(_('Volume in use'))

-

-                vol.delete()

-                while True:

-                    yield

-                    vol.get()

+                             if vol.status != 'deleting':

+                    vol.delete()

+                    while True:

+                        yield

+                        vol.get()

             except clients.cinderclient.exceptions.NotFound:

                 self.resource_id_set(None)

@@ -227,69 +227,187 @@

         logger.info(_('%s - complete') % str(self))

-

-class VolumeDetachTask(object):

+class VolumeDeleteTask(object):

     """A task for detaching a volume from a Nova server."""

-    def __init__(self, stack, server_id, attachment_id):

+    def __init__(self, stack, volume_id):

         """

         Initialise with the stack (for obtaining the clients), and the IDs of

         the server and volume.

         """

         self.clients = stack.clients

-        self.server_id = server_id

-        self.attachment_id = attachment_id

+        self.volume_id = volume_id

     def __str__(self):

         """Return a human-readable string description of the task."""

-        return _('Removing attachment %(att)s from Instance %(srv)s') % {

-            'att': self.attachment_id, 'srv': self.server_id}

+        return _('Deleting volume %(vol)s') % {

+            'vol': self.volume_id}

     def __repr__(self):

         """Return a brief string description of the task."""

-        return '%s(%s -/> %s)' % (type(self).__name__,

-                                  self.attachment_id,

-                                  self.server_id)

+        return '%s(%s)' % (type(self).__name__,

+                                  self.volume_id)

     def __call__(self):

         """Return a co-routine which runs the task."""

         logger.debug(str(self))

-        server_api = self.clients.nova().volumes

-

-        # get reference to the volume while it is attached

         try:

-            nova_vol = server_api.get_server_volume(self.server_id,

-                                                    self.attachment_id)

-            vol = self.clients.cinder().volumes.get(nova_vol.id)

-        except (clients.cinderclient.exceptions.NotFound,

-                clients.novaclient.exceptions.BadRequest,

-                clients.novaclient.exceptions.NotFound):

+            vol = self.clients.cinder().volumes.get(self.volume_id)

+            if vol.status in ('available'):

+                             vol.delete()

+                yield

+            while vol.status in ('available', 'deleting'):

+                logger.debug(_('%s - volume is still %s') % ( str(self), vol.status ))

+                yield

+                vol.get()

+            logger.info(_('%(name)s - status: %(status)s') % {

+                        'name': str(self), 'status': vol.status})

+

+        except clients.cinderclient.exceptions.NotFound:

             logger.warning(_('%s - volume not found') % str(self))

             return

+        except clients.cinderclient.exceptions.BadRequest:

+            msg = _('Failed to delete %(vol)s') % {

+                'vol': self.volume_id}

+            raise exception.Error(msg)

-        # detach the volume using volume_attachment

-        try:

-            server_api.delete_server_volume(self.server_id, self.attachment_id)

-        except (clients.novaclient.exceptions.BadRequest,

-                clients.novaclient.exceptions.NotFound) as e:

-            logger.warning('%(res)s - %(err)s' % {'res': str(self),

-                                                  'err': str(e)})

+        logger.info(_('%s - complete') % str(self))

-        yield

+class VolumeAttachment(resource.Resource):

+    PROPERTIES = (

+        INSTANCE_ID, VOLUME_ID, DEVICE,

+    ) = (

+        'InstanceId', 'VolumeId', 'Device',

+    )

+

+    properties_schema = {

+        INSTANCE_ID: properties.Schema(

+            properties.Schema.STRING,

+            _('The ID of the instance to which the volume attaches.'),

+            required=True,

+            update_allowed=True

+        ),

+        VOLUME_ID: properties.Schema(

+            properties.Schema.STRING,

+            _('The ID of the volume to be attached.'),

+            required=True,

+            update_allowed=True

+        ),

+        DEVICE: properties.Schema(

+            properties.Schema.STRING,

+            _('The device where the volume is exposed on the instance. This '

+              'assignment may not be honored and it is advised that the path '

+              '/dev/disk/by-id/virtio-<VolumeId> be used instead.'),

+            required=True,

+            update_allowed=True,

+            constraints=[

+                constraints.AllowedPattern('/dev/vd[b-z]'),

+            ]

+        ),

+    }

+

+    update_allowed_keys = ('Properties',)

+

+    def handle_create(self):

+        server_id = self.properties[self.INSTANCE_ID]

+        volume_id = self.properties[self.VOLUME_ID]

+        dev = self.properties[self.DEVICE]

+

+        attach_task = VolumeAttachTask(self.stack, server_id, volume_id, dev)

+        attach_runner = scheduler.TaskRunner(attach_task)

+

+        attach_runner.start()

+

+        self.resource_id_set(attach_task.attachment_id)

+

+        return attach_runner

+

+    def check_create_complete(self, attach_runner):

+        return attach_runner.step()

+

+    def handle_delete(self):

+        server_id = self.properties[self.INSTANCE_ID]

+        volume_id = self.properties[self.VOLUME_ID]

+        detach_task = VolumeDetachTask(self.stack, server_id, volume_id)

+        scheduler.TaskRunner(detach_task)()

+        delete_task = VolumeDeleteTask(self.stack,volume_id)

+        scheduler.TaskRunner(delete_task)()

+

+    def handle_update(self, json_snippet, tmpl_diff, prop_diff):

+        checkers = []

+        if prop_diff:

+            volume_id = self.properties.get(self.VOLUME_ID)

+            server_id = self.properties.get(self.INSTANCE_ID)

+            detach_task = VolumeDetachTask(self.stack, server_id, volume_id)

+            checkers.append(scheduler.TaskRunner(detach_task))

+

+            if self.VOLUME_ID in prop_diff:

+                volume_id = prop_diff.get(self.VOLUME_ID)

+            device = self.properties.get(self.DEVICE)

+            if self.DEVICE in prop_diff:

+                device = prop_diff.get(self.DEVICE)

+            if self.INSTANCE_ID in prop_diff:

+                server_id = prop_diff.get(self.INSTANCE_ID)

+            attach_task = VolumeAttachTask(self.stack, server_id,

+                                           volume_id, device)

+

+            checkers.append(scheduler.TaskRunner(attach_task))

+

+        if checkers:

+            checkers[0].start()

+        return checkers

+

+    def check_update_complete(self, checkers):

+        for checker in checkers:

+            if not checker.started():

+                checker.start()

+            if not checker.step():

+                return False

+        self.resource_id_set(checkers[-1]._task.attachment_id)

+        return True

+

+

+class VolumeDetachTask(object):

+    """A task for detaching a volume from a Nova server."""

+

+    def __init__(self, stack, server_id, volume_id):

+        """

+        Initialise with the stack (for obtaining the clients), and the IDs of

+        the server and volume.

+        """

+        self.clients = stack.clients

+        self.server_id = server_id

+        self.volume_id = volume_id

+

+    def __str__(self):

+        """Return a human-readable string description of the task."""

+        return _('Removing volume %(vol)s from Instance %(srv)s') % {

+            'vol': self.volume_id, 'srv': self.server_id}

+

+    def __repr__(self):

+        """Return a brief string description of the task."""

+        return '%s(%s -/> %s)' % (type(self).__name__,

+                                  self.volume_id,

+                                  self.server_id)

+

+    def __call__(self):

+        """Return a co-routine which runs the task."""

+        logger.debug(str(self))

         try:

-            vol.get()

+            vol = self.clients.cinder().volumes.get(self.volume_id)

+            attached_to = [att['server_id'] for att in vol.attachments]

+            if self.server_id not in attached_to:

+                msg = _('Volume %(vol)s is not attached to server %(srv)s') % {

+                    'vol': self.volume_id, 'srv': self.server_id}

+                raise exception.Error(msg)

+

+            vol.detach()

+            yield

             while vol.status in ('in-use', 'detaching'):

                 logger.debug(_('%s - volume still in use') % str(self))

                 yield

-

-                try:

-                    server_api.delete_server_volume(self.server_id,

-                                                    self.attachment_id)

-                except (clients.novaclient.exceptions.BadRequest,

-                        clients.novaclient.exceptions.NotFound):

-                    pass

                 vol.get()

             logger.info(_('%(name)s - status: %(status)s') % {

@@ -299,27 +417,32 @@

         except clients.cinderclient.exceptions.NotFound:

             logger.warning(_('%s - volume not found') % str(self))

+            return

+        except clients.cinderclient.exceptions.BadRequest:

+            msg = _('Failed to detach %(vol)s from server %(srv)s') % {

+                'vol': self.volume_id, 'srv': self.server_id}

+            raise exception.Error(msg)

         # The next check is needed for immediate reattachment when updating:

-        # as the volume info is taken from cinder, but the detach

-        # request is sent to nova, there might be some time

-        # between cinder marking volume as 'available' and

-        # nova removing attachment from it's own objects, so we

+        # there might be some time between cinder marking volume as 'available'

+        # and nova removing attachment from it's own objects, so we

         # check that nova already knows that the volume is detached

-        def server_has_attachment(server_id, attachment_id):

-            try:

-                server_api.get_server_volume(server_id, attachment_id)

-            except clients.novaclient.exceptions.NotFound:

+        server_api = self.clients.nova().volumes

+

+        def server_has_attachment(server_id, volume_id):

+            vol = self.clients.cinder().volumes.get(self.volume_id)

+            attached_to = [att['server_id'] for att in vol.attachments]

+            if self.server_id not in attached_to:

                 return False

-            return True

+            else:

+                return True

-        while server_has_attachment(self.server_id, self.attachment_id):

-            logger.info(_("Server %(srv)s still has attachment %(att)s.") %

-                        {'att': self.attachment_id, 'srv': self.server_id})

+        while server_has_attachment(self.server_id, self.volume_id):

+            logger.info(_("Server %(srv)s still has %(vol)s attached.") %

+                        {'vol': self.volume_id, 'srv': self.server_id})

             yield

-        logger.info(_("Volume %(vol)s is detached from server %(srv)s") %

-                    {'vol': vol.id, 'srv': self.server_id})

+        logger.info(_('%s - complete') % str(self))



 class VolumeAttachment(resource.Resource):

@@ -376,29 +499,25 @@

     def handle_delete(self):

         server_id = self.properties[self.INSTANCE_ID]

-        detach_task = VolumeDetachTask(self.stack, server_id, self.resource_id)

+        volume_id = self.properties[self.VOLUME_ID]

+        detach_task = VolumeDetachTask(self.stack, server_id, volume_id)

         scheduler.TaskRunner(detach_task)()

+        delete_task = VolumeDeleteTask(self.stack,volume_id)

+        scheduler.TaskRunner(delete_task)()

     def handle_update(self, json_snippet, tmpl_diff, prop_diff):

         checkers = []

         if prop_diff:

-            # Even though some combinations of changed properties

-            # could be updated in UpdateReplace manner,

-            # we still first detach the old resource so that

-            # self.resource_id is not replaced prematurely

             volume_id = self.properties.get(self.VOLUME_ID)

+            server_id = self.properties.get(self.INSTANCE_ID)

+            detach_task = VolumeDetachTask(self.stack, server_id, volume_id)

+            checkers.append(scheduler.TaskRunner(detach_task))

+

             if self.VOLUME_ID in prop_diff:

                 volume_id = prop_diff.get(self.VOLUME_ID)

-

             device = self.properties.get(self.DEVICE)

             if self.DEVICE in prop_diff:

                 device = prop_diff.get(self.DEVICE)

-

-            server_id = self.properties.get(self.INSTANCE_ID)

-            detach_task = VolumeDetachTask(self.stack, server_id,

-                                           self.resource_id)

-            checkers.append(scheduler.TaskRunner(detach_task))

-

             if self.INSTANCE_ID in prop_diff:

                 server_id = prop_diff.get(self.INSTANCE_ID)

             attach_task = VolumeAttachTask(self.stack, server_id,

@@ -419,7 +538,6 @@

         self.resource_id_set(checkers[-1]._task.attachment_id)

         return True

-

class CinderVolume(Volume):

     PROPERTIES = (

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openstack.org/pipermail/openstack/attachments/20140830/bb8e3616/attachment.html>


More information about the Openstack mailing list