From 17972d5920663fc91cae5cc34b0b4d391eb0a2a8 Mon Sep 17 00:00:00 2001
From: Christian Dreher <c.dreher@kit.edu>
Date: Mon, 20 Nov 2023 18:28:14 +0100
Subject: [PATCH] fix: Improve robustness of aborting skills by error handling
 and retries in case of failure.

Note that this might still lead to full deadlocks in ArmarX GUI, rendering the whole GUI unresponsive for good. This issue must be fixed in the skill framework.
---
 .../SkillManagerMonitorWidgetController.cpp   | 67 +++++++++++++++++--
 1 file changed, 60 insertions(+), 7 deletions(-)

diff --git a/source/RobotAPI/gui-plugins/SkillManagerPlugin/SkillManagerMonitorWidgetController.cpp b/source/RobotAPI/gui-plugins/SkillManagerPlugin/SkillManagerMonitorWidgetController.cpp
index 308c95e8b..bbeaa5a4d 100644
--- a/source/RobotAPI/gui-plugins/SkillManagerPlugin/SkillManagerMonitorWidgetController.cpp
+++ b/source/RobotAPI/gui-plugins/SkillManagerPlugin/SkillManagerMonitorWidgetController.cpp
@@ -570,14 +570,67 @@ namespace armarx
     void
     SkillManagerMonitorWidgetController::stopAllExecutions()
     {
-        auto tree = widget.treeWidgetSkillExecutions;
-        ARMARX_INFO << "The user requested to stop all skill executions from GUI.";
-        for (ssize_t i = 0; i < tree->topLevelItemCount(); ++i)
+        QTreeWidget const* tree = widget.treeWidgetSkillExecutions;
+
+        int const max_retries = 3;
+        int left_retries = max_retries;
+        bool retry = false;
+
+        do
         {
-            auto item = static_cast<SkillExecutionInfoTreeWidgetItem*>(
-                widget.treeWidgetSkillExecutions->topLevelItem(i));
-            memory->abortSkillAsync(item->executionId.toManagerIce());
-        }
+            retry = false;
+
+            for (ssize_t i = 0; i < tree->topLevelItemCount(); ++i)
+            {
+                SkillExecutionInfoTreeWidgetItem const* item =
+                    dynamic_cast<SkillExecutionInfoTreeWidgetItem*>(
+                        widget.treeWidgetSkillExecutions->topLevelItem(i));
+
+                if (not item)
+                {
+                    // At this point, dynamic-casting failed and we don't know anything about the
+                    // type.
+                    ARMARX_ERROR << "Cannot stop unknown skill.";
+                    retry = true;
+                    continue;
+                }
+
+                try
+                {
+                    ARMARX_INFO << "Aborting skill '" << item->executionId.skillId.skillName
+                                << "'...";
+                    memory->abortSkillAsync(item->executionId.toManagerIce());
+                }
+                catch (Ice::Exception const& e)
+                {
+                    retry = true;
+                    ARMARX_ERROR << "Unhandeled Ice exception while aborting skill '"
+                                 << item->executionId.skillId.skillName << "'.";
+                }
+                catch (...)
+                {
+                    retry = true;
+                    ARMARX_ERROR << "Unhandled error while aborting skill '"
+                                 << item->executionId.skillId.skillName << "'.";
+                }
+            }
+
+            if (retry)
+            {
+                left_retries -= 1;
+
+                if (left_retries > 0)
+                {
+                    ARMARX_WARNING << "There where errors aborting skills.  Retrying...";
+                }
+                else
+                {
+                    ARMARX_ERROR << "Couldn't abort all skills after " << max_retries
+                                 << " tries.  Giving up.";
+                    retry = false;
+                }
+            }
+        } while (retry);
     }
 
     void
-- 
GitLab