RobotHealth.cpp
Go to the documentation of this file.
1/*
2 * This file is part of ArmarX.
3 *
4 * ArmarX is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * ArmarX is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 *
16 * @package RobotAPI::ArmarXObjects::RobotHealth
17 * @author Simon Ottenhaus ( simon dot ottenhaus at kit dot edu )
18 * @date 2018
19 * @copyright http://www.gnu.org/licenses/gpl-2.0.txt
20 * GNU General Public License
21 */
22
23#include "RobotHealth.h"
24
25#include <algorithm>
26#include <mutex>
27#include <optional>
28
29#include <boost/regex.hpp>
30
31#include <SimoxUtility/algorithm/get_map_keys_values.h>
32#include <SimoxUtility/algorithm/string/string_tools.h>
33
41
42namespace armarx
43{
44 inline const std::string PROPERTY_REQUESTER_ID = "required_default";
45
46 void
48 {
50 monitorUpdateHealthTask =
51 new PeriodicTask<RobotHealth>(this, &RobotHealth::monitorHealthUpdateTaskClb, 10);
52
53 defaultMaximumCycleTimeWarn =
54 armarx::core::time::Duration::MilliSeconds(p.maximumCycleTimeWarnMS);
55 defaultMaximumCycleTimeErr =
56 armarx::core::time::Duration::MilliSeconds(p.maximumCycleTimeErrMS);
57
58 const std::vector<std::string> requiredTags = simox::alg::split(p.requiredTags, ",");
59 // a special requester: cannot be changed during runtime.
60 tagsPerRequester[PROPERTY_REQUESTER_ID] =
61 std::set<std::string>(requiredTags.begin(), requiredTags.end());
62
64 }
65
66 void
68 {
70
71 monitorUpdateHealthTask->start();
72 }
73
74 void
75 RobotHealth::monitorHealthUpdateTaskClb()
76 {
77 const std::scoped_lock lock(updateMutex);
78
81
82 for (auto& e : updateEntries)
83 {
85 const std::scoped_lock elock(e.mutex);
86
87 if (!e.enabled)
88 {
89 continue;
90 }
91
92 if (e.history.empty())
93 {
94 continue;
95 }
96
97 auto deltaToArrival = now - e.history.back().arrivalTime;
98
100
101 if (deltaToArrival > e.maximumCycleTimeErr)
102 {
104
105 if (e.state != HealthError) // not already in error state
106 {
108 ARMARX_ERROR << deactivateSpam(0.1, e.name) << "Component " << e.name
109 << " has died.";
110 e.state = HealthError;
111 }
112 }
113 else if (deltaToArrival > e.maximumCycleTimeWarn)
114 {
116 ARMARX_WARNING << deactivateSpam(0.1, e.name) << "Component " << e.name
117 << " is running too slow.";
118
119 e.state = HealthWarning;
120 }
121 else
122 {
123 // If everything is OK (again), update health
124 e.state = HealthOK;
125 }
126 }
127
128 // update aggregated health
129 RobotHealthState overallHealthState = RobotHealthState::HealthOK;
130
131
132 // get aggregated status
133 for (auto& e : updateEntries)
134 {
136 std::scoped_lock elock(e.mutex);
137
138 // trim history
139 while (e.history.size() > 20)
140 {
141 e.history.pop_front();
142 }
143
144 if (e.required)
145 {
146 if (e.state == HealthWarning && overallHealthState != HealthError)
147 {
148 overallHealthState = RobotHealthState::HealthWarning;
149 }
150 else if (e.state == HealthError)
151 {
152 overallHealthState = RobotHealthState::HealthError;
153 }
154 }
155 else
156 {
157 if (e.state != HealthOK && overallHealthState != HealthError)
158 {
159 overallHealthState = RobotHealthState::HealthWarning;
160 }
161 }
162 }
163
164 if (overallHealthState == HealthError)
165 {
167 ARMARX_INFO << deactivateSpam(3) << "Requesting emergency stop";
168 ARMARX_CHECK_NOT_NULL(p.emergencyStopTopicPrx);
169 p.emergencyStopTopicPrx->reportEmergencyStopState(
170 EmergencyStopState::eEmergencyStopActive);
171 }
173 ARMARX_CHECK_NOT_NULL(p.aggregatedRobotHealthTopicPrx);
174 p.aggregatedRobotHealthTopicPrx->aggregatedHeartbeat(overallHealthState);
175
176 reportDebugObserver();
177 }
178
179 void
181 {
182 //robotUnitPrx = nullptr;
183 monitorUpdateHealthTask->stop();
184 }
185
186 void
190
193 {
196
197
198 defs->topic(p.emergencyStopTopicPrx,
199 "EmergencyStop",
200 "The name of the topic over which changes of the emergencyStopState are sent.");
201 defs->topic<RobotHealthInterfacePrx>(
202 p.robotHealthTopicName, "RobotHealthTopic", "Name of the RobotHealth topic");
203
204 defs->topic(p.aggregatedRobotHealthTopicPrx,
205 "AggregatedRobotHealthTopic",
206 "Name of the AggregatedRobotHealthTopic");
207
208 defs->optional(p.maximumCycleTimeWarnMS,
209 "MaximumCycleTimeWarnMS",
210 "Default value of the maximum cycle time for warnings");
211 defs->optional(p.maximumCycleTimeErrMS,
212 "MaximumCycleTimeErrMS",
213 "Default value of the maximum cycle time for error");
214
215 defs->optional(p.requiredTags, "RequiredTags", "Tags that should be requested.");
216
217 return defs;
218 }
219
220 RobotHealth::UpdateEntry*
221 RobotHealth::findUpdateEntry(const std::string& name)
222 {
224
225 for (auto& updateEntrie : updateEntries)
226 {
227 if (updateEntrie.name == name)
228 {
229 return &updateEntrie;
230 }
231 }
232
233 return nullptr;
234 }
235
236 std::pair<bool, RobotHealth::UpdateEntry&>
237 RobotHealth::findOrCreateUpdateEntry(const std::string& name)
238 {
240 {
241 for (auto& updateEntrie : updateEntries)
242 {
243 if (updateEntrie.name == name)
244 {
245 return {false, updateEntrie};
246 }
247 }
248 }
249
250 ARMARX_INFO << "registering for heartbeat: '" << name << "'";
251
252 updateEntries.emplace_back(name, defaultMaximumCycleTimeWarn, defaultMaximumCycleTimeErr);
253
254 return {true, updateEntries.back()};
255 }
256
257 void
258 RobotHealth::signUp(const RobotHealthHeartbeatArgs& args, const Ice::Current& current)
259 {
261 std::scoped_lock lockU(updateMutex);
262
263 auto e = findOrCreateUpdateEntry(args.identifier);
264
265 {
266 std::scoped_lock lock(e.second.mutex);
267
268 if (e.first)
269 {
270 ARMARX_INFO << "Newly registering component " << args.identifier << " for updates.";
271 }
272 // else no messsage as components may spam sign up for legacy reasons (when there was no sign up)
273
274 armarx::core::time::fromIce(args.maximumCycleTimeWarning,
275 e.second.maximumCycleTimeWarn);
276 armarx::core::time::fromIce(args.maximumCycleTimeError, e.second.maximumCycleTimeErr);
277
278 e.second.tags = args.tags;
279 // e.second.required = args.requiredByDefault; // FIXME
280 e.second.description = args.description;
281 e.second.enabled = true;
282 e.second.state = HealthOK;
283 e.second.history.clear();
284 }
285
286 updateRequiredElements();
287 }
288
289 void
290 RobotHealth::heartbeat(const std::string& identifier,
291 const core::time::dto::DateTime& referenceTime,
292 const Ice::Current& current)
293 {
295 ARMARX_VERBOSE << "Finding update entry";
296
297 const DateTime arrivalTimestamp = Clock::Now();
298
299
300 // We hold a reference to 'o' which is an element in a vector.
301 // If we don't lock until the end of this scope, the vector size might change and 'o' will be invalidated.
302 std::scoped_lock lockU(updateMutex);
303 auto* const entry = findUpdateEntry(identifier);
304
305 if (entry == nullptr)
306 {
307 ARMARX_WARNING << deactivateSpam() << "Attention. Component `" << identifier
308 << "` was not signed up for heartbeat. Ignoring heartbeat for now...";
309 return;
310 }
311
313
314 std::scoped_lock lock(entry->mutex);
315
316 if (not entry->enabled)
317 {
318 ARMARX_WARNING << deactivateSpam() << "Attention. Component `" << identifier
319 << "` was not enabled. The heartbeat is ignored... ";
320 return;
321 }
322
323 ARMARX_VERBOSE << "Extending history";
324 // auto now = armarx::core::time::DateTime::Now();
325
326
328 fromIce(referenceTime, refTime);
329 entry->history.push_back(
330 UpdateEntry::TimeInfo{.referenceTime = refTime, .arrivalTime = arrivalTimestamp});
331 }
332
333 void
334 armarx::RobotHealth::unregister(const std::string& identifier, const Ice::Current&)
335 {
337 std::scoped_lock lock(updateMutex);
338
339 bool found = false;
340 {
341 for (auto& e : updateEntries)
342 {
343 if (e.name == identifier)
344 {
345 std::scoped_lock elock(e.mutex);
346 e.required = false;
347 e.enabled = false;
348 found = true;
349 break;
350 }
351 }
352 // Todo upgrade lock?
353 }
354
355 if (found)
356 {
357 ARMARX_INFO << "Component " << identifier << " disabled from heartbeat";
358 }
359 else
360 {
361 ARMARX_INFO << "Could not unregister component " << identifier << ". Not found.";
362 }
363
364 updateRequiredElements();
365 }
366
367 void
368 armarx::RobotHealth::addRequiredTags(const std::string& requesterIdentifier,
369 const std::vector<std::string>& tags,
370 const Ice::Current& current)
371 {
372 std::scoped_lock lock(updateMutex);
373
374 // add newly requested tags
375 for (const auto& tag : tags)
376 {
377 tagsPerRequester[requesterIdentifier].insert(tag);
378 }
379
380 updateRequiredElements();
381 }
382
383 std::set<std::string>
384 armarx::RobotHealth::requestedTags() const
385 {
386 // obtain a list of all tags that are relevant / requested at the moment
387 std::set<std::string> allRequestedTags;
388 for (const auto& tgs : simox::alg::get_values(tagsPerRequester))
389 {
390 for (const auto& tag : tgs)
391 {
392 allRequestedTags.insert(tag);
393 }
394 }
395 return allRequestedTags;
396 }
397
398 // checks whether any element in 'search' is in 'values'
399 bool
400 containsAnyOf(const std::set<std::string>& values, const std::vector<std::string>& searchKeys)
401 {
402 return std::any_of(searchKeys.begin(),
403 searchKeys.end(),
404 [&values](const auto& key) { return values.count(key) > 0; });
405 }
406
407 void
408 armarx::RobotHealth::updateRequiredElements()
409 {
410 ARMARX_IMPORTANT << "updateRequiredElements";
411 const std::set<std::string> allRequestedTags = requestedTags();
412
413 for (auto& e : updateEntries)
414 {
415 ARMARX_INFO << e.name;
416
417 e.required = false; // if required, will be set in the following
418
419 std::unique_lock lock(e.mutex);
420
421 if (containsAnyOf(allRequestedTags, e.tags))
422 {
423 ARMARX_INFO << e.name << " is required.";
424 e.required = true;
425 if (not e.enabled)
426 {
427 ARMARX_WARNING << "You are requiring the disabled component " << e.name
428 << ". Enabling it...";
429 e.enabled = true;
430 }
431 }
432 }
433 }
434
435 void
436 armarx::RobotHealth::removeRequiredTags(const std::string& requesterIdentifier,
437 const std::vector<std::string>& tags,
438 const Ice::Current& current)
439 {
440 std::scoped_lock lock(updateMutex);
441
442 // update the required tags list / map
443 for (const auto& tag : tags)
444 {
445 tagsPerRequester[requesterIdentifier].erase(tag);
446 }
447
448 updateRequiredElements();
449 }
450
451 void
452 armarx::RobotHealth::resetRequiredTags(const Ice::Current& current)
453 {
454 std::scoped_lock lock(updateMutex);
455
456 // treat the special provider (see onInitComponent()) appropriately
457 ARMARX_CHECK(tagsPerRequester.count(PROPERTY_REQUESTER_ID) > 0);
458 const auto propertyProviderTags = tagsPerRequester.at(PROPERTY_REQUESTER_ID);
459
460 // clear everything except the special provider
461 tagsPerRequester.clear();
462 tagsPerRequester.emplace(PROPERTY_REQUESTER_ID, propertyProviderTags);
463 }
464
465 RobotHealthInfo
466 RobotHealth::getSummary(const Ice::Current&)
467 {
468 std::scoped_lock lock(updateMutex);
469
471 RobotHealthInfo ret;
472
474
477
478 for (const auto& e : updateEntries)
479 {
480 RobotHealthInfoEntry healthEntry;
481
482 for (size_t i = 1; i < e.history.size(); i++)
483 {
484 auto later = e.history[i];
485 auto pre = e.history[i - 1];
486
487 auto delta = later.arrivalTime - pre.arrivalTime;
488
489 if (minDelta > delta)
490 {
491 minDelta = delta;
492 }
493
494 if (maxDelta < delta)
495 {
496 maxDelta = delta;
497 }
498 }
499
500 const Duration timeSinceLastUpdateArrival =
501 e.history.empty() ? Duration() : Clock::Now() - e.history.back().arrivalTime;
502 const Duration timeSinceLastUpdateReference =
503 e.history.empty() ? Duration() : Clock::Now() - e.history.back().referenceTime;
504
505 const DateTime lastReferenceTime = e.history.empty()
507 : e.history.back().referenceTime;
508
509 const Duration timeSyncDelayAndIce =
510 e.history.empty() ? armarx::core::time::Duration()
511 : e.history.back().arrivalTime - e.history.back().referenceTime;
512
513 healthEntry.identifier = e.name;
514 healthEntry.state = e.state;
515 healthEntry.enabled = e.enabled;
516 healthEntry.required = e.required;
517 toIce(healthEntry.minDelta, minDelta);
518 toIce(healthEntry.maxDelta, maxDelta);
519 toIce(healthEntry.lastReferenceTimestamp, lastReferenceTime);
520 toIce(healthEntry.timeSinceLastArrival, timeSinceLastUpdateArrival);
521 toIce(healthEntry.timeSyncDelayAndIce, timeSyncDelayAndIce);
522 toIce(healthEntry.timeSinceLastUpdateReference, timeSinceLastUpdateReference);
523 toIce(healthEntry.maximumCycleTimeWarning, e.maximumCycleTimeWarn);
524 toIce(healthEntry.maximumCycleTimeError, e.maximumCycleTimeErr);
525 healthEntry.tags = e.tags;
526
527 ret.entries[e.name] = healthEntry;
528 }
529
530 const auto tags = requestedTags();
531 ret.activeTags = std::vector<std::string>(tags.begin(), tags.end());
532
533 return ret;
534 }
535
536 std::string
537 RobotHealth::getTopicName(const Ice::Current& current)
538 {
539 return p.robotHealthTopicName;
540 }
541
542 void
543 RobotHealth::reportDebugObserver()
544 {
545 for (const auto& entry : updateEntries)
546 {
547 if (entry.history.size() > 1)
548 {
549 auto later = entry.history[entry.history.size() - 1];
550 auto pre = entry.history[entry.history.size() - 2];
551 const Duration delta = later.arrivalTime - pre.arrivalTime;
552
553 const Duration timeSinceLastArrival =
554 Clock::Now() - entry.history.back().arrivalTime;
555 const Duration timeToLastReference =
556 Clock::Now() - entry.history.back().referenceTime;
557 const Duration timeSyncDelay =
558 entry.history.back().arrivalTime - entry.history.back().referenceTime;
559
560 setDebugObserverDatafield("RobotHealth_" + entry.name + "_lastDelta",
561 delta.toMilliSecondsDouble());
562 setDebugObserverDatafield("RobotHealth_" + entry.name + "_timeSinceLastArrival",
563 timeSinceLastArrival.toMilliSecondsDouble());
564 setDebugObserverDatafield("RobotHealth_" + entry.name + "_timeToLastReference",
565 timeToLastReference.toMilliSecondsDouble());
566 setDebugObserverDatafield("RobotHealth_" + entry.name + "_timeSyncDelayAndIce",
567 timeSyncDelay.toMilliSecondsDouble());
568 setDebugObserverDatafield("RobotHealth_" + entry.name + "_maximumCycleTimeWarn",
569 entry.maximumCycleTimeWarn.toMilliSecondsDouble());
570 setDebugObserverDatafield("RobotHealth_" + entry.name + "_maximumCycleTimeErr",
571 entry.maximumCycleTimeErr.toMilliSecondsDouble());
572 }
573 }
574
576 }
577} // namespace armarx
static DateTime Now()
Current time on the virtual clock.
Definition Clock.cpp:93
Default component property definition container.
Definition Component.h:70
std::string getConfigIdentifier()
Retrieve config identifier for this component as set in constructor.
Definition Component.cpp:90
SpamFilterDataPtr deactivateSpam(float deactivationDurationSec=10.0f, const std::string &identifier="", bool deactivate=true) const
disables the logging for the current line for the given amount of seconds.
Definition Logging.cpp:99
The periodic task executes one thread method repeatedly using the time period specified in the constr...
void onInitComponent() override
void unregister(const std::string &identifier, const Ice::Current &) override
void removeRequiredTags(const std::string &requesterIdentifier, const std::vector< std::string > &tags, const Ice::Current &current) override
void onDisconnectComponent() override
RobotHealthInfo getSummary(const Ice::Current &current) override
armarx::PropertyDefinitionsPtr createPropertyDefinitions() override
void resetRequiredTags(const Ice::Current &current) override
void signUp(const RobotHealthHeartbeatArgs &args, const Ice::Current &current) override
void onConnectComponent() override
void heartbeat(const std::string &identifier, const core::time::dto::DateTime &referenceTime, const Ice::Current &current) override
void addRequiredTags(const std::string &requesterIdentifier, const std::vector< std::string > &tags, const Ice::Current &current) override
std::string getTopicName(const Ice::Current &current) override
void onExitComponent() override
Represents a point in time.
Definition DateTime.h:25
static DateTime Now()
Definition DateTime.cpp:51
static DateTime Invalid()
Definition DateTime.cpp:57
Represents a duration.
Definition Duration.h:17
double toMilliSecondsDouble() const
Returns the amount of milliseconds.
Definition Duration.cpp:66
static Duration MilliSeconds(std::int64_t milliSeconds)
Constructs a duration in milliseconds.
Definition Duration.cpp:48
#define ARMARX_CHECK(expression)
Shortcut for ARMARX_CHECK_EXPRESSION.
#define ARMARX_CHECK_NOT_NULL(ptr)
This macro evaluates whether ptr is not null and if it turns out to be false it will throw an Express...
#define ARMARX_INFO
The normal logging level.
Definition Logging.h:181
#define ARMARX_IMPORTANT
The logging level for always important information, but expected behaviour (in contrast to ARMARX_WAR...
Definition Logging.h:190
#define ARMARX_ERROR
The logging level for unexpected behaviour, that must be fixed.
Definition Logging.h:196
#define ARMARX_WARNING
The logging level for unexpected behaviour, but not a serious problem.
Definition Logging.h:193
#define ARMARX_VERBOSE
The logging level for verbose information.
Definition Logging.h:187
void fromIce(const dto::ClockType::ClockTypeEnum &dto, ClockType &bo)
This file offers overloads of toIce() and fromIce() functions for STL container types.
bool containsAnyOf(const std::set< std::string > &values, const std::vector< std::string > &searchKeys)
const std::string PROPERTY_REQUESTER_ID
IceUtil::Handle< class PropertyDefinitionContainer > PropertyDefinitionsPtr
PropertyDefinitions smart pointer type.
void fromIce(const std::map< IceKeyT, IceValueT > &iceMap, boost::container::flat_map< CppKeyT, CppValueT > &cppMap)
void toIce(std::map< IceKeyT, IceValueT > &iceMap, const boost::container::flat_map< CppKeyT, CppValueT > &cppMap)
#define ARMARX_TRACE
Definition trace.h:77