BusErrorHandler.cpp
Go to the documentation of this file.
1 #include "BusErrorHandler.h"
2 
3 #include <variant>
4 
5 #include <ethercat.h>
6 
8 
11 
13 
14 #include "Bus.h"
15 #include "ErrorReporting.h"
16 #include "EtherCATState.h"
17 #include "SlaveInterface.h"
18 
20 {
21  static constexpr unsigned int workerCounterWatchdogTimeout = 20000;
22 
24  bus(bus), workerCounterWatchdog(workerCounterWatchdogTimeout)
25  {
26  slaveReinitializingThreadRunning.store(true);
27  slaveReinitializingThread = std::thread(&BusErrorHandler::slaveReinitializingLoop, this);
28  }
29 
31  {
32  slaveReinitializingThreadRunning.store(false);
33  slaveReinitializingThread.join();
34  }
35 
36  void
38  {
39  ARMARX_CHECK(expectedWorkCounter > 0);
40  rtUpdateSlaveStates();
41  }
42 
43  void
45  {
46  BUS_TIMING_START(bus_rtHandleSlaveErrors_getErrorReporter)
48  BUS_TIMING_CEND(bus_rtHandleSlaveErrors_getErrorReporter, bus->iterationCount, 0.3)
49 
50  for (const auto& slave : bus->getSlaves())
51  {
52  //try to clear error if there exist some, the rest of the slaves can run normal
53  SLAVE_TIMING_START(bus_rtHandleSlaveErrors_slaveHasError)
54  auto hasError = slave->hasError();
56  slave->getSlaveIdentifier(), bus_rtHandleSlaveErrors_slaveHasError, 0.3)
57  if (hasError)
58  {
59 
60  SLAVE_TIMING_START(bus_rtHandleSlaveErrors_slaveHandleError)
61  if (!slave->handleErrors())
62  {
64  reporter, slave->getSlaveIdentifier(), "Unhandled error in slave")
65  .deactivateSpam(1);
66  }
68  slave->getSlaveIdentifier(), bus_rtHandleSlaveErrors_slaveHandleError, 0.3)
69  }
70  }
71 
72  slaveErrorFound = reporter.hasErrors();
73  }
74 
75  bool
77  {
78  return busErrorFound || slaveErrorFound;
79  }
80 
81  bool
83  {
84  return !allSlavesReinitialized.load();
85  }
86 
87  bool
89  {
90  return slaveStates.at(slave) != SlaveState::Operational;
91  }
92 
93  void
95  {
96  expectedWorkCounter = wkc;
97 
98  for (const auto& s : bus->getSlaves())
99  {
100  slaveStates.insert({s.get(), SlaveState::Operational});
101  }
102  }
103 
104  void
105  BusErrorHandler::rtUpdateSlaveStates()
106  {
107  if (bus->lastWorkCounter < expectedWorkCounter)
108  {
109  if (workerCounterWatchdog.expired())
110  {
111  if (bus->lastWorkCounter == EC_NOFRAME && !allSlavesReinitialized.load())
112  {
113  // Panic!! Bus unsable?
114  BUS_WARNING(bus->iterationCount,
115  "The last work counter is EC_NOFRAME not all slaves have been "
116  "reinitialized yet!");
117  }
118  else if (bus->lastWorkCounter != secondLastUnexpectedWorkCounter)
119  {
120  BUS_INFO(bus->iterationCount,
121  "Last Work Counter: %i, Second Last Unexpected Work Counter: %i, "
122  "Expected Work Counter: %i",
123  bus->lastWorkCounter,
124  secondLastUnexpectedWorkCounter,
125  expectedWorkCounter);
126  secondLastUnexpectedWorkCounter = bus->lastWorkCounter;
127  // Lost slaves or found slaves again or slaves are not in op
128 
129  // Check for lost slaves and mark them
130  rtMarkLostSlavesInSOEMStruct();
131 
132  std::stringstream lost_ss;
133  std::stringstream redisc_ss;
134  bool print_lost = false;
135  bool print_redisc = false;
136  lost_ss << "The following slaves have been lost: \n";
137  redisc_ss << "The following slaves have been rediscovered: \n";
138  for (const auto& slave : bus->getSlaves())
139  {
140  if (ec_slave[slave->getSlaveIdentifier().slaveIndex].islost)
141  {
142  if (slaveStates.at(slave.get()) != SlaveState::Lost)
143  {
144  lost_ss << slave->getSlaveIdentifier().getNameAsCStr() << "("
145  << slave->getSlaveIdentifier().slaveIndex << ")\n";
146  busErrorFound = true;
147  print_lost = true;
148  }
149 
150  slaveStates.at(slave.get()) = SlaveState::Lost;
151  allSlavesReinitialized.store(false);
152  }
153  else
154  {
155  if (slaveStates.at(slave.get()) == SlaveState::Lost)
156  {
157  slaveStates.at(slave.get()) = SlaveState::Reinitializating;
158  print_redisc = true;
159  redisc_ss << slave->getSlaveIdentifier().getNameAsCStr() << "("
160  << slave->getSlaveIdentifier().slaveIndex << ")\n";
161  }
162  }
163  }
164  if (print_lost)
165  {
166  BUS_ERROR(bus->iterationCount, lost_ss.str().c_str());
167  }
168  if (print_redisc)
169  {
170  BUS_INFO(bus->iterationCount, redisc_ss.str().c_str());
171  }
172 
173  // Read bus states and add all slaves that need reinitialization to list
174  if (bus->readStates() != EtherCATState::invalid)
175  {
176  for (const auto& slave : bus->getSlaves())
177  {
178  if (EtherCATState(
179  ec_slave[slave->getSlaveIdentifier().slaveIndex].state) !=
181  ec_slave[slave->getSlaveIdentifier().slaveIndex].islost == 0 &&
182  (slaveStates.at(slave.get()) != SlaveState::Reinitializating))
183  {
184  slaveStates.at(slave.get()) = SlaveState::Reinitializating;
185 
186  BUS_ERROR(bus->iterationCount,
187  "Slave at index %u of type %s needs reinitialization!",
188  slave->getSlaveIdentifier().slaveIndex,
189  slave->getSlaveIdentifier().getNameAsCStr());
190  }
191  }
192  }
193  }
194  }
195  }
196  else
197  {
198  busErrorFound = false;
199  workerCounterWatchdog.reset();
200  secondLastUnexpectedWorkCounter = bus->lastWorkCounter;
201  }
202 
203  return;
204  }
205 
206  void
207  BusErrorHandler::rtMarkLostSlavesInSOEMStruct() const
208  {
209  std::uint16_t w = 0;
210  const int slaveCount = ec_BRD(0x0000, ECT_REG_TYPE, sizeof(w), &w, EC_TIMEOUTSAFE);
211  if (slaveCount == EC_NOFRAME)
212  {
213  return;
214  }
215 
216  if (slaveCount < ec_slavecount)
217  {
218  BUS_WARNING(bus->iterationCount,
219  "Number of Slaves (%i) in SOEM struct is less than configured Slaves (%i).",
220  slaveCount,
221  ec_slavecount);
222  // Create RegisterDataList with request to read register CONFIGURED_STATION_ADDRESS
223  std::vector<RegisterDataList> registers;
224  for (auto i = 1; i <= ec_slavecount; i++)
225  {
226  registers.push_back(
227  RegisterDataList{static_cast<std::uint16_t>(i),
230  }
231 
232  // Read Registers
233  bus->readRegisters(registers);
234 
235  // Iterate over read registers and check which slaves did not answer
236  std::stringstream ss;
237  ss << "Slaves with the following indexes are not reachable: \n";
238  for (const auto& dataList : registers)
239  {
240  auto data =
241  dataList.registerData.at(datatypes::RegisterEnum::CONFIGURED_STATION_ADDRESS);
242  if (std::holds_alternative<datatypes::EtherCATDataType::UNSIGNED16>(data) &&
243  ec_slave[dataList.slaveIndex].configadr ==
244  std::get<datatypes::EtherCATDataType::UNSIGNED16>(data))
245  {
246  ec_slave[dataList.slaveIndex].islost = false;
247  }
248  else
249  {
250  ss << dataList.slaveIndex << ", ";
251  ec_slave[dataList.slaveIndex].islost = true;
252  }
253  }
254  ss.seekp(ss.str().length() - 2);
255  ss << "\n The total number of registers read is " << registers.size() << "\n";
256  BUS_ERROR(bus->iterationCount, "%s", ss.str().c_str());
257  }
258  else
259  {
260  // All slaves are found
261  for (int i = 1; i <= ec_slavecount; i++)
262  {
263  ec_slave[i].islost = false;
264  }
265  }
266  }
267 
268  void
269  BusErrorHandler::reinitializeSlaves()
270  {
271  // Update ec_slave with the current slave states
272  bus->readStates();
273 
274  for (const auto& [slave, state] : slaveStates)
275  {
276  if (state != SlaveState::Reinitializating)
277  {
278  continue;
279  }
280 
281  std::uint16_t index =
282  static_cast<std::uint16_t>(slave->getSlaveIdentifier().slaveIndex);
283 
284  bool slaveReadyAgain = false;
285  // TIMING_START(handleFound_switchEtherCATState)
286  switch (EtherCATState(ec_slave[index].state))
287  {
289  case EtherCATState::init:
290  // This state gets reached when the corresponding slave did a full power cycle
291  // and needs to be initialized from scratch ...
292  // The first step is to recover the slave to validate that the slave is the same
293  // as we lost at that position in the bus.
294  if (ec_recover_slave(index, 10000))
295  {
296  // Since the slave is in fact the same as before,
297  // we can change its state to preOp
299  }
300  else
301  {
302  BUS_ERROR(bus->iterationCount,
303  "Could not recover slave: %s",
304  slave->getSlaveIdentifier().getNameAsCStr());
305  }
306 
307  // TIMING_END_COMMENT(handleFound_switchEtherCATState,
308  // "ReinitializeSlaves - EtherCATState: init (" +
309  // slave->getSlaveIdentifier().getName() + ")")
310  break;
311 
313  // SOEM already provides a function for reconfiguring lost slaves.
314  // That function executes a hook for switching from preOp to safeOp which we have to
315  // set to a lambda which executes our default functions for that transition:
316  ec_slave[index].PO2SOconfig = [](std::uint16_t index) -> int
317  {
318  Bus& bus = Bus::getBus();
319 
320  auto localSlave = bus.getSlaveAtIndex(index);
321 
322  localSlave->doMappings();
323  localSlave->prepareForSafeOp();
324  localSlave->finishPreparingForSafeOp();
325 
326  return 0;
327  };
328  // After executing this function, the corresponding slave will be in safeOp
329  ec_reconfig_slave(index, 10000);
330 
331  // Better reset that hook, just to be sure
332  ec_slave[index].PO2SOconfig = nullptr;
333  // TIMING_END_COMMENT(handleFound_switchEtherCATState,
334  // "ReinitializeSlaves - EtherCATState: preOp (" +
335  // slave->getSlaveIdentifier().getName() + ")")
336  break;
337 
339  // Execute hooks for safeOp to op, similar to the normal bus initialization.
340  slave->prepareForOp();
341  slave->finishPreparingForOp();
342 
343  bus->changeStateOfSlave(index, EtherCATState::op, false);
344 
345  // TIMING_END_COMMENT(handleFound_switchEtherCATState,
346  // "ReinitializeSlaves - EtherCATState: safeOp (" +
347  // slave->getSlaveIdentifier().getName() + ")")
348  break;
349 
351  // This state is reached if the slaves have not been updated for a long time
352  // (>50ms) and the sync manager watchdog gets triggered.
353  // The only way to recover from that is to reinit the slave by switching its
354  // state to init
355  ec_recover_slave(index, 10000);
356 
357  bus->changeStateOfSlave(index, EtherCATState::init, false);
358 
359  // TIMING_END_COMMENT(handleFound_switchEtherCATState,
360  // "ReinitializeSlaves - EtherCATState: safeOpError (" +
361  // slave->getSlaveIdentifier().getName() + ")")
362  break;
363 
364  case EtherCATState::op:
365  if (slave->prepareForRun())
366  {
367  slaveReadyAgain = true;
368  }
369  // TIMING_END_COMMENT(handleFound_switchEtherCATState,
370  // "ReinitializeSlaves - EtherCATState: op (" +
371  // slave->getSlaveIdentifier().getName() + ")")
372  break;
373 
374  default:
375  break;
376  }
377 
378  // Slave was successfully recovered, we can remove it from the list
379  if (slaveReadyAgain)
380  {
381  slaveStates.at(slave) = SlaveState::Operational;
382 
384  << "Slave at index " << slave->getSlaveIdentifier().slaveIndex << " of type "
385  << slave->getSlaveIdentifier().getName()
386  << " has been successfully reinitialized. Current bus iteration number is "
387  << bus->iterationCount;
388 
389  allSlavesReinitialized.store(areAllSlavesReinitialized());
390  }
391  }
392  }
393 
394  void
395  BusErrorHandler::slaveReinitializingLoop()
396  {
397  while (slaveReinitializingThreadRunning.load())
398  {
399  while (!allSlavesReinitialized.load())
400  {
401  reinitializeSlaves();
402  }
403 
404  using namespace std::literals;
405  std::this_thread::sleep_for(100us);
406  }
407  }
408 
409  bool
410  BusErrorHandler::areAllSlavesReinitialized() const
411  {
412  bool r = false;
413  for (const auto& [slave, state] : slaveStates)
414  {
415  r |= (state == SlaveState::Reinitializating);
416  }
417  return !r;
418  }
419 
420  BusErrorHandler::Watchdog::Watchdog(std::uint32_t minDurationUS) :
421  minDuration(minDurationUS), lastTime(armarx::rtNow())
422  {
423  }
424 
425  bool
426  BusErrorHandler::Watchdog::expired() const
427  {
428  return (armarx::rtNow() - lastTime).toMicroSeconds() > minDuration;
429  }
430 
431  void
432  BusErrorHandler::Watchdog::reset()
433  {
434  lastTime = armarx::rtNow();
435  }
436 
437 } // namespace armarx::control::ethercat
armarx::control::ethercat::BusErrorHandler::rtHandleSlaveErrors
void rtHandleSlaveErrors()
Definition: BusErrorHandler.cpp:44
BusErrorHandler.h
armarx::control::ethercat::SlaveInterface
Brief description of class SlaveInterface.
Definition: SlaveInterface.h:29
armarx::control::ethercat::BusErrorHandler::rtHandleBusErrors
void rtHandleBusErrors()
Definition: BusErrorHandler.cpp:37
ARMARX_IMPORTANT
#define ARMARX_IMPORTANT
Definition: Logging.h:183
BUS_WARNING
#define BUS_WARNING(bin,...)
Definition: ErrorReporting.h:274
index
uint8_t index
Definition: EtherCATFrame.h:59
BUS_ERROR
#define BUS_ERROR(bin,...)
Definition: ErrorReporting.h:277
RtTiming.h
armarx::control::ethercat::reporting::Type::Bus
@ Bus
Bussdfnödf.
EtherCATState.h
armarx::control::ethercat::BusErrorHandler::init
void init(int wkc)
Definition: BusErrorHandler.cpp:94
armarx::control::ethercat::BusIO::changeStateOfSlave
EtherCATState changeStateOfSlave(std::uint16_t slaveIndex, EtherCATState state, bool validate=true)
Definition: BusIO.cpp:403
armarx::control::ethercat::BusErrorHandler::~BusErrorHandler
~BusErrorHandler() override
Definition: BusErrorHandler.cpp:30
BUS_INFO
#define BUS_INFO(bin,...)
Definition: ErrorReporting.h:271
armarx::control::ethercat::EtherCATState::invalid
@ invalid
State is not valid, e.g. if a request for reading the actual bus state failed.
Definition: EtherCATState.h:34
armarx::control::ethercat::BusIO::lastWorkCounter
int lastWorkCounter
Definition: BusIO.h:207
armarx::control::ethercat::BusErrorHandler::hasError
bool hasError() const
Definition: BusErrorHandler.cpp:76
armarx::control::ethercat::datatypes::RegisterEnum::CONFIGURED_STATION_ADDRESS
@ CONFIGURED_STATION_ADDRESS
SLAVE_TIMING_START
#define SLAVE_TIMING_START(name)
Definition: Timing.h:186
armarx::control::ethercat::BusIO::readStates
EtherCATState readStates()
Definition: BusIO.cpp:419
ARMARX_CHECK
#define ARMARX_CHECK(expression)
Shortcut for ARMARX_CHECK_EXPRESSION.
Definition: ExpressionException.h:82
Bus.h
armarx::control::ethercat::EtherCATState::safeOpError
@ safeOpError
Safe-operational state after an error has happend.
Definition: EtherCATState.h:51
armarx::control::ethercat::Bus
Brief description of class Bus.
Definition: Bus.h:55
armarx::control::ethercat::datatypes::RegisterEnumTypeContainer
std::variant< EtherCATDataType::INTEGER8, EtherCATDataType::INTEGER16, EtherCATDataType::INTEGER32, EtherCATDataType::INTEGER64, EtherCATDataType::UNSIGNED8, EtherCATDataType::UNSIGNED16, EtherCATDataType::UNSIGNED32, EtherCATDataType::UNSIGNED64 > RegisterEnumTypeContainer
Definition: SlaveRegisters.h:160
armarx::control::ethercat::reporting::Reporting::getErrorReporting
static Reporting & getErrorReporting()
Definition: ErrorReporting.cpp:82
data
uint8_t data[1]
Definition: EtherCATFrame.h:68
BUS_TIMING_CEND
#define BUS_TIMING_CEND(name, bin, thresholdMs)
Definition: Timing.h:181
armarx::control::ethercat
Definition: Bus.cpp:24
armarx::control::ethercat::EtherCATState::preOp
@ preOp
Pre-operational state.
Definition: EtherCATState.h:40
armarx::control::ethercat::reporting::Reporting::getErrorReporter
Reporter getErrorReporter()
Definition: ErrorReporting.cpp:89
armarx::control::ethercat::Bus::getBus
static Bus & getBus()
This returns the one and only Bus object.
Definition: Bus.cpp:29
armarx::control::ethercat::Bus::getSlaves
std::vector< std::experimental::observer_ptr< SlaveInterface > > getSlaves() const
Returns all identifiied slaves on the bus.
Definition: Bus.cpp:1375
SlaveInterface.h
Timing.h
ErrorReporting.h
armarx::control::ethercat::EtherCATState::safeOp
@ safeOp
Safe-operational state.
Definition: EtherCATState.h:46
armarx::control::ethercat::BusErrorHandler::isReinitializationActive
bool isReinitializationActive() const
Definition: BusErrorHandler.cpp:82
TimeUtil.h
armarx::control::ethercat::EtherCATState::init
@ init
Initial state after switch a EtherCAT slave on.
Definition: EtherCATState.h:37
SLAVE_ERROR_LOCAL
#define SLAVE_ERROR_LOCAL(reporter, sid,...)
Definition: ErrorReporting.h:315
armarx::control::ethercat::BusErrorHandler::isSlaveLostOrDuringReinitialization
bool isSlaveLostOrDuringReinitialization(SlaveInterface *slave) const
Definition: BusErrorHandler.cpp:88
armarx::control::ethercat::BusErrorHandler::BusErrorHandler
BusErrorHandler(Bus *bus)
Definition: BusErrorHandler.cpp:23
ControlThreadOutputBuffer.h
armarx::control::ethercat::EtherCATState::op
@ op
Operational state.
Definition: EtherCATState.h:49
BUS_TIMING_START
#define BUS_TIMING_START(name)
Definition: Timing.h:163
SLAVE_TIMING_CEND
#define SLAVE_TIMING_CEND(sid, name, thresholdMs)
Definition: Timing.h:203
armarx::ctrlutil::s
double s(double t, double s0, double v0, double a0, double j)
Definition: CtrlUtil.h:33
armarx
This file offers overloads of toIce() and fromIce() functions for STL container types.
Definition: ArmarXTimeserver.cpp:28
armarx::control::ethercat::BusIO::readRegisters
bool readRegisters(std::vector< RegisterDataList > &registerData)
Definition: BusIO.cpp:433
armarx::rtNow
IceUtil::Time rtNow()
Definition: RtTiming.h:40